v2 / vlib / builtin / utf8_test.v
168 lines · 142 sloc · 4.86 KB · 3642cd580846f35344ea0c2248e74adc47ad4698
Raw
1fn test_utf8_char_len() {
2 assert utf8_char_len(`a`) == 1
3 println(utf8_char_len(`a`))
4 s := 'п'
5 assert utf8_char_len(s[0]) == 2
6}
7
8fn test_utf8_wide_char() {
9 $if msvc {
10 // TODO: make this test pass msvc too
11 return
12 }
13 r := `✔`
14 s := '✔'
15 println('r: ${r}')
16 println('s: ${s}')
17 rstr := r.str()
18 println('rstr: ${rstr}')
19 assert utf8_char_len(r) == 1
20 assert utf8_char_len(s[0]) == 3
21 assert s == rstr
22 val := rstr.str
23 unsafe {
24 assert val[0].hex() == 'e2'
25 assert val[1].hex() == '9c'
26 assert val[2].hex() == '94'
27 }
28}
29
30fn test_to_wide_latin() {
31 s := 'abc 123'
32 w := s.to_wide()
33 unsafe {
34 assert w[0] == 97
35 assert w[1] == 98
36 assert w[2] == 99
37 assert w[3] == 32
38 assert w[4] == 49
39 assert w[5] == 50
40 assert w[6] == 51
41 assert w[7] == 0
42 }
43}
44
45fn test_to_wide_cyrillic() {
46 s := 'Проба'
47 w := s.to_wide()
48 unsafe {
49 assert w[0] == 1055
50 assert w[1] == 1088
51 assert w[2] == 1086
52 assert w[3] == 1073
53 assert w[4] == 1072
54 assert w[5] == 0
55 }
56}
57
58const little_serial_number = [u8(67), 0, 76, 0, 52, 0, 54, 0, 73, 0, 49, 0, 65, 0, 48, 0, 48, 0,
59 54, 0, 52, 0, 57, 0, 0, 0, 0]
60const big_serial_number = [u8(0), 67, 0, 76, 0, 52, 0, 54, 0, 73, 0, 49, 0, 65, 0, 48, 0, 48, 0,
61 54, 0, 52, 0, 57, 0, 0, 0, 0]
62
63const swide_serial_number = 'CL46I1A00649'
64
65fn test_string_from_wide() {
66 $if little_endian {
67 z := unsafe { string_from_wide(little_serial_number.data) }
68 assert z == swide_serial_number
69 } $else {
70 z := unsafe { string_from_wide(big_serial_number.data) }
71 assert z == swide_serial_number
72 }
73}
74
75fn test_string_from_wide2() {
76 $if little_endian {
77 z := unsafe { string_from_wide2(little_serial_number.data, 12) }
78 assert z == swide_serial_number
79 } $else {
80 z := unsafe { string_from_wide2(big_serial_number.data, 12) }
81 assert z == swide_serial_number
82 }
83}
84
85fn test_reverse_cyrillic_with_string_from_wide() {
86 s := 'Проба'
87 ws := s.to_wide()
88 z := unsafe { string_from_wide(ws) }
89 assert z == s
90}
91
92fn test_wide_to_ansi() {
93 ws := 'abc'.to_wide()
94 assert wide_to_ansi(ws) == [u8(97), 98, 99, 0]
95}
96
97fn test_string_to_ansi_not_null_terminated() {
98 assert string_to_ansi_not_null_terminated('abc') == [u8(97), 98, 99]
99}
100
101fn test_utf8_str_visible_length() {
102 assert utf8_str_visible_length('𝐀𝐁𝐂') == 3
103 assert utf8_str_visible_length('\u006E\u0303') == 1
104 assert utf8_str_visible_length('\U0001F3F3\uFE0F\u200D\U0001F308') == 2
105 assert utf8_str_visible_length('ห์') == 1
106 assert utf8_str_visible_length('ปีเตอร์') == 5
107 assert utf8_str_visible_length('👩🏽‍💻') == 2
108}
109
110fn test_utf8_to_utf32_cases() {
111 test_case1 := 'A'.bytes()
112 assert impl_utf8_to_utf32(&u8(test_case1.data), test_case1.len) == rune(`A`)
113
114 test_case2 := 'é'.bytes()
115 assert impl_utf8_to_utf32(&u8(test_case2.data), test_case2.len) == rune(`é`)
116
117 test_case3 := '€'.bytes()
118 assert impl_utf8_to_utf32(&u8(test_case3.data), test_case3.len) == rune(`€`)
119
120 test_case4 := '𐍈'.bytes()
121 assert impl_utf8_to_utf32(&u8(test_case4.data), test_case4.len) == rune(0x10348)
122 assert impl_utf8_to_utf32(&u8(test_case4.data), test_case4.len) == rune(`𐍈`)
123
124 test_case5 := '中'.bytes()
125 assert impl_utf8_to_utf32(&u8(test_case5.data), test_case5.len) == rune(0x4E2D)
126 assert impl_utf8_to_utf32(&u8(test_case5.data), test_case5.len) == rune(`中`)
127
128 // emoji, 4-byte UTF-8
129 test_case6 := '😀'.bytes()
130 assert impl_utf8_to_utf32(&u8(test_case6.data), test_case6.len) == rune(0x1F600)
131 assert impl_utf8_to_utf32(&u8(test_case6.data), test_case6.len) == `😀`
132
133 test_case7 := 'Ж'.bytes()
134 assert impl_utf8_to_utf32(&u8(test_case7.data), test_case7.len) == rune(`Ж`)
135
136 test_case8 := 'م'.bytes()
137 assert impl_utf8_to_utf32(&u8(test_case8.data), test_case8.len) == rune(`م`)
138
139 test_case9 := '߿'.bytes()
140 assert impl_utf8_to_utf32(&u8(test_case9.data), test_case9.len) == rune(0x07FF)
141 assert impl_utf8_to_utf32(&u8(test_case9.data), test_case9.len) == rune(`߿`)
142
143 test_case10 := 'ࠀ'.bytes()
144 assert impl_utf8_to_utf32(&u8(test_case10.data), test_case10.len) == rune(0x0800)
145 assert impl_utf8_to_utf32(&u8(test_case10.data), test_case10.len) == rune(`ࠀ`)
146
147 test_case11 := '￿'.bytes()
148 assert impl_utf8_to_utf32(&u8(test_case11.data), test_case11.len) == rune(0xFFFF)
149 assert impl_utf8_to_utf32(&u8(test_case11.data), test_case11.len) == rune(`￿`)
150
151 test_case12 := '𐀀'.bytes()
152 assert impl_utf8_to_utf32(&u8(test_case12.data), test_case12.len) == rune(0x10000)
153 assert impl_utf8_to_utf32(&u8(test_case12.data), test_case12.len) == rune(`𐀀`)
154
155 test_case13 := '􏿿'.bytes()
156 assert impl_utf8_to_utf32(&u8(test_case13.data), test_case13.len) == rune(0x10FFFF)
157 assert impl_utf8_to_utf32(&u8(test_case13.data), test_case13.len) == rune(`􏿿`)
158}
159
160fn test_utf8_to_utf32_invalid_length() {
161 // More than 4 bytes is invalid
162 invalid := [u8(0xF0), 0x9F, 0x98, 0x80, 0x00]
163 assert impl_utf8_to_utf32(&u8(invalid.data), invalid.len) == 0
164}
165
166fn test_utf8_to_utf32_empty() {
167 assert impl_utf8_to_utf32(&u8([]u8{}.data), 0) == 0
168}
169