Gitly


1 fn test_utf8_char_len() {
2     assert utf8_char_len(`a`) == 1
3     println(utf8_char_len(`a`))
4     s := 'п'
5     assert utf8_char_len(s[0]) == 2
6 }
7 
8 fn test_utf8_wide_char() {
9     $if msvc {
10         // TODO: make this test pass msvc too
11         return
12     }
13     r := `✔`
14     s := '✔'
15     println('r: ${r}')
16     println('s: ${s}')
17     rstr := r.str()
18     println('rstr: ${rstr}')
19     assert utf8_char_len(r) == 1
20     assert utf8_char_len(s[0]) == 3
21     assert s == rstr
22     val := rstr.str
23     unsafe {
24         assert val[0].hex() == 'e2'
25         assert val[1].hex() == '9c'
26         assert val[2].hex() == '94'
27     }
28 }
29 
30 fn test_to_wide_latin() {
31     s := 'abc 123'
32     w := s.to_wide()
33     unsafe {
34         assert w[0] == 97
35         assert w[1] == 98
36         assert w[2] == 99
37         assert w[3] == 32
38         assert w[4] == 49
39         assert w[5] == 50
40         assert w[6] == 51
41         assert w[7] == 0
42     }
43 }
44 
45 fn test_to_wide_cyrillic() {
46     s := 'Проба'
47     w := s.to_wide()
48     unsafe {
49         assert w[0] == 1055
50         assert w[1] == 1088
51         assert w[2] == 1086
52         assert w[3] == 1073
53         assert w[4] == 1072
54         assert w[5] == 0
55     }
56 }
57 
58 const little_serial_number = [u8(67), 0, 76, 0, 52, 0, 54, 0, 73, 0, 49, 0, 65, 0, 48, 0, 48, 0,
59     54, 0, 52, 0, 57, 0, 0, 0, 0]
60 const big_serial_number = [u8(0), 67, 0, 76, 0, 52, 0, 54, 0, 73, 0, 49, 0, 65, 0, 48, 0, 48, 0,
61     54, 0, 52, 0, 57, 0, 0, 0, 0]
62 
63 const swide_serial_number = 'CL46I1A00649'
64 
65 fn test_string_from_wide() {
66     $if little_endian {
67         z := unsafe { string_from_wide(little_serial_number.data) }
68         assert z == swide_serial_number
69     } $else {
70         z := unsafe { string_from_wide(big_serial_number.data) }
71         assert z == swide_serial_number
72     }
73 }
74 
75 fn test_string_from_wide2() {
76     $if little_endian {
77         z := unsafe { string_from_wide2(little_serial_number.data, 12) }
78         assert z == swide_serial_number
79     } $else {
80         z := unsafe { string_from_wide2(big_serial_number.data, 12) }
81         assert z == swide_serial_number
82     }
83 }
84 
85 fn test_reverse_cyrillic_with_string_from_wide() {
86     s := 'Проба'
87     ws := s.to_wide()
88     z := unsafe { string_from_wide(ws) }
89     assert z == s
90 }
91 
92 fn test_wide_to_ansi() {
93     ws := 'abc'.to_wide()
94     assert wide_to_ansi(ws) == [u8(97), 98, 99, 0]
95 }
96 
97 fn test_string_to_ansi_not_null_terminated() {
98     assert string_to_ansi_not_null_terminated('abc') == [u8(97), 98, 99]
99 }
100 
101 fn test_utf8_str_visible_length() {
102     assert utf8_str_visible_length('𝐀𝐁𝐂') == 3
103     assert utf8_str_visible_length('\u006E\u0303') == 1
104     assert utf8_str_visible_length('\U0001F3F3\uFE0F\u200D\U0001F308') == 2
105     assert utf8_str_visible_length('ห์') == 1
106     assert utf8_str_visible_length('ปีเตอร์') == 5
107     assert utf8_str_visible_length('👩🏽‍💻') == 2
108 }
109 
110 fn test_utf8_to_utf32_cases() {
111     test_case1 := 'A'.bytes()
112     assert impl_utf8_to_utf32(&u8(test_case1.data), test_case1.len) == rune(`A`)
113 
114     test_case2 := 'é'.bytes()
115     assert impl_utf8_to_utf32(&u8(test_case2.data), test_case2.len) == rune(`é`)
116 
117     test_case3 := '€'.bytes()
118     assert impl_utf8_to_utf32(&u8(test_case3.data), test_case3.len) == rune(`€`)
119 
120     test_case4 := '𐍈'.bytes()
121     assert impl_utf8_to_utf32(&u8(test_case4.data), test_case4.len) == rune(0x10348)
122     assert impl_utf8_to_utf32(&u8(test_case4.data), test_case4.len) == rune(`𐍈`)
123 
124     test_case5 := '中'.bytes()
125     assert impl_utf8_to_utf32(&u8(test_case5.data), test_case5.len) == rune(0x4E2D)
126     assert impl_utf8_to_utf32(&u8(test_case5.data), test_case5.len) == rune(`中`)
127 
128     // emoji, 4-byte UTF-8
129     test_case6 := '😀'.bytes()
130     assert impl_utf8_to_utf32(&u8(test_case6.data), test_case6.len) == rune(0x1F600)
131     assert impl_utf8_to_utf32(&u8(test_case6.data), test_case6.len) == `😀`
132 
133     test_case7 := 'Ж'.bytes()
134     assert impl_utf8_to_utf32(&u8(test_case7.data), test_case7.len) == rune(`Ж`)
135 
136     test_case8 := 'م'.bytes()
137     assert impl_utf8_to_utf32(&u8(test_case8.data), test_case8.len) == rune(`م`)
138 
139     test_case9 := '߿'.bytes()
140     assert impl_utf8_to_utf32(&u8(test_case9.data), test_case9.len) == rune(0x07FF)
141     assert impl_utf8_to_utf32(&u8(test_case9.data), test_case9.len) == rune(`߿`)
142 
143     test_case10 := 'ࠀ'.bytes()
144     assert impl_utf8_to_utf32(&u8(test_case10.data), test_case10.len) == rune(0x0800)
145     assert impl_utf8_to_utf32(&u8(test_case10.data), test_case10.len) == rune(`ࠀ`)
146 
147     test_case11 := ''.bytes()
148     assert impl_utf8_to_utf32(&u8(test_case11.data), test_case11.len) == rune(0xFFFF)
149     assert impl_utf8_to_utf32(&u8(test_case11.data), test_case11.len) == rune(``)
150 
151     test_case12 := '𐀀'.bytes()
152     assert impl_utf8_to_utf32(&u8(test_case12.data), test_case12.len) == rune(0x10000)
153     assert impl_utf8_to_utf32(&u8(test_case12.data), test_case12.len) == rune(`𐀀`)
154 
155     test_case13 := '􏿿'.bytes()
156     assert impl_utf8_to_utf32(&u8(test_case13.data), test_case13.len) == rune(0x10FFFF)
157     assert impl_utf8_to_utf32(&u8(test_case13.data), test_case13.len) == rune(`􏿿`)
158 }
159 
160 fn test_utf8_to_utf32_invalid_length() {
161     // More than 4 bytes is invalid
162     invalid := [u8(0xF0), 0x9F, 0x98, 0x80, 0x00]
163     assert impl_utf8_to_utf32(&u8(invalid.data), invalid.len) == 0
164 }
165 
166 fn test_utf8_to_utf32_empty() {
167     assert impl_utf8_to_utf32(&u8([]u8{}.data), 0) == 0
168 }
169

1	fn test_utf8_char_len() {
2	assert utf8_char_len(`a`) == 1
3	println(utf8_char_len(`a`))
4	s := 'п'
5	assert utf8_char_len(s[0]) == 2
6	}
7
8	fn test_utf8_wide_char() {
9	$if msvc {
10	// TODO: make this test pass msvc too
11	return
12	}
13	r := `✔`
14	s := '✔'
15	println('r: ${r}')
16	println('s: ${s}')
17	rstr := r.str()
18	println('rstr: ${rstr}')
19	assert utf8_char_len(r) == 1
20	assert utf8_char_len(s[0]) == 3
21	assert s == rstr
22	val := rstr.str
23	unsafe {
24	assert val[0].hex() == 'e2'
25	assert val[1].hex() == '9c'
26	assert val[2].hex() == '94'
27	}
28	}
29
30	fn test_to_wide_latin() {
31	s := 'abc 123'
32	w := s.to_wide()
33	unsafe {
34	assert w[0] == 97
35	assert w[1] == 98
36	assert w[2] == 99
37	assert w[3] == 32
38	assert w[4] == 49
39	assert w[5] == 50
40	assert w[6] == 51
41	assert w[7] == 0
42	}
43	}
44
45	fn test_to_wide_cyrillic() {
46	s := 'Проба'
47	w := s.to_wide()
48	unsafe {
49	assert w[0] == 1055
50	assert w[1] == 1088
51	assert w[2] == 1086
52	assert w[3] == 1073
53	assert w[4] == 1072
54	assert w[5] == 0
55	}
56	}
57
58	const little_serial_number = [u8(67), 0, 76, 0, 52, 0, 54, 0, 73, 0, 49, 0, 65, 0, 48, 0, 48, 0,
59	54, 0, 52, 0, 57, 0, 0, 0, 0]
60	const big_serial_number = [u8(0), 67, 0, 76, 0, 52, 0, 54, 0, 73, 0, 49, 0, 65, 0, 48, 0, 48, 0,
61	54, 0, 52, 0, 57, 0, 0, 0, 0]
62
63	const swide_serial_number = 'CL46I1A00649'
64
65	fn test_string_from_wide() {
66	$if little_endian {
67	z := unsafe { string_from_wide(little_serial_number.data) }
68	assert z == swide_serial_number
69	} $else {
70	z := unsafe { string_from_wide(big_serial_number.data) }
71	assert z == swide_serial_number
72	}
73	}
74
75	fn test_string_from_wide2() {
76	$if little_endian {
77	z := unsafe { string_from_wide2(little_serial_number.data, 12) }
78	assert z == swide_serial_number
79	} $else {
80	z := unsafe { string_from_wide2(big_serial_number.data, 12) }
81	assert z == swide_serial_number
82	}
83	}
84
85	fn test_reverse_cyrillic_with_string_from_wide() {
86	s := 'Проба'
87	ws := s.to_wide()
88	z := unsafe { string_from_wide(ws) }
89	assert z == s
90	}
91
92	fn test_wide_to_ansi() {
93	ws := 'abc'.to_wide()
94	assert wide_to_ansi(ws) == [u8(97), 98, 99, 0]
95	}
96
97	fn test_string_to_ansi_not_null_terminated() {
98	assert string_to_ansi_not_null_terminated('abc') == [u8(97), 98, 99]
99	}
100
101	fn test_utf8_str_visible_length() {
102	assert utf8_str_visible_length('𝐀𝐁𝐂') == 3
103	assert utf8_str_visible_length('\u006E\u0303') == 1
104	assert utf8_str_visible_length('\U0001F3F3\uFE0F\u200D\U0001F308') == 2
105	assert utf8_str_visible_length('ห์') == 1
106	assert utf8_str_visible_length('ปีเตอร์') == 5
107	assert utf8_str_visible_length('👩🏽‍💻') == 2
108	}
109
110	fn test_utf8_to_utf32_cases() {
111	test_case1 := 'A'.bytes()
112	assert impl_utf8_to_utf32(&u8(test_case1.data), test_case1.len) == rune(`A`)
113
114	test_case2 := 'é'.bytes()
115	assert impl_utf8_to_utf32(&u8(test_case2.data), test_case2.len) == rune(`é`)
116
117	test_case3 := '€'.bytes()
118	assert impl_utf8_to_utf32(&u8(test_case3.data), test_case3.len) == rune(`€`)
119
120	test_case4 := '𐍈'.bytes()
121	assert impl_utf8_to_utf32(&u8(test_case4.data), test_case4.len) == rune(0x10348)
122	assert impl_utf8_to_utf32(&u8(test_case4.data), test_case4.len) == rune(`𐍈`)
123
124	test_case5 := '中'.bytes()
125	assert impl_utf8_to_utf32(&u8(test_case5.data), test_case5.len) == rune(0x4E2D)
126	assert impl_utf8_to_utf32(&u8(test_case5.data), test_case5.len) == rune(`中`)
127
128	// emoji, 4-byte UTF-8
129	test_case6 := '😀'.bytes()
130	assert impl_utf8_to_utf32(&u8(test_case6.data), test_case6.len) == rune(0x1F600)
131	assert impl_utf8_to_utf32(&u8(test_case6.data), test_case6.len) == `😀`
132
133	test_case7 := 'Ж'.bytes()
134	assert impl_utf8_to_utf32(&u8(test_case7.data), test_case7.len) == rune(`Ж`)
135
136	test_case8 := 'م'.bytes()
137	assert impl_utf8_to_utf32(&u8(test_case8.data), test_case8.len) == rune(`م`)
138
139	test_case9 := '߿'.bytes()
140	assert impl_utf8_to_utf32(&u8(test_case9.data), test_case9.len) == rune(0x07FF)
141	assert impl_utf8_to_utf32(&u8(test_case9.data), test_case9.len) == rune(`߿`)
142
143	test_case10 := 'ࠀ'.bytes()
144	assert impl_utf8_to_utf32(&u8(test_case10.data), test_case10.len) == rune(0x0800)
145	assert impl_utf8_to_utf32(&u8(test_case10.data), test_case10.len) == rune(`ࠀ`)
146
147	test_case11 := ''.bytes()
148	assert impl_utf8_to_utf32(&u8(test_case11.data), test_case11.len) == rune(0xFFFF)
149	assert impl_utf8_to_utf32(&u8(test_case11.data), test_case11.len) == rune(``)
150
151	test_case12 := '𐀀'.bytes()
152	assert impl_utf8_to_utf32(&u8(test_case12.data), test_case12.len) == rune(0x10000)
153	assert impl_utf8_to_utf32(&u8(test_case12.data), test_case12.len) == rune(`𐀀`)
154
155	test_case13 := '􏿿'.bytes()
156	assert impl_utf8_to_utf32(&u8(test_case13.data), test_case13.len) == rune(0x10FFFF)
157	assert impl_utf8_to_utf32(&u8(test_case13.data), test_case13.len) == rune(`􏿿`)
158	}
159
160	fn test_utf8_to_utf32_invalid_length() {
161	// More than 4 bytes is invalid
162	invalid := [u8(0xF0), 0x9F, 0x98, 0x80, 0x00]
163	assert impl_utf8_to_utf32(&u8(invalid.data), invalid.len) == 0
164	}
165
166	fn test_utf8_to_utf32_empty() {
167	assert impl_utf8_to_utf32(&u8([]u8{}.data), 0) == 0
168	}
169