v / vlib / encoding / utf8 / utf8_util_test.v
137 lines · 113 sloc · 3.55 KB · 1957162c2a4545ec88c75f59d5c27c689d974a57
Raw
1import encoding.utf8
2
3fn test_utf8_util() {
4 // string test
5 src := 'ăĂ ôÔ testo 怔' //_\u1E5A\u1E5B<=>\u1F49\u1F41<=>\u0128\u012a\u012c" // len 29 runes, raw 49 bytes
6 src_upper := 'ĂĂ ÔÔ TESTO Æ€”' //_\u1E5A\u1E5A<=>\u1F49\u1F49<=>\u0128\u012A\u012C"
7 src_lower := 'ăă ôô testo 怔' //_\u1E5B\u1E5B<=>\u1F41\u1F41<=>\u0129\u012B\u012D"
8 upper := utf8.to_upper(src)
9 lower := utf8.to_lower(src)
10 assert upper == src_upper
11 assert lower == src_lower
12
13 assert utf8.to_upper('абвёabc12{') == 'АБВЁABC12{'
14 assert utf8.to_lower('АБВЁABC12{') == 'абвёabc12{'
15
16 // test len function
17 assert utf8.len('') == 0
18 assert utf8.len('pippo') == 5
19 assert utf8.len(src) == 15 // 29
20 assert src.len == 24 // 49
21
22 // western punctuation
23 a := '.abc?abcòàè.'
24 assert utf8.is_punct(a, 0) == true
25 assert utf8.is_punct('b', 0) == false
26 assert utf8.is_rune_punct(0x002E) == true
27 assert utf8.is_punct(a, 4) == true // ?
28 assert utf8.is_punct(a, 14) == true // last .
29 assert utf8.is_punct(a, 12) == false // è
30 println('OK western')
31
32 // global punctuation
33 b := '.ĂĂa. ÔÔ TESTO Æ€'
34 assert utf8.is_global_punct(b, 0) == true
35 assert utf8.is_global_punct('.', 0) == true
36 assert utf8.is_rune_punct(0x002E) == true
37 assert utf8.is_global_punct(b, 6) == true // .
38 assert utf8.is_global_punct(b, 1) == false // a
39
40 // test utility functions
41 assert utf8.get_rune(b, 0) == 0x002E
42 c := 'a©★🚀'
43 assert utf8.get_rune(c, 0) == `a` // 1 byte
44 assert utf8.get_rune(c, 1) == `©` // 2 bytes
45 assert utf8.get_rune(c, 3) == `★` // 3 bytes
46 assert utf8.get_rune(c, 6) == `🚀` // 4 bytes
47}
48
49fn test_raw_indexing() {
50 a := '我是V Lang!'
51
52 // test non ascii characters
53 assert utf8.raw_index(a, 0) == '我'
54 assert utf8.raw_index(a, 1) == '是'
55
56 // test ascii characters
57 assert utf8.raw_index(a, 2) == 'V'
58 assert utf8.raw_index(a, 3) == ' '
59 assert utf8.raw_index(a, 4) == 'L'
60 assert utf8.raw_index(a, 5) == 'a'
61 assert utf8.raw_index(a, 6) == 'n'
62 assert utf8.raw_index(a, 7) == 'g'
63 assert utf8.raw_index(a, 8) == '!'
64
65 // test differnt utf8 byte lengths
66 c := 'a©★🚀'
67 assert utf8.raw_index(c, 0) == 'a' // 1 byte
68 assert utf8.raw_index(c, 1) == '©' // 2 bytes
69 assert utf8.raw_index(c, 2) == '★' // 3 bytes
70 assert utf8.raw_index(c, 3) == '🚀' // 4 bytes
71}
72
73fn test_reversed() {
74 a := '我是V Lang!'
75 b := '你好世界hello world'
76 assert utf8.reverse(a) == '!gnaL V是我'
77 assert utf8.reverse(b) == 'dlrow olleh界世好你'
78}
79
80fn test_is_control() {
81 for ra in `a` .. `z` {
82 assert utf8.is_control(ra) == false
83 }
84
85 for ra in `A` .. `Z` {
86 assert utf8.is_control(ra) == false
87 }
88
89 assert utf8.is_control('\x01'.runes()[0]) == true
90 assert utf8.is_control('\u0100'.runes()[0]) == false
91}
92
93fn test_is_letter() {
94 for ra in `a` .. `z` {
95 assert utf8.is_letter(ra) == true
96 }
97
98 for ra in `A` .. `Z` {
99 assert utf8.is_letter(ra) == true
100 }
101
102 assert utf8.is_letter(`ɀ`) == true
103 assert utf8.is_letter(`ȶ`) == true
104 assert utf8.is_letter(`ȹ`) == true
105}
106
107fn test_is_space() {
108 for ra in `a` .. `z` {
109 assert utf8.is_space(ra) == false
110 }
111
112 for ra in `A` .. `Z` {
113 assert utf8.is_space(ra) == false
114 }
115
116 assert utf8.is_space(`\u202f`) == true
117 assert utf8.is_space(`\u2009`) == true
118 assert utf8.is_space(`\u00A0`) == true
119}
120
121fn test_is_number() {
122 for ra in `a` .. `z` {
123 assert utf8.is_number(ra) == false
124 }
125
126 for ra in `A` .. `Z` {
127 assert utf8.is_number(ra) == false
128 }
129
130 for ra in `0` .. `1` {
131 assert utf8.is_number(ra) == true
132 }
133
134 assert utf8.is_number(`\u2164`) == true
135 assert utf8.is_number(`\u2188`) == true
136 assert utf8.is_number(`\u3029`) == true
137}
138