v / vlib / encoding / iconv / iconv_test.v
140 lines · 114 sloc · 6.32 KB · b5b93b092b8c72e76ff578650e01950fcc7c2314
Raw
1// vtest build: !docker-ubuntu-musl // needs libiconv to be installed
2import encoding.iconv
3import os
4
5fn test_vstring_to_encoding() {
6 empty_utf8 := iconv.vstring_to_encoding('', 'UTF-8')!
7 assert empty_utf8 == []
8
9 abc_utf8 := iconv.vstring_to_encoding('abc', 'UTF-8')!
10 assert abc_utf8 == [u8(97), 98, 99]
11
12 abc_utf16le := iconv.vstring_to_encoding('abc', 'UTF-16LE')!
13 assert abc_utf16le == [u8(97), 0, 98, 0, 99, 0]
14
15 abc_utf16be := iconv.vstring_to_encoding('abc', 'UTF-16BE')!
16 assert abc_utf16be == [u8(0), 97, 0, 98, 0, 99]
17
18 abc_utf32le := iconv.vstring_to_encoding('abc', 'UTF-32LE')!
19 assert abc_utf32le == [u8(97), 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0]
20
21 abc_utf32be := iconv.vstring_to_encoding('abc', 'UTF-32BE')!
22 assert abc_utf32be == [u8(0), 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99]
23
24 abc_local := iconv.vstring_to_encoding('abc', 'LOCAL')!
25 // Windows LOCAL: ANSI encoding
26 // Linux LOCAL: UTF-8 encoding
27 assert abc_local == [u8(97), 98, 99]
28
29 if abc_not_exist := iconv.vstring_to_encoding('abc', 'encoding_not_exist') {
30 assert false, 'encoding_not_exist'
31 }
32
33 if ch_str := iconv.vstring_to_encoding('V大法好abc', 'GB2312') {
34 assert ch_str == [u8(86), 180, 243, 183, 168, 186, 195, 97, 98, 99]
35 } else {
36 // some platforms do not support GB2312, skip
37 assert true
38 }
39}
40
41fn test_encoding_to_vstring() {
42 empty_utf8 := iconv.encoding_to_vstring([], 'UTF-8')!
43 assert empty_utf8 == ''
44
45 abc_utf8 := iconv.encoding_to_vstring([u8(97), 98, 99], 'UTF-8')!
46 assert abc_utf8 == 'abc'
47
48 abc_utf16le := iconv.encoding_to_vstring([u8(97), 0, 98, 0, 99, 0], 'UTF-16LE')!
49 assert abc_utf16le == 'abc'
50
51 abc_utf16be := iconv.encoding_to_vstring([u8(0), 97, 0, 98, 0, 99], 'UTF-16BE')!
52 assert abc_utf16be == 'abc'
53
54 abc_utf32le := iconv.encoding_to_vstring([u8(97), 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0],
55 'UTF-32LE')!
56 assert abc_utf32le == 'abc'
57
58 abc_utf32be := iconv.encoding_to_vstring([u8(0), 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99],
59 'UTF-32BE')!
60 assert abc_utf32be == 'abc'
61
62 abc_local := iconv.encoding_to_vstring([u8(97), 98, 99], 'LOCAL')!
63 // Windows LOCAL: ANSI encoding
64 // Linux LOCAL: UTF-8 encoding
65 assert abc_local == 'abc'
66
67 if abc_not_exist := iconv.encoding_to_vstring([u8(97), 98, 99], 'encoding_not_exist') {
68 assert false, 'encoding_not_exist'
69 }
70
71 if ch_str := iconv.encoding_to_vstring([u8(86), 180, 243, 183, 168, 186, 195, 97, 98, 99],
72 'GB2312')
73 {
74 assert ch_str == 'V大法好abc'
75 } else {
76 // some platforms do not support GB2312, skip
77 assert true
78 }
79}
80
81fn test_create_utf_string_with_bom() {
82 // bug ? vfmt create strange format here
83 // vfmt off
84 assert iconv.create_utf_string_with_bom([u8(97), 98, 99], 'UTF-8') == [u8(0xEF), 0xBB, 0xBF, 97, 98, 99]
85 assert iconv.create_utf_string_with_bom([u8(97), 0, 98, 0, 99, 0], 'UTF-16LE') == [u8(0xFF), 0xFE, 97, 0, 98, 0, 99, 0]
86 assert iconv.create_utf_string_with_bom([u8(0), 97, 0, 98, 0, 99], 'UTF-16BE') == [u8(0xFE), 0xFF, 0, 97, 0, 98, 0, 99]
87 assert iconv.create_utf_string_with_bom([u8(97), 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0], 'UTF-32LE') == [u8(0xFF), 0xFE, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0]
88 assert iconv.create_utf_string_with_bom([u8(0), 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99], 'UTF-32BE') == [u8(0), 0, 0xFE, 0xFF, 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99]
89 // vfmt on
90}
91
92fn test_remove_utf_string_with_bom() {
93 // bug ? vfmt create strange format here
94 // vfmt off
95 assert iconv.remove_utf_string_with_bom([u8(0xEF), 0xBB, 0xBF, 97, 98, 99], 'UTF-8') == [u8(97), 98, 99]
96 assert iconv.remove_utf_string_with_bom([u8(0xFF), 0xFE, 97, 0, 98, 0, 99, 0], 'UTF-16LE') == [u8(97), 0, 98, 0, 99, 0]
97 assert iconv.remove_utf_string_with_bom([u8(0xFE), 0xFF, 0, 97, 0, 98, 0, 99], 'UTF-16BE') == [u8(0), 97, 0, 98, 0, 99]
98 assert iconv.remove_utf_string_with_bom([u8(0xFF), 0xFE, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0], 'UTF-32LE') == [u8(97), 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0]
99 assert iconv.remove_utf_string_with_bom([u8(0), 0, 0xFE, 0xFF, 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99], 'UTF-32BE') == [u8(0), 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99]
100 // vfmt on
101}
102
103fn my_test_read_file_encoding_write_file_encoding(txt string, encoding string, bom bool, bytes []u8) ! {
104 iconv.write_file_encoding('iconv_tmp.txt', txt, encoding, bom)!
105 // read bytes directly from file
106 mut bytes_ref := os.read_file_array[u8]('iconv_tmp.txt')
107 assert bytes_ref == bytes
108 if bom {
109 bytes_ref = iconv.remove_utf_string_with_bom(bytes_ref, encoding)
110 }
111 str_ref := iconv.encoding_to_vstring(bytes_ref, encoding)!
112 assert str_ref.bytes() == txt.bytes()
113 str_conv := iconv.read_file_encoding('iconv_tmp.txt', encoding)!
114 assert str_conv == txt
115 os.rm('iconv_tmp.txt')!
116}
117
118fn test_read_file_encoding_write_file_encoding() ! {
119 // vfmt off
120 // UTF-8
121 my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-8',false,[u8(86), 229, 164, 167, 230, 179, 149, 229, 165, 189, 97, 98, 99])!
122 my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-8',true,[u8(0xEF), 0xBB, 0xBF, 86, 229, 164, 167, 230, 179, 149, 229, 165, 189, 97, 98, 99])!
123
124 // UTF-16LE
125 my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-16LE',false,[u8(86), 0, 39, 89, 213, 108, 125, 89, 97, 0, 98, 0, 99, 0])!
126 my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-16LE',true,[u8(0xFF), 0xFE, 86, 0, 39, 89, 213, 108, 125, 89, 97, 0, 98, 0, 99, 0])!
127
128 // UTF-16BE
129 my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-16BE',false,[u8(0), 86, 89, 39, 108, 213, 89, 125, 0, 97, 0, 98, 0, 99])!
130 my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-16BE',true,[u8(0xFE), 0xFF, 0, 86, 89, 39, 108, 213, 89, 125, 0, 97, 0, 98, 0, 99])!
131
132 // UTF-32LE
133 my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-32LE',false,[u8(86), 0, 0, 0, 39, 89, 0, 0, 213, 108, 0, 0, 125, 89, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0])!
134 my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-32LE',true,[u8(0xFF), 0xFE, 0, 0, 86, 0, 0, 0, 39, 89, 0, 0, 213, 108, 0, 0, 125, 89, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0])!
135
136 // UTF-32BE
137 my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-32BE',false,[u8(0), 0, 0, 86, 0, 0, 89, 39, 0, 0, 108, 213, 0, 0, 89, 125, 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99])!
138 my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-32BE',true,[u8(0), 0, 0xFE, 0xFF, 0, 0, 0, 86, 0, 0, 89, 39, 0, 0, 108, 213, 0, 0, 89, 125, 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99])!
139 // vfmt on
140}
141