| 1 | // vtest build: !docker-ubuntu-musl // needs libiconv to be installed |
| 2 | import encoding.iconv |
| 3 | import os |
| 4 | |
| 5 | fn test_vstring_to_encoding() { |
| 6 | empty_utf8 := iconv.vstring_to_encoding('', 'UTF-8')! |
| 7 | assert empty_utf8 == [] |
| 8 | |
| 9 | abc_utf8 := iconv.vstring_to_encoding('abc', 'UTF-8')! |
| 10 | assert abc_utf8 == [u8(97), 98, 99] |
| 11 | |
| 12 | abc_utf16le := iconv.vstring_to_encoding('abc', 'UTF-16LE')! |
| 13 | assert abc_utf16le == [u8(97), 0, 98, 0, 99, 0] |
| 14 | |
| 15 | abc_utf16be := iconv.vstring_to_encoding('abc', 'UTF-16BE')! |
| 16 | assert abc_utf16be == [u8(0), 97, 0, 98, 0, 99] |
| 17 | |
| 18 | abc_utf32le := iconv.vstring_to_encoding('abc', 'UTF-32LE')! |
| 19 | assert abc_utf32le == [u8(97), 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0] |
| 20 | |
| 21 | abc_utf32be := iconv.vstring_to_encoding('abc', 'UTF-32BE')! |
| 22 | assert abc_utf32be == [u8(0), 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99] |
| 23 | |
| 24 | abc_local := iconv.vstring_to_encoding('abc', 'LOCAL')! |
| 25 | // Windows LOCAL: ANSI encoding |
| 26 | // Linux LOCAL: UTF-8 encoding |
| 27 | assert abc_local == [u8(97), 98, 99] |
| 28 | |
| 29 | if abc_not_exist := iconv.vstring_to_encoding('abc', 'encoding_not_exist') { |
| 30 | assert false, 'encoding_not_exist' |
| 31 | } |
| 32 | |
| 33 | if ch_str := iconv.vstring_to_encoding('V大法好abc', 'GB2312') { |
| 34 | assert ch_str == [u8(86), 180, 243, 183, 168, 186, 195, 97, 98, 99] |
| 35 | } else { |
| 36 | // some platforms do not support GB2312, skip |
| 37 | assert true |
| 38 | } |
| 39 | } |
| 40 | |
| 41 | fn test_encoding_to_vstring() { |
| 42 | empty_utf8 := iconv.encoding_to_vstring([], 'UTF-8')! |
| 43 | assert empty_utf8 == '' |
| 44 | |
| 45 | abc_utf8 := iconv.encoding_to_vstring([u8(97), 98, 99], 'UTF-8')! |
| 46 | assert abc_utf8 == 'abc' |
| 47 | |
| 48 | abc_utf16le := iconv.encoding_to_vstring([u8(97), 0, 98, 0, 99, 0], 'UTF-16LE')! |
| 49 | assert abc_utf16le == 'abc' |
| 50 | |
| 51 | abc_utf16be := iconv.encoding_to_vstring([u8(0), 97, 0, 98, 0, 99], 'UTF-16BE')! |
| 52 | assert abc_utf16be == 'abc' |
| 53 | |
| 54 | abc_utf32le := iconv.encoding_to_vstring([u8(97), 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0], |
| 55 | 'UTF-32LE')! |
| 56 | assert abc_utf32le == 'abc' |
| 57 | |
| 58 | abc_utf32be := iconv.encoding_to_vstring([u8(0), 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99], |
| 59 | 'UTF-32BE')! |
| 60 | assert abc_utf32be == 'abc' |
| 61 | |
| 62 | abc_local := iconv.encoding_to_vstring([u8(97), 98, 99], 'LOCAL')! |
| 63 | // Windows LOCAL: ANSI encoding |
| 64 | // Linux LOCAL: UTF-8 encoding |
| 65 | assert abc_local == 'abc' |
| 66 | |
| 67 | if abc_not_exist := iconv.encoding_to_vstring([u8(97), 98, 99], 'encoding_not_exist') { |
| 68 | assert false, 'encoding_not_exist' |
| 69 | } |
| 70 | |
| 71 | if ch_str := iconv.encoding_to_vstring([u8(86), 180, 243, 183, 168, 186, 195, 97, 98, 99], |
| 72 | 'GB2312') |
| 73 | { |
| 74 | assert ch_str == 'V大法好abc' |
| 75 | } else { |
| 76 | // some platforms do not support GB2312, skip |
| 77 | assert true |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | fn test_create_utf_string_with_bom() { |
| 82 | // bug ? vfmt create strange format here |
| 83 | // vfmt off |
| 84 | assert iconv.create_utf_string_with_bom([u8(97), 98, 99], 'UTF-8') == [u8(0xEF), 0xBB, 0xBF, 97, 98, 99] |
| 85 | assert iconv.create_utf_string_with_bom([u8(97), 0, 98, 0, 99, 0], 'UTF-16LE') == [u8(0xFF), 0xFE, 97, 0, 98, 0, 99, 0] |
| 86 | assert iconv.create_utf_string_with_bom([u8(0), 97, 0, 98, 0, 99], 'UTF-16BE') == [u8(0xFE), 0xFF, 0, 97, 0, 98, 0, 99] |
| 87 | assert iconv.create_utf_string_with_bom([u8(97), 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0], 'UTF-32LE') == [u8(0xFF), 0xFE, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0] |
| 88 | assert iconv.create_utf_string_with_bom([u8(0), 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99], 'UTF-32BE') == [u8(0), 0, 0xFE, 0xFF, 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99] |
| 89 | // vfmt on |
| 90 | } |
| 91 | |
| 92 | fn test_remove_utf_string_with_bom() { |
| 93 | // bug ? vfmt create strange format here |
| 94 | // vfmt off |
| 95 | assert iconv.remove_utf_string_with_bom([u8(0xEF), 0xBB, 0xBF, 97, 98, 99], 'UTF-8') == [u8(97), 98, 99] |
| 96 | assert iconv.remove_utf_string_with_bom([u8(0xFF), 0xFE, 97, 0, 98, 0, 99, 0], 'UTF-16LE') == [u8(97), 0, 98, 0, 99, 0] |
| 97 | assert iconv.remove_utf_string_with_bom([u8(0xFE), 0xFF, 0, 97, 0, 98, 0, 99], 'UTF-16BE') == [u8(0), 97, 0, 98, 0, 99] |
| 98 | assert iconv.remove_utf_string_with_bom([u8(0xFF), 0xFE, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0], 'UTF-32LE') == [u8(97), 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0] |
| 99 | assert iconv.remove_utf_string_with_bom([u8(0), 0, 0xFE, 0xFF, 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99], 'UTF-32BE') == [u8(0), 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99] |
| 100 | // vfmt on |
| 101 | } |
| 102 | |
| 103 | fn my_test_read_file_encoding_write_file_encoding(txt string, encoding string, bom bool, bytes []u8) ! { |
| 104 | iconv.write_file_encoding('iconv_tmp.txt', txt, encoding, bom)! |
| 105 | // read bytes directly from file |
| 106 | mut bytes_ref := os.read_file_array[u8]('iconv_tmp.txt') |
| 107 | assert bytes_ref == bytes |
| 108 | if bom { |
| 109 | bytes_ref = iconv.remove_utf_string_with_bom(bytes_ref, encoding) |
| 110 | } |
| 111 | str_ref := iconv.encoding_to_vstring(bytes_ref, encoding)! |
| 112 | assert str_ref.bytes() == txt.bytes() |
| 113 | str_conv := iconv.read_file_encoding('iconv_tmp.txt', encoding)! |
| 114 | assert str_conv == txt |
| 115 | os.rm('iconv_tmp.txt')! |
| 116 | } |
| 117 | |
| 118 | fn test_read_file_encoding_write_file_encoding() ! { |
| 119 | // vfmt off |
| 120 | // UTF-8 |
| 121 | my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-8',false,[u8(86), 229, 164, 167, 230, 179, 149, 229, 165, 189, 97, 98, 99])! |
| 122 | my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-8',true,[u8(0xEF), 0xBB, 0xBF, 86, 229, 164, 167, 230, 179, 149, 229, 165, 189, 97, 98, 99])! |
| 123 | |
| 124 | // UTF-16LE |
| 125 | my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-16LE',false,[u8(86), 0, 39, 89, 213, 108, 125, 89, 97, 0, 98, 0, 99, 0])! |
| 126 | my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-16LE',true,[u8(0xFF), 0xFE, 86, 0, 39, 89, 213, 108, 125, 89, 97, 0, 98, 0, 99, 0])! |
| 127 | |
| 128 | // UTF-16BE |
| 129 | my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-16BE',false,[u8(0), 86, 89, 39, 108, 213, 89, 125, 0, 97, 0, 98, 0, 99])! |
| 130 | my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-16BE',true,[u8(0xFE), 0xFF, 0, 86, 89, 39, 108, 213, 89, 125, 0, 97, 0, 98, 0, 99])! |
| 131 | |
| 132 | // UTF-32LE |
| 133 | my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-32LE',false,[u8(86), 0, 0, 0, 39, 89, 0, 0, 213, 108, 0, 0, 125, 89, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0])! |
| 134 | my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-32LE',true,[u8(0xFF), 0xFE, 0, 0, 86, 0, 0, 0, 39, 89, 0, 0, 213, 108, 0, 0, 125, 89, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0])! |
| 135 | |
| 136 | // UTF-32BE |
| 137 | my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-32BE',false,[u8(0), 0, 0, 86, 0, 0, 89, 39, 0, 0, 108, 213, 0, 0, 89, 125, 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99])! |
| 138 | my_test_read_file_encoding_write_file_encoding('V大法好abc','UTF-32BE',true,[u8(0), 0, 0xFE, 0xFF, 0, 0, 0, 86, 0, 0, 89, 39, 0, 0, 108, 213, 0, 0, 89, 125, 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99])! |
| 139 | // vfmt on |
| 140 | } |
| 141 | |