Gitly


1 module iconv
2 
3 // Module iconv provides functions to convert between vstring(UTF8) and other encodings.
4 import os
5 
6 @[inline]
7 fn reverse_u16(src u16) u16 {
8     return u16(src >> 8 | src << 8)
9 }
10 
11 @[inline]
12 fn reverse_u32(src u32) u32 {
13     return u32(src >> 24 | ((src >> 8) & 0x0000_FF00) | ((src << 8) & 0x00FF_0000) | src << 24)
14 }
15 
16 // vstring_to_encoding convert V string `str` to `tocode` encoding string
17 // tips: use `iconv --list` check for supported encodings
18 pub fn vstring_to_encoding(str string, tocode string) ![]u8 {
19     mut encoding_name := tocode.to_upper()
20     if encoding_name in ['UTF16', 'UTF32', 'UTF-16', 'UTF-32']! {
21         return error('please use UTF16-LE/UTF-16BE/UTF-32LE/UTF-32BE instead')
22     }
23     if encoding_name == 'LOCAL' {
24         $if windows {
25             encoding_name = 'ANSI'
26         } $else {
27             encoding_name = 'UTF-8'
28         }
29     }
30     return conv(encoding_name, 'UTF-8', str.str, str.len)
31 }
32 
33 // encoding_to_vstring converts the given `bytes` using `fromcode` encoding, to a V string (encoded with UTF-8)
34 // tips: use `iconv --list` check for supported encodings
35 pub fn encoding_to_vstring(bytes []u8, fromcode string) !string {
36     mut encoding_name := fromcode.to_upper()
37     if encoding_name in ['UTF16', 'UTF32', 'UTF-16', 'UTF-32']! {
38         return error('please use UTF16-LE/UTF-16BE/UTF-32LE/UTF-32BE instead')
39     }
40     if encoding_name == 'LOCAL' {
41         $if windows {
42             encoding_name = 'ANSI'
43         } $else {
44             encoding_name = 'UTF-8'
45         }
46     }
47     mut dst := conv('UTF-8', encoding_name, bytes.data, bytes.len)!
48     dst << 0 // add a tail zero, to build a vstring
49     return unsafe { cstring_to_vstring(dst.data) }
50 }
51 
52 // create_utf_string_with_bom will create a utf8/utf16/utf32 string with BOM header
53 // for utf8, it will prepend 0xEFBBBF to the `src`
54 // for utf16le, it will prepend 0xFFFE to the `src`
55 // for utf16be, it will prepend 0xFEFF to the `src`
56 // for utf32le, it will prepend 0xFFFE0000 to the `src`
57 // for utf32be, it will prepend 0x0000FEFF to the `src`
58 pub fn create_utf_string_with_bom(src []u8, utf_type string) []u8 {
59     mut clone := src.clone()
60     mut encoding_name := utf_type.to_upper()
61     if encoding_name == 'LOCAL' {
62         $if windows {
63             encoding_name = 'ANSI'
64         } $else {
65             encoding_name = 'UTF-8'
66         }
67     }
68     match encoding_name {
69         'UTF8', 'UTF-8' {
70             clone.prepend([u8(0xEF), 0xBB, 0xBF])
71         }
72         'UTF16LE', 'UTF-16LE' {
73             clone.prepend([u8(0xFF), 0xFE])
74         }
75         'UTF16BE', 'UTF-16BE' {
76             clone.prepend([u8(0xFE), 0xFF])
77         }
78         'UTF32LE', 'UTF-32LE' {
79             clone.prepend([u8(0xFF), 0xFE, 0, 0])
80         }
81         'UTF32BE', 'UTF-32BE' {
82             clone.prepend([u8(0), 0, 0xFE, 0xFF])
83         }
84         else {}
85     }
86 
87     return clone
88 }
89 
90 // remove_utf_string_with_bom will remove a utf8/utf16/utf32 string's BOM header
91 // for utf8, it will remove 0xEFBBBF from the `src`
92 // for utf16le, it will remove 0xFFFE from the `src`
93 // for utf16be, it will remove 0xFEFF from the `src`
94 // for utf32le, it will remove 0xFFFE0000 from the `src`
95 // for utf32be, it will remove 0x0000FEFF from the `src`
96 @[direct_array_access]
97 pub fn remove_utf_string_with_bom(src []u8, utf_type string) []u8 {
98     mut clone := src.clone()
99     mut encoding_name := utf_type.to_upper()
100     if encoding_name == 'LOCAL' {
101         $if windows {
102             encoding_name = 'ANSI'
103         } $else {
104             encoding_name = 'UTF-8'
105         }
106     }
107     match encoding_name {
108         'UTF8', 'UTF-8' {
109             if clone.len > 3 {
110                 if clone[0] == u8(0xEF) && clone[1] == u8(0xBB) && clone[2] == u8(0xBF) {
111                     clone.delete_many(0, 3)
112                 }
113             }
114         }
115         'UTF16LE', 'UTF-16LE' {
116             if clone.len > 2 {
117                 if clone[0] == u8(0xFF) && clone[1] == u8(0xFE) {
118                     clone.delete_many(0, 2)
119                 }
120             }
121         }
122         'UTF16BE', 'UTF-16BE' {
123             if clone.len > 2 {
124                 if clone[0] == u8(0xFE) && clone[1] == u8(0xFF) {
125                     clone.delete_many(0, 2)
126                 }
127             }
128         }
129         'UTF32LE', 'UTF-32LE' {
130             if clone.len > 4 {
131                 if clone[0] == u8(0xFF) && clone[1] == u8(0xFE) && clone[2] == u8(0)
132                     && clone[3] == u8(0) {
133                     clone.delete_many(0, 4)
134                 }
135             }
136         }
137         'UTF32BE', 'UTF-32BE' {
138             if clone.len > 4 {
139                 if clone[0] == u8(0) && clone[1] == u8(0) && clone[2] == u8(0xFE)
140                     && clone[3] == u8(0xFF) {
141                     clone.delete_many(0, 4)
142                 }
143             }
144         }
145         else {}
146     }
147 
148     return clone
149 }
150 
151 // write_file_encoding write_file convert `text` into `encoding` and writes to a file with the given `path`. If `path` already exists, it will be overwritten.
152 // For `encoding` in UTF8/UTF16/UTF32, if `bom` is true, then a BOM header will write to the file.
153 pub fn write_file_encoding(path string, text string, encoding string, bom bool) ! {
154     encoding_bytes := vstring_to_encoding(text, encoding)!
155     if bom && encoding.to_upper().starts_with('UTF') {
156         encoding_bom_bytes := create_utf_string_with_bom(encoding_bytes, encoding)
157         os.write_file_array(path, encoding_bom_bytes)!
158     } else {
159         os.write_file_array(path, encoding_bytes)!
160     }
161 }
162 
163 // read_file_encoding reads the file in `path` with `encoding` and returns the contents
164 pub fn read_file_encoding(path string, encoding string) !string {
165     encoding_bytes := os.read_file_array[u8](path)
166     encoding_without_bom_bytes := remove_utf_string_with_bom(encoding_bytes, encoding)
167     return encoding_to_vstring(encoding_without_bom_bytes, encoding)!
168 }
169

1	module iconv
2
3	// Module iconv provides functions to convert between vstring(UTF8) and other encodings.
4	import os
5
6	@[inline]
7	fn reverse_u16(src u16) u16 {
8	return u16(src >> 8 \| src << 8)
9	}
10
11	@[inline]
12	fn reverse_u32(src u32) u32 {
13	return u32(src >> 24 \| ((src >> 8) & 0x0000_FF00) \| ((src << 8) & 0x00FF_0000) \| src << 24)
14	}
15
16	// vstring_to_encoding convert V string `str` to `tocode` encoding string
17	// tips: use `iconv --list` check for supported encodings
18	pub fn vstring_to_encoding(str string, tocode string) ![]u8 {
19	mut encoding_name := tocode.to_upper()
20	if encoding_name in ['UTF16', 'UTF32', 'UTF-16', 'UTF-32']! {
21	return error('please use UTF16-LE/UTF-16BE/UTF-32LE/UTF-32BE instead')
22	}
23	if encoding_name == 'LOCAL' {
24	$if windows {
25	encoding_name = 'ANSI'
26	} $else {
27	encoding_name = 'UTF-8'
28	}
29	}
30	return conv(encoding_name, 'UTF-8', str.str, str.len)
31	}
32
33	// encoding_to_vstring converts the given `bytes` using `fromcode` encoding, to a V string (encoded with UTF-8)
34	// tips: use `iconv --list` check for supported encodings
35	pub fn encoding_to_vstring(bytes []u8, fromcode string) !string {
36	mut encoding_name := fromcode.to_upper()
37	if encoding_name in ['UTF16', 'UTF32', 'UTF-16', 'UTF-32']! {
38	return error('please use UTF16-LE/UTF-16BE/UTF-32LE/UTF-32BE instead')
39	}
40	if encoding_name == 'LOCAL' {
41	$if windows {
42	encoding_name = 'ANSI'
43	} $else {
44	encoding_name = 'UTF-8'
45	}
46	}
47	mut dst := conv('UTF-8', encoding_name, bytes.data, bytes.len)!
48	dst << 0 // add a tail zero, to build a vstring
49	return unsafe { cstring_to_vstring(dst.data) }
50	}
51
52	// create_utf_string_with_bom will create a utf8/utf16/utf32 string with BOM header
53	// for utf8, it will prepend 0xEFBBBF to the `src`
54	// for utf16le, it will prepend 0xFFFE to the `src`
55	// for utf16be, it will prepend 0xFEFF to the `src`
56	// for utf32le, it will prepend 0xFFFE0000 to the `src`
57	// for utf32be, it will prepend 0x0000FEFF to the `src`
58	pub fn create_utf_string_with_bom(src []u8, utf_type string) []u8 {
59	mut clone := src.clone()
60	mut encoding_name := utf_type.to_upper()
61	if encoding_name == 'LOCAL' {
62	$if windows {
63	encoding_name = 'ANSI'
64	} $else {
65	encoding_name = 'UTF-8'
66	}
67	}
68	match encoding_name {
69	'UTF8', 'UTF-8' {
70	clone.prepend([u8(0xEF), 0xBB, 0xBF])
71	}
72	'UTF16LE', 'UTF-16LE' {
73	clone.prepend([u8(0xFF), 0xFE])
74	}
75	'UTF16BE', 'UTF-16BE' {
76	clone.prepend([u8(0xFE), 0xFF])
77	}
78	'UTF32LE', 'UTF-32LE' {
79	clone.prepend([u8(0xFF), 0xFE, 0, 0])
80	}
81	'UTF32BE', 'UTF-32BE' {
82	clone.prepend([u8(0), 0, 0xFE, 0xFF])
83	}
84	else {}
85	}
86
87	return clone
88	}
89
90	// remove_utf_string_with_bom will remove a utf8/utf16/utf32 string's BOM header
91	// for utf8, it will remove 0xEFBBBF from the `src`
92	// for utf16le, it will remove 0xFFFE from the `src`
93	// for utf16be, it will remove 0xFEFF from the `src`
94	// for utf32le, it will remove 0xFFFE0000 from the `src`
95	// for utf32be, it will remove 0x0000FEFF from the `src`
96	@[direct_array_access]
97	pub fn remove_utf_string_with_bom(src []u8, utf_type string) []u8 {
98	mut clone := src.clone()
99	mut encoding_name := utf_type.to_upper()
100	if encoding_name == 'LOCAL' {
101	$if windows {
102	encoding_name = 'ANSI'
103	} $else {
104	encoding_name = 'UTF-8'
105	}
106	}
107	match encoding_name {
108	'UTF8', 'UTF-8' {
109	if clone.len > 3 {
110	if clone[0] == u8(0xEF) && clone[1] == u8(0xBB) && clone[2] == u8(0xBF) {
111	clone.delete_many(0, 3)
112	}
113	}
114	}
115	'UTF16LE', 'UTF-16LE' {
116	if clone.len > 2 {
117	if clone[0] == u8(0xFF) && clone[1] == u8(0xFE) {
118	clone.delete_many(0, 2)
119	}
120	}
121	}
122	'UTF16BE', 'UTF-16BE' {
123	if clone.len > 2 {
124	if clone[0] == u8(0xFE) && clone[1] == u8(0xFF) {
125	clone.delete_many(0, 2)
126	}
127	}
128	}
129	'UTF32LE', 'UTF-32LE' {
130	if clone.len > 4 {
131	if clone[0] == u8(0xFF) && clone[1] == u8(0xFE) && clone[2] == u8(0)
132	&& clone[3] == u8(0) {
133	clone.delete_many(0, 4)
134	}
135	}
136	}
137	'UTF32BE', 'UTF-32BE' {
138	if clone.len > 4 {
139	if clone[0] == u8(0) && clone[1] == u8(0) && clone[2] == u8(0xFE)
140	&& clone[3] == u8(0xFF) {
141	clone.delete_many(0, 4)
142	}
143	}
144	}
145	else {}
146	}
147
148	return clone
149	}
150
151	// write_file_encoding write_file convert `text` into `encoding` and writes to a file with the given `path`. If `path` already exists, it will be overwritten.
152	// For `encoding` in UTF8/UTF16/UTF32, if `bom` is true, then a BOM header will write to the file.
153	pub fn write_file_encoding(path string, text string, encoding string, bom bool) ! {
154	encoding_bytes := vstring_to_encoding(text, encoding)!
155	if bom && encoding.to_upper().starts_with('UTF') {
156	encoding_bom_bytes := create_utf_string_with_bom(encoding_bytes, encoding)
157	os.write_file_array(path, encoding_bom_bytes)!
158	} else {
159	os.write_file_array(path, encoding_bytes)!
160	}
161	}
162
163	// read_file_encoding reads the file in `path` with `encoding` and returns the contents
164	pub fn read_file_encoding(path string, encoding string) !string {
165	encoding_bytes := os.read_file_array[u8](path)
166	encoding_without_bom_bytes := remove_utf_string_with_bom(encoding_bytes, encoding)
167	return encoding_to_vstring(encoding_without_bom_bytes, encoding)!
168	}
169