Gitly


1 module iconv
2 
3 // Module iconv provides functions convert between vstring(UTF8) to/from different encodings.
4 // iconv implementation using Win32 API to convert
5 // Idear from https://github.com/win-iconv/win-iconv
6 
7 fn C.GetACP() i32
8 fn C.GetOEMCP() i32
9 fn C.WideCharToMultiByte(codepage u32, dwflags u32, src &u8, src_len i32, dst &u8, dst_len i32, default_char &u8, used_default_char &bool) i32
10 fn C.MultiByteToWideChar(codepage u32, dwflags u32, src &u8, src_len i32, dst &u8, dst_len i32) i32
11 
12 struct Codepage_Alias {
13     codepage int
14     name     string
15 }
16 
17 const codepage_alias = [
18     // NOTE! name field string MUST in uppercase!
19     // vfmt off
20     Codepage_Alias{65001, 'CP65001'},
21     Codepage_Alias{65001, 'UTF8'},
22     Codepage_Alias{65001, 'UTF-8'},
23 
24     Codepage_Alias{1200, 'CP1200'},
25     Codepage_Alias{1200, 'UTF16LE'},
26     Codepage_Alias{1200, 'UTF-16LE'},
27     Codepage_Alias{1200, 'UCS2LE'},
28     Codepage_Alias{1200, 'UCS-2LE'},
29     Codepage_Alias{1200, 'UCS-2-INTERNAL'},
30     Codepage_Alias{1200, 'UNICODE'},    // for iconv 
31 
32     Codepage_Alias{1201, 'CP1201'},
33     Codepage_Alias{1201, 'UTF16BE'},
34     Codepage_Alias{1201, 'UTF-16BE'},
35     Codepage_Alias{1201, 'UCS2BE'},
36     Codepage_Alias{1201, 'UCS-2BE'},
37     Codepage_Alias{1201, 'UNICODEFFFE'},
38 
39     Codepage_Alias{12000, 'CP12000'},
40     Codepage_Alias{12000, 'UTF32LE'},
41     Codepage_Alias{12000, 'UTF-32LE'},
42     Codepage_Alias{12000, 'UCS4LE'},
43     Codepage_Alias{12000, 'UCS-4LE'},
44 
45     Codepage_Alias{12001, 'CP12001'},
46     Codepage_Alias{12001, 'UTF32BE'},
47     Codepage_Alias{12001, 'UTF-32BE'},
48     Codepage_Alias{12001, 'UCS4BE'},
49     Codepage_Alias{12001, 'UCS-4BE'},
50 
51 //#ifndef GLIB_COMPILATION
52 //    //
53 //     * Default is big endian.
54 //     * See rfc2781 4.3 Interpreting text labelled as UTF-16.
55 //     
56 //    Codepage_Alias{1201, 'UTF16'},
57 //    Codepage_Alias{1201, 'UTF-16'},
58 //    Codepage_Alias{1201, 'UCS2'},
59 //    Codepage_Alias{1201, 'UCS-2'},
60 //    Codepage_Alias{12001, 'UTF32'},
61 //    Codepage_Alias{12001, 'UTF-32'},
62 //    Codepage_Alias{12001, 'UCS-4'},
63 //    Codepage_Alias{12001, 'UCS4'},
64 //#else
65     // Default is little endian, because the platform is
66     Codepage_Alias{1200, 'UTF16'},
67     Codepage_Alias{1200, 'UTF-16'},
68     Codepage_Alias{1200, 'UCS2'},
69     Codepage_Alias{1200, 'UCS-2'},
70     Codepage_Alias{12000, 'UTF32'},
71     Codepage_Alias{12000, 'UTF-32'},
72     Codepage_Alias{12000, 'UCS4'},
73     Codepage_Alias{12000, 'UCS-4'},
74 //#endif
75 
76     // copy from libiconv `iconv -l`
77     // !IsValidCodePage(367)
78     Codepage_Alias{20127, 'ANSI_X3.4-1968'},
79     Codepage_Alias{20127, 'ANSI_X3.4-1986'},
80     Codepage_Alias{20127, 'ASCII'},
81     Codepage_Alias{20127, 'CP367'},
82     Codepage_Alias{20127, 'IBM367'},
83     Codepage_Alias{20127, 'ISO-IR-6'},
84     Codepage_Alias{20127, 'ISO646-US'},
85     Codepage_Alias{20127, 'ISO_646.IRV:1991'},
86     Codepage_Alias{20127, 'US'},
87     Codepage_Alias{20127, 'US-ASCII'},
88     Codepage_Alias{20127, 'CSASCII'},
89 
90     // !IsValidCodePage(819)
91     Codepage_Alias{1252, 'CP819'},
92     Codepage_Alias{1252, 'IBM819'},
93     Codepage_Alias{28591, 'ISO-8859-1'},
94     Codepage_Alias{28591, 'ISO-IR-100'},
95     Codepage_Alias{28591, 'ISO8859-1'},
96     Codepage_Alias{28591, 'ISO_8859-1'},
97     Codepage_Alias{28591, 'ISO_8859-1:1987'},
98     Codepage_Alias{28591, 'L1'},
99     Codepage_Alias{28591, 'LATIN1'},
100     Codepage_Alias{28591, 'CSISOLATIN1'},
101 
102     Codepage_Alias{1250, 'CP1250'},
103     Codepage_Alias{1250, 'MS-EE'},
104     Codepage_Alias{1250, 'WINDOWS-1250'},
105 
106     Codepage_Alias{1251, 'CP1251'},
107     Codepage_Alias{1251, 'MS-CYRL'},
108     Codepage_Alias{1251, 'WINDOWS-1251'},
109 
110     Codepage_Alias{1252, 'CP1252'},
111     Codepage_Alias{1252, 'MS-ANSI'},
112     Codepage_Alias{1252, 'WINDOWS-1252'},
113 
114     Codepage_Alias{1253, 'CP1253'},
115     Codepage_Alias{1253, 'MS-GREEK'},
116     Codepage_Alias{1253, 'WINDOWS-1253'},
117 
118     Codepage_Alias{1254, 'CP1254'},
119     Codepage_Alias{1254, 'MS-TURK'},
120     Codepage_Alias{1254, 'WINDOWS-1254'},
121 
122     Codepage_Alias{1255, 'CP1255'},
123     Codepage_Alias{1255, 'MS-HEBR'},
124     Codepage_Alias{1255, 'WINDOWS-1255'},
125 
126     Codepage_Alias{1256, 'CP1256'},
127     Codepage_Alias{1256, 'MS-ARAB'},
128     Codepage_Alias{1256, 'WINDOWS-1256'},
129 
130     Codepage_Alias{1257, 'CP1257'},
131     Codepage_Alias{1257, 'WINBALTRIM'},
132     Codepage_Alias{1257, 'WINDOWS-1257'},
133 
134     Codepage_Alias{1258, 'CP1258'},
135     Codepage_Alias{1258, 'WINDOWS-1258'},
136 
137     Codepage_Alias{850, '850'},
138     Codepage_Alias{850, 'CP850'},
139     Codepage_Alias{850, 'IBM850'},
140     Codepage_Alias{850, 'CSPC850MULTILINGUAL'},
141 
142     // !IsValidCodePage(862)
143     Codepage_Alias{862, '862'},
144     Codepage_Alias{862, 'CP862'},
145     Codepage_Alias{862, 'IBM862'},
146     Codepage_Alias{862, 'CSPC862LATINHEBREW'},
147 
148     Codepage_Alias{866, '866'},
149     Codepage_Alias{866, 'CP866'},
150     Codepage_Alias{866, 'IBM866'},
151     Codepage_Alias{866, 'CSIBM866'},
152 
153     // !IsValidCodePage(154) 
154     Codepage_Alias{154, 'CP154'},
155     Codepage_Alias{154, 'CYRILLIC-ASIAN'},
156     Codepage_Alias{154, 'PT154'},
157     Codepage_Alias{154, 'PTCP154'},
158     Codepage_Alias{154, 'CSPTCP154'},
159 
160     // !IsValidCodePage(1133) 
161     Codepage_Alias{1133, 'CP1133'},
162     Codepage_Alias{1133, 'IBM-CP1133'},
163 
164     Codepage_Alias{874, 'CP874'},
165     Codepage_Alias{874, 'WINDOWS-874'},
166 
167     // !IsValidCodePage(51932) 
168     Codepage_Alias{51932, 'CP51932'},
169     Codepage_Alias{51932, 'MS51932'},
170     Codepage_Alias{51932, 'WINDOWS-51932'},
171     Codepage_Alias{51932, 'EUC-JP'},
172 
173     Codepage_Alias{932, 'CP932'},
174     Codepage_Alias{932, 'MS932'},
175     Codepage_Alias{932, 'SHIFFT_JIS'},
176     Codepage_Alias{932, 'SHIFFT_JIS-MS'},
177     Codepage_Alias{932, 'SJIS'},
178     Codepage_Alias{932, 'SJIS-MS'},
179     Codepage_Alias{932, 'SJIS-OPEN'},
180     Codepage_Alias{932, 'SJIS-WIN'},
181     Codepage_Alias{932, 'WINDOWS-31J'},
182     Codepage_Alias{932, 'WINDOWS-932'},
183     Codepage_Alias{932, 'CSWINDOWS31J'},
184 
185     Codepage_Alias{50221, 'CP50221'},
186     Codepage_Alias{50221, 'ISO-2022-JP'},
187     Codepage_Alias{50221, 'ISO-2022-JP-MS'},
188     Codepage_Alias{50221, 'ISO2022-JP'},
189     Codepage_Alias{50221, 'ISO2022-JP-MS'},
190     Codepage_Alias{50221, 'MS50221'},
191     Codepage_Alias{50221, 'WINDOWS-50221'},
192 
193     Codepage_Alias{936, 'CP936'},
194     Codepage_Alias{936, 'GBK'},
195     Codepage_Alias{936, 'MS936'},
196     Codepage_Alias{936, 'WINDOWS-936'},
197 
198     Codepage_Alias{950, 'CP950'},
199     Codepage_Alias{950, 'BIG5'},
200     Codepage_Alias{950, 'BIG5HKSCS'},
201     Codepage_Alias{950, 'BIG5-HKSCS'},
202 
203     Codepage_Alias{949, 'CP949'},
204     Codepage_Alias{949, 'UHC'},
205     Codepage_Alias{949, 'EUC-KR'},
206 
207     Codepage_Alias{1361, 'CP1361'},
208     Codepage_Alias{1361, 'JOHAB'},
209 
210     Codepage_Alias{437, '437'},
211     Codepage_Alias{437, 'CP437'},
212     Codepage_Alias{437, 'IBM437'},
213     Codepage_Alias{437, 'CSPC8CODEPAGE437'},
214 
215     Codepage_Alias{737, 'CP737'},
216 
217     Codepage_Alias{775, 'CP775'},
218     Codepage_Alias{775, 'IBM775'},
219     Codepage_Alias{775, 'CSPC775BALTIC'},
220 
221     Codepage_Alias{852, '852'},
222     Codepage_Alias{852, 'CP852'},
223     Codepage_Alias{852, 'IBM852'},
224     Codepage_Alias{852, 'CSPCP852'},
225 
226     // !IsValidCodePage(853) 
227     Codepage_Alias{853, 'CP853'},
228 
229     Codepage_Alias{855, '855'},
230     Codepage_Alias{855, 'CP855'},
231     Codepage_Alias{855, 'IBM855'},
232     Codepage_Alias{855, 'CSIBM855'},
233 
234     Codepage_Alias{857, '857'},
235     Codepage_Alias{857, 'CP857'},
236     Codepage_Alias{857, 'IBM857'},
237     Codepage_Alias{857, 'CSIBM857'},
238 
239     // !IsValidCodePage(858) 
240     Codepage_Alias{858, 'CP858'},
241 
242     Codepage_Alias{860, '860'},
243     Codepage_Alias{860, 'CP860'},
244     Codepage_Alias{860, 'IBM860'},
245     Codepage_Alias{860, 'CSIBM860'},
246 
247     Codepage_Alias{861, '861'},
248     Codepage_Alias{861, 'CP-IS'},
249     Codepage_Alias{861, 'CP861'},
250     Codepage_Alias{861, 'IBM861'},
251     Codepage_Alias{861, 'CSIBM861'},
252 
253     Codepage_Alias{863, '863'},
254     Codepage_Alias{863, 'CP863'},
255     Codepage_Alias{863, 'IBM863'},
256     Codepage_Alias{863, 'CSIBM863'},
257 
258     Codepage_Alias{864, 'CP864'},
259     Codepage_Alias{864, 'IBM864'},
260     Codepage_Alias{864, 'CSIBM864'},
261 
262     Codepage_Alias{865, '865'},
263     Codepage_Alias{865, 'CP865'},
264     Codepage_Alias{865, 'IBM865'},
265     Codepage_Alias{865, 'CSIBM865'},
266 
267     Codepage_Alias{869, '869'},
268     Codepage_Alias{869, 'CP-GR'},
269     Codepage_Alias{869, 'CP869'},
270     Codepage_Alias{869, 'IBM869'},
271     Codepage_Alias{869, 'CSIBM869'},
272 
273     // !IsValidCodePage(1152) 
274     Codepage_Alias{1125, 'CP1125'},
275 
276     //
277     // * Code Page Identifiers
278     // * https://learn.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
279      
280     Codepage_Alias{37, 'IBM037'}, // IBM EBCDIC US-Canada 
281     Codepage_Alias{437, 'IBM437'}, // OEM United States 
282     Codepage_Alias{500, 'IBM500'}, // IBM EBCDIC International 
283     Codepage_Alias{708, 'ASMO-708'}, // Arabic (ASMO 708) 
284     // 709         Arabic (ASMO-449+, BCON V4) 
285     // 710         Arabic - Transparent Arabic 
286     Codepage_Alias{720, 'DOS-720'}, // Arabic (Transparent ASMO); Arabic (DOS) 
287     Codepage_Alias{737, 'IBM737'}, // OEM Greek (formerly 437G); Greek (DOS) 
288     Codepage_Alias{775, 'IBM775'}, // OEM Baltic; Baltic (DOS) 
289     Codepage_Alias{850, 'IBM850'}, // OEM Multilingual Latin 1; Western European (DOS) 
290     Codepage_Alias{852, 'IBM852'}, // OEM Latin 2; Central European (DOS) 
291     Codepage_Alias{855, 'IBM855'}, // OEM Cyrillic (primarily Russian) 
292     Codepage_Alias{857, 'IBM857'}, // OEM Turkish; Turkish (DOS) 
293     Codepage_Alias{858, 'IBM00858'}, // OEM Multilingual Latin 1 + Euro symbol 
294     Codepage_Alias{860, 'IBM860'}, // OEM Portuguese; Portuguese (DOS) 
295     Codepage_Alias{861, 'IBM861'}, // OEM Icelandic; Icelandic (DOS) 
296     Codepage_Alias{862, 'DOS-862'}, // OEM Hebrew; Hebrew (DOS) 
297     Codepage_Alias{863, 'IBM863'}, // OEM French Canadian; French Canadian (DOS) 
298     Codepage_Alias{864, 'IBM864'}, // OEM Arabic; Arabic (864) 
299     Codepage_Alias{865, 'IBM865'}, // OEM Nordic; Nordic (DOS) 
300     Codepage_Alias{866, 'CP866'}, // OEM Russian; Cyrillic (DOS) 
301     Codepage_Alias{869, 'IBM869'}, // OEM Modern Greek; Greek, Modern (DOS) 
302     Codepage_Alias{870, 'IBM870'}, // IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 
303     Codepage_Alias{874, 'WINDOWS-874'}, // ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) 
304     Codepage_Alias{875, 'CP875'}, // IBM EBCDIC Greek Modern 
305     Codepage_Alias{932, 'SHIFT_JIS'}, // ANSI/OEM Japanese; Japanese (Shift-JIS) 
306     Codepage_Alias{932, 'SHIFT-JIS'}, // alternative name for it 
307     Codepage_Alias{936, 'GB2312'}, // ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) 
308     Codepage_Alias{949, 'KS_C_5601-1987'}, // ANSI/OEM Korean (Unified Hangul Code) 
309     Codepage_Alias{950, 'BIG5'}, // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) 
310     Codepage_Alias{950, 'BIG5HKSCS'}, // ANSI/OEM Traditional Chinese (Hong Kong SAR); Chinese Traditional (Big5-HKSCS) 
311     Codepage_Alias{950, 'BIG5-HKSCS'}, // alternative name for it 
312     Codepage_Alias{1026, 'IBM1026'}, // IBM EBCDIC Turkish (Latin 5) 
313     Codepage_Alias{1047, 'IBM01047'}, // IBM EBCDIC Latin 1/Open System 
314     Codepage_Alias{1140, 'IBM01140'}, // IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) 
315     Codepage_Alias{1141, 'IBM01141'}, // IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) 
316     Codepage_Alias{1142, 'IBM01142'}, // IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) 
317     Codepage_Alias{1143, 'IBM01143'}, // IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) 
318     Codepage_Alias{1144, 'IBM01144'}, // IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) 
319     Codepage_Alias{1145, 'IBM01145'}, // IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) 
320     Codepage_Alias{1146, 'IBM01146'}, // IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) 
321     Codepage_Alias{1147, 'IBM01147'}, // IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) 
322     Codepage_Alias{1148, 'IBM01148'}, // IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) 
323     Codepage_Alias{1149, 'IBM01149'}, // IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) 
324     Codepage_Alias{1200, 'UTF-16'}, // Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications 
325     Codepage_Alias{1201, 'UNICODEFFFE'}, // Unicode UTF-16, big endian byte order; available only to managed applications 
326     Codepage_Alias{1250, 'WINDOWS-1250'}, // ANSI Central European; Central European (Windows) 
327     Codepage_Alias{1251, 'WINDOWS-1251'}, // ANSI Cyrillic; Cyrillic (Windows) 
328     Codepage_Alias{1252, 'WINDOWS-1252'}, // ANSI Latin 1; Western European (Windows) 
329     Codepage_Alias{1253, 'WINDOWS-1253'}, // ANSI Greek; Greek (Windows) 
330     Codepage_Alias{1254, 'WINDOWS-1254'}, // ANSI Turkish; Turkish (Windows) 
331     Codepage_Alias{1255, 'WINDOWS-1255'}, // ANSI Hebrew; Hebrew (Windows) 
332     Codepage_Alias{1256, 'WINDOWS-1256'}, // ANSI Arabic; Arabic (Windows) 
333     Codepage_Alias{1257, 'WINDOWS-1257'}, // ANSI Baltic; Baltic (Windows) 
334     Codepage_Alias{1258, 'WINDOWS-1258'}, // ANSI/OEM Vietnamese; Vietnamese (Windows) 
335     Codepage_Alias{1361, 'JOHAB'}, // Korean (Johab) 
336     Codepage_Alias{10000, 'MACINTOSH'}, // MAC Roman; Western European (Mac) 
337     Codepage_Alias{10001, 'X-MAC-JAPANESE'}, // Japanese (Mac) 
338     Codepage_Alias{10002, 'X-MAC-CHINESETRAD'}, // MAC Traditional Chinese (Big5); Chinese Traditional (Mac) 
339     Codepage_Alias{10003, 'X-MAC-KOREAN'}, // Korean (Mac) 
340     Codepage_Alias{10004, 'X-MAC-ARABIC'}, // Arabic (Mac) 
341     Codepage_Alias{10005, 'X-MAC-HEBREW'}, // Hebrew (Mac) 
342     Codepage_Alias{10006, 'X-MAC-GREEK'}, // Greek (Mac) 
343     Codepage_Alias{10007, 'X-MAC-CYRILLIC'}, // Cyrillic (Mac) 
344     Codepage_Alias{10008, 'X-MAC-CHINESESIMP'}, // MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) 
345     Codepage_Alias{10010, 'X-MAC-ROMANIAN'}, // Romanian (Mac) 
346     Codepage_Alias{10017, 'X-MAC-UKRAINIAN'}, // Ukrainian (Mac) 
347     Codepage_Alias{10021, 'X-MAC-THAI'}, // Thai (Mac) 
348     Codepage_Alias{10029, 'X-MAC-CE'}, // MAC Latin 2; Central European (Mac) 
349     Codepage_Alias{10079, 'X-MAC-ICELANDIC'}, // Icelandic (Mac) 
350     Codepage_Alias{10081, 'X-MAC-TURKISH'}, // Turkish (Mac) 
351     Codepage_Alias{10082, 'X-MAC-CROATIAN'}, // Croatian (Mac) 
352     Codepage_Alias{12000, 'UTF-32'}, // Unicode UTF-32, little endian byte order; available only to managed applications 
353     Codepage_Alias{12001, 'UTF-32BE'}, // Unicode UTF-32, big endian byte order; available only to managed applications 
354     Codepage_Alias{20000, 'X-CHINESE_CNS'}, // CNS Taiwan; Chinese Traditional (CNS) 
355     Codepage_Alias{20001, 'X-CP20001'}, // TCA Taiwan 
356     Codepage_Alias{20002, 'X_CHINESE-ETEN'}, // Eten Taiwan; Chinese Traditional (Eten) 
357     Codepage_Alias{20003, 'X-CP20003'}, // IBM5550 Taiwan 
358     Codepage_Alias{20004, 'X-CP20004'}, // TeleText Taiwan 
359     Codepage_Alias{20005, 'X-CP20005'}, // Wang Taiwan 
360     Codepage_Alias{20105, 'X-IA5'}, // IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) 
361     Codepage_Alias{20106, 'X-IA5-GERMAN'}, // IA5 German (7-bit) 
362     Codepage_Alias{20107, 'X-IA5-SWEDISH'}, // IA5 Swedish (7-bit) 
363     Codepage_Alias{20108, 'X-IA5-NORWEGIAN'}, // IA5 Norwegian (7-bit) 
364     Codepage_Alias{20127, 'US-ASCII'}, // US-ASCII (7-bit) 
365     Codepage_Alias{20261, 'X-CP20261'}, // T.61 
366     Codepage_Alias{20269, 'X-CP20269'}, // ISO 6937 Non-Spacing Accent 
367     Codepage_Alias{20273, 'IBM273'}, // IBM EBCDIC Germany 
368     Codepage_Alias{20277, 'IBM277'}, // IBM EBCDIC Denmark-Norway 
369     Codepage_Alias{20278, 'IBM278'}, // IBM EBCDIC Finland-Sweden 
370     Codepage_Alias{20280, 'IBM280'}, // IBM EBCDIC Italy 
371     Codepage_Alias{20284, 'IBM284'}, // IBM EBCDIC Latin America-Spain 
372     Codepage_Alias{20285, 'IBM285'}, // IBM EBCDIC United Kingdom 
373     Codepage_Alias{20290, 'IBM290'}, // IBM EBCDIC Japanese Katakana Extended 
374     Codepage_Alias{20297, 'IBM297'}, // IBM EBCDIC France 
375     Codepage_Alias{20420, 'IBM420'}, // IBM EBCDIC Arabic 
376     Codepage_Alias{20423, 'IBM423'}, // IBM EBCDIC Greek 
377     Codepage_Alias{20424, 'IBM424'}, // IBM EBCDIC Hebrew 
378     Codepage_Alias{20833, 'X-EBCDIC-KOREANEXTENDED'}, // IBM EBCDIC Korean Extended 
379     Codepage_Alias{20838, 'IBM-THAI'}, // IBM EBCDIC Thai 
380     Codepage_Alias{20866, 'KOI8-R'}, // Russian (KOI8-R); Cyrillic (KOI8-R) 
381     Codepage_Alias{20871, 'IBM871'}, // IBM EBCDIC Icelandic 
382     Codepage_Alias{20880, 'IBM880'}, // IBM EBCDIC Cyrillic Russian 
383     Codepage_Alias{20905, 'IBM905'}, // IBM EBCDIC Turkish 
384     Codepage_Alias{20924, 'IBM00924'}, // IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) 
385     Codepage_Alias{20932, 'EUC-JP'}, // Japanese (JIS 0208-1990 and 0121-1990) 
386     Codepage_Alias{20936, 'X-CP20936'}, // Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) 
387     Codepage_Alias{20949, 'X-CP20949'}, // Korean Wansung 
388     Codepage_Alias{21025, 'CP1025'}, // IBM EBCDIC Cyrillic Serbian-Bulgarian 
389     // 21027         (deprecated) 
390     Codepage_Alias{21866, 'KOI8-U'}, // Ukrainian (KOI8-U); Cyrillic (KOI8-U) 
391     Codepage_Alias{28591, 'ISO-8859-1'}, // ISO 8859-1 Latin 1; Western European (ISO) 
392     Codepage_Alias{28591, 'ISO8859-1'}, // ISO 8859-1 Latin 1; Western European (ISO) 
393     Codepage_Alias{28591, 'ISO_8859-1'},
394     Codepage_Alias{28591, 'ISO_8859_1'},
395     Codepage_Alias{28592, 'ISO-8859-2'}, // ISO 8859-2 Central European; Central European (ISO) 
396     Codepage_Alias{28592, 'ISO8859-2'}, // ISO 8859-2 Central European; Central European (ISO) 
397     Codepage_Alias{28592, 'ISO_8859-2'},
398     Codepage_Alias{28592, 'ISO_8859_2'},
399     Codepage_Alias{28593, 'ISO-8859-3'}, // ISO 8859-3 Latin 3 
400     Codepage_Alias{28593, 'ISO8859-3'}, // ISO 8859-3 Latin 3 
401     Codepage_Alias{28593, 'ISO_8859-3'},
402     Codepage_Alias{28593, 'ISO_8859_3'},
403     Codepage_Alias{28594, 'ISO-8859-4'}, // ISO 8859-4 Baltic 
404     Codepage_Alias{28594, 'ISO8859-4'}, // ISO 8859-4 Baltic 
405     Codepage_Alias{28594, 'ISO_8859-4'},
406     Codepage_Alias{28594, 'ISO_8859_4'},
407     Codepage_Alias{28595, 'ISO-8859-5'}, // ISO 8859-5 Cyrillic 
408     Codepage_Alias{28595, 'ISO8859-5'}, // ISO 8859-5 Cyrillic 
409     Codepage_Alias{28595, 'ISO_8859-5'},
410     Codepage_Alias{28595, 'ISO_8859_5'},
411     Codepage_Alias{28596, 'ISO-8859-6'}, // ISO 8859-6 Arabic 
412     Codepage_Alias{28596, 'ISO8859-6'}, // ISO 8859-6 Arabic 
413     Codepage_Alias{28596, 'ISO_8859-6'},
414     Codepage_Alias{28596, 'ISO_8859_6'},
415     Codepage_Alias{28597, 'ISO-8859-7'}, // ISO 8859-7 Greek 
416     Codepage_Alias{28597, 'ISO8859-7'}, // ISO 8859-7 Greek 
417     Codepage_Alias{28597, 'ISO_8859-7'},
418     Codepage_Alias{28597, 'ISO_8859_7'},
419     Codepage_Alias{28598, 'ISO-8859-8'}, // ISO 8859-8 Hebrew; Hebrew (ISO-Visual) 
420     Codepage_Alias{28598, 'ISO8859-8'}, // ISO 8859-8 Hebrew; Hebrew (ISO-Visual) 
421     Codepage_Alias{28598, 'ISO_8859-8'},
422     Codepage_Alias{28598, 'ISO_8859_8'},
423     Codepage_Alias{28599, 'ISO-8859-9'}, // ISO 8859-9 Turkish 
424     Codepage_Alias{28599, 'ISO8859-9'}, // ISO 8859-9 Turkish 
425     Codepage_Alias{28599, 'ISO_8859-9'},
426     Codepage_Alias{28599, 'ISO_8859_9'},
427     Codepage_Alias{28603, 'ISO-8859-13'}, // ISO 8859-13 Estonian 
428     Codepage_Alias{28603, 'ISO8859-13'}, // ISO 8859-13 Estonian 
429     Codepage_Alias{28603, 'ISO_8859-13'},
430     Codepage_Alias{28603, 'ISO_8859_13'},
431     Codepage_Alias{28605, 'ISO-8859-15'}, // ISO 8859-15 Latin 9 
432     Codepage_Alias{28605, 'ISO8859-15'}, // ISO 8859-15 Latin 9 
433     Codepage_Alias{28605, 'ISO_8859-15'},
434     Codepage_Alias{28605, 'ISO_8859_15'},
435     Codepage_Alias{29001, 'X-EUROPA'}, // Europa 3 
436     Codepage_Alias{38598, 'ISO-8859-8-I'}, // ISO 8859-8 Hebrew; Hebrew (ISO-Logical) 
437     Codepage_Alias{38598, 'ISO8859-8-I'}, // ISO 8859-8 Hebrew; Hebrew (ISO-Logical) 
438     Codepage_Alias{38598, 'ISO_8859-8-I'},
439     Codepage_Alias{38598, 'ISO_8859_8-I'},
440     Codepage_Alias{50220, 'ISO-2022-JP'}, // ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) 
441     Codepage_Alias{50221, 'CSISO2022JP'}, // ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) 
442     Codepage_Alias{50222, 'ISO-2022-JP'}, // ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) 
443     Codepage_Alias{50225, 'ISO-2022-KR'}, // ISO 2022 Korean 
444     Codepage_Alias{50225, 'ISO2022-KR'}, // ISO 2022 Korean 
445     Codepage_Alias{50227, 'X-CP50227'}, // ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) 
446     // 50229         ISO 2022 Traditional Chinese 
447     // 50930         EBCDIC Japanese (Katakana) Extended 
448     // 50931         EBCDIC US-Canada and Japanese 
449     // 50933         EBCDIC Korean Extended and Korean 
450     // 50935         EBCDIC Simplified Chinese Extended and Simplified Chinese 
451     // 50936         EBCDIC Simplified Chinese 
452     // 50937         EBCDIC US-Canada and Traditional Chinese 
453     // 50939         EBCDIC Japanese (Latin) Extended and Japanese 
454     Codepage_Alias{51932, 'EUC-JP'}, // EUC Japanese 
455     Codepage_Alias{51936, 'EUC-CN'}, // EUC Simplified Chinese; Chinese Simplified (EUC) 
456     Codepage_Alias{51949, 'EUC-KR'}, // EUC Korean 
457     // 51950         EUC Traditional Chinese 
458     Codepage_Alias{52936, 'HZ-GB-2312'}, // HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) 
459     Codepage_Alias{54936, 'GB18030'}, // Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) 
460     Codepage_Alias{57002, 'X-ISCII-DE'}, // ISCII Devanagari 
461     Codepage_Alias{57003, 'X-ISCII-BE'}, // ISCII Bengali 
462     Codepage_Alias{57004, 'X-ISCII-TA'}, // ISCII Tamil 
463     Codepage_Alias{57005, 'X-ISCII-TE'}, // ISCII Telugu 
464     Codepage_Alias{57006, 'X-ISCII-AS'}, // ISCII Assamese 
465     Codepage_Alias{57007, 'X-ISCII-OR'}, // ISCII Oriya 
466     Codepage_Alias{57008, 'X-ISCII-KA'}, // ISCII Kannada 
467     Codepage_Alias{57009, 'X-ISCII-MA'}, // ISCII Malayalam 
468     Codepage_Alias{57010, 'X-ISCII-GU'}, // ISCII Gujarati 
469     Codepage_Alias{57011, 'X-ISCII-PA'}, // ISCII Punjabi 
470     Codepage_Alias{65000, 'UTF-7'}, // Unicode (UTF-7) 
471     Codepage_Alias{65001, 'UTF-8'}, // Unicode (UTF-8) 
472     // vfmt on
473 ]
474 
475 fn name_to_codepage(name string) int {
476     // performance hack
477     if name == 'UTF-8' {
478         return 65001
479     }
480 
481     name_upper := name.to_upper()
482     if name_upper == '' || name_upper == 'CP_ACP' || name_upper == 'ANSI' {
483         return C.GetACP()
484     }
485     if name_upper == 'CP_OEMCP' {
486         return C.GetOEMCP()
487     }
488     if name_upper.len < 2 {
489         return -1
490     }
491     if name_upper == 'WCHAR_T' {
492         return 1200
493     }
494     // CP123
495     if name_upper.starts_with('CP') {
496         return name_upper[2..].int()
497     }
498     if name_upper.is_int() {
499         return name_upper.int()
500     }
501     // XX123 for debug
502     if name_upper.starts_with('xx') {
503         return name_upper[2..].int()
504     }
505 
506     for x in codepage_alias {
507         if x.name == name_upper {
508             return x.codepage
509         }
510     }
511     return -1
512 }
513 
514 // https://www.cnblogs.com/findumars/p/6376034.html
515 @[direct_array_access]
516 fn utf32_to_utf16(src &u8, src_len int, is_src_little_endian bool, is_dst_little_endian bool) ![]u8 {
517     mut dst := []u8{len: src_len}
518     mut sptr := unsafe { &u32(src) }
519     mut dptr := &u16(dst.data)
520     mut src_idx := 0
521     mut dst_idx := 0
522     mut c := u32(0)
523     mut t := u16(0)
524     for {
525         if src_idx == src_len / 4 {
526             break
527         }
528         unsafe {
529             c = sptr[src_idx]
530         }
531         if !is_src_little_endian {
532             c = reverse_u32(c)
533         }
534         src_idx++
535         if c <= 0xFFFF {
536             t = u16(c)
537             if !is_dst_little_endian {
538                 t = reverse_u16(t)
539             }
540             unsafe {
541                 dptr[dst_idx] = t
542             }
543             dst_idx++
544         } else if c <= 0xEFFFF {
545             t = u16((0xD800 + (c >> 10) - 0x40)) // high
546 
547             if !is_dst_little_endian {
548                 t = reverse_u16(t)
549             }
550             unsafe {
551                 dptr[dst_idx] = t
552             }
553             dst_idx++
554             t = u16(0xDC00 + (c & 0x03FF)) // low
555             if !is_dst_little_endian {
556                 t = reverse_u16(t)
557             }
558             unsafe {
559                 dptr[dst_idx] = t
560             }
561             dst_idx++
562         } else {
563             return error('invalid UTF-32LE encoding')
564         }
565     }
566     dst.trim(dst_idx * 2)
567     return dst
568 }
569 
570 // https://www.cnblogs.com/findumars/p/6376034.html
571 @[direct_array_access]
572 fn utf16_to_utf32(src &u8, src_len int, is_src_little_endian bool, is_dst_little_endian bool) ![]u8 {
573     mut dst := []u8{len: src_len * 2}
574     mut sptr := unsafe { &u16(src) }
575     mut dptr := &u32(dst.data)
576     mut w1 := u16(0)
577     mut w2 := u16(0)
578     mut t := u32(0)
579     mut src_idx := 0
580     mut dst_idx := 0
581     for {
582         if src_idx == src_len / 2 {
583             break
584         }
585         unsafe {
586             w1 = sptr[src_idx]
587         }
588         if !is_src_little_endian {
589             w1 = reverse_u16(w1)
590         }
591         src_idx++
592         if w1 >= 0xD800 && w1 <= 0xDFFF {
593             if w1 < 0xDC00 {
594                 if src_idx == src_len / 2 {
595                     return error('invalid UTF-16LE encoding')
596                 }
597                 unsafe {
598                     w2 = sptr[src_idx]
599                 }
600                 if !is_src_little_endian {
601                     w2 = reverse_u16(w2)
602                 }
603                 if w2 >= 0xDC00 && w2 <= 0xDFFF {
604                     t = (w2 & 0x03FF) + (((w1 & 0x03FF) + 0x40) << 10)
605                     if !is_dst_little_endian {
606                         t = reverse_u32(t)
607                     }
608                     unsafe {
609                         dptr[dst_idx] = t
610                     }
611                     dst_idx++
612                 }
613             } else {
614                 return error('invalid UTF-16LE encoding')
615             }
616         } else {
617             t = w1
618             if !is_dst_little_endian {
619                 t = reverse_u32(t)
620             }
621             unsafe {
622                 dptr[dst_idx] = t
623             }
624             dst_idx++
625         }
626     }
627     dst.trim(dst_idx * 4)
628     return dst
629 }
630 
631 // conv convert `fromcode` encoding string to `tocode` encoding string
632 @[direct_array_access]
633 fn conv(tocode string, fromcode string, src &u8, src_len int) ![]u8 {
634     if src_len < 0 {
635         return error('src length error')
636     }
637     src_codepage := name_to_codepage(fromcode)
638     dst_codepage := name_to_codepage(tocode)
639     if src_codepage <= 0 {
640         return error('fromcode ${fromcode} does not exist')
641     }
642     if dst_codepage <= 0 {
643         return error('tocode ${tocode} does not exist')
644     }
645 
646     if src_codepage == dst_codepage {
647         // clone src
648         mut dst_buf := []u8{len: src_len}
649         unsafe { vmemcpy(dst_buf.data, src, src_len) }
650         return dst_buf
651     }
652 
653     mut unicode := []u8{}
654     // src codepage => Unicode
655     match src_codepage {
656         1200 {
657             // src already in Unicode(UTF-16LE) encoding, just clone src
658             unsafe {
659                 unicode.grow_len(src_len)
660                 vmemcpy(unicode.data, src, src_len)
661             }
662         }
663         1201 {
664             // Windows does not support UTF-16BE
665             // byte swap each 16 bit character element
666             unsafe {
667                 unicode.grow_len(src_len)
668                 vmemcpy(unicode.data, src, src_len)
669             }
670             mut eptr := &u16(unicode.data)
671             for i in 0 .. src_len / 2 {
672                 unsafe {
673                     eptr[i] = reverse_u16(eptr[i])
674                 }
675             }
676         }
677         12000 {
678             // Windows does not support UTF-32LE
679             unicode = utf32_to_utf16(src, src_len, true, true)!
680         }
681         12001 {
682             // Windows does not support UTF-32BE
683             unicode = utf32_to_utf16(src, src_len, false, true)!
684         }
685         else {
686             char_num := C.MultiByteToWideChar(src_codepage, 0, src, src_len, 0, 0)
687             if char_num == 0 {
688                 return error('MultiByteToWideChar fail: src contain zero ${fromcode} character')
689             }
690             unsafe { unicode.grow_len(char_num * 2) } // every char take 2 bytes
691             C.MultiByteToWideChar(src_codepage, 0, src, src_len, unicode.data, unicode.len)
692         }
693     }
694 
695     mut dst := []u8{}
696     // Unicode => dst codepage
697     match dst_codepage {
698         1200 {
699             // dst codepage is Unicode, just return unicode
700             return unicode
701         }
702         1201 {
703             // Windows does not support UTF-16BE
704             // byte swap each 16 bit character element
705             mut eptr := &u16(unicode.data)
706             for i in 0 .. unicode.len / 2 {
707                 unsafe {
708                     eptr[i] = reverse_u16(eptr[i])
709                 }
710             }
711             return unicode
712         }
713         12000 {
714             // Windows does not support UTF-32LE
715             dst = utf16_to_utf32(unicode.data, unicode.len, true, true)!
716         }
717         12001 {
718             // Windows does not support UTF-32BE
719             dst = utf16_to_utf32(unicode.data, unicode.len, true, false)!
720         }
721         else {
722             dst_len := C.WideCharToMultiByte(dst_codepage, 0, unicode.data, unicode.len / 2, 0, 0,
723                 0, 0)
724             if dst_len == 0 {
725                 return error('WideCharToMultiByte fail: src contain zero unicode character')
726             }
727             unsafe { dst.grow_len(dst_len) }
728             C.WideCharToMultiByte(dst_codepage, 0, unicode.data, unicode.len, dst.data, dst.len, 0,
729                 0)
730         }
731     }
732 
733     return dst
734 }
735