v2 / vlib / builtin / utf8.c.v
120 lines · 113 sloc · 3.85 KB · 80538516b34a0d2a254b56b1d306e6f3af9187c4
Raw
1module builtin
2
3import strings
4
5const cp_acp = 0
6const cp_utf8 = 65001
7
8@[params]
9pub struct ToWideConfig {
10pub:
11 from_ansi bool
12}
13
14// to_wide returns a pointer to an UTF-16 version of the string receiver.
15// In V, strings are encoded using UTF-8 internally, but on windows most APIs,
16// that accept strings, need them to be in UTF-16 encoding.
17// The returned pointer of .to_wide(), has a type of &u16, and is suitable
18// for passing to Windows APIs that expect LPWSTR or wchar_t* parameters.
19// See also MultiByteToWideChar ( https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-multibytetowidechar )
20// See also builtin.wchar.from_string/1, for a version, that produces a
21// platform dependant L"" C style wchar_t* wide string.
22pub fn (_str string) to_wide(param ToWideConfig) &u16 {
23 $if windows {
24 unsafe {
25 src_encoding := if param.from_ansi { cp_acp } else { cp_utf8 }
26 num_chars := (C.MultiByteToWideChar(src_encoding, 0, &char(_str.str), _str.len, 0, 0))
27 mut wstr := &u16(malloc_noscan((num_chars + 1) * 2)) // sizeof(wchar_t)
28 if wstr != 0 {
29 C.MultiByteToWideChar(src_encoding, 0, &char(_str.str), _str.len, wstr, num_chars)
30 C.memset(&u8(wstr) + num_chars * 2, 0, 2)
31 }
32 return wstr
33 }
34 } $else {
35 srunes := _str.runes()
36 unsafe {
37 mut result := &u16(vcalloc_noscan((srunes.len + 1) * 2))
38 for i, r in srunes {
39 result[i] = u16(r)
40 }
41 result[srunes.len] = 0
42 return result
43 }
44 }
45}
46
47// string_from_wide creates a V string, encoded in UTF-8, given a windows
48// style string encoded in UTF-16. Note that this function first searches
49// for the string terminator 0 character, and is thus slower, while more
50// convenient compared to string_from_wide2/2 (you have to know the length
51// in advance to use string_from_wide2/2).
52// See also builtin.wchar.to_string/1, for a version that eases working with
53// the platform dependent &wchar_t L"" strings.
54@[manualfree; unsafe]
55pub fn string_from_wide(_wstr &u16) string {
56 $if windows {
57 unsafe {
58 wstr_len := C.wcslen(_wstr)
59 return string_from_wide2(_wstr, int(wstr_len))
60 }
61 } $else {
62 mut i := 0
63 for unsafe { _wstr[i] } != 0 {
64 i++
65 }
66 return unsafe { string_from_wide2(_wstr, i) }
67 }
68}
69
70// string_from_wide2 creates a V string, encoded in UTF-8, given a windows
71// style string, encoded in UTF-16. It is more efficient, compared to
72// string_from_wide, but it requires you to know the input string length,
73// and to pass it as the second argument.
74// See also builtin.wchar.to_string2/2, for a version that eases working
75// with the platform dependent &wchar_t L"" strings.
76@[manualfree; unsafe]
77pub fn string_from_wide2(_wstr &u16, len int) string {
78 $if windows {
79 unsafe {
80 num_chars := C.WideCharToMultiByte(cp_utf8, 0, _wstr, len, 0, 0, 0, 0)
81 mut str_to := malloc_noscan(num_chars + 1)
82 if str_to != 0 {
83 C.WideCharToMultiByte(cp_utf8, 0, _wstr, len, &char(str_to), num_chars, 0, 0)
84 C.memset(str_to + num_chars, 0, 1)
85 }
86 return tos2(str_to)
87 }
88 } $else {
89 mut sb := strings.new_builder(len)
90 for i := 0; i < len; i++ {
91 u := unsafe { rune(_wstr[i]) }
92 sb.write_rune(u)
93 }
94 res := sb.str()
95 unsafe { sb.free() }
96 return res
97 }
98}
99
100// wide_to_ansi create an ANSI string, given a windows style string, encoded in UTF-16.
101// It use CP_ACP, which is ANSI code page identifier, as dest encoding.
102// NOTE: It return a vstring(encoded in UTF-8) []u8 under Linux.
103pub fn wide_to_ansi(_wstr &u16) []u8 {
104 $if windows {
105 num_bytes := C.WideCharToMultiByte(cp_acp, 0, _wstr, -1, 0, 0, 0, 0)
106 if num_bytes != 0 {
107 mut str_to := []u8{len: num_bytes}
108 C.WideCharToMultiByte(cp_acp, 0, _wstr, -1, &char(str_to.data), str_to.len, 0, 0)
109 return str_to
110 } else {
111 return []u8{}
112 }
113 } $else {
114 s := unsafe { string_from_wide(_wstr) }
115 mut str_to := []u8{len: s.len + 1}
116 unsafe { vmemcpy(str_to.data, s.str, s.len) }
117 return str_to
118 }
119 return []u8{} // TODO: remove this, bug?
120}
121