v2 / vlib / builtin / wchar / wchar.c.v
120 lines · 106 sloc · 3.82 KB · 008aaad99981918c51194d7aaaaaccb4c258f244
Raw
1module wchar
2
3import strings
4
5#include <wchar.h>
6
7@[typedef]
8pub struct C.wchar_t {}
9
10// Character is a type, that eases working with the platform dependent C.wchar_t type.
11// Note: the size of C.wchar_t varies between platforms, it is 2 bytes on windows,
12// and usually 4 bytes elsewhere.
13pub type Character = C.wchar_t
14
15// zero is a Character, that in C L"" strings represents the string end character (terminator).
16pub const zero = from_rune(0)
17
18// return a string representation of the given Character
19pub fn (a Character) str() string {
20 return a.to_rune().str()
21}
22
23// == is an equality operator, to ease comparing Characters
24// TODO: the default == operator, that V generates, does not work for C.wchar_t .
25@[inline]
26pub fn (a Character) == (b Character) bool {
27 return u64(a) == u64(b)
28}
29
30// to_rune creates a V rune, given a Character
31@[inline]
32pub fn (c Character) to_rune() rune {
33 $if windows {
34 return unsafe { *(&rune(&c)) } & 0xFFFF
35 } $else {
36 return unsafe { *(&rune(&c)) }
37 }
38}
39
40// from_rune creates a Character, given a V rune
41@[inline]
42pub fn from_rune(r rune) Character {
43 return unsafe { *(&Character(&r)) }
44}
45
46// length_in_characters returns the length of the given wchar_t* wide C style L"" string.
47// Example: assert unsafe { wchar.length_in_characters(wchar.from_string('abc')) } == 3
48// See also `length_in_bytes` .
49@[unsafe]
50pub fn length_in_characters(p voidptr) int {
51 mut len := 0
52 pc := &Character(p)
53 for unsafe { pc[len] != zero } {
54 len++
55 }
56 return len
57}
58
59// length_in_bytes returns the length of the given wchar_t* wide C style L"" string in bytes.
60// Note that the size of wchar_t is different on the different platforms, thus the length in
61// bytes for the same data converted from UTF-8 to a &Character buffer, will be different as well.
62// i.e. unsafe { wchar.length_in_bytes(wchar.from_string('abc')) } will be 12 on unix, but
63// 6 on windows.
64@[unsafe]
65pub fn length_in_bytes(p voidptr) int {
66 return unsafe { length_in_characters(p) } * int(sizeof(Character))
67}
68
69// to_string creates a V string, encoded in UTF-8, given a wchar_t*
70// wide C style L"" string. It relies that the string has a 0 terminator at its end,
71// to determine the string's length.
72// Note, that the size of wchar_t is platform-dependent, and is *2 bytes* on windows,
73// while it is *4 bytes* on most everything else.
74// Unless you are interfacing with a C library, that does specifically use `wchar_t`,
75// consider using `string_from_wide` instead, which will always assume that the input
76// data is in an UTF-16 encoding, no matter what the platform is.
77@[unsafe]
78pub fn to_string(p voidptr) string {
79 unsafe {
80 len := length_in_characters(p)
81 return to_string2(p, len)
82 }
83}
84
85// to_string2 creates a V string, encoded in UTF-8, given a `C.wchar_t*`
86// wide C style L"" string. Note, that the size of `C.wchar_t` is platform-dependent,
87// and is *2 bytes* on windows, while *4* on most everything else.
88// Unless you are interfacing with a C library, that does specifically use wchar_t,
89// consider using string_from_wide2 instead, which will always assume that the input
90// data is in an UTF-16 encoding, no matter what the platform is.
91@[manualfree; unsafe]
92pub fn to_string2(p voidptr, len int) string {
93 pc := &Character(p)
94 mut sb := strings.new_builder(len)
95 defer {
96 unsafe { sb.free() }
97 }
98 for i := 0; i < len; i++ {
99 u := unsafe { rune(pc[i]) }
100 sb.write_rune(u)
101 }
102 res := sb.str()
103 return res
104}
105
106// from_string converts the V string (in UTF-8 encoding), into a newly allocated
107// platform specific buffer of C.wchar_t .
108// The conversion is done by processing each rune of the input string 1 by 1.
109@[manualfree]
110pub fn from_string(s string) &Character {
111 srunes := s.runes()
112 unsafe {
113 mut result := &Character(vcalloc_noscan((srunes.len + 1) * int(sizeof(Character))))
114 for i, r in srunes {
115 result[i] = from_rune(r)
116 }
117 result[srunes.len] = zero
118 return result
119 }
120}
121