| 1 | module wchar |
| 2 | |
| 3 | import strings |
| 4 | |
| 5 | #include <wchar.h> |
| 6 | |
| 7 | @[typedef] |
| 8 | pub struct C.wchar_t {} |
| 9 | |
| 10 | // Character is a type, that eases working with the platform dependent C.wchar_t type. |
| 11 | // Note: the size of C.wchar_t varies between platforms, it is 2 bytes on windows, |
| 12 | // and usually 4 bytes elsewhere. |
| 13 | pub type Character = C.wchar_t |
| 14 | |
| 15 | // zero is a Character, that in C L"" strings represents the string end character (terminator). |
| 16 | pub const zero = from_rune(0) |
| 17 | |
| 18 | // return a string representation of the given Character |
| 19 | pub fn (a Character) str() string { |
| 20 | return a.to_rune().str() |
| 21 | } |
| 22 | |
| 23 | // == is an equality operator, to ease comparing Characters |
| 24 | // TODO: the default == operator, that V generates, does not work for C.wchar_t . |
| 25 | @[inline] |
| 26 | pub fn (a Character) == (b Character) bool { |
| 27 | return u64(a) == u64(b) |
| 28 | } |
| 29 | |
| 30 | // to_rune creates a V rune, given a Character |
| 31 | @[inline] |
| 32 | pub fn (c Character) to_rune() rune { |
| 33 | $if windows { |
| 34 | return unsafe { *(&rune(&c)) } & 0xFFFF |
| 35 | } $else { |
| 36 | return unsafe { *(&rune(&c)) } |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | // from_rune creates a Character, given a V rune |
| 41 | @[inline] |
| 42 | pub fn from_rune(r rune) Character { |
| 43 | return unsafe { *(&Character(&r)) } |
| 44 | } |
| 45 | |
| 46 | // length_in_characters returns the length of the given wchar_t* wide C style L"" string. |
| 47 | // Example: assert unsafe { wchar.length_in_characters(wchar.from_string('abc')) } == 3 |
| 48 | // See also `length_in_bytes` . |
| 49 | @[unsafe] |
| 50 | pub fn length_in_characters(p voidptr) int { |
| 51 | mut len := 0 |
| 52 | pc := &Character(p) |
| 53 | for unsafe { pc[len] != zero } { |
| 54 | len++ |
| 55 | } |
| 56 | return len |
| 57 | } |
| 58 | |
| 59 | // length_in_bytes returns the length of the given wchar_t* wide C style L"" string in bytes. |
| 60 | // Note that the size of wchar_t is different on the different platforms, thus the length in |
| 61 | // bytes for the same data converted from UTF-8 to a &Character buffer, will be different as well. |
| 62 | // i.e. unsafe { wchar.length_in_bytes(wchar.from_string('abc')) } will be 12 on unix, but |
| 63 | // 6 on windows. |
| 64 | @[unsafe] |
| 65 | pub fn length_in_bytes(p voidptr) int { |
| 66 | return unsafe { length_in_characters(p) } * int(sizeof(Character)) |
| 67 | } |
| 68 | |
| 69 | // to_string creates a V string, encoded in UTF-8, given a wchar_t* |
| 70 | // wide C style L"" string. It relies that the string has a 0 terminator at its end, |
| 71 | // to determine the string's length. |
| 72 | // Note, that the size of wchar_t is platform-dependent, and is *2 bytes* on windows, |
| 73 | // while it is *4 bytes* on most everything else. |
| 74 | // Unless you are interfacing with a C library, that does specifically use `wchar_t`, |
| 75 | // consider using `string_from_wide` instead, which will always assume that the input |
| 76 | // data is in an UTF-16 encoding, no matter what the platform is. |
| 77 | @[unsafe] |
| 78 | pub fn to_string(p voidptr) string { |
| 79 | unsafe { |
| 80 | len := length_in_characters(p) |
| 81 | return to_string2(p, len) |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | // to_string2 creates a V string, encoded in UTF-8, given a `C.wchar_t*` |
| 86 | // wide C style L"" string. Note, that the size of `C.wchar_t` is platform-dependent, |
| 87 | // and is *2 bytes* on windows, while *4* on most everything else. |
| 88 | // Unless you are interfacing with a C library, that does specifically use wchar_t, |
| 89 | // consider using string_from_wide2 instead, which will always assume that the input |
| 90 | // data is in an UTF-16 encoding, no matter what the platform is. |
| 91 | @[manualfree; unsafe] |
| 92 | pub fn to_string2(p voidptr, len int) string { |
| 93 | pc := &Character(p) |
| 94 | mut sb := strings.new_builder(len) |
| 95 | defer { |
| 96 | unsafe { sb.free() } |
| 97 | } |
| 98 | for i := 0; i < len; i++ { |
| 99 | u := unsafe { rune(pc[i]) } |
| 100 | sb.write_rune(u) |
| 101 | } |
| 102 | res := sb.str() |
| 103 | return res |
| 104 | } |
| 105 | |
| 106 | // from_string converts the V string (in UTF-8 encoding), into a newly allocated |
| 107 | // platform specific buffer of C.wchar_t . |
| 108 | // The conversion is done by processing each rune of the input string 1 by 1. |
| 109 | @[manualfree] |
| 110 | pub fn from_string(s string) &Character { |
| 111 | srunes := s.runes() |
| 112 | unsafe { |
| 113 | mut result := &Character(vcalloc_noscan((srunes.len + 1) * int(sizeof(Character)))) |
| 114 | for i, r in srunes { |
| 115 | result[i] = from_rune(r) |
| 116 | } |
| 117 | result[srunes.len] = zero |
| 118 | return result |
| 119 | } |
| 120 | } |
| 121 | |