v2 / vlib / builtin / rune.v
142 lines · 131 sloc · 3.35 KB · 8986645ee93015bf9b44e7839c3a3370aff4f51b
Raw
1// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license that can be found in the LICENSE file.
3module builtin
4
5import strings
6
7// This was never working correctly, the issue is now
8// fixed however the type checks in checker need to be
9// updated. if you uncomment it you will see the issue
10// type rune = int
11
12// str converts a rune to string.
13pub fn (c rune) str() string {
14 return utf32_to_str(u32(c))
15}
16
17// string converts a rune array to a string.
18@[manualfree]
19pub fn (ra []rune) string() string {
20 mut sb := strings.new_builder(ra.len)
21 sb.write_runes(ra)
22 res := sb.str()
23 unsafe { sb.free() }
24 return res
25}
26
27// repeat returns a new string with `count` number of copies of the rune it was called on.
28pub fn (c rune) repeat(count int) string {
29 if count <= 0 {
30 return ''
31 } else if count == 1 {
32 return c.str()
33 }
34 mut buffer := [5]u8{}
35 res := unsafe { utf32_to_str_no_malloc(u32(c), mut &buffer[0]) }
36 return res.repeat(count)
37}
38
39// bytes converts a rune to an array of bytes.
40@[manualfree; markused]
41pub fn (c rune) bytes() []u8 {
42 mut res := []u8{cap: 5}
43 mut buf := &u8(res.data)
44 res.len = unsafe { utf32_decode_to_buffer(u32(c), mut buf) }
45 return res
46}
47
48// length_in_bytes returns the number of bytes needed to store the code point.
49// Returns -1 if the data is not a valid code point.
50pub fn (c rune) length_in_bytes() int {
51 code := u32(c)
52 if code <= 0x7F {
53 return 1
54 } else if code <= 0x7FF {
55 return 2
56 } else if 0xD800 <= code && code <= 0xDFFF {
57 // between min and max for surrogates
58 return -1
59 } else if code <= 0xFFFF {
60 return 3
61 } else if code <= 0x10FFFF {
62 // 0x10FFFF is the maximum valid unicode code point
63 return 4
64 }
65 return -1
66}
67
68// `to_upper` convert to uppercase mode.
69pub fn (c rune) to_upper() rune {
70 if c < 0x80 {
71 if c >= `a` && c <= `z` {
72 return c - 32
73 }
74 return c
75 }
76 return c.map_to(.to_upper)
77}
78
79// `to_lower` convert to lowercase mode.
80pub fn (c rune) to_lower() rune {
81 if c < 0x80 {
82 if c >= `A` && c <= `Z` {
83 return c + 32
84 }
85 return c
86 }
87 return c.map_to(.to_lower)
88}
89
90// `to_title` convert to title mode.
91pub fn (c rune) to_title() rune {
92 if c < 0x80 {
93 if c >= `a` && c <= `z` {
94 return c - 32
95 }
96 return c
97 }
98 return c.map_to(.to_title)
99}
100
101// `map_to` rune map mode: .to_upper/.to_lower/.to_title
102@[direct_array_access]
103fn (c rune) map_to(mode MapMode) rune {
104 mut start := 0
105 mut end := rune_maps.len / rune_maps_columns_in_row
106 // Binary search
107 for start < end {
108 middle := (start + end) / 2
109 cur_map := unsafe { &rune_maps[middle * rune_maps_columns_in_row] }
110 if c >= u32(unsafe { *cur_map }) && c <= u32(unsafe { *(cur_map + 1) }) {
111 offset := if mode in [.to_upper, .to_title] {
112 unsafe { *(cur_map + 2) }
113 } else {
114 unsafe { *(cur_map + 3) }
115 }
116 if offset == rune_maps_ul {
117 // upper, lower, upper, lower, ... sequence
118 cnt := (c - unsafe { *cur_map }) % 2
119 if mode == .to_lower {
120 return c + 1 - cnt
121 }
122 return c - cnt
123 } else if offset == rune_maps_utl {
124 // upper, title, lower, upper, title, lower, ... sequence
125 cnt := (c - unsafe { *cur_map }) % 3
126 if mode == .to_upper {
127 return c - cnt
128 } else if mode == .to_lower {
129 return c + 2 - cnt
130 }
131 return c + 1 - cnt
132 }
133 return c + offset
134 }
135 if c < u32(unsafe { *cur_map }) {
136 end = middle
137 } else {
138 start = middle + 1
139 }
140 }
141 return c
142}
143