| 1 | // Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license that can be found in the LICENSE file. |
| 3 | module builtin |
| 4 | |
| 5 | import strings |
| 6 | |
| 7 | // This was never working correctly, the issue is now |
| 8 | // fixed however the type checks in checker need to be |
| 9 | // updated. if you uncomment it you will see the issue |
| 10 | // type rune = int |
| 11 | |
| 12 | // str converts a rune to string. |
| 13 | pub fn (c rune) str() string { |
| 14 | return utf32_to_str(u32(c)) |
| 15 | } |
| 16 | |
| 17 | // string converts a rune array to a string. |
| 18 | @[manualfree] |
| 19 | pub fn (ra []rune) string() string { |
| 20 | mut sb := strings.new_builder(ra.len) |
| 21 | sb.write_runes(ra) |
| 22 | res := sb.str() |
| 23 | unsafe { sb.free() } |
| 24 | return res |
| 25 | } |
| 26 | |
| 27 | // repeat returns a new string with `count` number of copies of the rune it was called on. |
| 28 | pub fn (c rune) repeat(count int) string { |
| 29 | if count <= 0 { |
| 30 | return '' |
| 31 | } else if count == 1 { |
| 32 | return c.str() |
| 33 | } |
| 34 | mut buffer := [5]u8{} |
| 35 | res := unsafe { utf32_to_str_no_malloc(u32(c), mut &buffer[0]) } |
| 36 | return res.repeat(count) |
| 37 | } |
| 38 | |
| 39 | // bytes converts a rune to an array of bytes. |
| 40 | @[manualfree; markused] |
| 41 | pub fn (c rune) bytes() []u8 { |
| 42 | mut res := []u8{cap: 5} |
| 43 | mut buf := &u8(res.data) |
| 44 | res.len = unsafe { utf32_decode_to_buffer(u32(c), mut buf) } |
| 45 | return res |
| 46 | } |
| 47 | |
| 48 | // length_in_bytes returns the number of bytes needed to store the code point. |
| 49 | // Returns -1 if the data is not a valid code point. |
| 50 | pub fn (c rune) length_in_bytes() int { |
| 51 | code := u32(c) |
| 52 | if code <= 0x7F { |
| 53 | return 1 |
| 54 | } else if code <= 0x7FF { |
| 55 | return 2 |
| 56 | } else if 0xD800 <= code && code <= 0xDFFF { |
| 57 | // between min and max for surrogates |
| 58 | return -1 |
| 59 | } else if code <= 0xFFFF { |
| 60 | return 3 |
| 61 | } else if code <= 0x10FFFF { |
| 62 | // 0x10FFFF is the maximum valid unicode code point |
| 63 | return 4 |
| 64 | } |
| 65 | return -1 |
| 66 | } |
| 67 | |
| 68 | // `to_upper` convert to uppercase mode. |
| 69 | pub fn (c rune) to_upper() rune { |
| 70 | if c < 0x80 { |
| 71 | if c >= `a` && c <= `z` { |
| 72 | return c - 32 |
| 73 | } |
| 74 | return c |
| 75 | } |
| 76 | return c.map_to(.to_upper) |
| 77 | } |
| 78 | |
| 79 | // `to_lower` convert to lowercase mode. |
| 80 | pub fn (c rune) to_lower() rune { |
| 81 | if c < 0x80 { |
| 82 | if c >= `A` && c <= `Z` { |
| 83 | return c + 32 |
| 84 | } |
| 85 | return c |
| 86 | } |
| 87 | return c.map_to(.to_lower) |
| 88 | } |
| 89 | |
| 90 | // `to_title` convert to title mode. |
| 91 | pub fn (c rune) to_title() rune { |
| 92 | if c < 0x80 { |
| 93 | if c >= `a` && c <= `z` { |
| 94 | return c - 32 |
| 95 | } |
| 96 | return c |
| 97 | } |
| 98 | return c.map_to(.to_title) |
| 99 | } |
| 100 | |
| 101 | // `map_to` rune map mode: .to_upper/.to_lower/.to_title |
| 102 | @[direct_array_access] |
| 103 | fn (c rune) map_to(mode MapMode) rune { |
| 104 | mut start := 0 |
| 105 | mut end := rune_maps.len / rune_maps_columns_in_row |
| 106 | // Binary search |
| 107 | for start < end { |
| 108 | middle := (start + end) / 2 |
| 109 | cur_map := unsafe { &rune_maps[middle * rune_maps_columns_in_row] } |
| 110 | if c >= u32(unsafe { *cur_map }) && c <= u32(unsafe { *(cur_map + 1) }) { |
| 111 | offset := if mode in [.to_upper, .to_title] { |
| 112 | unsafe { *(cur_map + 2) } |
| 113 | } else { |
| 114 | unsafe { *(cur_map + 3) } |
| 115 | } |
| 116 | if offset == rune_maps_ul { |
| 117 | // upper, lower, upper, lower, ... sequence |
| 118 | cnt := (c - unsafe { *cur_map }) % 2 |
| 119 | if mode == .to_lower { |
| 120 | return c + 1 - cnt |
| 121 | } |
| 122 | return c - cnt |
| 123 | } else if offset == rune_maps_utl { |
| 124 | // upper, title, lower, upper, title, lower, ... sequence |
| 125 | cnt := (c - unsafe { *cur_map }) % 3 |
| 126 | if mode == .to_upper { |
| 127 | return c - cnt |
| 128 | } else if mode == .to_lower { |
| 129 | return c + 2 - cnt |
| 130 | } |
| 131 | return c + 1 - cnt |
| 132 | } |
| 133 | return c + offset |
| 134 | } |
| 135 | if c < u32(unsafe { *cur_map }) { |
| 136 | end = middle |
| 137 | } else { |
| 138 | start = middle + 1 |
| 139 | } |
| 140 | } |
| 141 | return c |
| 142 | } |
| 143 | |