| 1 | module strconv |
| 2 | |
| 3 | // Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved. |
| 4 | // Use of this source code is governed by an MIT license |
| 5 | // that can be found in the LICENSE file. |
| 6 | // TODO: use options, or some way to return default with error. |
| 7 | // int_size is the size in bits of an int or uint value. |
| 8 | // int_size = 32 << (~u32(0) >> 63) |
| 9 | // max_u64 = u64(u64(1 << 63) - 1) |
| 10 | const int_size = 32 |
| 11 | |
| 12 | @[inline] |
| 13 | pub fn byte_to_lower(c u8) u8 { |
| 14 | return c | 32 |
| 15 | } |
| 16 | |
| 17 | // common_parse_uint is called by parse_uint and allows the parsing |
| 18 | // to stop on non or invalid digit characters and return with an error |
| 19 | pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !u64 { |
| 20 | result, err := common_parse_uint2(s, _base, _bit_size) |
| 21 | // TODO: error_on_non_digit and error_on_high_digit have no difference |
| 22 | if err != 0 && (error_on_non_digit || error_on_high_digit) { |
| 23 | match err { |
| 24 | -1 { return error('common_parse_uint: wrong base ${_base} for ${s}') } |
| 25 | -2 { return error('common_parse_uint: wrong bit size ${_bit_size} for ${s}') } |
| 26 | -3 { return error('common_parse_uint: integer overflow ${s}') } |
| 27 | else { return error('common_parse_uint: syntax error ${s}') } |
| 28 | } |
| 29 | } |
| 30 | return result |
| 31 | } |
| 32 | |
| 33 | // the first returned value contains the parsed value, |
| 34 | // the second returned value contains the error code (0 = OK, >1 = index of first non-parseable character + 1, -1 = wrong base, -2 = wrong bit size, -3 = overflow) |
| 35 | @[direct_array_access] |
| 36 | pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) { |
| 37 | if s == '' { |
| 38 | return u64(0), 1 |
| 39 | } |
| 40 | |
| 41 | mut bit_size := _bit_size |
| 42 | mut base := _base |
| 43 | mut start_index := 0 |
| 44 | |
| 45 | if base == 0 { |
| 46 | // Look for octal, binary and hex prefix. |
| 47 | base = 10 |
| 48 | if s[0] == `0` { |
| 49 | ch := if s.len > 1 { s[1] | 32 } else { `0` } |
| 50 | if s.len >= 3 { |
| 51 | if ch == `b` { |
| 52 | base = 2 |
| 53 | start_index += 2 |
| 54 | } else if ch == `o` { |
| 55 | base = 8 |
| 56 | start_index += 2 |
| 57 | } else if ch == `x` { |
| 58 | base = 16 |
| 59 | start_index += 2 |
| 60 | } |
| 61 | |
| 62 | // check for underscore after the base prefix |
| 63 | if s[start_index] == `_` { |
| 64 | start_index++ |
| 65 | } |
| 66 | } |
| 67 | // manage leading zeros in decimal base's numbers |
| 68 | // otherwise it is an octal for C compatibility |
| 69 | // TODO: Check if this behaviour is logically right |
| 70 | else if s.len >= 2 && (s[1] >= `0` && s[1] <= `9`) { |
| 71 | base = 10 |
| 72 | start_index++ |
| 73 | } else { |
| 74 | base = 8 |
| 75 | start_index++ |
| 76 | } |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | if bit_size == 0 { |
| 81 | bit_size = int_size |
| 82 | } else if bit_size < 0 || bit_size > 64 { |
| 83 | return u64(0), -2 |
| 84 | } |
| 85 | // Cutoff is the smallest number such that cutoff*base > maxUint64. |
| 86 | // Use compile-time constants for common cases. |
| 87 | cutoff := max_u64 / u64(base) + u64(1) |
| 88 | max_val := if bit_size == 64 { max_u64 } else { (u64(1) << u64(bit_size)) - u64(1) } |
| 89 | basem1 := base - 1 |
| 90 | |
| 91 | mut n := u64(0) |
| 92 | for i in start_index .. s.len { |
| 93 | mut c := s[i] |
| 94 | |
| 95 | // manage underscore inside the number |
| 96 | if c == `_` { |
| 97 | if i == start_index || i >= (s.len - 1) { |
| 98 | // println("_ limit") |
| 99 | return u64(0), 1 |
| 100 | } |
| 101 | if s[i - 1] == `_` || s[i + 1] == `_` { |
| 102 | // println("_ *2") |
| 103 | return u64(0), 1 |
| 104 | } |
| 105 | |
| 106 | continue |
| 107 | } |
| 108 | |
| 109 | mut sub_count := 0 |
| 110 | |
| 111 | // get the 0-9 digit |
| 112 | c -= 48 // subtract the rune `0` |
| 113 | |
| 114 | // check if we are in the superior base rune interval [A..Z] |
| 115 | if c >= 17 { // (65 - 48) |
| 116 | sub_count++ |
| 117 | c -= 7 // subtract the `A` - `0` rune to obtain the value of the digit |
| 118 | |
| 119 | // check if we are in the superior base rune interval [a..z] |
| 120 | if c >= 42 { // (97 - 7 - 48) |
| 121 | sub_count++ |
| 122 | c -= 32 // subtract the `a` - `0` rune to obtain the value of the digit |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | // check for digit over base |
| 127 | if c > basem1 || (sub_count == 0 && c > 9) { |
| 128 | return n, i + 1 |
| 129 | } |
| 130 | |
| 131 | // check if we are in the cutoff zone |
| 132 | if n >= cutoff { |
| 133 | // n*base overflows |
| 134 | // return error('parse_uint: range error ${s}') |
| 135 | return max_val, -3 |
| 136 | } |
| 137 | n *= u64(base) |
| 138 | n1 := n + u64(c) |
| 139 | if n1 < n || n1 > max_val { |
| 140 | // n+v overflows |
| 141 | // return error('parse_uint: range error ${s}') |
| 142 | return max_val, -3 |
| 143 | } |
| 144 | n = n1 |
| 145 | } |
| 146 | return n, 0 |
| 147 | } |
| 148 | |
| 149 | // parse_uint is like parse_int but for unsigned numbers. |
| 150 | pub fn parse_uint(s string, _base int, _bit_size int) !u64 { |
| 151 | return common_parse_uint(s, _base, _bit_size, true, true) |
| 152 | } |
| 153 | |
| 154 | // common_parse_int is called by parse int and allows the parsing |
| 155 | // to stop on non or invalid digit characters and return with an error |
| 156 | @[direct_array_access] |
| 157 | pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !i64 { |
| 158 | if _s == '' { |
| 159 | // return error('parse_int: syntax error ${s}') |
| 160 | return i64(0) |
| 161 | } |
| 162 | mut bit_size := _bit_size |
| 163 | if bit_size == 0 { |
| 164 | bit_size = int_size |
| 165 | } |
| 166 | mut s := _s |
| 167 | // Pick off leading sign. |
| 168 | mut neg := false |
| 169 | if s[0] == `+` { |
| 170 | // s = s[1..] |
| 171 | unsafe { |
| 172 | s = tos(s.str + 1, s.len - 1) |
| 173 | } |
| 174 | } else if s[0] == `-` { |
| 175 | neg = true |
| 176 | // s = s[1..] |
| 177 | unsafe { |
| 178 | s = tos(s.str + 1, s.len - 1) |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | // Convert unsigned and check range. |
| 183 | // un := parse_uint(s, base, bit_size) or { |
| 184 | // return i64(0) |
| 185 | // } |
| 186 | un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit)! |
| 187 | if un == 0 { |
| 188 | return i64(0) |
| 189 | } |
| 190 | // TODO: check should u64(bit_size-1) be size of int (32)? |
| 191 | cutoff := u64(1) << u64(bit_size - 1) |
| 192 | if !neg && un >= cutoff { |
| 193 | if error_on_high_digit { |
| 194 | return error('common_parse_int: integer overflow ${_s}') |
| 195 | } |
| 196 | return i64(cutoff - u64(1)) |
| 197 | } |
| 198 | if neg && un > cutoff { |
| 199 | if error_on_high_digit { |
| 200 | return error('common_parse_int: integer overflow ${_s}') |
| 201 | } |
| 202 | return -i64(cutoff) |
| 203 | } |
| 204 | return if neg { -i64(un) } else { i64(un) } |
| 205 | } |
| 206 | |
| 207 | // parse_int interprets a string s in the given base (0, 2 to 36) and |
| 208 | // bit size (0 to 64) and returns the corresponding value i. |
| 209 | // |
| 210 | // If the base argument is 0, the true base is implied by the string's |
| 211 | // prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise. |
| 212 | // Also, for argument base 0 only, underscore characters are permitted |
| 213 | // as defined by the Go syntax for integer literals. |
| 214 | // |
| 215 | // The bitSize argument specifies the integer type |
| 216 | // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64 |
| 217 | // correspond to int, int8, int16, int32, and int64. |
| 218 | // If bitSize is below 0 or above 64, an error is returned. |
| 219 | pub fn parse_int(_s string, base int, _bit_size int) !i64 { |
| 220 | return common_parse_int(_s, base, _bit_size, true, false) |
| 221 | } |
| 222 | |
| 223 | // atoi_common_check perform basics check on string to parse: |
| 224 | // Test emptiness, + or - sign presence, presence of digit after signs and no |
| 225 | // underscore as first character. |
| 226 | // returns +1 or -1 depending on sign, and s first digit index or an error. |
| 227 | @[direct_array_access] |
| 228 | fn atoi_common_check(s string) !(i64, int) { |
| 229 | if s == '' { |
| 230 | return error('strconv.atoi: parsing "": empty string') |
| 231 | } |
| 232 | |
| 233 | mut start_idx := 0 |
| 234 | mut sign := i64(1) |
| 235 | |
| 236 | if s[0] == `-` || s[0] == `+` { |
| 237 | start_idx++ |
| 238 | if s[0] == `-` { |
| 239 | sign = -1 |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | if s.len - start_idx < 1 { |
| 244 | return error('strconv.atoi: parsing "${s}": no number after sign') |
| 245 | } |
| 246 | |
| 247 | if s[start_idx] == `_` || s[s.len - 1] == `_` { |
| 248 | return error('strconv.atoi: parsing "${s}": values cannot start or end with underscores') |
| 249 | } |
| 250 | return sign, start_idx |
| 251 | } |
| 252 | |
| 253 | // atoi_common performs computation for all i8, i16 and i32 type, excluding i64. |
| 254 | // Parse values, and returns consistent error message over differents types. |
| 255 | // s is string to parse, type_min/max are respective types min/max values. |
| 256 | @[direct_array_access] |
| 257 | fn atoi_common(s string, type_min i64, type_max i64) !i64 { |
| 258 | mut sign, mut start_idx := atoi_common_check(s)! |
| 259 | mut x := i64(0) |
| 260 | mut underscored := false |
| 261 | for i in start_idx .. s.len { |
| 262 | c := s[i] - `0` |
| 263 | if c == 47 { // 47 = Ascii(`_`) - ascii(`0`) = 95 - 48. |
| 264 | if underscored == true { // Two consecutives underscore |
| 265 | return error('strconv.atoi: parsing "${s}": consecutives underscores are not allowed') |
| 266 | } |
| 267 | underscored = true |
| 268 | continue // Skip underscore |
| 269 | } else { |
| 270 | if c > 9 { |
| 271 | return error('strconv.atoi: parsing "${s}": invalid radix 10 character') |
| 272 | } |
| 273 | underscored = false |
| 274 | x = (x * 10) + (c * sign) |
| 275 | if sign == 1 && x > type_max { |
| 276 | return error('strconv.atoi: parsing "${s}": integer overflow') |
| 277 | } else { |
| 278 | if x < type_min { |
| 279 | return error('strconv.atoi: parsing "${s}": integer underflow') |
| 280 | } |
| 281 | } |
| 282 | } |
| 283 | } |
| 284 | return x |
| 285 | } |
| 286 | |
| 287 | // atoi is equivalent to parse_int(s, 10, 0), converted to type int. |
| 288 | // It follows V scanner as much as observed. |
| 289 | pub fn atoi(s string) !int { |
| 290 | return int(atoi_common(s, i64_min_int32, i64_max_int32)!) |
| 291 | } |
| 292 | |
| 293 | // atoi8 is equivalent to atoi(s), converted to type i8. |
| 294 | // returns an i8 [-128 .. 127] or an error. |
| 295 | pub fn atoi8(s string) !i8 { |
| 296 | return i8(atoi_common(s, min_i8, max_i8)!) |
| 297 | } |
| 298 | |
| 299 | // atoi16 is equivalent to atoi(s), converted to type i16. |
| 300 | // returns an i16 [-32678 .. 32767] or an error. |
| 301 | pub fn atoi16(s string) !i16 { |
| 302 | return i16(atoi_common(s, min_i16, max_i16)!) |
| 303 | } |
| 304 | |
| 305 | // atoi32 is equivalent to atoi(s), converted to type i32. |
| 306 | // returns an i32 [-2147483648 .. 2147483647] or an error. |
| 307 | pub fn atoi32(s string) !i32 { |
| 308 | return i32(atoi_common(s, min_i32, max_i32)!) |
| 309 | } |
| 310 | |
| 311 | // atoi64 converts radix 10 string to i64 type. |
| 312 | // returns an i64 [-9223372036854775808 .. 9223372036854775807] or an error. |
| 313 | @[direct_array_access] |
| 314 | pub fn atoi64(s string) !i64 { |
| 315 | mut sign, mut start_idx := atoi_common_check(s)! |
| 316 | mut x := i64(0) |
| 317 | mut underscored := false |
| 318 | for i in start_idx .. s.len { |
| 319 | c := s[i] - `0` |
| 320 | if c == 47 { // 47 = Ascii(`_`) - ascii(`0`) = 95 - 48. |
| 321 | if underscored == true { // Two consecutives underscore |
| 322 | return error('strconv.atoi64: parsing "${s}": consecutives underscores are not allowed') |
| 323 | } |
| 324 | underscored = true |
| 325 | continue // Skip underscore |
| 326 | } else { |
| 327 | if c > 9 { |
| 328 | return error('strconv.atoi64: parsing "${s}": invalid radix 10 character') |
| 329 | } |
| 330 | underscored = false |
| 331 | x = safe_mul10_64bits(x) or { return error('strconv.atoi64: parsing "${s}": ${err}') } |
| 332 | x = safe_add_64bits(x, int(c * sign)) or { |
| 333 | return error('strconv.atoi64: parsing "${s}": ${err}') |
| 334 | } |
| 335 | } |
| 336 | } |
| 337 | return x |
| 338 | } |
| 339 | |
| 340 | // safe_add64 performs a signed 64 bits addition and returns an error |
| 341 | // in case of overflow or underflow. |
| 342 | @[inline] |
| 343 | fn safe_add_64bits(a i64, b i64) !i64 { |
| 344 | if a > 0 && b > (max_i64 - a) { |
| 345 | return error('integer overflow') |
| 346 | } else if a < 0 && b < (min_i64 - a) { |
| 347 | return error('integer underflow') |
| 348 | } |
| 349 | return a + b |
| 350 | } |
| 351 | |
| 352 | // safe_mul10 performs a * 10 multiplication and returns an error |
| 353 | // in case of overflow or underflow. |
| 354 | @[inline] |
| 355 | fn safe_mul10_64bits(a i64) !i64 { |
| 356 | if a > 0 && a > (max_i64 / 10) { |
| 357 | return error('integer overflow') |
| 358 | } |
| 359 | if a < 0 && a < (min_i64 / 10) { |
| 360 | return error('integer underflow') |
| 361 | } |
| 362 | return a * 10 |
| 363 | } |
| 364 | |
| 365 | const i64_min_int32 = i64(-2147483647) - 1 // msvc has a bug that treats just i64(min_int) as 2147483648 :-(; this is a workaround for it |
| 366 | const i64_max_int32 = i64(2147483646) + 1 |
| 367 | |