| 1 | // Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | module strings |
| 5 | |
| 6 | // strings.Builder is used to efficiently append many strings to a large |
| 7 | // dynamically growing buffer, then use the resulting large string. Using |
| 8 | // a string builder is much better for performance/memory usage than doing |
| 9 | // constantly string concatenation. |
| 10 | pub type Builder = []u8 |
| 11 | |
| 12 | // new_builder returns a new string builder, with an initial capacity of `initial_size`. |
| 13 | pub fn new_builder(initial_size int) Builder { |
| 14 | mut res := Builder([]u8{cap: initial_size}) |
| 15 | unsafe { res.flags.set(.noslices) } |
| 16 | return res |
| 17 | } |
| 18 | |
| 19 | // reuse_as_plain_u8_array allows using the Builder instance as a plain []u8 return value. |
| 20 | // It is useful, when you have accumulated data in the builder, that you want to |
| 21 | // pass/access as []u8 later, without copying or freeing the buffer. |
| 22 | // NB: you *should NOT use* the string builder instance after calling this method. |
| 23 | // Use only the return value after calling this method. |
| 24 | @[unsafe] |
| 25 | pub fn (mut b Builder) reuse_as_plain_u8_array() []u8 { |
| 26 | unsafe { b.flags.clear(.noslices) } |
| 27 | return *b |
| 28 | } |
| 29 | |
| 30 | // write_ptr writes `len` bytes provided byteptr to the accumulated buffer |
| 31 | @[unsafe] |
| 32 | pub fn (mut b Builder) write_ptr(ptr &u8, len int) { |
| 33 | if len == 0 { |
| 34 | return |
| 35 | } |
| 36 | unsafe { b.push_many(ptr, len) } |
| 37 | } |
| 38 | |
| 39 | // write_rune appends a single rune to the accumulated buffer |
| 40 | @[manualfree] |
| 41 | pub fn (mut b Builder) write_rune(r rune) { |
| 42 | mut buffer := [5]u8{} |
| 43 | res := unsafe { utf32_to_str_no_malloc(u32(r), mut &buffer[0]) } |
| 44 | if res.len == 0 { |
| 45 | return |
| 46 | } |
| 47 | unsafe { b.push_many(res.str, res.len) } |
| 48 | } |
| 49 | |
| 50 | // write_runes appends all the given runes to the accumulated buffer. |
| 51 | pub fn (mut b Builder) write_runes(runes []rune) { |
| 52 | mut buffer := [5]u8{} |
| 53 | for r in runes { |
| 54 | res := unsafe { utf32_to_str_no_malloc(u32(r), mut &buffer[0]) } |
| 55 | if res.len == 0 { |
| 56 | continue |
| 57 | } |
| 58 | unsafe { b.push_many(res.str, res.len) } |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | // write_u8 appends a single `data` byte to the accumulated buffer |
| 63 | @[inline] |
| 64 | pub fn (mut b Builder) write_u8(data u8) { |
| 65 | b << data |
| 66 | } |
| 67 | |
| 68 | // write_byte appends a single `data` byte to the accumulated buffer |
| 69 | @[inline] |
| 70 | pub fn (mut b Builder) write_byte(data u8) { |
| 71 | b << data |
| 72 | } |
| 73 | |
| 74 | // write_decimal appends a decimal representation of the number `n` into the builder `b`, |
| 75 | // without dynamic allocation. The higher order digits come first, i.e. 6123 will be written |
| 76 | // with the digit `6` first, then `1`, then `2` and `3` last. |
| 77 | @[direct_array_access] |
| 78 | pub fn (mut b Builder) write_decimal(n i64) { |
| 79 | if n == 0 { |
| 80 | b.write_u8(0x30) |
| 81 | return |
| 82 | } |
| 83 | if n == min_i64 { |
| 84 | b.write_string(n.str()) |
| 85 | return |
| 86 | } |
| 87 | |
| 88 | mut buf := [25]u8{} |
| 89 | mut x := if n < 0 { -n } else { n } |
| 90 | mut i := 24 |
| 91 | for x != 0 { |
| 92 | nextx := x / 10 |
| 93 | r := x % 10 |
| 94 | buf[i] = u8(r) + 0x30 |
| 95 | x = nextx |
| 96 | i-- |
| 97 | } |
| 98 | if n < 0 { |
| 99 | buf[i] = `-` |
| 100 | i-- |
| 101 | } |
| 102 | unsafe { b.write_ptr(&buf[i + 1], 24 - i) } |
| 103 | } |
| 104 | |
| 105 | // write implements the io.Writer interface, that is why it returns how many bytes were written to the string builder. |
| 106 | pub fn (mut b Builder) write(data []u8) !int { |
| 107 | if data.len == 0 { |
| 108 | return 0 |
| 109 | } |
| 110 | unsafe { b.push_many(data.data, data.len) } |
| 111 | return data.len |
| 112 | } |
| 113 | |
| 114 | // drain_builder writes all of the `other` builder content, then re-initialises |
| 115 | // `other`, so that the `other` strings builder is ready to receive new content. |
| 116 | @[manualfree] |
| 117 | pub fn (mut b Builder) drain_builder(mut other Builder, other_new_cap int) { |
| 118 | if other.len > 0 { |
| 119 | b << *other |
| 120 | } |
| 121 | unsafe { other.free() } |
| 122 | other = new_builder(other_new_cap) |
| 123 | } |
| 124 | |
| 125 | // byte_at returns a byte, located at a given index `i`. |
| 126 | // Note: it can panic, if there are not enough bytes in the strings builder yet. |
| 127 | @[inline] |
| 128 | pub fn (b &Builder) byte_at(n int) u8 { |
| 129 | return unsafe { (&[]u8(b))[n] } |
| 130 | } |
| 131 | |
| 132 | // write appends the string `s` to the buffer |
| 133 | @[expand_simple_interpolation; inline] |
| 134 | pub fn (mut b Builder) write_string(s string) { |
| 135 | if s.len == 0 { |
| 136 | return |
| 137 | } |
| 138 | unsafe { b.push_many(s.str, s.len) } |
| 139 | // for c in s { |
| 140 | // b.buf << c |
| 141 | // } |
| 142 | // b.buf << []u8(s) // TODO |
| 143 | } |
| 144 | |
| 145 | // write_string2 appends the strings `s1` and `s2` to the buffer. |
| 146 | @[inline] |
| 147 | pub fn (mut b Builder) write_string2(s1 string, s2 string) { |
| 148 | if s1.len != 0 { |
| 149 | unsafe { b.push_many(s1.str, s1.len) } |
| 150 | } |
| 151 | if s2.len != 0 { |
| 152 | unsafe { b.push_many(s2.str, s2.len) } |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | // go_back discards the last `n` bytes from the buffer. |
| 157 | pub fn (mut b Builder) go_back(n int) { |
| 158 | b.trim(b.len - n) |
| 159 | } |
| 160 | |
| 161 | // spart returns a part of the buffer as a string |
| 162 | @[inline] |
| 163 | pub fn (b &Builder) spart(start_pos int, n int) string { |
| 164 | unsafe { |
| 165 | mut x := malloc_noscan(n + 1) |
| 166 | vmemcpy(x, &u8(b.data) + start_pos, n) |
| 167 | x[n] = 0 |
| 168 | return tos(x, n) |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | // cut_last cuts the last `n` bytes from the buffer and returns them. |
| 173 | pub fn (mut b Builder) cut_last(n int) string { |
| 174 | cut_pos := b.len - n |
| 175 | res := b.spart(cut_pos, n) |
| 176 | b.trim(cut_pos) |
| 177 | return res |
| 178 | } |
| 179 | |
| 180 | // cut_to cuts the string after `pos` and returns it. |
| 181 | // if `pos` is superior to builder length, returns an empty string |
| 182 | // and cancel further operations |
| 183 | pub fn (mut b Builder) cut_to(pos int) string { |
| 184 | if pos > b.len { |
| 185 | return '' |
| 186 | } |
| 187 | return b.cut_last(b.len - pos) |
| 188 | } |
| 189 | |
| 190 | // go_back_to resets the buffer to the given position `pos`. |
| 191 | // Note: pos should be < than the existing buffer length. |
| 192 | pub fn (mut b Builder) go_back_to(pos int) { |
| 193 | b.trim(pos) |
| 194 | } |
| 195 | |
| 196 | // writeln appends the string `s`, and then a newline character. |
| 197 | @[inline] |
| 198 | pub fn (mut b Builder) writeln(s string) { |
| 199 | // for c in s { |
| 200 | // b.buf << c |
| 201 | // } |
| 202 | if s != '' { |
| 203 | unsafe { b.push_many(s.str, s.len) } |
| 204 | } |
| 205 | // b.buf << []u8(s) // TODO |
| 206 | b << u8(`\n`) |
| 207 | } |
| 208 | |
| 209 | // writeln2 appends two strings: `s1` + `\n`, and `s2` + `\n`, to the buffer. |
| 210 | @[inline] |
| 211 | pub fn (mut b Builder) writeln2(s1 string, s2 string) { |
| 212 | if s1 != '' { |
| 213 | unsafe { b.push_many(s1.str, s1.len) } |
| 214 | } |
| 215 | b << u8(`\n`) |
| 216 | if s2 != '' { |
| 217 | unsafe { b.push_many(s2.str, s2.len) } |
| 218 | } |
| 219 | b << u8(`\n`) |
| 220 | } |
| 221 | |
| 222 | // last_n(5) returns 'world' |
| 223 | // buf == 'hello world' |
| 224 | pub fn (b &Builder) last_n(n int) string { |
| 225 | if n > b.len { |
| 226 | return '' |
| 227 | } |
| 228 | return b.spart(b.len - n, n) |
| 229 | } |
| 230 | |
| 231 | // after(6) returns 'world' |
| 232 | // buf == 'hello world' |
| 233 | pub fn (b &Builder) after(n int) string { |
| 234 | if n >= b.len { |
| 235 | return '' |
| 236 | } |
| 237 | return b.spart(n, b.len - n) |
| 238 | } |
| 239 | |
| 240 | // str returns a copy of all of the accumulated buffer content. |
| 241 | // Note: after a call to b.str(), the builder b will be empty, and could be used again. |
| 242 | // The returned string *owns* its own separate copy of the accumulated data that was in |
| 243 | // the string builder, before the .str() call. |
| 244 | pub fn (mut b Builder) str() string { |
| 245 | b << u8(0) |
| 246 | bcopy := unsafe { &u8(memdup_noscan(b.data, b.len)) } |
| 247 | s := unsafe { bcopy.vstring_with_len(b.len - 1) } |
| 248 | b.clear() |
| 249 | return s |
| 250 | } |
| 251 | |
| 252 | // ensure_cap ensures that the buffer has enough space for at least `n` bytes by growing the buffer if necessary. |
| 253 | pub fn (mut b Builder) ensure_cap(n int) { |
| 254 | // Work through the underlying array pointer, instead of taking a pointer |
| 255 | // cast to the alias receiver. This keeps self-hosted builds from generating |
| 256 | // an invalid `&b` cast in C. |
| 257 | mut arr := unsafe { &[]u8(b) } |
| 258 | arr.ensure_cap(n) |
| 259 | } |
| 260 | |
| 261 | // grow_len grows the length of the buffer by `n` bytes if necessary |
| 262 | @[unsafe] |
| 263 | pub fn (mut b Builder) grow_len(n int) { |
| 264 | if n <= 0 { |
| 265 | return |
| 266 | } |
| 267 | |
| 268 | new_len := b.len + n |
| 269 | b.ensure_cap(new_len) |
| 270 | unsafe { |
| 271 | b.len = new_len |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | // free frees the memory block, used for the buffer. |
| 276 | // Note: do not use the builder, after a call to free(). |
| 277 | @[unsafe] |
| 278 | pub fn (mut b Builder) free() { |
| 279 | if b.data != 0 { |
| 280 | mut arr := unsafe { &[]u8(b) } |
| 281 | unsafe { arr.free() } |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | // write_repeated_rune appends multiple copies of the same rune to the accumulated buffer |
| 286 | @[direct_array_access] |
| 287 | pub fn (mut b Builder) write_repeated_rune(r rune, count int) { |
| 288 | if count <= 0 { |
| 289 | return |
| 290 | } |
| 291 | |
| 292 | // Convert rune to UTF-8 bytes once |
| 293 | mut buffer := [5]u8{} |
| 294 | res := unsafe { utf32_to_str_no_malloc(u32(r), mut &buffer[0]) } |
| 295 | if res.len == 0 { |
| 296 | return |
| 297 | } |
| 298 | |
| 299 | if res.len == 1 { |
| 300 | b.ensure_cap(b.len + count) |
| 301 | unsafe { |
| 302 | vmemset(&u8(b.data) + b.len, buffer[0], count) |
| 303 | b.len += count |
| 304 | } |
| 305 | return |
| 306 | } else { |
| 307 | total_needed := count * res.len |
| 308 | b.ensure_cap(b.len + total_needed) |
| 309 | |
| 310 | mut dest := unsafe { &u8(b.data) + b.len } |
| 311 | for _ in 0 .. count { |
| 312 | unsafe { |
| 313 | vmemcpy(dest, res.str, res.len) |
| 314 | dest += res.len |
| 315 | } |
| 316 | } |
| 317 | unsafe { |
| 318 | b.len += total_needed |
| 319 | } |
| 320 | } |
| 321 | } |
| 322 | |
| 323 | // IndentParam holds configuration parameters for the indent() function |
| 324 | @[params] |
| 325 | pub struct IndentParam { |
| 326 | pub mut: |
| 327 | block_start rune = `{` // Character that starts a new block (+ indent) |
| 328 | block_end rune = `}` // Character that ends a new block (- indent) |
| 329 | indent_char rune = ` ` // Character used for indentation (space or tab) |
| 330 | indent_count int = 4 // Number of indent_char per indentation level |
| 331 | starting_level int // Initial indentation level (0 = no initial indent) |
| 332 | } |
| 333 | |
| 334 | // IndentState represents the current parsing state of the indent() function |
| 335 | enum IndentState { |
| 336 | normal // Normal state, processing regular characters |
| 337 | in_string // Inside a string literal, ignoring formatting characters |
| 338 | } |
| 339 | |
| 340 | // indent formats a string by applying structured indentation based on block delimiters. |
| 341 | // It processes the input string `s` and writes the formatted output to the `Builder` `b`. |
| 342 | // The function preserves content inside string literals (both single and double quotes) and |
| 343 | // configures indentation behavior through the `param` structure. |
| 344 | // |
| 345 | // Key behaviors: |
| 346 | // 1. Removes existing indentation at the beginning of lines. |
| 347 | // 2. Applies new indentation based on block nesting levels. |
| 348 | // 3. Ignores block delimiters and formatting characters inside string literals. |
| 349 | // 4. Keeps empty blocks (e.g., {}) on the same line. |
| 350 | // 5. Inserts newlines after `block_start` and before `block_end` (except for empty blocks). |
| 351 | // 6. Maintains existing line breaks from the input. |
| 352 | // |
| 353 | // Example: |
| 354 | // ```v |
| 355 | // import strings |
| 356 | // input := 'User{name:"John" settings:{theme:"dark"}}' |
| 357 | // mut b := strings.new_builder(64) |
| 358 | // b.indent(input, indent_count: 2) |
| 359 | // println(b.str()) // Formatted output: 'User{\n name:"John" settings:{\n theme:"dark"\n }\n}' |
| 360 | // ``` |
| 361 | @[direct_array_access] |
| 362 | pub fn (mut b Builder) indent(s string, param IndentParam) { |
| 363 | if s.len == 0 { |
| 364 | return |
| 365 | } |
| 366 | |
| 367 | mut state := IndentState.normal |
| 368 | mut indent_level := param.starting_level |
| 369 | mut string_char := `\0` |
| 370 | mut at_line_start := true |
| 371 | for i := 0; i < s.len; i++ { |
| 372 | c := rune(s[i]) |
| 373 | match state { |
| 374 | // Normal state: process characters outside of string literals |
| 375 | .normal { |
| 376 | match c { |
| 377 | `"`, `'` { // Note: quote characters for editor display " |
| 378 | state = .in_string |
| 379 | string_char = c |
| 380 | // Add indentation if at the start of a line |
| 381 | if at_line_start { |
| 382 | b.write_repeated_rune(param.indent_char, |
| 383 | indent_level * param.indent_count) |
| 384 | at_line_start = false |
| 385 | } |
| 386 | // Write the opening quote |
| 387 | b.write_rune(c) |
| 388 | } |
| 389 | param.block_start { |
| 390 | // Start of a new block |
| 391 | // Add indentation if at the start of a line |
| 392 | if at_line_start { |
| 393 | b.write_repeated_rune(param.indent_char, |
| 394 | indent_level * param.indent_count) |
| 395 | at_line_start = false |
| 396 | } |
| 397 | |
| 398 | // Write the block start character |
| 399 | b.write_rune(c) |
| 400 | |
| 401 | // Check for empty block (e.g., {}) |
| 402 | // Empty blocks stay on the same line |
| 403 | if i + 1 < s.len && s[i + 1] == param.block_end { |
| 404 | b.write_rune(param.block_end) |
| 405 | i++ |
| 406 | } else { |
| 407 | // Non-empty block: increase indentation and add newline |
| 408 | indent_level++ |
| 409 | b.write_rune(`\n`) |
| 410 | at_line_start = true |
| 411 | } |
| 412 | } |
| 413 | param.block_end { |
| 414 | // End of a block |
| 415 | // Decrease indentation level (but not below 0) |
| 416 | if indent_level > 0 { |
| 417 | indent_level-- |
| 418 | } |
| 419 | |
| 420 | // If not at the start of a line, add a newline |
| 421 | if !at_line_start { |
| 422 | b.write_rune(`\n`) |
| 423 | } |
| 424 | |
| 425 | // Add indentation for the block end |
| 426 | b.write_repeated_rune(param.indent_char, indent_level * param.indent_count) |
| 427 | at_line_start = false |
| 428 | |
| 429 | b.write_rune(c) |
| 430 | } |
| 431 | ` `, `\t`, `\r`, `\n` { |
| 432 | // Whitespace characters |
| 433 | // Only write whitespace if not at the start of a line |
| 434 | if !at_line_start { |
| 435 | b.write_rune(c) |
| 436 | } |
| 437 | |
| 438 | // Newline resets the line start flag |
| 439 | if c == `\n` { |
| 440 | at_line_start = true |
| 441 | } |
| 442 | } |
| 443 | else { |
| 444 | // Any other character |
| 445 | // Add indentation if at the start of a line |
| 446 | if at_line_start { |
| 447 | b.write_repeated_rune(param.indent_char, |
| 448 | indent_level * param.indent_count) |
| 449 | at_line_start = false |
| 450 | } |
| 451 | b.write_rune(c) |
| 452 | } |
| 453 | } |
| 454 | } |
| 455 | .in_string { |
| 456 | // Inside a string literal: preserve all characters as-is |
| 457 | b.write_rune(c) |
| 458 | |
| 459 | // Check for string termination |
| 460 | // The character must match the opening quote and not be escaped |
| 461 | if c == string_char { |
| 462 | if s[i - 1] != `\\` { |
| 463 | state = .normal |
| 464 | string_char = `\0` |
| 465 | } |
| 466 | } |
| 467 | } |
| 468 | } |
| 469 | } |
| 470 | } |
| 471 | |