v / vlib / strings / builder.c.v
470 lines · 426 sloc · 12.56 KB · 8e35f4d9848f7ad35d857a187dddbfd2eca5e19d
Raw
1// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4module strings
5
6// strings.Builder is used to efficiently append many strings to a large
7// dynamically growing buffer, then use the resulting large string. Using
8// a string builder is much better for performance/memory usage than doing
9// constantly string concatenation.
10pub type Builder = []u8
11
12// new_builder returns a new string builder, with an initial capacity of `initial_size`.
13pub fn new_builder(initial_size int) Builder {
14 mut res := Builder([]u8{cap: initial_size})
15 unsafe { res.flags.set(.noslices) }
16 return res
17}
18
19// reuse_as_plain_u8_array allows using the Builder instance as a plain []u8 return value.
20// It is useful, when you have accumulated data in the builder, that you want to
21// pass/access as []u8 later, without copying or freeing the buffer.
22// NB: you *should NOT use* the string builder instance after calling this method.
23// Use only the return value after calling this method.
24@[unsafe]
25pub fn (mut b Builder) reuse_as_plain_u8_array() []u8 {
26 unsafe { b.flags.clear(.noslices) }
27 return *b
28}
29
30// write_ptr writes `len` bytes provided byteptr to the accumulated buffer
31@[unsafe]
32pub fn (mut b Builder) write_ptr(ptr &u8, len int) {
33 if len == 0 {
34 return
35 }
36 unsafe { b.push_many(ptr, len) }
37}
38
39// write_rune appends a single rune to the accumulated buffer
40@[manualfree]
41pub fn (mut b Builder) write_rune(r rune) {
42 mut buffer := [5]u8{}
43 res := unsafe { utf32_to_str_no_malloc(u32(r), mut &buffer[0]) }
44 if res.len == 0 {
45 return
46 }
47 unsafe { b.push_many(res.str, res.len) }
48}
49
50// write_runes appends all the given runes to the accumulated buffer.
51pub fn (mut b Builder) write_runes(runes []rune) {
52 mut buffer := [5]u8{}
53 for r in runes {
54 res := unsafe { utf32_to_str_no_malloc(u32(r), mut &buffer[0]) }
55 if res.len == 0 {
56 continue
57 }
58 unsafe { b.push_many(res.str, res.len) }
59 }
60}
61
62// write_u8 appends a single `data` byte to the accumulated buffer
63@[inline]
64pub fn (mut b Builder) write_u8(data u8) {
65 b << data
66}
67
68// write_byte appends a single `data` byte to the accumulated buffer
69@[inline]
70pub fn (mut b Builder) write_byte(data u8) {
71 b << data
72}
73
74// write_decimal appends a decimal representation of the number `n` into the builder `b`,
75// without dynamic allocation. The higher order digits come first, i.e. 6123 will be written
76// with the digit `6` first, then `1`, then `2` and `3` last.
77@[direct_array_access]
78pub fn (mut b Builder) write_decimal(n i64) {
79 if n == 0 {
80 b.write_u8(0x30)
81 return
82 }
83 if n == min_i64 {
84 b.write_string(n.str())
85 return
86 }
87
88 mut buf := [25]u8{}
89 mut x := if n < 0 { -n } else { n }
90 mut i := 24
91 for x != 0 {
92 nextx := x / 10
93 r := x % 10
94 buf[i] = u8(r) + 0x30
95 x = nextx
96 i--
97 }
98 if n < 0 {
99 buf[i] = `-`
100 i--
101 }
102 unsafe { b.write_ptr(&buf[i + 1], 24 - i) }
103}
104
105// write implements the io.Writer interface, that is why it returns how many bytes were written to the string builder.
106pub fn (mut b Builder) write(data []u8) !int {
107 if data.len == 0 {
108 return 0
109 }
110 unsafe { b.push_many(data.data, data.len) }
111 return data.len
112}
113
114// drain_builder writes all of the `other` builder content, then re-initialises
115// `other`, so that the `other` strings builder is ready to receive new content.
116@[manualfree]
117pub fn (mut b Builder) drain_builder(mut other Builder, other_new_cap int) {
118 if other.len > 0 {
119 b << *other
120 }
121 unsafe { other.free() }
122 other = new_builder(other_new_cap)
123}
124
125// byte_at returns a byte, located at a given index `i`.
126// Note: it can panic, if there are not enough bytes in the strings builder yet.
127@[inline]
128pub fn (b &Builder) byte_at(n int) u8 {
129 return unsafe { (&[]u8(b))[n] }
130}
131
132// write appends the string `s` to the buffer
133@[expand_simple_interpolation; inline]
134pub fn (mut b Builder) write_string(s string) {
135 if s.len == 0 {
136 return
137 }
138 unsafe { b.push_many(s.str, s.len) }
139 // for c in s {
140 // b.buf << c
141 // }
142 // b.buf << []u8(s) // TODO
143}
144
145// write_string2 appends the strings `s1` and `s2` to the buffer.
146@[inline]
147pub fn (mut b Builder) write_string2(s1 string, s2 string) {
148 if s1.len != 0 {
149 unsafe { b.push_many(s1.str, s1.len) }
150 }
151 if s2.len != 0 {
152 unsafe { b.push_many(s2.str, s2.len) }
153 }
154}
155
156// go_back discards the last `n` bytes from the buffer.
157pub fn (mut b Builder) go_back(n int) {
158 b.trim(b.len - n)
159}
160
161// spart returns a part of the buffer as a string
162@[inline]
163pub fn (b &Builder) spart(start_pos int, n int) string {
164 unsafe {
165 mut x := malloc_noscan(n + 1)
166 vmemcpy(x, &u8(b.data) + start_pos, n)
167 x[n] = 0
168 return tos(x, n)
169 }
170}
171
172// cut_last cuts the last `n` bytes from the buffer and returns them.
173pub fn (mut b Builder) cut_last(n int) string {
174 cut_pos := b.len - n
175 res := b.spart(cut_pos, n)
176 b.trim(cut_pos)
177 return res
178}
179
180// cut_to cuts the string after `pos` and returns it.
181// if `pos` is superior to builder length, returns an empty string
182// and cancel further operations
183pub fn (mut b Builder) cut_to(pos int) string {
184 if pos > b.len {
185 return ''
186 }
187 return b.cut_last(b.len - pos)
188}
189
190// go_back_to resets the buffer to the given position `pos`.
191// Note: pos should be < than the existing buffer length.
192pub fn (mut b Builder) go_back_to(pos int) {
193 b.trim(pos)
194}
195
196// writeln appends the string `s`, and then a newline character.
197@[inline]
198pub fn (mut b Builder) writeln(s string) {
199 // for c in s {
200 // b.buf << c
201 // }
202 if s != '' {
203 unsafe { b.push_many(s.str, s.len) }
204 }
205 // b.buf << []u8(s) // TODO
206 b << u8(`\n`)
207}
208
209// writeln2 appends two strings: `s1` + `\n`, and `s2` + `\n`, to the buffer.
210@[inline]
211pub fn (mut b Builder) writeln2(s1 string, s2 string) {
212 if s1 != '' {
213 unsafe { b.push_many(s1.str, s1.len) }
214 }
215 b << u8(`\n`)
216 if s2 != '' {
217 unsafe { b.push_many(s2.str, s2.len) }
218 }
219 b << u8(`\n`)
220}
221
222// last_n(5) returns 'world'
223// buf == 'hello world'
224pub fn (b &Builder) last_n(n int) string {
225 if n > b.len {
226 return ''
227 }
228 return b.spart(b.len - n, n)
229}
230
231// after(6) returns 'world'
232// buf == 'hello world'
233pub fn (b &Builder) after(n int) string {
234 if n >= b.len {
235 return ''
236 }
237 return b.spart(n, b.len - n)
238}
239
240// str returns a copy of all of the accumulated buffer content.
241// Note: after a call to b.str(), the builder b will be empty, and could be used again.
242// The returned string *owns* its own separate copy of the accumulated data that was in
243// the string builder, before the .str() call.
244pub fn (mut b Builder) str() string {
245 b << u8(0)
246 bcopy := unsafe { &u8(memdup_noscan(b.data, b.len)) }
247 s := unsafe { bcopy.vstring_with_len(b.len - 1) }
248 b.clear()
249 return s
250}
251
252// ensure_cap ensures that the buffer has enough space for at least `n` bytes by growing the buffer if necessary.
253pub fn (mut b Builder) ensure_cap(n int) {
254 // Work through the underlying array pointer, instead of taking a pointer
255 // cast to the alias receiver. This keeps self-hosted builds from generating
256 // an invalid `&b` cast in C.
257 mut arr := unsafe { &[]u8(b) }
258 arr.ensure_cap(n)
259}
260
261// grow_len grows the length of the buffer by `n` bytes if necessary
262@[unsafe]
263pub fn (mut b Builder) grow_len(n int) {
264 if n <= 0 {
265 return
266 }
267
268 new_len := b.len + n
269 b.ensure_cap(new_len)
270 unsafe {
271 b.len = new_len
272 }
273}
274
275// free frees the memory block, used for the buffer.
276// Note: do not use the builder, after a call to free().
277@[unsafe]
278pub fn (mut b Builder) free() {
279 if b.data != 0 {
280 mut arr := unsafe { &[]u8(b) }
281 unsafe { arr.free() }
282 }
283}
284
285// write_repeated_rune appends multiple copies of the same rune to the accumulated buffer
286@[direct_array_access]
287pub fn (mut b Builder) write_repeated_rune(r rune, count int) {
288 if count <= 0 {
289 return
290 }
291
292 // Convert rune to UTF-8 bytes once
293 mut buffer := [5]u8{}
294 res := unsafe { utf32_to_str_no_malloc(u32(r), mut &buffer[0]) }
295 if res.len == 0 {
296 return
297 }
298
299 if res.len == 1 {
300 b.ensure_cap(b.len + count)
301 unsafe {
302 vmemset(&u8(b.data) + b.len, buffer[0], count)
303 b.len += count
304 }
305 return
306 } else {
307 total_needed := count * res.len
308 b.ensure_cap(b.len + total_needed)
309
310 mut dest := unsafe { &u8(b.data) + b.len }
311 for _ in 0 .. count {
312 unsafe {
313 vmemcpy(dest, res.str, res.len)
314 dest += res.len
315 }
316 }
317 unsafe {
318 b.len += total_needed
319 }
320 }
321}
322
323// IndentParam holds configuration parameters for the indent() function
324@[params]
325pub struct IndentParam {
326pub mut:
327 block_start rune = `{` // Character that starts a new block (+ indent)
328 block_end rune = `}` // Character that ends a new block (- indent)
329 indent_char rune = ` ` // Character used for indentation (space or tab)
330 indent_count int = 4 // Number of indent_char per indentation level
331 starting_level int // Initial indentation level (0 = no initial indent)
332}
333
334// IndentState represents the current parsing state of the indent() function
335enum IndentState {
336 normal // Normal state, processing regular characters
337 in_string // Inside a string literal, ignoring formatting characters
338}
339
340// indent formats a string by applying structured indentation based on block delimiters.
341// It processes the input string `s` and writes the formatted output to the `Builder` `b`.
342// The function preserves content inside string literals (both single and double quotes) and
343// configures indentation behavior through the `param` structure.
344//
345// Key behaviors:
346// 1. Removes existing indentation at the beginning of lines.
347// 2. Applies new indentation based on block nesting levels.
348// 3. Ignores block delimiters and formatting characters inside string literals.
349// 4. Keeps empty blocks (e.g., {}) on the same line.
350// 5. Inserts newlines after `block_start` and before `block_end` (except for empty blocks).
351// 6. Maintains existing line breaks from the input.
352//
353// Example:
354// ```v
355// import strings
356// input := 'User{name:"John" settings:{theme:"dark"}}'
357// mut b := strings.new_builder(64)
358// b.indent(input, indent_count: 2)
359// println(b.str()) // Formatted output: 'User{\n name:"John" settings:{\n theme:"dark"\n }\n}'
360// ```
361@[direct_array_access]
362pub fn (mut b Builder) indent(s string, param IndentParam) {
363 if s.len == 0 {
364 return
365 }
366
367 mut state := IndentState.normal
368 mut indent_level := param.starting_level
369 mut string_char := `\0`
370 mut at_line_start := true
371 for i := 0; i < s.len; i++ {
372 c := rune(s[i])
373 match state {
374 // Normal state: process characters outside of string literals
375 .normal {
376 match c {
377 `"`, `'` { // Note: quote characters for editor display "
378 state = .in_string
379 string_char = c
380 // Add indentation if at the start of a line
381 if at_line_start {
382 b.write_repeated_rune(param.indent_char,
383 indent_level * param.indent_count)
384 at_line_start = false
385 }
386 // Write the opening quote
387 b.write_rune(c)
388 }
389 param.block_start {
390 // Start of a new block
391 // Add indentation if at the start of a line
392 if at_line_start {
393 b.write_repeated_rune(param.indent_char,
394 indent_level * param.indent_count)
395 at_line_start = false
396 }
397
398 // Write the block start character
399 b.write_rune(c)
400
401 // Check for empty block (e.g., {})
402 // Empty blocks stay on the same line
403 if i + 1 < s.len && s[i + 1] == param.block_end {
404 b.write_rune(param.block_end)
405 i++
406 } else {
407 // Non-empty block: increase indentation and add newline
408 indent_level++
409 b.write_rune(`\n`)
410 at_line_start = true
411 }
412 }
413 param.block_end {
414 // End of a block
415 // Decrease indentation level (but not below 0)
416 if indent_level > 0 {
417 indent_level--
418 }
419
420 // If not at the start of a line, add a newline
421 if !at_line_start {
422 b.write_rune(`\n`)
423 }
424
425 // Add indentation for the block end
426 b.write_repeated_rune(param.indent_char, indent_level * param.indent_count)
427 at_line_start = false
428
429 b.write_rune(c)
430 }
431 ` `, `\t`, `\r`, `\n` {
432 // Whitespace characters
433 // Only write whitespace if not at the start of a line
434 if !at_line_start {
435 b.write_rune(c)
436 }
437
438 // Newline resets the line start flag
439 if c == `\n` {
440 at_line_start = true
441 }
442 }
443 else {
444 // Any other character
445 // Add indentation if at the start of a line
446 if at_line_start {
447 b.write_repeated_rune(param.indent_char,
448 indent_level * param.indent_count)
449 at_line_start = false
450 }
451 b.write_rune(c)
452 }
453 }
454 }
455 .in_string {
456 // Inside a string literal: preserve all characters as-is
457 b.write_rune(c)
458
459 // Check for string termination
460 // The character must match the opening quote and not be escaped
461 if c == string_char {
462 if s[i - 1] != `\\` {
463 state = .normal
464 string_char = `\0`
465 }
466 }
467 }
468 }
469 }
470}
471