From 359e28da67a7838094265af74be8570743b046d6 Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Wed, 25 Mar 2026 16:42:24 +0300 Subject: [PATCH] cgen: optimize string concatenation chains (fixes #276) --- vlib/builtin/string.v | 27 +++++++++ vlib/v/gen/c/infix.v | 55 +++++++++++++++++++ ...ring_concat_chain_optimization.c.must_have | 2 + .../string_concat_chain_optimization.vv | 12 ++++ 4 files changed, 96 insertions(+) create mode 100644 vlib/v/gen/c/testdata/string_concat_chain_optimization.c.must_have create mode 100644 vlib/v/gen/c/testdata/string_concat_chain_optimization.vv diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index 0a337a7d9..435b96e6b 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -918,6 +918,33 @@ fn (s string) + (a string) string { return res } +// string_plus_many concatenates several strings with a single allocation. +@[direct_array_access; markused] +fn string_plus_many(data_len int, input_base &string) string { + mut new_len := 0 + for i := 0; i < data_len; i++ { + part := unsafe { input_base[i] } + new_len += if part.len > 0 { part.len } else { 0 } + } + mut res := string{ + str: unsafe { malloc_noscan(new_len + 1) } + len: new_len + } + mut offset := 0 + unsafe { + for i := 0; i < data_len; i++ { + part := input_base[i] + part_len := if part.len > 0 { part.len } else { 0 } + if part_len > 0 { + vmemcpy(res.str + offset, part.str, part_len) + offset += part_len + } + } + res.str[new_len] = 0 // V strings are not null terminated, but just in case + } + return res +} + // for `s + s2 + s3`, an optimization (faster than string_plus(string_plus(s1, s2), s3)) @[direct_array_access] fn (s string) plus_two(a string, b string) string { diff --git a/vlib/v/gen/c/infix.v b/vlib/v/gen/c/infix.v index 3c1160962..dfd66a0ef 100644 --- a/vlib/v/gen/c/infix.v +++ b/vlib/v/gen/c/infix.v @@ -1163,7 +1163,62 @@ fn (mut g Gen) gen_interface_is_op(node ast.InfixExpr) { // infix_expr_arithmetic_op generates code for `+`, `-`, `*`, `/`, and `%` // It handles operator overloading when necessary +fn (mut g Gen) is_string_type(typ ast.Type) bool { + return g.unwrap(typ).unaliased_sym.kind == .string +} + +fn (mut g Gen) is_string_concat_infix(node ast.InfixExpr) bool { + if node.op != .plus { + return false + } + left_type := g.type_resolver.get_type_or_default(node.left, node.left_type) + right_type := g.type_resolver.get_type_or_default(node.right, node.right_type) + return g.is_string_type(left_type) && g.is_string_type(right_type) +} + +fn (mut g Gen) collect_string_concat_parts(expr ast.Expr, mut parts []ast.Expr) { + match expr { + ast.InfixExpr { + if g.is_string_concat_infix(expr) { + g.collect_string_concat_parts(expr.left, mut parts) + g.collect_string_concat_parts(expr.right, mut parts) + return + } + } + ast.ParExpr { + g.collect_string_concat_parts(expr.expr, mut parts) + return + } + else {} + } + parts << expr +} + +fn (mut g Gen) gen_string_concat_many(node ast.InfixExpr) bool { + if !g.is_string_concat_infix(node) { + return false + } + mut parts := []ast.Expr{} + g.collect_string_concat_parts(ast.Expr(node), mut parts) + if parts.len < 3 { + return false + } + g.write('builtin__string_plus_many(${parts.len}, _MOV((string[${parts.len}]){') + for i, part in parts { + part_type := g.type_resolver.get_type_or_default(part, part.type()) + g.expr_with_cast(part, part_type, ast.string_type) + if i < parts.len - 1 { + g.write(', ') + } + } + g.write('}))') + return true +} + fn (mut g Gen) infix_expr_arithmetic_op(node ast.InfixExpr) { + if g.gen_string_concat_many(node) { + return + } left_type := g.resolved_expr_type(node.left, node.left_type) right_type := g.resolved_expr_type(node.right, node.right_type) $if trace_ci_fixes ? { diff --git a/vlib/v/gen/c/testdata/string_concat_chain_optimization.c.must_have b/vlib/v/gen/c/testdata/string_concat_chain_optimization.c.must_have new file mode 100644 index 000000000..2897bea31 --- /dev/null +++ b/vlib/v/gen/c/testdata/string_concat_chain_optimization.c.must_have @@ -0,0 +1,2 @@ +builtin__string_plus_many(3, _MOV((string[3]){a, b, c})) +builtin__string_plus_many(4, _MOV((string[4]){a, b, c, d})) diff --git a/vlib/v/gen/c/testdata/string_concat_chain_optimization.vv b/vlib/v/gen/c/testdata/string_concat_chain_optimization.vv new file mode 100644 index 000000000..cbc0aca3e --- /dev/null +++ b/vlib/v/gen/c/testdata/string_concat_chain_optimization.vv @@ -0,0 +1,12 @@ +fn join_three(a string, b string, c string) string { + return a + b + c +} + +fn join_four(a string, b string, c string, d string) string { + return a + b + c + d +} + +fn main() { + assert join_three('a', 'b', 'c') == 'abc' + assert join_four('a', 'b', 'c', 'd') == 'abcd' +} -- 2.39.5