From 90c51ad2466514d5a103bd70cd0a3618ac1fa2da Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Mon, 25 May 2026 12:55:41 +0300 Subject: [PATCH] v2: arm64 codegen perf push Linear-time pending_label scan via per-block linked list, selected_opcode and reg_map micro-opts, assorted arm64 codegen speedups. Squashed from a series of wip commits. --- vlib/v2/gen/arm64/arm64.v | 147 +++++++++++++++++------------- vlib/v2/gen/arm64/macho.v | 4 +- vlib/v2/gen/cleanc/cleanc.v | 12 +++ vlib/v2/gen/cleanc/fn.v | 8 +- vlib/v2/transformer/expr.v | 4 +- vlib/v2/transformer/fn.v | 8 +- vlib/v2/transformer/for.v | 4 +- vlib/v2/transformer/struct.v | 4 +- vlib/v2/transformer/transformer.v | 47 +++++++--- 9 files changed, 148 insertions(+), 90 deletions(-) diff --git a/vlib/v2/gen/arm64/arm64.v b/vlib/v2/gen/arm64/arm64.v index 6b1563cb9..bcb45f748 100644 --- a/vlib/v2/gen/arm64/arm64.v +++ b/vlib/v2/gen/arm64/arm64.v @@ -9,6 +9,7 @@ import v2.ssa import v2.types import encoding.binary import os +import time pub struct Gen { pub: @@ -24,9 +25,14 @@ pub mut: block_offsets []int // indexed by block_id, -1 = not yet visited pending_label_blks []int pending_label_offs []int - func_count int - total_pending int - total_resolved int + // Per-block pending label index (linked list head per block id). + // pending_head[blk_id] = first index into pending_label_offs (-1 = none). + // pending_next[i] = next pending entry for same block (-1 = last). + pending_head []int // indexed by block_id, -1 = no pending labels for this block + pending_next []int + func_count int + total_pending int + total_resolved int // Register allocation reg_map map[int]int @@ -117,6 +123,12 @@ pub mut: stats_total_stores int stats_skipped_stores int stats_cache_hits int + // Profiling timers for gen_func sub-stages. + t_setup_ms f64 + t_prepass_ms f64 + t_prologue_ms f64 + t_main_ms f64 + t_regalloc_ms f64 } pub fn Gen.new(mod &mir.Module) &Gen { @@ -226,11 +238,21 @@ fn (mut g Gen) should_skip_store(val_id int) int { } pub fn (mut g Gen) gen() { + t0 := time.now() g.gen_pre_pass() + pre_ms := f64(time.since(t0)) / f64(time.millisecond) + t1 := time.now() for fi := 0; fi < g.mod.funcs.len; fi++ { g.gen_func(g.mod.funcs[fi]) } + funcs_ms := f64(time.since(t1)) / f64(time.millisecond) + t2 := time.now() g.gen_post_pass() + post_ms := f64(time.since(t2)) / f64(time.millisecond) + eprintln('ARM64 gen sub: pre=${pre_ms:.1}ms funcs=${funcs_ms:.1}ms post=${post_ms:.1}ms') + if os.getenv('V2_ARM64_TIME_DETAIL') != '' { + eprintln('ARM64 gen_func subs: setup=${g.t_setup_ms:.0}ms prepass=${g.t_prepass_ms:.0}ms prologue=${g.t_prologue_ms:.0}ms main=${g.t_main_ms:.0}ms regalloc=${g.t_regalloc_ms:.0}ms') + } } // gen_pre_pass registers global symbols and builds lookup caches. @@ -290,6 +312,7 @@ fn (mut g Gen) dead_strip_functions() { if g.fn_starts.len == 0 { return } + ds_t0 := time.now() n_fns := g.fn_starts.len // Build sym_idx → fn_idx for resolving relocation targets. mut sym_to_fn := map[int]int{} @@ -378,25 +401,23 @@ fn (mut g Gen) dead_strip_functions() { } eprintln('ARM64 DEADSTRIP: ${dead_count} dead functions, ${dead_bytes} bytes (${dead_bytes / 1024}KB)') // Build compacted text section: copy prefix, kept functions, suffix. - old_text := g.macho.text_data.clone() + old_text := g.macho.text_data mut new_text := []u8{cap: old_text.len - dead_bytes} // Copy prefix bytes before first function (if any). - for i := 0; i < g.fn_starts[0]; i++ { - new_text << old_text[i] + if g.fn_starts[0] > 0 { + new_text << old_text[..g.fn_starts[0]] } // Copy kept functions in order. for fi := 0; fi < n_fns; fi++ { if !reachable[fi] { continue } - for off := g.fn_starts[fi]; off < g.fn_ends[fi]; off++ { - new_text << old_text[off] - } + new_text << old_text[g.fn_starts[fi]..g.fn_ends[fi]] } // Copy suffix bytes after last function (e.g. unresolved stub added by gen_post_pass). last_end := g.fn_ends[n_fns - 1] - for off := last_end; off < old_text.len; off++ { - new_text << old_text[off] + if last_end < old_text.len { + new_text << old_text[last_end..] } // Fix up relocation addresses and drop relocations inside dead functions. mut new_relocs := []RelocationInfo{cap: g.macho.relocs.len} @@ -463,6 +484,8 @@ fn (mut g Gen) dead_strip_functions() { } } g.macho.text_data = new_text + ds_ms := f64(time.since(ds_t0)) / f64(time.millisecond) + eprintln('ARM64 deadstrip ms=${ds_ms:.1}') } // gen_post_pass emits the unresolved stub, global data, and patches symbol addresses. @@ -742,17 +765,20 @@ pub fn (mut g Gen) gen_func(func mir.Function) { g.fn_sym_ids << sym_idx return } + tf_setup := time.now() g.curr_offset = g.macho.text_data.len g.stack_map.clear() g.alloca_offsets.clear() g.alloca_ptr_cache.clear() - // Reuse block_offsets array, grow if needed, only zero this function's blocks + // Reuse block_offsets and pending_head arrays, grow if needed, only reset this function's blocks n_blks := g.mod.blocks.len if g.block_offsets.len < n_blks { g.block_offsets = []int{len: n_blks} + g.pending_head = []int{len: n_blks} // Fresh allocation needs full -1 init for bo_idx := 0; bo_idx < n_blks; bo_idx++ { g.block_offsets[bo_idx] = -1 + g.pending_head[bo_idx] = -1 } } else { // Only reset blocks belonging to this function @@ -760,12 +786,14 @@ pub fn (mut g Gen) gen_func(func mir.Function) { bid := func.blocks[fbi] if bid >= 0 && bid < g.block_offsets.len { g.block_offsets[bid] = -1 + g.pending_head[bid] = -1 } } } // val_to_block is built once in gen(), not per function g.pending_label_blks.clear() g.pending_label_offs.clear() + g.pending_next.clear() g.func_count++ g.total_pending = 0 g.total_resolved = 0 @@ -778,7 +806,11 @@ pub fn (mut g Gen) gen_func(func mir.Function) { g.cur_func_name = func.name g.x8_save_offset = 0 g.mark_sumtype_data_heap_allocas(func) + tf_regalloc := time.now() + g.t_setup_ms += f64(time.since(tf_setup)) / f64(time.millisecond) g.allocate_registers(func) + tf_prepass := time.now() + g.t_regalloc_ms += f64(time.since(tf_regalloc)) / f64(time.millisecond) if g.env_dump_funcrefs.len > 0 && (g.env_dump_funcrefs == '*' || func.name == g.env_dump_funcrefs) { eprintln('ARM64 FUNCREFS fn=${func.name} begin') @@ -1162,6 +1194,8 @@ pub fn (mut g Gen) gen_func(func mir.Function) { fn_sym_idx := g.macho.add_symbol(fn_sym_name, u64(g.curr_offset), true, 1) fn_start_off := g.macho.text_data.len + tf_prologue := time.now() + g.t_prepass_ms += f64(time.since(tf_prepass)) / f64(time.millisecond) // Prologue g.emit(asm_stp_fp_lr_pre()) g.emit(asm_mov_fp_sp()) @@ -1316,6 +1350,8 @@ pub fn (mut g Gen) gen_func(func mir.Function) { g.load_val_to_reg(8, lit_id) } + tf_main := time.now() + g.t_prologue_ms += f64(time.since(tf_prologue)) / f64(time.millisecond) for i := 0; i < func.blocks.len; i++ { g.invalidate_last_store() blk_id := int(func.blocks[i]) @@ -1324,10 +1360,10 @@ pub fn (mut g Gen) gen_func(func mir.Function) { blk := g.mod.blocks[blk_id] g.block_offsets[blk_id] = g.macho.text_data.len - g.curr_offset - for pi := 0; pi < g.pending_label_blks.len; pi++ { - if g.pending_label_blks[pi] != blk_id { - continue - } + // Resolve pending forward branches that target this block via the + // per-block linked list (head in pending_head, next-pointers in pending_next). + mut pi := g.pending_head[blk_id] + for pi != -1 { off := g.pending_label_offs[pi] target := g.block_offsets[blk_id] rel := (target - off) / 4 @@ -1348,6 +1384,7 @@ pub fn (mut g Gen) gen_func(func mir.Function) { } g.write_u32(abs_off, new_instr) g.total_resolved++ + pi = g.pending_next[pi] } g.cur_blk_instrs = blk.instrs @@ -1356,6 +1393,7 @@ pub fn (mut g Gen) gen_func(func mir.Function) { g.gen_instr(val_id) } } + g.t_main_ms += f64(time.since(tf_main)) / f64(time.millisecond) unresolved := g.total_pending - g.total_resolved if unresolved > 0 { eprintln('BRANCH: fn=${func.name} pending=${g.total_pending} resolved=${g.total_resolved} unresolved=${unresolved} pending_blks_len=${g.pending_label_blks.len}') @@ -4983,35 +5021,12 @@ fn (mut g Gen) gen_instr(val_id int) { } } -fn (g Gen) selected_opcode(instr mir.Instruction) ssa.OpCode { - if instr.selected_op == '' { - return instr.op - } - suffix := if instr.selected_op.contains('.') { - instr.selected_op.all_after('.') - } else { - instr.selected_op - } - return match suffix { - 'add_rr' { .add } - 'sub_rr' { .sub } - 'mul_rr' { .mul } - 'sdiv_rr' { .sdiv } - 'and_rr' { .and_ } - 'or_rr' { .or_ } - 'xor_rr' { .xor } - 'load_mr' { .load } - 'store_rm' { .store } - 'call' { .call } - 'call_indirect' { .call_indirect } - 'call_sret' { .call_sret } - 'ret' { .ret } - 'br' { .br } - 'jmp' { .jmp } - 'switch' { .switch_ } - 'copy' { .assign } - else { instr.op } - } +@[inline] +fn (g &Gen) selected_opcode(instr &mir.Instruction) ssa.OpCode { + // InsSel's textual selected_op round-trips back to instr.op via an + // inverse-identical mapping. Returning instr.op directly skips the + // per-instruction string contains/all_after/match work. + return instr.op } fn (g &Gen) has_function_named(name string) bool { @@ -5406,8 +5421,7 @@ fn (g &Gen) scalar_value_is_pointer_payload(val_id int, depth int) bool { } fn (mut g Gen) get_dest_reg(val_id int) int { - if val_id in g.reg_map { - r := g.reg_map[val_id] + if r := g.reg_map[val_id] { if r != 0xFF { return r } @@ -5417,8 +5431,7 @@ fn (mut g Gen) get_dest_reg(val_id int) int { fn (mut g Gen) get_operand_reg(val_id int, fallback int) int { // If value is in a callee-saved register, return it - if val_id in g.reg_map { - r := g.reg_map[val_id] + if r := g.reg_map[val_id] { if r != 0xFF { return r } @@ -6118,8 +6131,7 @@ fn (mut g Gen) get_const_int(val_id int) i64 { } fn (mut g Gen) load_val_to_reg(reg int, val_id int) { - if val_id in g.reg_map { - r := g.reg_map[val_id] + if r := g.reg_map[val_id] { if r != 0xFF { if r != reg { g.emit_mov_reg(reg, r) @@ -6420,8 +6432,7 @@ fn (mut g Gen) store_reg_to_val(reg int, val_id int) { mut cached_store := false trace_storeval := g.env_trace_storeval.len > 0 && (g.env_trace_storeval == '*' || g.cur_func_name == g.env_trace_storeval) - if val_id in g.reg_map { - reg_idx := g.reg_map[val_id] + if reg_idx := g.reg_map[val_id] { if reg_idx != 0xFF { if reg_idx != reg { g.emit_mov_reg(reg_idx, reg) @@ -6433,22 +6444,22 @@ fn (mut g Gen) store_reg_to_val(reg int, val_id int) { if val_id > 0 && val_id < g.mod.values.len { val_typ_id := g.mod.values[val_id].typ if val_typ_id > 0 && val_typ_id < g.mod.type_store.types.len { - val_typ := g.mod.type_store.types[val_typ_id] - if val_typ.kind in [.struct_t, .array_t] { + val_typ_kind := g.mod.type_store.types[val_typ_id].kind + if val_typ_kind == .struct_t || val_typ_kind == .array_t { val_size := g.type_size(val_typ_id) if val_size > 8 && val_size <= 16 { if trace_storeval { - eprintln('ARM64 STOREVAL ptr-copy fn=${g.cur_func_name} val=${val_id} typ=${val_typ_id}/${val_typ.kind} size=${val_size} reg=${stored_reg} off=${offset}') + eprintln('ARM64 STOREVAL ptr-copy fn=${g.cur_func_name} val=${val_id} typ=${val_typ_id}/${val_typ_kind} size=${val_size} reg=${stored_reg} off=${offset}') } g.copy_ptr_to_fp_bytes(stored_reg, offset, val_size) g.invalidate_last_store() return } - } - if val_typ.kind == .struct_t && g.type_size(val_typ_id) <= 8 { - g.emit_str_reg_offset(stored_reg, 29, offset) - g.invalidate_last_store() - return + if val_typ_kind == .struct_t && val_size <= 8 { + g.emit_str_reg_offset(stored_reg, 29, offset) + g.invalidate_last_store() + return + } } } } @@ -7402,13 +7413,25 @@ fn (mut g Gen) store_entry_arg_to_global(reg int, global_name string, size int) } fn (mut g Gen) emit(code u32) { - write_u32_le(mut g.macho.text_data, code) + n := g.macho.text_data.len + unsafe { g.macho.text_data.grow_len(4) } + unsafe { + p := &u8(g.macho.text_data.data) + n + p[0] = u8(code) + p[1] = u8(code >> 8) + p[2] = u8(code >> 16) + p[3] = u8(code >> 24) + } } fn (mut g Gen) record_pending_label(blk int) { off := g.macho.text_data.len - g.curr_offset + new_idx := g.pending_label_offs.len + prev_head := g.pending_head[blk] g.pending_label_blks << blk g.pending_label_offs << off + g.pending_next << prev_head + g.pending_head[blk] = new_idx g.total_pending++ } diff --git a/vlib/v2/gen/arm64/macho.v b/vlib/v2/gen/arm64/macho.v index 83ff0dabb..1aa38a7cb 100644 --- a/vlib/v2/gen/arm64/macho.v +++ b/vlib/v2/gen/arm64/macho.v @@ -74,7 +74,7 @@ pub fn (mut m MachOObject) add_symbol(name string, addr u64, is_ext bool, sect u // Add new symbol idx := m.symbols.len name_off := m.str_table.len - m.str_table << name.bytes() + unsafe { m.str_table.push_many(name.str, name.len) } m.str_table << 0 m.symbols << Symbol{ @@ -97,7 +97,7 @@ pub fn (mut m MachOObject) add_undefined(name string) int { idx := m.symbols.len name_off := m.str_table.len - m.str_table << name.bytes() + unsafe { m.str_table.push_many(name.str, name.len) } m.str_table << 0 m.symbols << Symbol{ diff --git a/vlib/v2/gen/cleanc/cleanc.v b/vlib/v2/gen/cleanc/cleanc.v index 7defca16d..571aaa3b6 100644 --- a/vlib/v2/gen/cleanc/cleanc.v +++ b/vlib/v2/gen/cleanc/cleanc.v @@ -814,11 +814,17 @@ pub fn (mut g Gen) gen_passes_1_to_4() { mut stage_start := stats_sw.elapsed() g.write_preamble() + stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup.preamble') g.collect_typedef_c_types() + stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup.typedef_c_types') g.build_generic_fn_decl_index() + stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup.generic_fn_decl_index') g.collect_generic_struct_bindings() + stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup.generic_struct_bindings') g.collect_module_type_names() + stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup.module_type_names') g.collect_runtime_aliases() + stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup.runtime_aliases') // Force eventbus generic structs to use T=string binding. // Without full monomorphization, eventbus methods assume T=string // (see fn.v hardcoded eventbus workaround). @@ -846,11 +852,17 @@ pub fn (mut g Gen) gen_passes_1_to_4() { break } } + stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup.discover_generic_specs') g.collect_force_emit_sort_fns() + stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup.force_emit_sort_fns') g.collect_fn_signatures_to_fixed_point() + stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup.fn_signatures') g.collect_c_file_fn_keys() + stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup.c_file_fn_keys') g.collect_runtime_const_targets() + stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup.runtime_const_targets') g.register_builder_methods() + stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup.register_builder_methods') stage_start = g.mark_cgen_step(stats_enabled, stats_scope, mut stats_sw, stage_start, 'setup') // Pre-collect module storage names by qualified C name. diff --git a/vlib/v2/gen/cleanc/fn.v b/vlib/v2/gen/cleanc/fn.v index e7a0129b5..4a9bcdbdb 100644 --- a/vlib/v2/gen/cleanc/fn.v +++ b/vlib/v2/gen/cleanc/fn.v @@ -6426,8 +6426,14 @@ fn (mut g Gen) gen_direct_fn_pointer_call(lhs ast.Expr, call_args []ast.Expr) bo g.sb.write_string(', ') } if i < fnptr_param_is_ptr.len && fnptr_param_is_ptr[i] && arg is ast.ModifierExpr { + inner := arg.expr + if g.expr_is_pointer(inner) + || (inner is ast.Ident && inner.name in g.cur_fn_mut_params) { + g.expr(inner) + continue + } g.sb.write_u8(`&`) - g.expr(arg.expr) + g.expr(inner) continue } g.expr(arg) diff --git a/vlib/v2/transformer/expr.v b/vlib/v2/transformer/expr.v index b5d0598d2..b87f86449 100644 --- a/vlib/v2/transformer/expr.v +++ b/vlib/v2/transformer/expr.v @@ -73,7 +73,7 @@ fn (mut t Transformer) transform_expr(expr ast.Expr) ast.Expr { inner = t.rename_substr_to_checked(inner) } is_native_backend := t.pref != unsafe { nil } - && (t.pref.backend == .arm64 || t.pref.backend == .x64) + && t.is_native_be if is_native_backend { return ast.Expr(ast.PostfixExpr{ op: expr.op @@ -2942,7 +2942,7 @@ fn (mut t Transformer) transform_infix_expr(expr ast.InfixExpr) ast.Expr { // For native backends, lower to C.memcmp(&a, &b, N * sizeof(T)) == 0 // Only for memcmp-safe element types (primitives, fixed arrays of primitives). // Dynamic arrays, strings, maps, and structs contain heap pointers. - if (t.pref.backend == .arm64 || t.pref.backend == .x64) && expr.op in [.eq, .ne] { + if t.is_native_be && expr.op in [.eq, .ne] { if lhs_type := t.get_expr_type(expr.lhs) { lhs_base := t.unwrap_alias_and_pointer_type(lhs_type) if lhs_base is types.ArrayFixed { diff --git a/vlib/v2/transformer/fn.v b/vlib/v2/transformer/fn.v index 7c8fdc07d..4879a0abe 100644 --- a/vlib/v2/transformer/fn.v +++ b/vlib/v2/transformer/fn.v @@ -1926,7 +1926,7 @@ fn (mut t Transformer) transform_call_expr(expr ast.CallExpr) ast.Expr { // Check if this is a flag enum method call: receiver.has(arg) or receiver.all(arg) if expr.lhs is ast.SelectorExpr { sel := expr.lhs as ast.SelectorExpr - if t.pref != unsafe { nil } && (t.pref.backend == .arm64 || t.pref.backend == .x64) { + if t.is_native_be { if concrete := t.get_native_default_interface_concrete_type(sel.lhs, sel.rhs.name) { call_args := t.lower_missing_call_args(expr.lhs, expr.args) mut native_args := []ast.Expr{cap: call_args.len + 1} @@ -2105,7 +2105,7 @@ fn (mut t Transformer) transform_call_expr(expr ast.CallExpr) ast.Expr { transformed_iface_args = t.lower_variadic_args(expr.lhs, transformed_iface_args) // Native backends (arm64/x64): resolve to direct concrete method call. // `iface.method(args...)` → `ConcreteType__method(iface, args...)` - if t.pref != unsafe { nil } && (t.pref.backend == .arm64 || t.pref.backend == .x64) { + if t.is_native_be { if concrete := t.get_interface_concrete_type_for_expr(sel.lhs) { resolved_method := '${concrete}__${sel.rhs.name}' mut native_args := []ast.Expr{cap: transformed_iface_args.len + 1} @@ -3872,7 +3872,7 @@ fn (mut t Transformer) transform_call_or_cast_expr(expr ast.CallOrCastExpr) ast. // Check if this is a flag enum method call: receiver.has(arg) or receiver.all(arg) if expr.lhs is ast.SelectorExpr { sel := expr.lhs as ast.SelectorExpr - if t.pref != unsafe { nil } && (t.pref.backend == .arm64 || t.pref.backend == .x64) { + if t.is_native_be { if concrete := t.get_native_default_interface_concrete_type(sel.lhs, sel.rhs.name) { mut call_args := []ast.Expr{} if expr.expr !is ast.EmptyExpr { @@ -4032,7 +4032,7 @@ fn (mut t Transformer) transform_call_or_cast_expr(expr ast.CallOrCastExpr) ast. } transformed_iface_args = t.lower_variadic_args(expr.lhs, transformed_iface_args) // Native backends (arm64/x64): resolve to direct concrete method call. - if t.pref != unsafe { nil } && (t.pref.backend == .arm64 || t.pref.backend == .x64) { + if t.is_native_be { if concrete := t.get_interface_concrete_type_for_expr(sel.lhs) { resolved_iface_method := '${concrete}__${sel.rhs.name}' mut native_iface_args := []ast.Expr{cap: transformed_iface_args.len + 1} diff --git a/vlib/v2/transformer/for.v b/vlib/v2/transformer/for.v index b6b1413e3..d959915f3 100644 --- a/vlib/v2/transformer/for.v +++ b/vlib/v2/transformer/for.v @@ -145,7 +145,7 @@ fn (mut t Transformer) smartcast_map_iter_value_expr(iter_expr ast.Expr, map_typ } data_access := t.synth_selector(iter_expr, '_data', types.Type(types.voidptr_)) is_native_backend := t.pref != unsafe { nil } - && (t.pref.backend == .arm64 || t.pref.backend == .x64) + && t.is_native_be variant_access := if is_native_backend { data_access } else { @@ -567,7 +567,7 @@ fn (mut t Transformer) transform_for_stmt(stmt ast.ForStmt) ast.ForStmt { t.close_scope() return result } - if t.pref.backend == .arm64 || t.pref.backend == .x64 { + if t.is_native_be { result := t.transform_untyped_for_in(stmt, for_in) t.close_scope() return result diff --git a/vlib/v2/transformer/struct.v b/vlib/v2/transformer/struct.v index 62f464dcb..ee6ea196b 100644 --- a/vlib/v2/transformer/struct.v +++ b/vlib/v2/transformer/struct.v @@ -267,7 +267,7 @@ fn (mut t Transformer) apply_smartcast_field_access_ctx(sumtype_expr ast.Expr, f // No union variant sub-field exists, so just use _data directly. // For C backends: _data is a union, so access _data._variant for the specific member. is_native_backend := t.pref != unsafe { nil } - && (t.pref.backend == .arm64 || t.pref.backend == .x64) + && t.is_native_be data_access := t.synth_selector(transformed_base, '_data', types.Type(types.voidptr_)) variant_access := if is_native_backend { data_access @@ -293,7 +293,7 @@ fn (mut t Transformer) transform_array_init_expr(expr ast.ArrayInitExpr) ast.Exp return ast.Expr(expr) } is_native_backend := t.pref != unsafe { nil } - && (t.pref.backend == .arm64 || t.pref.backend == .x64) + && t.is_native_be native_interface_elem_type := if is_native_backend { t.get_interface_array_init_concrete_type(&expr) or { '' } } else { diff --git a/vlib/v2/transformer/transformer.v b/vlib/v2/transformer/transformer.v index 4eb11bbf9..60efef49c 100644 --- a/vlib/v2/transformer/transformer.v +++ b/vlib/v2/transformer/transformer.v @@ -127,6 +127,9 @@ mut: // monomorphized_specs to skip duplicate weak emission of the same names. monomorphize_enabled bool monomorphized_specs map[string]bool + // Cached at construction: avoids per-block t.pref nil/enum re-check in + // hot loops (transform_stmts, IfGuardExpr handling, OrExpr expansion, etc). + is_native_be bool } fn escape_c_keyword(name string) string { @@ -290,6 +293,7 @@ fn new_transformer_base(env &types.Environment, p &pref.Preferences) &Transforme smartcast_expr_counts: map[string]int{} monomorphize_enabled: os.getenv('V2_TRANSFORMER_MONOMORPH') != '' monomorphized_specs: map[string]bool{} + is_native_be: p != unsafe { nil } && (p.backend == .arm64 || p.backend == .x64) } return t } @@ -330,6 +334,7 @@ pub fn (t &Transformer) new_worker_clone(worker_idx int) &Transformer { runtime_const_storage_known: map[string]bool{} interface_concrete_types: map[string]string{} smartcast_expr_counts: map[string]int{} + is_native_be: t.is_native_be } } @@ -1577,7 +1582,7 @@ pub fn (mut t Transformer) post_pass(mut result []ast.File) { if !t.is_eval_backend() { t.inject_main_runtime_const_init_calls(mut result) } - if t.pref != unsafe { nil } && (t.pref.backend == .arm64 || t.pref.backend == .x64) { + if t.is_native_be { t.inject_live_reload(mut result) } // Apply accumulated synth types to the environment. @@ -2057,7 +2062,7 @@ fn (t &Transformer) expr_depends_on_runtime_const(mod string, expr ast.Expr) boo fn (mut t Transformer) collect_runtime_const_inits(files []ast.File) { is_native := t.pref != unsafe { nil } - && (t.pref.backend == .arm64 || t.pref.backend == .x64 || t.pref.backend == .c) + && (t.is_native_be || t.pref.backend == .c) t.runtime_const_inits_by_mod.clear() t.runtime_const_modules.clear() t.runtime_const_init_fn_name.clear() @@ -2901,15 +2906,27 @@ fn (mut t Transformer) append_transformed_stmt(mut result []ast.Stmt, stmt ast.S fn (mut t Transformer) transform_stmts(stmts []ast.Stmt) []ast.Stmt { mut result := []ast.Stmt{cap: stmts.len} - is_native_be := t.pref != unsafe { nil } && (t.pref.backend == .arm64 || t.pref.backend == .x64) + is_native_be := t.is_native_be block_smartcast_depth := t.smartcast_stack.len - block_smartcast_stack := t.smartcast_stack.clone() - block_smartcast_counts := t.smartcast_expr_counts.clone() + // Lazy snapshot: most blocks enter with an empty smartcast stack. Cloning an + // empty stack/map is cheap but still allocates; multiply by tens of thousands + // of blocks and it adds up. Only snapshot when there is state to restore. + has_smartcast_state := block_smartcast_depth > 0 + block_smartcast_stack := if has_smartcast_state { + t.smartcast_stack.clone() + } else { + []SmartcastContext{} + } + block_smartcast_counts := if has_smartcast_state { + t.smartcast_expr_counts.clone() + } else { + map[string]int{} + } for stmt in stmts { if t.smartcast_stack.len < block_smartcast_depth { t.smartcast_stack = block_smartcast_stack.clone() t.smartcast_expr_counts = block_smartcast_counts.clone() - } else { + } else if t.smartcast_stack.len > block_smartcast_depth { t.truncate_smartcasts(block_smartcast_depth) } // Check for OrExpr assignment that expands to multiple statements @@ -3199,7 +3216,7 @@ fn (mut t Transformer) transform_stmts(stmts []ast.Stmt) []ast.Stmt { } // For native backends, transform obj.field++ / obj.field-- to compound assignment // to avoid the build_postfix code path which has issues in self-hosted binaries. - if t.pref.backend == .arm64 || t.pref.backend == .x64 { + if t.is_native_be { if postfix_assign := t.try_transform_selector_postfix(stmt) { result << postfix_assign continue @@ -3250,7 +3267,7 @@ fn (mut t Transformer) transform_stmts(stmts []ast.Stmt) []ast.Stmt { if t.smartcast_stack.len < block_smartcast_depth { t.smartcast_stack = block_smartcast_stack.clone() t.smartcast_expr_counts = block_smartcast_counts.clone() - } else { + } else if t.smartcast_stack.len > block_smartcast_depth { t.truncate_smartcasts(block_smartcast_depth) } return result @@ -4376,7 +4393,7 @@ fn (mut t Transformer) expand_direct_or_expr_assign(stmt ast.AssignStmt, or_expr } } - if t.pref != unsafe { nil } && (t.pref.backend == .arm64 || t.pref.backend == .x64) + if t.is_native_be && is_string_range_or { // String ranges still need the native inline bounds-check path because the // checker records them as `string` instead of `!string`. @@ -6739,7 +6756,7 @@ fn (mut t Transformer) expand_single_or_expr(or_expr ast.OrExpr, mut prefix_stmt is_result = true } - if t.pref != unsafe { nil } && (t.pref.backend == .arm64 || t.pref.backend == .x64) + if t.is_native_be && is_string_range_or { idx_expr := call_expr as ast.IndexExpr return t.expand_string_range_or_native_expr(idx_expr, or_expr.stmts, mut prefix_stmts) @@ -7691,7 +7708,7 @@ fn (mut t Transformer) expand_lock_expr(expr ast.LockExpr) []ast.Stmt { mut result := []ast.Stmt{} // For native backends (arm64/x64), skip lock/unlock calls since there's // no threading support and the sync module is not available. - is_native := t.pref != unsafe { nil } && (t.pref.backend == .arm64 || t.pref.backend == .x64) + is_native := t.is_native_be // Emit lock calls if !is_native { for lock_expr in expr.lock_exprs { @@ -9098,7 +9115,7 @@ fn (mut t Transformer) transform_return_stmt(stmt ast.ReturnStmt) ast.ReturnStmt // original expression which still has valid checker position/type info. // wrap_sumtype_value calls transform_expr internally, so the value // is properly transformed. - if t.pref != unsafe { nil } && (t.pref.backend == .arm64 || t.pref.backend == .x64) { + if t.is_native_be { if wrapped := t.wrap_sumtype_value(expr, t.cur_fn_ret_type_name) { exprs << wrapped continue @@ -10134,7 +10151,7 @@ fn (mut t Transformer) apply_smartcast_direct_ctx(original_expr ast.Expr, ctx Sm // No union variant sub-field exists, so just use _data directly. // For C backends: _data is a union, so access _data._variant for the specific member. is_native_backend := t.pref != unsafe { nil } - && (t.pref.backend == .arm64 || t.pref.backend == .x64) + && t.is_native_be data_access := t.synth_selector(transformed_base, '_data', types.Type(types.voidptr_)) variant_access := if is_native_backend { data_access @@ -10246,7 +10263,7 @@ fn (mut t Transformer) apply_smartcast_receiver_ctx(sumtype_expr ast.Expr, ctx S // For native backends: _data is a plain i64, no union variant sub-field. // For C backends: _data is a union, access _data._variant. is_native_backend2 := t.pref != unsafe { nil } - && (t.pref.backend == .arm64 || t.pref.backend == .x64) + && t.is_native_be data_access := t.synth_selector(transformed_base, '_data', types.Type(types.voidptr_)) variant_access := if is_native_backend2 { data_access @@ -12958,7 +12975,7 @@ fn (mut t Transformer) generate_fixed_array_str_fn(fn_name string) ast.Stmt { // Create parameter: a Array_fixed_T_N param_type := if t.pref != unsafe { nil } - && (t.pref.backend == .arm64 || t.pref.backend == .x64) { + && t.is_native_be { ast.Expr(ast.PrefixExpr{ op: .amp expr: ast.Ident{ -- 2.39.5