From 8c0528cdad40ff465b6a1dd3535de9564338f677 Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Wed, 11 Mar 2026 16:18:18 +0300 Subject: [PATCH] json: encodes differently unicode strings yet decoded well (fixes #25103) --- vlib/json/json_primitives.c.v | 63 +++++++++++++++++++++- vlib/json/tests/json_encode_primite_test.v | 14 +++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/vlib/json/json_primitives.c.v b/vlib/json/json_primitives.c.v index e1e718d11..29d54c720 100644 --- a/vlib/json/json_primitives.c.v +++ b/vlib/json/json_primitives.c.v @@ -36,6 +36,8 @@ fn C.cJSON_CreateBool(bool) &C.cJSON fn C.cJSON_CreateString(&char) &C.cJSON +fn C.cJSON_CreateRaw(&char) &C.cJSON + fn C.cJSON_Parse(&char) &C.cJSON fn C.cJSON_PrintUnformatted(&C.cJSON) &char @@ -251,12 +253,69 @@ fn encode_bool(val bool) &C.cJSON { @[markused] fn encode_rune(val rune) &C.cJSON { - return C.cJSON_CreateString(&char(val.str().str)) + return C.cJSON_CreateRaw(&char(json_ascii_string(val.str()).str)) } @[markused] fn encode_string(val string) &C.cJSON { - return C.cJSON_CreateString(&char(val.str)) + return C.cJSON_CreateRaw(&char(json_ascii_string(val).str)) +} + +// json_ascii_string returns a quoted JSON string with non-ASCII runes escaped as `\uXXXX`. +fn json_ascii_string(val string) string { + mut output := []u8{cap: val.len + 2} + output << `"` + for character in val.runes() { + match character { + `"` { + output << `\\` + output << `"` + } + `\\` { + output << `\\` + output << `\\` + } + `\b` { + output << `\\` + output << `b` + } + `\f` { + output << `\\` + output << `f` + } + `\n` { + output << `\\` + output << `n` + } + `\r` { + output << `\\` + output << `r` + } + `\t` { + output << `\\` + output << `t` + } + else { + if character < 0x20 || character > 0x7f { + if character <= 0xffff { + output << `\\` + output << `u` + hex_string := '${u32(character):04x}' + unsafe { output.push_many(hex_string.str, 4) } + } else { + unicode_point_low := u32(character) - 0x10000 + surrogate_pair := '\\u${0xD800 + ((unicode_point_low >> 10) & 0x3FF):04X}\\u${ + 0xDC00 + (unicode_point_low & 0x3FF):04x}' + unsafe { output.push_many(surrogate_pair.str, surrogate_pair.len) } + } + } else { + output << u8(character) + } + } + } + } + output << `"` + return output.bytestr() } // json_float_to_raw_string uses V's float formatter so json.encode keeps exact float round-trips. diff --git a/vlib/json/tests/json_encode_primite_test.v b/vlib/json/tests/json_encode_primite_test.v index b68318b64..4875dc0bb 100644 --- a/vlib/json/tests/json_encode_primite_test.v +++ b/vlib/json/tests/json_encode_primite_test.v @@ -8,6 +8,10 @@ type MyInt = int type MyString = string type MySumType = MyString | int | string +struct UnicodeString { + emoji string +} + fn test_alias_to_primitive() { mut test := Test{ field: MyString('foo') @@ -38,3 +42,13 @@ fn test_alias_to_primitive() { encoded = json.encode(test3) assert dump(encoded) == '1000' } + +fn test_encode_unicode_as_ascii_escape_sequences() { + valid_json := r'{"emoji":"\u3007"}' + decoded := json.decode(UnicodeString, valid_json)! + assert decoded.emoji == '〇' + assert json.encode(UnicodeString{ + emoji: '〇' + }) == valid_json + assert json.encode('😀') == r'"\uD83D\ude00"' +} -- 2.39.5