From 3329919144bc9ac8b8a26a629ddfbbe3aa32dc0f Mon Sep 17 00:00:00 2001 From: Turiiya <34311583+ttytm@users.noreply.github.com> Date: Wed, 6 Sep 2023 08:37:51 +0200 Subject: [PATCH] encoding.html: improve hex unescaping, add test (#19279) --- vlib/encoding/html/escape.v | 23 ++++++++++++----------- vlib/encoding/html/escape_test.v | 1 + 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/vlib/encoding/html/escape.v b/vlib/encoding/html/escape.v index 1bdf11a8c..b30780fb2 100644 --- a/vlib/encoding/html/escape.v +++ b/vlib/encoding/html/escape.v @@ -51,7 +51,7 @@ fn unescape_all(input string) string { mut result := []rune{} runes := input.runes() mut i := 0 - outer: for i < runes.len { + for i < runes.len { if runes[i] == `&` { mut j := i + 1 for j < runes.len && runes[j] != `;` { @@ -59,21 +59,22 @@ fn unescape_all(input string) string { } if j < runes.len && runes[i + 1] == `#` { // Numeric escape sequences (e.g., ' or ') - code := runes[i + 2..j].string() - if code[0] == `x` { + if runes[i + 2] == `x` || runes[i + 2] == `X` { // Hexadecimal escape sequence - for c in code[1..] { - if !c.is_hex_digit() { - // Leave invalid sequences unchanged - result << runes[i..j + 1] - i = j + 1 - continue outer + if v := hex.decode(runes[i + 3..j].string()) { + mut n := u16(0) + for x in v { + n = n * 256 + x } + result << n + } else { + // Leave invalid sequences unchanged + result << runes[i..j + 1] + i = j + 1 } - result << hex.decode(code[1..]) or { []u8{} }.bytestr().runes() } else { // Decimal escape sequence - if v := strconv.atoi(code) { + if v := strconv.atoi(runes[i + 2..j].string()) { result << v } else { // Leave invalid sequences unchanged diff --git a/vlib/encoding/html/escape_test.v b/vlib/encoding/html/escape_test.v index f94b52b23..d23b76dd2 100644 --- a/vlib/encoding/html/escape_test.v +++ b/vlib/encoding/html/escape_test.v @@ -48,6 +48,7 @@ fn test_unescape_html() { fn test_unescape_all_html() { // Test different formats assert html.unescape(''''', all: true) == "'''" + assert html.unescape('⩔ = ⩔ = ⩔ = ⩔', all: true) == '⩔ = ⩔ = ⩔ = ⩔' // Converse escape tests assert html.unescape('<>&', all: true) == '<>&' assert html.unescape('No change', all: true) == 'No change' -- 2.39.5