v2 / vlib / encoding / html / escape.v
101 lines · 94 sloc · 2.64 KB · 008aaad99981918c51194d7aaaaaccb4c258f244
Raw
1module html
2
3import encoding.hex
4import strconv
5
6@[params]
7pub struct EscapeConfig {
8pub:
9 quote bool = true
10}
11
12@[params]
13pub struct UnescapeConfig {
14 EscapeConfig
15pub:
16 all bool
17}
18
19const escape_seq = ['&', '&', '<', '<', '>', '>']
20const escape_quote_seq = ['"', '"', "'", ''']
21const unescape_seq = ['&', '&', '<', '<', '>', '>']
22const unescape_quote_seq = ['"', '"', ''', "'"]
23
24// escape converts special characters in the input, specifically "<", ">", and "&"
25// to HTML-safe sequences. If `quote` is set to true (which is default), quotes in
26// HTML will also be translated. Both double and single quotes will be affected.
27// **Note:** escape() supports funky accents by doing nothing about them. V's UTF-8
28// support through `string` is robust enough to deal with these cases.
29pub fn escape(input string, config EscapeConfig) string {
30 return if config.quote {
31 input.replace_each(escape_seq).replace_each(escape_quote_seq)
32 } else {
33 input.replace_each(escape_seq)
34 }
35}
36
37// unescape converts entities like "<" to "<". By default it is the converse of `escape`.
38// If `all` is set to true, it handles named, numeric, and hex values - for example,
39// `'''`, `'''`, and `'''` then unescape to "'".
40pub fn unescape(input string, config UnescapeConfig) string {
41 return if config.all {
42 unescape_all(input)
43 } else if config.quote {
44 input.replace_each(unescape_seq).replace_each(unescape_quote_seq)
45 } else {
46 input.replace_each(unescape_seq)
47 }
48}
49
50fn unescape_all(input string) string {
51 mut result := []rune{}
52 runes := input.runes()
53 mut i := 0
54 for i < runes.len {
55 if runes[i] == `&` {
56 mut j := i + 1
57 for j < runes.len && runes[j] != `;` {
58 j++
59 }
60 if j < runes.len && runes[i + 1] == `#` {
61 // Numeric escape sequences (e.g., ' or ')
62 if runes[i + 2] == `x` || runes[i + 2] == `X` {
63 // Hexadecimal escape sequence
64 if v := hex.decode(runes[i + 3..j].string()) {
65 mut n := u16(0)
66 for x in v {
67 n = n * 256 + x
68 }
69 result << n
70 } else {
71 // Leave invalid sequences unchanged
72 result << runes[i..j + 1]
73 i = j + 1
74 }
75 } else {
76 // Decimal escape sequence
77 if v := strconv.atoi(runes[i + 2..j].string()) {
78 result << v
79 } else {
80 // Leave invalid sequences unchanged
81 result << runes[i..j + 1]
82 }
83 }
84 } else {
85 // Named entity (e.g., <)
86 entity := runes[i + 1..j].string()
87 if v := named_references[entity] {
88 result << v
89 } else {
90 // Leave unknown entities unchanged
91 result << runes[i..j + 1]
92 }
93 }
94 i = j + 1
95 } else {
96 result << runes[i]
97 i++
98 }
99 }
100 return result.string()
101}
102