From 39ec1134fadd4749fc15cdc721ce978a637dde32 Mon Sep 17 00:00:00 2001 From: Turiiya <34311583+tobealive@users.noreply.github.com> Date: Sun, 7 May 2023 02:55:02 +0200 Subject: [PATCH] net.html: fix parsing of nested quoted strings in code tags (#18123) --- vlib/net/html/parser.v | 23 ++++++++++------------- vlib/net/html/parser_test.v | 2 +- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/vlib/net/html/parser.v b/vlib/net/html/parser.v index c54f1590f..6de0f7b77 100644 --- a/vlib/net/html/parser.v +++ b/vlib/net/html/parser.v @@ -111,32 +111,29 @@ fn (mut parser Parser) generate_tag() { pub fn (mut parser Parser) split_parse(data string) { parser.init() for chr in data { - // returns true if byte is a " or ' is_quote := chr == `"` || chr == `'` string_code := match chr { - `"` { 1 } // " - `'` { 2 } // ' + `"` { 1 } + `'` { 2 } else { 0 } } - if parser.lexical_attributes.open_code { // here will verify all needed to know if open_code finishes and string in code + if parser.lexical_attributes.open_code { // verify if open_code is complete and handle string code parser.lexical_attributes.lexeme_builder.write_u8(chr) if parser.lexical_attributes.open_string > 0 && parser.lexical_attributes.open_string == string_code { parser.lexical_attributes.open_string = 0 - } else if is_quote { - parser.lexical_attributes.open_string = string_code - } else if chr == `>` { // only execute verification if is a > // here will verify < to know if code tag is finished + } else if chr == `>` { // code tag is finished name_close_tag := '${parser.lexical_attributes.opened_code_type}>' if parser.builder_str().to_lower().ends_with(name_close_tag) { parser.lexical_attributes.open_code = false - // need to modify lexeme_builder to add script text as a content in next loop (not gave error in dom) + // modify lexeme_builder to include script text as content in the next loop parser.lexical_attributes.lexeme_builder.go_back(name_close_tag.len) parser.lexical_attributes.current_tag.closed = true parser.lexical_attributes.current_tag.close_type = .new_tag } } } else if parser.lexical_attributes.open_comment { - if chr == `>` && parser.verify_end_comment(false) { // close tag '>' + if chr == `>` && parser.verify_end_comment(false) { // parser.print_debug(parser.builder_str() + " >> " + parser.lexical_attributes.line_count.str()) parser.lexical_attributes.lexeme_builder.go_back_to(0) parser.lexical_attributes.open_comment = false @@ -174,10 +171,10 @@ pub fn (mut parser Parser) split_parse(data string) { if parser.lexical_attributes.lexeme_builder.len == 0 && is_quote { parser.lexical_attributes.open_string = string_code parser.lexical_attributes.lexeme_builder.write_u8(chr) - } else if chr == `>` { // close tag > + } else if chr == `>` { complete_lexeme := parser.builder_str().to_lower() parser.lexical_attributes.current_tag.closed = (complete_lexeme.len > 0 - && complete_lexeme[complete_lexeme.len - 1] == `/`) // if equals to / + && complete_lexeme[complete_lexeme.len - 1] == `/`) if complete_lexeme.len > 0 && complete_lexeme[0] == `/` { parser.dom.close_tags[complete_lexeme] = true } @@ -207,7 +204,7 @@ pub fn (mut parser Parser) split_parse(data string) { } else { parser.lexical_attributes.current_tag.attributes[complete_lexeme] = '' parser.lexical_attributes.current_tag.last_attribute = '' - if chr == `=` { // if was a = + if chr == `=` { parser.lexical_attributes.current_tag.last_attribute = complete_lexeme } } @@ -216,7 +213,7 @@ pub fn (mut parser Parser) split_parse(data string) { if parser.builder_str() == '!--' { parser.lexical_attributes.open_comment = true } - } else if chr == `<` { // open tag '<' + } else if chr == `<` { temp_string := parser.builder_str() if parser.lexical_attributes.lexeme_builder.len >= 1 { if parser.lexical_attributes.current_tag.name.len > 1 diff --git a/vlib/net/html/parser_test.v b/vlib/net/html/parser_test.v index 60ae631cd..e20ca529d 100644 --- a/vlib/net/html/parser_test.v +++ b/vlib/net/html/parser_test.v @@ -62,7 +62,7 @@ fn test_giant_string() { fn test_script_tag() { mut parser := Parser{} - script_content := "\nvar googletag = googletag || {};\ngoogletag.cmd = googletag.cmd || [];if(3 > 5) {console.log('Birl');}\n" + script_content := '\nvar googletag = googletag || {};\ngoogletag.cmd = googletag.cmd || [];if(3 > 5) {console.log("Quoted \'message\'");}\n' temp_html := '
' parser.parse_html(temp_html) assert parser.tags[2].content.len == script_content.len -- 2.39.5