From 3094cf12ec589dd1a0c63b37010c156c581dd373 Mon Sep 17 00:00:00 2001
From: JalonSolov <JalonSolov@gmail.com>
Date: Thu, 23 Apr 2026 21:42:32 -0400
Subject: [PATCH] x.markdown: small cleanups, speedups (#26965)

* x.markdown: small cleanups, speedups

* x.markdown: more small changes
---
 vlib/x/markdown/html.v          | 11 ------
 vlib/x/markdown/markdown.v      | 19 ++++------
 vlib/x/markdown/markdown_test.v | 40 ++++++---------------
 vlib/x/markdown/node.v          | 52 +++++++---------------------
 vlib/x/markdown/parser.v        | 57 ++++++++----------------------
 vlib/x/markdown/util.v          | 61 ++++++++++++++++++++++++++++++---
 6 files changed, 101 insertions(+), 139 deletions(-)
diff --git a/vlib/x/markdown/html.v b/vlib/x/markdown/html.v
index 7f64d52a9..f23e1986a 100644
--- a/vlib/x/markdown/html.v
+++ b/vlib/x/markdown/html.v
@@ -60,7 +60,6 @@ fn (mut r HTMLRenderer) render_node(node &Node) {
 		.definition_term { r.render_definition_term(node) }
 		.definition_desc { r.render_definition_desc(node) }
 		.footnote_def {} // rendered in the footnote section
-		// Inline nodes.
 		.text { r.render_text(node) }
 		.emphasis { r.render_emphasis(node) }
 		.strong { r.render_strong(node) }
@@ -92,8 +91,6 @@ fn (mut r HTMLRenderer) render_inline(src string) {
 	}
 }
 
-// ---- Block elements ----
-
 fn (mut r HTMLRenderer) render_heading(node &Node) {
 	tag := 'h${node.level}'
 	if node.id.len > 0 {
@@ -203,8 +200,6 @@ fn (mut r HTMLRenderer) render_html_block(node &Node) {
 	}
 }
 
-// ---- Table ----
-
 fn (mut r HTMLRenderer) render_table(node &Node) {
 	r.sb.write_string('<table>\n')
 	r.render_children(node)
@@ -249,8 +244,6 @@ fn (mut r HTMLRenderer) render_table_cell(node &Node) {
 	r.sb.write_string('</${cell_tag}>\n')
 }
 
-// ---- Definition list ----
-
 fn (mut r HTMLRenderer) render_definition_list(node &Node) {
 	r.sb.write_string('<dl>\n')
 	r.render_children(node)
@@ -272,8 +265,6 @@ fn (mut r HTMLRenderer) render_definition_desc(node &Node) {
 	r.sb.write_string('</dd>\n')
 }
 
-// ---- Footnotes ----
-
 fn (mut r HTMLRenderer) render_footnote_ref(node &Node) {
 	label := node.fn_label
 	// Assign an ordinal on first encounter.
@@ -302,8 +293,6 @@ fn (mut r HTMLRenderer) render_footnotes_section() {
 	r.sb.write_string('</ol>\n</section>\n')
 }
 
-// ---- Inline elements ----
-
 fn (mut r HTMLRenderer) render_text(node &Node) {
 	content := if r.opts.typographer {
 		smart_punctuate(node.literal)
diff --git a/vlib/x/markdown/markdown.v b/vlib/x/markdown/markdown.v
index a8a5f4ba5..78c8daeda 100644
--- a/vlib/x/markdown/markdown.v
+++ b/vlib/x/markdown/markdown.v
@@ -93,9 +93,9 @@ pub mut:
 	ref_map map[string]LinkRef
 }
 
-// new creates a Markdown processor with the given options.
+// Markdown.new creates a Markdown processor with the given options.
 // All extensions in opts.extensions are applied immediately.
-pub fn new(opts Options) Markdown {
+pub fn Markdown.new(opts Options) Markdown {
 	mut m := Markdown{
 		opts:    opts
 		ref_map: map[string]LinkRef{}
@@ -106,16 +106,9 @@ pub fn new(opts Options) Markdown {
 	return m
 }
 
-// to_html converts the markdown source to HTML using default settings
-// (CommonMark only, no extensions, raw HTML stripped).
-pub fn to_html(src string) string {
-	mut md := new(Options{})
-	return md.convert(src)
-}
-
-// to_html_opts converts the markdown source to HTML with the given options.
-pub fn to_html_opts(src string, opts Options) string {
-	mut md := new(opts)
+// to_html converts the markdown source to HTML with the given options.
+pub fn to_html(src string, opts Options) string {
+	mut md := Markdown.new(opts)
 	return md.convert(src)
 }
 
@@ -133,7 +126,7 @@ pub fn (mut m Markdown) convert(src string) string {
 // Link reference definitions collected during parsing are cached so that
 // subsequent parse/convert calls on the same Markdown instance share them.
 pub fn (mut m Markdown) parse(src string) &Node {
-	mut p := new_block_parser(src, m.opts, m.ref_map)
+	mut p := BlockParser.new(src, m.opts, m.ref_map)
 	doc := p.parse()
 	for k, v in p.ref_map {
 		m.ref_map[k] = v
diff --git a/vlib/x/markdown/markdown_test.v b/vlib/x/markdown/markdown_test.v
index 6293b3571..e27de7735 100644
--- a/vlib/x/markdown/markdown_test.v
+++ b/vlib/x/markdown/markdown_test.v
@@ -137,9 +137,7 @@ fn test_shortcut_reference_still_resolves_normally() {
 
 fn test_gfm_table_header_uses_th_cells() {
 	src := '| a | b |\n| --- | --- |\n| 1 | 2 |'
-	html := to_html_opts(src, Options{
-		extensions: gfm()
-	})
+	html := to_html(src, extensions: gfm())
 	assert html.contains('<thead>')
 	assert html.contains('<th>a</th>')
 	assert html.contains('<th>b</th>')
@@ -225,9 +223,7 @@ fn test_setext_heading_multiline_text() {
 
 fn test_task_list() {
 	src := '- [ ] unchecked\n- [x] checked\n- [X] also checked'
-	html := to_html_opts(src, Options{
-		task_list: true
-	})
+	html := to_html(src, task_list: true)
 	assert html.contains('<input type="checkbox" disabled="">')
 	assert html.contains('<input type="checkbox" disabled="" checked="">')
 	assert html.contains('unchecked')
@@ -244,29 +240,25 @@ fn test_task_list_not_applied_without_extension() {
 fn test_task_list_marker_requires_space_after_closing_bracket() {
 	// GFM task markers are [ ]/[x]/[X] followed by whitespace or end of item.
 	src := '- [x]ok\n- [ ]todo'
-	html := to_html_opts(src, Options{
-		task_list: true
-	})
+	html := to_html(src, task_list: true)
 	assert !html.contains('<input')
 	assert html.contains('[x]ok')
 	assert html.contains('[ ]todo')
 }
 
 fn test_task_list_xhtml_checkbox_self_closing() {
-	html := to_html_opts('- [x] done', Options{
+	html := to_html('- [x] done',
 		task_list:     true
 		renderer_opts: RendererOptions{
 			xhtml: true
 		}
-	})
+	)
 	assert html.contains('<input type="checkbox" disabled="" checked="" />')
 }
 
 fn test_footnote_definition_inside_list_item_is_preserved() {
 	src := '- item[^note]\n\n  [^note]: footnote in list\n\noutside[^note]'
-	html := to_html_opts(src, Options{
-		footnotes: true
-	})
+	html := to_html(src, footnotes: true)
 	assert html.contains('item<sup><a href="#fn-note" id="fnref-note">1</a></sup>')
 	assert html.contains('outside<sup><a href="#fn-note" id="fnref-note">1</a></sup>')
 	assert html.contains('<li id="fn-note">footnote in list')
@@ -275,9 +267,7 @@ fn test_footnote_definition_inside_list_item_is_preserved() {
 
 fn test_footnote_definition_inside_blockquote_is_preserved() {
 	src := '> quote[^q]\n>\n> [^q]: footnote in quote'
-	html := to_html_opts(src, Options{
-		footnotes: true
-	})
+	html := to_html(src, footnotes: true)
 	assert html.contains('quote<sup><a href="#fn-q" id="fnref-q">1</a></sup>')
 	assert html.contains('<li id="fn-q">footnote in quote')
 	assert html.contains('<a href="#fnref-q">&#x21A9;</a></li>')
@@ -308,9 +298,7 @@ fn test_link_ref_def_multiline_no_title_next_line_is_content() {
 }
 
 fn test_gfm_helper_sets_core_extension_flags() {
-	md := new(Options{
-		extensions: gfm()
-	})
+	md := Markdown.new(extensions: gfm())
 	assert md.opts.tables
 	assert md.opts.strikethrough
 	assert md.opts.linkify
@@ -318,19 +306,13 @@ fn test_gfm_helper_sets_core_extension_flags() {
 }
 
 fn test_individual_extension_helpers_set_flags() {
-	md_footnote := new(Options{
-		extensions: [Extension(footnote())]
-	})
+	md_footnote := Markdown.new(extensions: [Extension(footnote())])
 	assert md_footnote.opts.footnotes
 
-	md_typographer := new(Options{
-		extensions: [Extension(typographer())]
-	})
+	md_typographer := Markdown.new(extensions: [Extension(typographer())])
 	assert md_typographer.opts.typographer
 
-	md_definition_list := new(Options{
-		extensions: [Extension(definition_list())]
-	})
+	md_definition_list := Markdown.new(extensions: [Extension(definition_list())])
 	assert md_definition_list.opts.definition_list
 }
 
diff --git a/vlib/x/markdown/node.v b/vlib/x/markdown/node.v
index 720c90e1b..f03fb7170 100644
--- a/vlib/x/markdown/node.v
+++ b/vlib/x/markdown/node.v
@@ -7,9 +7,7 @@ import strings
 
 // NodeKind identifies what kind of AST node a Node represents.
 pub enum NodeKind {
-	// ------- document root -------
 	document
-	// ------- block elements -------
 	heading
 	paragraph
 	blockquote
@@ -20,19 +18,15 @@ pub enum NodeKind {
 	thematic_break
 	html_block
 	link_ref_def
-	// GFM block extensions
 	table
 	table_head
 	table_body
 	table_row
 	table_cell
-	// Definition list (Pandoc-style)
 	definition_list
 	definition_term
 	definition_desc
-	// Footnote definition block
 	footnote_def
-	// ------- inline elements -------
 	text
 	emphasis
 	strong
@@ -43,11 +37,8 @@ pub enum NodeKind {
 	raw_html
 	hard_break
 	soft_break
-	// GFM inline extensions
 	strikethrough
-	// Footnote reference inline
 	footnote_ref
-	// Task list checkbox (inline, first child of a list_item)
 	task_checkbox
 }
 
@@ -64,39 +55,22 @@ pub enum Alignment {
 @[heap]
 pub struct Node {
 pub mut:
-	kind NodeKind
-	// ----- block-level fields -----
-	// heading: 1–6
-	level int
-	// list: true when there are no blank lines between items
-	is_tight bool
-	// list: true for ordered (1. 2. 3.), false for bullet (- * +)
+	kind       NodeKind
+	level      int
+	is_tight   bool
 	is_ordered bool
-	// list: starting number of an ordered list
 	list_start int = 1
-	// fenced_code: the info string after the opening fence (e.g. "go")
 	fence_info string
-	// ----- inline-level fields -----
-	// text / code_span / raw_html / html_block: literal string content
-	literal string
-	// link / image: URL destination
-	dest string
-	// link / image: optional title
-	title string
-	// link: reference label (for reference-style links)
-	label string
-	// task_checkbox: true when the checkbox is checked ([x])
-	checked bool
-	// table_cell: column alignment
-	align Alignment
-	// heading: optional explicit or auto-generated id attribute
-	id string
-	// footnote_ref / footnote_def: footnote label
-	fn_label string
-	// footnote_def: 1-based ordinal assigned during rendering
-	fn_index int
-	// ----- tree structure -----
-	children []&Node
+	literal    string
+	dest       string
+	title      string
+	label      string
+	checked    bool
+	align      Alignment
+	id         string
+	fn_label   string
+	fn_index   int
+	children   []&Node
 }
 
 // new_node allocates and returns a new Node of the given kind.
diff --git a/vlib/x/markdown/parser.v b/vlib/x/markdown/parser.v
index 43127ea75..44f62feb3 100644
--- a/vlib/x/markdown/parser.v
+++ b/vlib/x/markdown/parser.v
@@ -5,6 +5,19 @@ module markdown
 
 import strings
 
+// block_level_tags lists HTML tags that start an HTML block (type 6).
+// vfmt off
+const block_level_tags = [
+	'address', 'article', 'aside', 'base', 'basefont', 'blockquote', 'body', 'caption',	'center',
+	'col', 'colgroup', 'dd', 'details', 'dialog', 'dir', 'div', 'dl', 'dt', 'fieldset',
+	'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5',
+	'h6', 'head', 'header', 'hr', 'html', 'iframe', 'legend', 'li', 'link', 'main', 'menu',
+	'menuitem', 'meta', 'nav', 'noframes', 'ol', 'optgroup', 'option', 'p', 'param', 'search',
+	'section', 'summary', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'track',
+	'ul'
+]
+// vfmt on
+
 // BlockParser parses markdown block structure line by line into an AST.
 // After block parsing, inline content is parsed for every leaf node.
 struct BlockParser {
@@ -16,8 +29,8 @@ mut:
 	fn_defs map[string]&Node
 }
 
-// new_block_parser creates a BlockParser for the given source.
-fn new_block_parser(src string, opts Options, ref_map map[string]LinkRef) BlockParser {
+// BlockParser.new creates a BlockParser for the given source.
+fn BlockParser.new(src string, opts Options, ref_map map[string]LinkRef) BlockParser {
 	normalized := src.replace('\r\n', '\n').replace('\r', '\n')
 	lines := normalized.split('\n')
 	mut refs := map[string]LinkRef{}
@@ -179,8 +192,6 @@ fn (mut p BlockParser) parse_blocks(mut parent Node, indent int) {
 	}
 }
 
-// ---- Thematic break ----
-
 // is_thematic_break returns true if line is a valid thematic break
 // (three or more -, *, or _ with optional spaces).
 fn is_thematic_break(line string) bool {
@@ -204,8 +215,6 @@ fn is_thematic_break(line string) bool {
 	return count >= 3
 }
 
-// ---- ATX headings ----
-
 // try_atx_heading attempts to parse an ATX heading from line.
 // Returns the heading node on success.
 fn (mut p BlockParser) try_atx_heading(line string) ?&Node {
@@ -241,8 +250,6 @@ fn (mut p BlockParser) try_atx_heading(line string) ?&Node {
 	return node
 }
 
-// ---- Fenced code blocks ----
-
 // try_fenced_code attempts to parse a fenced code block starting at p.pos.
 fn (mut p BlockParser) try_fenced_code(line string, indent int) ?&Node {
 	fence_char, fence_len := detect_fence(line)
@@ -297,8 +304,6 @@ fn detect_fence(line string) (u8, int) {
 	return 0, 0
 }
 
-// ---- Indented code block ----
-
 // parse_indented_code collects lines that are indented by at least (indent+4)
 // spaces (or blank) into an indented code block.
 fn (mut p BlockParser) parse_indented_code(indent int) &Node {
@@ -328,17 +333,6 @@ fn (mut p BlockParser) parse_indented_code(indent int) &Node {
 	return node
 }
 
-// ---- HTML blocks ----
-
-// block_level_tags lists HTML tags that start an HTML block (type 6).
-const block_level_tags = ['address', 'article', 'aside', 'base', 'basefont', 'blockquote', 'body',
-	'caption', 'center', 'col', 'colgroup', 'dd', 'details', 'dialog', 'dir', 'div', 'dl', 'dt',
-	'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3',
-	'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'iframe', 'legend', 'li', 'link', 'main',
-	'menu', 'menuitem', 'meta', 'nav', 'noframes', 'ol', 'optgroup', 'option', 'p', 'param', 'search',
-	'section', 'summary', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'track',
-	'ul']
-
 // try_html_block attempts to parse an HTML block starting at p.pos.
 fn (mut p BlockParser) try_html_block(line string, indent int) ?&Node {
 	html_type := detect_html_block_type(line)
@@ -478,8 +472,6 @@ fn is_complete_html_tag(s string) bool {
 	return end == s.len - 1 || s[end + 1..].trim_space().len == 0
 }
 
-// ---- Link reference definitions ----
-
 // try_link_ref_def attempts to parse a link reference definition at p.pos.
 // CommonMark allows the title to appear on the next line when the destination
 // is alone on the first line.  Returns true and advances p.pos if successful.
@@ -537,9 +529,6 @@ fn (mut p BlockParser) try_link_ref_def(line string) bool {
 					title = parsed_title
 					extra_lines = 1
 				}
-				// If the next line starts with something that is not a title
-				// delimiter, we simply leave `title` empty and do not consume
-				// that line (it will be parsed as the next block).
 			}
 		}
 	}
@@ -626,8 +615,6 @@ fn parse_link_title(s string) (string, string) {
 	return '', s
 }
 
-// ---- Blockquote ----
-
 // parse_blockquote parses a blockquote block and returns a blockquote node.
 fn (mut p BlockParser) parse_blockquote(indent int) &Node {
 	mut bq_lines := []string{}
@@ -659,8 +646,6 @@ fn (mut p BlockParser) parse_blockquote(indent int) &Node {
 	return node
 }
 
-// ---- Lists ----
-
 // ListMarker holds parsed list marker information.
 struct ListMarker {
 	is_ordered  bool
@@ -926,8 +911,6 @@ fn (mut p BlockParser) parse_list_item(base_indent int) &Node {
 	return item
 }
 
-// ---- Tables (GFM) ----
-
 // try_table attempts to parse a GFM table starting at p.pos.
 // A table requires a header row, an alignment row (|---|), then data rows.
 fn (mut p BlockParser) try_table(indent int) ?&Node {
@@ -1082,8 +1065,6 @@ fn split_table_cells(line string) []string {
 	return cells
 }
 
-// ---- Definition list ----
-
 // try_definition_list attempts to parse a definition list starting at p.pos.
 fn (mut p BlockParser) try_definition_list(indent int) ?&Node {
 	if p.pos + 1 >= p.lines.len {
@@ -1127,8 +1108,6 @@ fn (mut p BlockParser) try_definition_list(indent int) ?&Node {
 	return dl
 }
 
-// ---- Footnote definitions ----
-
 // try_footnote_def attempts to parse a footnote definition starting at p.pos.
 fn (mut p BlockParser) try_footnote_def(line string, indent int) bool {
 	if !line.starts_with('[^') {
@@ -1164,8 +1143,6 @@ fn (mut p BlockParser) try_footnote_def(line string, indent int) bool {
 	return true
 }
 
-// ---- Paragraph / Setext heading ----
-
 // parse_paragraph parses a paragraph block, upgrading it to a setext heading
 // if the immediately following line is a setext underline (=== or ---).
 fn (mut p BlockParser) parse_paragraph(indent int) &Node {
@@ -1237,10 +1214,6 @@ fn is_setext_underline(line string) bool {
 	return true
 }
 
-// ---- Inline parsing kick-off ----
-// After block parsing, leaf node .literal fields contain raw inline text.
-// The inline parser is invoked lazily by the HTML renderer.
-
 // unescape_string decodes CommonMark backslash escapes in s.
 fn unescape_string(s string) string {
 	if !s.contains('\\') {
diff --git a/vlib/x/markdown/util.v b/vlib/x/markdown/util.v
index 402e56d3c..28b336231 100644
--- a/vlib/x/markdown/util.v
+++ b/vlib/x/markdown/util.v
@@ -5,6 +5,57 @@ module markdown
 
 import strings
 
+// unicode_space lists Unicode code points considered whitespace
+// vfmt off
+const unicode_space = [
+	` `, // space
+	`\t`, // tab
+	0x0a, // LF
+	0x0b, // Vertical Tab
+	0x0c, // FF
+	0x0d, // CR
+	0x0085, // next line
+	0x00A0, // no-break space
+	0x1680, // ogham space mark
+	0x180E, // mongolian vowel separator
+	0x2000, // en quad
+	0x2001, // em quad
+	0x2002, // en space
+	0x2003, // em space
+	0x2004, // three-per-em space
+	0x2005, // four-per-em space
+	0x2006, // six-per-em space
+	0x2007, // figure space
+	0x2008, // punctuation space
+	0x2009, // thin space
+	0x200A, // hair space
+	0x200B, // zero width space
+	0x200C, // zero width non-joiner
+	0x200D, // zero width joiner
+	0x2028, // line separator
+	0x2029, // paragraph separator
+	0x202F, // narrow no-break space
+	0x205F, // medium mathematical space
+	0x2060, // word joiner
+	0x3000, // ideographic space
+	0xFEFF, // zero width non-breaking space
+]!
+
+// ascii_punct lists ASCII punctuation characters
+const ascii_punct = [
+	`!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, `*`, `+`, `,`, `-`, `.`, `/`, `:`,
+	`;`, `<`, `=`, `>`, `?`, `@`, `[`, `\\`, `]`, `^`, `_`, `\``, `{`, `|`, `}`, `~`,
+]!
+
+// alpha lists ASCII letters a-z and A-Z
+const alpha = [
+	`a`, `b`, `c`, `d`, `e`, `f`, `g`, `h`, `i`, `j`, `k`, `l`, `m`,
+	`n`, `o`, `p`, `q`, `r`, `s`, `t`, `u`, `v`, `w`, `x`, `y`, `z`,
+	`A`, `B`, `C`, `D`, `E`, `F`, `G`, `H`, `I`, `J`, `K`, `L`, `M`,
+	`N`, `O`, `P`, `Q`, `R`, `S`, `T`, `U`, `V`, `W`, `X`, `Y`, `Z`,
+]!
+// vfmt on
+
 // html_escape replaces HTML special characters in s with their entity equivalents.
 fn html_escape(s string) string {
 	if s.index_any('&<>"') == -1 {
@@ -79,26 +130,26 @@ fn ascii_lower(c u8) u8 {
 // is_unicode_space returns true for CommonMark Unicode whitespace.
 @[inline]
 fn is_unicode_space(c u8) bool {
-	return c == ` ` || c == `\t` || c == `\n` || c == `\r` || c == 0x0c || c == 0x0b
+	return c in unicode_space
 }
 
 // is_ascii_punct returns true if c is an ASCII punctuation character.
 @[inline]
 fn is_ascii_punct(c u8) bool {
-	return (c >= `!` && c <= `/`) || (c >= `:` && c <= `@`) || (c >= `[` && c <= 96)
-		|| (c >= `{` && c <= `~`)
+	return c in ascii_punct
 }
 
+const digits = [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`]!
 // is_digit returns true if c is an ASCII decimal digit.
 @[inline]
 fn is_digit(c u8) bool {
-	return c >= `0` && c <= `9`
+	return c in digits
 }
 
 // is_alpha returns true if c is an ASCII letter.
 @[inline]
 fn is_alpha(c u8) bool {
-	return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`)
+	return c in alpha
 }
 
 // is_alnum returns true if c is an ASCII letter or digit.
-- 
2.39.5