From a92b5a36ba604b704fd85a7554c60454cdbe298b Mon Sep 17 00:00:00 2001
From: Alexander Medvednikov <alexander@medvednikov.com>
Date: Wed, 15 Apr 2026 15:56:46 +0300
Subject: [PATCH] vdoc: fix markdown rendering differing from GitHub (fixes
 #20534)

---
 cmd/tools/vdoc/document/doc_test.v | 21 ++++++++
 cmd/tools/vdoc/document/utils.v    | 17 ++++++
 cmd/tools/vdoc/html.v              | 85 +++++++++++++++++++++++++++++-
 cmd/tools/vdoc/vdoc_test.v         | 16 ++++++
 4 files changed, 138 insertions(+), 1 deletion(-)

diff --git a/cmd/tools/vdoc/document/doc_test.v b/cmd/tools/vdoc/document/doc_test.v
index 7162f7f34..c6cf89037 100644
--- a/cmd/tools/vdoc/document/doc_test.v
+++ b/cmd/tools/vdoc/document/doc_test.v
@@ -92,3 +92,24 @@ fn test_merge_doc_comments_keeps_blockquotes_on_separate_lines() {
 	]
 	assert doc.merge_doc_comments(comments).trim_space() == '> **Note**\n> line one\n> line two'
 }
+
+fn test_merge_doc_comments_preserves_readme_markdown() {
+	readme := '# Description
+
+`regex` is a small but powerful regular expression library,
+written in pure V.
+
+1. The basic atomic elements of this regex engine are the tokens.
+   In a query string a simple character is a token.
+
+> **Note**
+> `regex` is *not* PCRE compatible.
+'
+	comments := [
+		doc.DocComment{
+			is_readme: true
+			text:      readme
+		},
+	]
+	assert doc.merge_doc_comments(comments) == readme
+}
diff --git a/cmd/tools/vdoc/document/utils.v b/cmd/tools/vdoc/document/utils.v
index 067fd90fb..d2561ed57 100644
--- a/cmd/tools/vdoc/document/utils.v
+++ b/cmd/tools/vdoc/document/utils.v
@@ -46,6 +46,9 @@ pub fn merge_doc_comments(comments []DocComment) string {
 	if comments.len == 0 {
 		return ''
 	}
+	if raw_markdown := merge_raw_markdown_comments(comments) {
+		return raw_markdown
+	}
 	mut doc_comments := []string{}
 	for i := comments.len - 1; i >= 0; i-- {
 		if comments[i].is_multi {
@@ -143,6 +146,20 @@ pub fn merge_doc_comments(comments []DocComment) string {
 	return comment
 }
 
+fn merge_raw_markdown_comments(comments []DocComment) ?string {
+	if !comments.all(it.is_readme) {
+		return none
+	}
+	mut raw_markdown := []string{}
+	for i := comments.len - 1; i >= 0; i-- {
+		if comments[i].is_multi {
+			continue
+		}
+		raw_markdown << comments[i].text.trim_left('\x01')
+	}
+	return raw_markdown.reverse().join('\n')
+}
+
 // stmt_signature returns the signature of a given `ast.Stmt` node.
 pub fn (mut d Doc) stmt_signature(stmt ast.Stmt) string {
 	match stmt {
diff --git a/cmd/tools/vdoc/html.v b/cmd/tools/vdoc/html.v
index 2c4b87951..0d3714d01 100644
--- a/cmd/tools/vdoc/html.v
+++ b/cmd/tools/vdoc/html.v
@@ -731,21 +731,29 @@ fn (vd &VDoc) doc_node_html(dn doc.DocNode, link string, md_link_base string, he
 }
 
 fn prepare_markdown_for_html(text string) string {
-	if !text.contains('>') {
+	if !text.contains('\n') {
 		return text
 	}
 	lines := text.split_into_lines()
 	mut prepared := []string{cap: lines.len}
 	mut is_codeblock := false
+	mut prev_line := ''
 	for i, line in lines {
 		trimmed := line.trim_space()
 		if trimmed.starts_with('```') {
 			prepared << line
 			is_codeblock = !is_codeblock
+			prev_line = line
 			continue
 		}
 		if is_codeblock {
 			prepared << line
+			prev_line = line
+			continue
+		}
+		if line_continues_previous_block(prev_line, line) && prepared.len > 0 {
+			prepared[prepared.len - 1] += ' ' + trimmed
+			prev_line = line
 			continue
 		}
 		next_line := if i + 1 < lines.len { lines[i + 1] } else { '' }
@@ -754,10 +762,85 @@ fn prepare_markdown_for_html(text string) string {
 		} else {
 			prepared << line
 		}
+		prev_line = line
 	}
 	return prepared.join('\n')
 }
 
+fn line_continues_previous_block(prev_line string, line string) bool {
+	prev_trimmed := prev_line.trim_space()
+	trimmed := line.trim_space()
+	if prev_trimmed == '' || trimmed == '' {
+		return false
+	}
+	if prev_trimmed.starts_with('>') || trimmed.starts_with('>') {
+		return false
+	}
+	if prev_trimmed.starts_with('```') || trimmed.starts_with('```') {
+		return false
+	}
+	if markdown_line_starts_new_block(line) {
+		return false
+	}
+	if prev_trimmed.starts_with('#') || prev_trimmed.starts_with('|')
+		|| markdown_line_is_horizontal_rule(prev_trimmed) {
+		return false
+	}
+	return true
+}
+
+fn markdown_line_starts_new_block(line string) bool {
+	trimmed := line.trim_space()
+	if trimmed == '' {
+		return false
+	}
+	if trimmed.starts_with('#') || trimmed.starts_with('>') || trimmed.starts_with('|')
+		|| trimmed.starts_with('```') || markdown_line_is_horizontal_rule(trimmed) {
+		return true
+	}
+	if markdown_indent_width(line) >= 4 {
+		return true
+	}
+	return markdown_line_is_list_item(trimmed)
+}
+
+fn markdown_line_is_list_item(line string) bool {
+	if line.len > 1 && line[1] == ` ` && line[0] in [`-`, `*`, `+`] {
+		return true
+	}
+	return line.len > 2 && line[2] == ` ` && line[1] == `.` && line[0].is_digit()
+}
+
+fn markdown_line_is_horizontal_rule(line string) bool {
+	line_no_spaces := line.replace(' ', '')
+	if line_no_spaces.len < 3 {
+		return false
+	}
+	for ch in ['-', '=', '*', '_', '~'] {
+		if line_no_spaces.starts_with(ch.repeat(3))
+			&& line_no_spaces.count(ch) == line_no_spaces.len {
+			return true
+		}
+	}
+	return false
+}
+
+fn markdown_indent_width(line string) int {
+	mut width := 0
+	for ch in line {
+		if ch == ` ` {
+			width++
+			continue
+		}
+		if ch == `\t` {
+			width += 4
+			continue
+		}
+		break
+	}
+	return width
+}
+
 fn blockquote_line_needs_hard_break(line string, next_line string) bool {
 	if line.ends_with('  ') {
 		return false
diff --git a/cmd/tools/vdoc/vdoc_test.v b/cmd/tools/vdoc/vdoc_test.v
index 5d3f02963..154039e60 100644
--- a/cmd/tools/vdoc/vdoc_test.v
+++ b/cmd/tools/vdoc/vdoc_test.v
@@ -275,3 +275,19 @@ fn test_prepare_markdown_for_html_skips_fenced_code_blocks() {
 	input := '```sh\n> prompt\n> next\n```'
 	assert prepare_markdown_for_html(input) == input
 }
+
+fn test_markdown_renderer_preserves_wrapped_readme_markdown() ! {
+	input := '1. The basic atomic elements of this regex engine are the tokens.\n   In a query string a simple character is a token.\n\n- The basic element **is the token not the sequence of symbols**,\n  and the most simple token, is a single character.\n\n- `|` **the OR operator acts on tokens,** for example `abc|ebc` is not\n  `abc` OR `ebc`.'
+	mut renderer := markdown.HtmlRenderer{
+		transformer: &MdHtmlCodeHighlighter{
+			table: ast.new_table()
+		}
+	}
+	out := markdown.render(prepare_markdown_for_html(input), mut renderer)!
+	assert !out.contains('tokens.In')
+	assert !out.contains('mostsimple')
+	assert !out.contains('not<code>abc</code>')
+	assert out.contains('tokens. In a query string a simple character is a token.')
+	assert out.contains('the most simple token')
+	assert out.contains('is not <code>abc</code> OR <code>ebc</code>')
+}
-- 
2.39.5