From a92b5a36ba604b704fd85a7554c60454cdbe298b Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Wed, 15 Apr 2026 15:56:46 +0300 Subject: [PATCH] vdoc: fix markdown rendering differing from GitHub (fixes #20534) --- cmd/tools/vdoc/document/doc_test.v | 21 ++++++++ cmd/tools/vdoc/document/utils.v | 17 ++++++ cmd/tools/vdoc/html.v | 85 +++++++++++++++++++++++++++++- cmd/tools/vdoc/vdoc_test.v | 16 ++++++ 4 files changed, 138 insertions(+), 1 deletion(-) diff --git a/cmd/tools/vdoc/document/doc_test.v b/cmd/tools/vdoc/document/doc_test.v index 7162f7f34..c6cf89037 100644 --- a/cmd/tools/vdoc/document/doc_test.v +++ b/cmd/tools/vdoc/document/doc_test.v @@ -92,3 +92,24 @@ fn test_merge_doc_comments_keeps_blockquotes_on_separate_lines() { ] assert doc.merge_doc_comments(comments).trim_space() == '> **Note**\n> line one\n> line two' } + +fn test_merge_doc_comments_preserves_readme_markdown() { + readme := '# Description + +`regex` is a small but powerful regular expression library, +written in pure V. + +1. The basic atomic elements of this regex engine are the tokens. + In a query string a simple character is a token. + +> **Note** +> `regex` is *not* PCRE compatible. +' + comments := [ + doc.DocComment{ + is_readme: true + text: readme + }, + ] + assert doc.merge_doc_comments(comments) == readme +} diff --git a/cmd/tools/vdoc/document/utils.v b/cmd/tools/vdoc/document/utils.v index 067fd90fb..d2561ed57 100644 --- a/cmd/tools/vdoc/document/utils.v +++ b/cmd/tools/vdoc/document/utils.v @@ -46,6 +46,9 @@ pub fn merge_doc_comments(comments []DocComment) string { if comments.len == 0 { return '' } + if raw_markdown := merge_raw_markdown_comments(comments) { + return raw_markdown + } mut doc_comments := []string{} for i := comments.len - 1; i >= 0; i-- { if comments[i].is_multi { @@ -143,6 +146,20 @@ pub fn merge_doc_comments(comments []DocComment) string { return comment } +fn merge_raw_markdown_comments(comments []DocComment) ?string { + if !comments.all(it.is_readme) { + return none + } + mut raw_markdown := []string{} + for i := comments.len - 1; i >= 0; i-- { + if comments[i].is_multi { + continue + } + raw_markdown << comments[i].text.trim_left('\x01') + } + return raw_markdown.reverse().join('\n') +} + // stmt_signature returns the signature of a given `ast.Stmt` node. pub fn (mut d Doc) stmt_signature(stmt ast.Stmt) string { match stmt { diff --git a/cmd/tools/vdoc/html.v b/cmd/tools/vdoc/html.v index 2c4b87951..0d3714d01 100644 --- a/cmd/tools/vdoc/html.v +++ b/cmd/tools/vdoc/html.v @@ -731,21 +731,29 @@ fn (vd &VDoc) doc_node_html(dn doc.DocNode, link string, md_link_base string, he } fn prepare_markdown_for_html(text string) string { - if !text.contains('>') { + if !text.contains('\n') { return text } lines := text.split_into_lines() mut prepared := []string{cap: lines.len} mut is_codeblock := false + mut prev_line := '' for i, line in lines { trimmed := line.trim_space() if trimmed.starts_with('```') { prepared << line is_codeblock = !is_codeblock + prev_line = line continue } if is_codeblock { prepared << line + prev_line = line + continue + } + if line_continues_previous_block(prev_line, line) && prepared.len > 0 { + prepared[prepared.len - 1] += ' ' + trimmed + prev_line = line continue } next_line := if i + 1 < lines.len { lines[i + 1] } else { '' } @@ -754,10 +762,85 @@ fn prepare_markdown_for_html(text string) string { } else { prepared << line } + prev_line = line } return prepared.join('\n') } +fn line_continues_previous_block(prev_line string, line string) bool { + prev_trimmed := prev_line.trim_space() + trimmed := line.trim_space() + if prev_trimmed == '' || trimmed == '' { + return false + } + if prev_trimmed.starts_with('>') || trimmed.starts_with('>') { + return false + } + if prev_trimmed.starts_with('```') || trimmed.starts_with('```') { + return false + } + if markdown_line_starts_new_block(line) { + return false + } + if prev_trimmed.starts_with('#') || prev_trimmed.starts_with('|') + || markdown_line_is_horizontal_rule(prev_trimmed) { + return false + } + return true +} + +fn markdown_line_starts_new_block(line string) bool { + trimmed := line.trim_space() + if trimmed == '' { + return false + } + if trimmed.starts_with('#') || trimmed.starts_with('>') || trimmed.starts_with('|') + || trimmed.starts_with('```') || markdown_line_is_horizontal_rule(trimmed) { + return true + } + if markdown_indent_width(line) >= 4 { + return true + } + return markdown_line_is_list_item(trimmed) +} + +fn markdown_line_is_list_item(line string) bool { + if line.len > 1 && line[1] == ` ` && line[0] in [`-`, `*`, `+`] { + return true + } + return line.len > 2 && line[2] == ` ` && line[1] == `.` && line[0].is_digit() +} + +fn markdown_line_is_horizontal_rule(line string) bool { + line_no_spaces := line.replace(' ', '') + if line_no_spaces.len < 3 { + return false + } + for ch in ['-', '=', '*', '_', '~'] { + if line_no_spaces.starts_with(ch.repeat(3)) + && line_no_spaces.count(ch) == line_no_spaces.len { + return true + } + } + return false +} + +fn markdown_indent_width(line string) int { + mut width := 0 + for ch in line { + if ch == ` ` { + width++ + continue + } + if ch == `\t` { + width += 4 + continue + } + break + } + return width +} + fn blockquote_line_needs_hard_break(line string, next_line string) bool { if line.ends_with(' ') { return false diff --git a/cmd/tools/vdoc/vdoc_test.v b/cmd/tools/vdoc/vdoc_test.v index 5d3f02963..154039e60 100644 --- a/cmd/tools/vdoc/vdoc_test.v +++ b/cmd/tools/vdoc/vdoc_test.v @@ -275,3 +275,19 @@ fn test_prepare_markdown_for_html_skips_fenced_code_blocks() { input := '```sh\n> prompt\n> next\n```' assert prepare_markdown_for_html(input) == input } + +fn test_markdown_renderer_preserves_wrapped_readme_markdown() ! { + input := '1. The basic atomic elements of this regex engine are the tokens.\n In a query string a simple character is a token.\n\n- The basic element **is the token not the sequence of symbols**,\n and the most simple token, is a single character.\n\n- `|` **the OR operator acts on tokens,** for example `abc|ebc` is not\n `abc` OR `ebc`.' + mut renderer := markdown.HtmlRenderer{ + transformer: &MdHtmlCodeHighlighter{ + table: ast.new_table() + } + } + out := markdown.render(prepare_markdown_for_html(input), mut renderer)! + assert !out.contains('tokens.In') + assert !out.contains('mostsimple') + assert !out.contains('notabc') + assert out.contains('tokens. In a query string a simple character is a token.') + assert out.contains('the most simple token') + assert out.contains('is not abc OR ebc') +} -- 2.39.5