| 1 | // Copyright 2026 The V Language. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | module markdown |
| 5 | |
| 6 | fn test_to_html_heading() { |
| 7 | assert to_html('# Hello') == '<h1>Hello</h1>\n' |
| 8 | assert to_html('## World') == '<h2>World</h2>\n' |
| 9 | } |
| 10 | |
| 11 | fn test_to_html_paragraph() { |
| 12 | assert to_html('Hello world') == '<p>Hello world</p>\n' |
| 13 | } |
| 14 | |
| 15 | fn test_to_html_thematic_break() { |
| 16 | assert to_html('---') == '<hr>\n' |
| 17 | } |
| 18 | |
| 19 | fn test_to_html_emphasis() { |
| 20 | html := to_html('*em*') |
| 21 | assert html.contains('<em>') |
| 22 | } |
| 23 | |
| 24 | fn test_to_html_strong() { |
| 25 | html := to_html('**bold**') |
| 26 | assert html.contains('<strong>') |
| 27 | } |
| 28 | |
| 29 | fn test_to_html_code_span() { |
| 30 | html := to_html('`code`') |
| 31 | assert html.contains('<code>') |
| 32 | assert html.contains('code') |
| 33 | } |
| 34 | |
| 35 | fn test_to_html_link() { |
| 36 | html := to_html('[link](https://example.com)') |
| 37 | assert html.contains('<a href="https://example.com">') |
| 38 | assert html.contains('link') |
| 39 | } |
| 40 | |
| 41 | fn test_html_escape_in_text() { |
| 42 | html := to_html('A < B') |
| 43 | assert html.contains('<') |
| 44 | } |
| 45 | |
| 46 | fn test_named_entities_are_decoded_before_render() { |
| 47 | assert to_html('©') == '<p>©</p>\n' |
| 48 | assert to_html('&') == '<p>&</p>\n' |
| 49 | } |
| 50 | |
| 51 | fn test_unknown_named_entity_is_left_as_literal_text() { |
| 52 | assert to_html('¬_a_real_entity;') == '<p>¬_a_real_entity;</p>\n' |
| 53 | } |
| 54 | |
| 55 | fn test_numeric_entities_are_decoded() { |
| 56 | assert to_html('© ©') == '<p>© ©</p>\n' |
| 57 | } |
| 58 | |
| 59 | fn test_empty_input() { |
| 60 | assert to_html('') == '' |
| 61 | } |
| 62 | |
| 63 | fn test_multiline_paragraph() { |
| 64 | html := to_html('line one\nline two') |
| 65 | assert html.contains('<p>') |
| 66 | assert html.contains('line one') |
| 67 | } |
| 68 | |
| 69 | fn test_fenced_code() { |
| 70 | html := to_html('```go\nfn main() {}\n```') |
| 71 | assert html.contains('<code') |
| 72 | assert html.contains('fn main') |
| 73 | } |
| 74 | |
| 75 | fn test_list() { |
| 76 | html := to_html('- item') |
| 77 | assert html.contains('<ul>') |
| 78 | assert html.contains('<li>') |
| 79 | assert html.contains('item') |
| 80 | } |
| 81 | |
| 82 | fn test_ordered_list() { |
| 83 | html := to_html('1. first') |
| 84 | assert html.contains('<ol>') |
| 85 | assert html.contains('<li>') |
| 86 | assert html.contains('first') |
| 87 | } |
| 88 | |
| 89 | fn test_ordered_list_marker_requires_whitespace_or_eol() { |
| 90 | assert to_html('1.test') == '<p>1.test</p>\n' |
| 91 | assert to_html('1)test') == '<p>1)test</p>\n' |
| 92 | } |
| 93 | |
| 94 | fn test_ordered_list_marker_allows_space_tab_or_eol() { |
| 95 | assert to_html('1. item') == '<ol>\n<li>item</li>\n</ol>\n' |
| 96 | assert to_html('1)\titem') == '<ol>\n<li>item</li>\n</ol>\n' |
| 97 | assert to_html('1.') == '<ol>\n<li></li>\n</ol>\n' |
| 98 | } |
| 99 | |
| 100 | fn test_blockquote() { |
| 101 | html := to_html('> quote') |
| 102 | assert html.contains('<blockquote>') |
| 103 | assert html.contains('quote') |
| 104 | } |
| 105 | |
| 106 | fn test_list_multiple_items() { |
| 107 | html := to_html('- item 1\n- item 2') |
| 108 | assert html.contains('<ul>') |
| 109 | assert html.contains('item 1') |
| 110 | assert html.contains('item 2') |
| 111 | } |
| 112 | |
| 113 | fn test_invalid_link_ref_def_does_not_create_reference() { |
| 114 | src := '[bad]: <https://example.com\n\n[bad]' |
| 115 | html := to_html(src) |
| 116 | assert !html.contains('<a href=') |
| 117 | assert html.contains('[bad]') |
| 118 | } |
| 119 | |
| 120 | fn test_valid_link_ref_def_is_resolved() { |
| 121 | src := '[ok]: <https://example.com>\n\n[ok]' |
| 122 | html := to_html(src) |
| 123 | assert html.contains('<a href="https://example.com">ok</a>') |
| 124 | } |
| 125 | |
| 126 | fn test_full_reference_does_not_fallback_to_shortcut_when_label_is_undefined() { |
| 127 | src := '[text]: https://example.com/text\n\n[text][missing]' |
| 128 | html := to_html(src) |
| 129 | assert html == '<p>[text][missing]</p>\n' |
| 130 | } |
| 131 | |
| 132 | fn test_shortcut_reference_still_resolves_normally() { |
| 133 | src := '[text]: https://example.com/text\n\n[text]' |
| 134 | html := to_html(src) |
| 135 | assert html == '<p><a href="https://example.com/text">text</a></p>\n' |
| 136 | } |
| 137 | |
| 138 | fn test_gfm_table_header_uses_th_cells() { |
| 139 | src := '| a | b |\n| --- | --- |\n| 1 | 2 |' |
| 140 | html := to_html(src, extensions: gfm()) |
| 141 | assert html.contains('<thead>') |
| 142 | assert html.contains('<th>a</th>') |
| 143 | assert html.contains('<th>b</th>') |
| 144 | } |
| 145 | |
| 146 | fn test_emphasis_underscore_intraword_does_not_emphasize() { |
| 147 | assert to_html('foo_bar_baz') == '<p>foo_bar_baz</p>\n' |
| 148 | assert to_html('foo_bar_') == '<p>foo_bar_</p>\n' |
| 149 | assert to_html('_foo_bar') == '<p>_foo_bar</p>\n' |
| 150 | } |
| 151 | |
| 152 | fn test_emphasis_star_delimiters_still_emphasize() { |
| 153 | assert to_html('a*b*c') == '<p>a<em>b</em>c</p>\n' |
| 154 | } |
| 155 | |
| 156 | fn test_emphasis_triple_delimiters() { |
| 157 | assert to_html('***foo***') == '<p><em><strong>foo</strong></em></p>\n' |
| 158 | assert to_html('___foo___') == '<p><em><strong>foo</strong></em></p>\n' |
| 159 | assert to_html('foo***bar***baz') == '<p>foo<em><strong>bar</strong></em>baz</p>\n' |
| 160 | } |
| 161 | |
| 162 | fn test_emphasis_nested_mixed_runs() { |
| 163 | assert to_html('**foo *bar***') == '<p><strong>foo <em>bar</em></strong></p>\n' |
| 164 | assert to_html('*foo **bar***') == '<p><em>foo <strong>bar</strong></em></p>\n' |
| 165 | assert to_html('*foo**bar**baz*') == '<p><em>foo<strong>bar</strong>baz</em></p>\n' |
| 166 | assert to_html('*foo **bar** baz*') == '<p><em>foo <strong>bar</strong> baz</em></p>\n' |
| 167 | assert to_html('**foo *bar* baz**') == '<p><strong>foo <em>bar</em> baz</strong></p>\n' |
| 168 | } |
| 169 | |
| 170 | fn test_emphasis_multiple_of_three_resolution() { |
| 171 | assert to_html('***foo** bar*') == '<p><em><strong>foo</strong> bar</em></p>\n' |
| 172 | assert to_html('***foo* bar**') == '<p><strong><em>foo</em> bar</strong></p>\n' |
| 173 | assert to_html('***foo**bar*') == '<p><em><strong>foo</strong>bar</em></p>\n' |
| 174 | } |
| 175 | |
| 176 | fn test_emphasis_underscore_punctuation_flanking() { |
| 177 | assert to_html('foo-_(bar)_') == '<p>foo-<em>(bar)</em></p>\n' |
| 178 | assert to_html('foo__bar__baz') == '<p>foo__bar__baz</p>\n' |
| 179 | assert to_html('foo__bar__') == '<p>foo__bar__</p>\n' |
| 180 | assert to_html('__foo__bar') == '<p>__foo__bar</p>\n' |
| 181 | } |
| 182 | |
| 183 | fn test_setext_heading_leading_spaces() { |
| 184 | // CommonMark allows 0-3 leading spaces on the setext underline. |
| 185 | assert to_html('Foo\n ===') == '<h1>Foo</h1>\n' |
| 186 | assert to_html('Foo\n ---') == '<h2>Foo</h2>\n' |
| 187 | assert to_html('Foo\n ===') == '<h1>Foo</h1>\n' |
| 188 | } |
| 189 | |
| 190 | fn test_emphasis_leftover_delimiters_are_literal() { |
| 191 | // Unmatched delimiters become literal text. |
| 192 | assert to_html('*a**b**') == '<p>*a<strong>b</strong></p>\n' |
| 193 | assert to_html('**a**b*') == '<p><strong>a</strong>b*</p>\n' |
| 194 | assert to_html('*foo bar') == '<p>*foo bar</p>\n' |
| 195 | } |
| 196 | |
| 197 | fn test_emphasis_mixed_star_underscore() { |
| 198 | // * and _ delimiters do not pair with each other. |
| 199 | assert to_html('*foo _bar_ baz*') == '<p><em>foo <em>bar</em> baz</em></p>\n' |
| 200 | assert to_html('__foo *bar* baz__') == '<p><strong>foo <em>bar</em> baz</strong></p>\n' |
| 201 | } |
| 202 | |
| 203 | fn test_link_ref_def_with_leading_spaces() { |
| 204 | // CommonMark allows 0-3 leading spaces before a link ref def. |
| 205 | assert to_html(' [foo]: https://example.com\n\n[foo]') == '<p><a href="https://example.com">foo</a></p>\n' |
| 206 | assert to_html(' [bar]: https://example.org\n\n[bar]') == '<p><a href="https://example.org">bar</a></p>\n' |
| 207 | assert to_html(' [baz]: https://v-lang.io\n\n[baz]') == '<p><a href="https://v-lang.io">baz</a></p>\n' |
| 208 | } |
| 209 | |
| 210 | fn test_link_ref_def_with_four_leading_spaces_is_not_a_ref() { |
| 211 | // Four leading spaces start an indented code block, not a reference definition. |
| 212 | src := ' [foo]: https://example.com\n\n[foo]' |
| 213 | html := to_html(src) |
| 214 | assert !html.contains('<a href=') |
| 215 | assert html.contains('[foo]: https://example.com') |
| 216 | } |
| 217 | |
| 218 | fn test_setext_heading_multiline_text() { |
| 219 | // Multi-line setext heading text should preserve soft breaks. |
| 220 | html := to_html('Foo\nbar\n===') |
| 221 | assert html == '<h1>Foo\nbar</h1>\n' |
| 222 | } |
| 223 | |
| 224 | fn test_task_list() { |
| 225 | src := '- [ ] unchecked\n- [x] checked\n- [X] also checked' |
| 226 | html := to_html(src, task_list: true) |
| 227 | assert html.contains('<input type="checkbox" disabled="">') |
| 228 | assert html.contains('<input type="checkbox" disabled="" checked="">') |
| 229 | assert html.contains('unchecked') |
| 230 | assert html.contains('checked') |
| 231 | } |
| 232 | |
| 233 | fn test_task_list_not_applied_without_extension() { |
| 234 | // Without the extension, task markers are rendered as plain text. |
| 235 | html := to_html('- [ ] item') |
| 236 | assert !html.contains('<input') |
| 237 | assert html.contains('[ ] item') |
| 238 | } |
| 239 | |
| 240 | fn test_task_list_marker_requires_space_after_closing_bracket() { |
| 241 | // GFM task markers are [ ]/[x]/[X] followed by whitespace or end of item. |
| 242 | src := '- [x]ok\n- [ ]todo' |
| 243 | html := to_html(src, task_list: true) |
| 244 | assert !html.contains('<input') |
| 245 | assert html.contains('[x]ok') |
| 246 | assert html.contains('[ ]todo') |
| 247 | } |
| 248 | |
| 249 | fn test_task_list_xhtml_checkbox_self_closing() { |
| 250 | html := to_html('- [x] done', |
| 251 | task_list: true |
| 252 | renderer_opts: RendererOptions{ |
| 253 | xhtml: true |
| 254 | } |
| 255 | ) |
| 256 | assert html.contains('<input type="checkbox" disabled="" checked="" />') |
| 257 | } |
| 258 | |
| 259 | fn test_footnote_definition_inside_list_item_is_preserved() { |
| 260 | src := '- item[^note]\n\n [^note]: footnote in list\n\noutside[^note]' |
| 261 | html := to_html(src, footnotes: true) |
| 262 | assert html.contains('item<sup><a href="#fn-note" id="fnref-note">1</a></sup>') |
| 263 | assert html.contains('outside<sup><a href="#fn-note" id="fnref-note">1</a></sup>') |
| 264 | assert html.contains('<li id="fn-note">footnote in list') |
| 265 | assert html.contains('<a href="#fnref-note">↩</a></li>') |
| 266 | } |
| 267 | |
| 268 | fn test_footnote_definition_inside_blockquote_is_preserved() { |
| 269 | src := '> quote[^q]\n>\n> [^q]: footnote in quote' |
| 270 | html := to_html(src, footnotes: true) |
| 271 | assert html.contains('quote<sup><a href="#fn-q" id="fnref-q">1</a></sup>') |
| 272 | assert html.contains('<li id="fn-q">footnote in quote') |
| 273 | assert html.contains('<a href="#fnref-q">↩</a></li>') |
| 274 | } |
| 275 | |
| 276 | fn test_link_ref_def_multiline_title() { |
| 277 | // CommonMark allows the title on the next line when the destination is alone. |
| 278 | src := '[foo]: /url\n"a title"\n\n[foo]' |
| 279 | html := to_html(src) |
| 280 | assert html.contains('<a href="/url"') |
| 281 | assert html.contains('title="a title"') |
| 282 | assert html.contains('>foo</a>') |
| 283 | } |
| 284 | |
| 285 | fn test_link_ref_def_multiline_title_single_quotes() { |
| 286 | src := "[bar]: /path\n'my title'\n\n[bar]" |
| 287 | html := to_html(src) |
| 288 | assert html.contains('<a href="/path"') |
| 289 | assert html.contains('title="my title"') |
| 290 | } |
| 291 | |
| 292 | fn test_link_ref_def_multiline_no_title_next_line_is_content() { |
| 293 | // If the next line is not a title, it becomes normal content. |
| 294 | src := '[baz]: /url\n\nsome text\n\n[baz]' |
| 295 | html := to_html(src) |
| 296 | assert html.contains('<a href="/url">baz</a>') |
| 297 | assert html.contains('some text') |
| 298 | } |
| 299 | |
| 300 | fn test_gfm_helper_sets_core_extension_flags() { |
| 301 | md := Markdown.new(extensions: gfm()) |
| 302 | assert md.opts.tables |
| 303 | assert md.opts.strikethrough |
| 304 | assert md.opts.linkify |
| 305 | assert md.opts.task_list |
| 306 | } |
| 307 | |
| 308 | fn test_individual_extension_helpers_set_flags() { |
| 309 | md_footnote := Markdown.new(extensions: [Extension(footnote())]) |
| 310 | assert md_footnote.opts.footnotes |
| 311 | |
| 312 | md_typographer := Markdown.new(extensions: [Extension(typographer())]) |
| 313 | assert md_typographer.opts.typographer |
| 314 | |
| 315 | md_definition_list := Markdown.new(extensions: [Extension(definition_list())]) |
| 316 | assert md_definition_list.opts.definition_list |
| 317 | } |
| 318 | |
| 319 | fn test_emphasis_goldmark_parity_edge_cases() { |
| 320 | assert to_html('_a* __*_* b b') == '<p><em>a* __*</em>* b b</p>\n' |
| 321 | assert to_html('* bb _ *__*a* a_') == '<ul>\n<li>bb _ *__<em>a</em> a_</li>\n</ul>\n' |
| 322 | assert to_html('baa _ a*aba**_ba') == '<p>baa _ a*aba**_ba</p>\n' |
| 323 | assert to_html('_a_*_b**_aba*') == '<p><em>a</em><em>_b**_aba</em></p>\n' |
| 324 | assert to_html('x_ ***b*ab*bb_a*a a') == '<p>x_ <em><em><em>b</em>ab</em>bb_a</em>a a</p>\n' |
| 325 | } |
| 326 | |
| 327 | fn test_to_plaintext_basic_blocks_and_inlines() { |
| 328 | text := to_plaintext('# Héllo\n\nA *b* [site](https://example.com)') |
| 329 | assert text.contains('# Héllo') |
| 330 | assert text.contains('A *b* site (https://example.com)') |
| 331 | } |
| 332 | |
| 333 | fn test_to_plaintext_task_list() { |
| 334 | text := to_plaintext('- [ ] todo\n- [x] done', task_list: true) |
| 335 | assert text.contains('☐') |
| 336 | assert text.contains('☑') |
| 337 | assert text.contains('todo') |
| 338 | assert text.contains('done') |
| 339 | } |
| 340 | |
| 341 | fn test_to_plaintext_footnotes() { |
| 342 | text := to_plaintext('Text[^n]\n\n[^n]: note body', footnotes: true) |
| 343 | assert text.contains('Text[1]') |
| 344 | assert text.contains('Footnotes:') |
| 345 | assert text.contains('[1] note body') |
| 346 | } |
| 347 | |
| 348 | fn test_to_plaintext_table_rows_are_separated() { |
| 349 | text := to_plaintext('| a | b |\n|---|---|\n| 1 | 2 |', extensions: gfm()) |
| 350 | assert text.contains('a | b |') |
| 351 | assert text.contains('1 | 2 |') |
| 352 | assert text.contains('a | b | \n1 | 2 |') |
| 353 | assert !text.contains('a | b | 1 | 2 |') |
| 354 | } |
| 355 | |
| 356 | fn test_to_plaintext_blockquote_footnotes_share_global_order() { |
| 357 | text := to_plaintext('> quote[^q]\n\n[^q]: note body', footnotes: true) |
| 358 | assert text.contains('> quote[1]') |
| 359 | assert text.contains('Footnotes:') |
| 360 | assert text.contains('[1] note body') |
| 361 | } |
| 362 | |