v2 / vlib / x / markdown / markdown_test.v
361 lines · 305 sloc · 11.74 KB · 46c3d7f13d605a08603985fe4e6f82f2a8771775
Raw
1// Copyright 2026 The V Language. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4module markdown
5
6fn test_to_html_heading() {
7 assert to_html('# Hello') == '<h1>Hello</h1>\n'
8 assert to_html('## World') == '<h2>World</h2>\n'
9}
10
11fn test_to_html_paragraph() {
12 assert to_html('Hello world') == '<p>Hello world</p>\n'
13}
14
15fn test_to_html_thematic_break() {
16 assert to_html('---') == '<hr>\n'
17}
18
19fn test_to_html_emphasis() {
20 html := to_html('*em*')
21 assert html.contains('<em>')
22}
23
24fn test_to_html_strong() {
25 html := to_html('**bold**')
26 assert html.contains('<strong>')
27}
28
29fn test_to_html_code_span() {
30 html := to_html('`code`')
31 assert html.contains('<code>')
32 assert html.contains('code')
33}
34
35fn test_to_html_link() {
36 html := to_html('[link](https://example.com)')
37 assert html.contains('<a href="https://example.com">')
38 assert html.contains('link')
39}
40
41fn test_html_escape_in_text() {
42 html := to_html('A < B')
43 assert html.contains('<')
44}
45
46fn test_named_entities_are_decoded_before_render() {
47 assert to_html('©') == '<p>©</p>\n'
48 assert to_html('&') == '<p>&</p>\n'
49}
50
51fn test_unknown_named_entity_is_left_as_literal_text() {
52 assert to_html('¬_a_real_entity;') == '<p>&not_a_real_entity;</p>\n'
53}
54
55fn test_numeric_entities_are_decoded() {
56 assert to_html('© ©') == '<p>© ©</p>\n'
57}
58
59fn test_empty_input() {
60 assert to_html('') == ''
61}
62
63fn test_multiline_paragraph() {
64 html := to_html('line one\nline two')
65 assert html.contains('<p>')
66 assert html.contains('line one')
67}
68
69fn test_fenced_code() {
70 html := to_html('```go\nfn main() {}\n```')
71 assert html.contains('<code')
72 assert html.contains('fn main')
73}
74
75fn test_list() {
76 html := to_html('- item')
77 assert html.contains('<ul>')
78 assert html.contains('<li>')
79 assert html.contains('item')
80}
81
82fn test_ordered_list() {
83 html := to_html('1. first')
84 assert html.contains('<ol>')
85 assert html.contains('<li>')
86 assert html.contains('first')
87}
88
89fn test_ordered_list_marker_requires_whitespace_or_eol() {
90 assert to_html('1.test') == '<p>1.test</p>\n'
91 assert to_html('1)test') == '<p>1)test</p>\n'
92}
93
94fn test_ordered_list_marker_allows_space_tab_or_eol() {
95 assert to_html('1. item') == '<ol>\n<li>item</li>\n</ol>\n'
96 assert to_html('1)\titem') == '<ol>\n<li>item</li>\n</ol>\n'
97 assert to_html('1.') == '<ol>\n<li></li>\n</ol>\n'
98}
99
100fn test_blockquote() {
101 html := to_html('> quote')
102 assert html.contains('<blockquote>')
103 assert html.contains('quote')
104}
105
106fn test_list_multiple_items() {
107 html := to_html('- item 1\n- item 2')
108 assert html.contains('<ul>')
109 assert html.contains('item 1')
110 assert html.contains('item 2')
111}
112
113fn test_invalid_link_ref_def_does_not_create_reference() {
114 src := '[bad]: <https://example.com\n\n[bad]'
115 html := to_html(src)
116 assert !html.contains('<a href=')
117 assert html.contains('[bad]')
118}
119
120fn test_valid_link_ref_def_is_resolved() {
121 src := '[ok]: <https://example.com>\n\n[ok]'
122 html := to_html(src)
123 assert html.contains('<a href="https://example.com">ok</a>')
124}
125
126fn test_full_reference_does_not_fallback_to_shortcut_when_label_is_undefined() {
127 src := '[text]: https://example.com/text\n\n[text][missing]'
128 html := to_html(src)
129 assert html == '<p>[text][missing]</p>\n'
130}
131
132fn test_shortcut_reference_still_resolves_normally() {
133 src := '[text]: https://example.com/text\n\n[text]'
134 html := to_html(src)
135 assert html == '<p><a href="https://example.com/text">text</a></p>\n'
136}
137
138fn test_gfm_table_header_uses_th_cells() {
139 src := '| a | b |\n| --- | --- |\n| 1 | 2 |'
140 html := to_html(src, extensions: gfm())
141 assert html.contains('<thead>')
142 assert html.contains('<th>a</th>')
143 assert html.contains('<th>b</th>')
144}
145
146fn test_emphasis_underscore_intraword_does_not_emphasize() {
147 assert to_html('foo_bar_baz') == '<p>foo_bar_baz</p>\n'
148 assert to_html('foo_bar_') == '<p>foo_bar_</p>\n'
149 assert to_html('_foo_bar') == '<p>_foo_bar</p>\n'
150}
151
152fn test_emphasis_star_delimiters_still_emphasize() {
153 assert to_html('a*b*c') == '<p>a<em>b</em>c</p>\n'
154}
155
156fn test_emphasis_triple_delimiters() {
157 assert to_html('***foo***') == '<p><em><strong>foo</strong></em></p>\n'
158 assert to_html('___foo___') == '<p><em><strong>foo</strong></em></p>\n'
159 assert to_html('foo***bar***baz') == '<p>foo<em><strong>bar</strong></em>baz</p>\n'
160}
161
162fn test_emphasis_nested_mixed_runs() {
163 assert to_html('**foo *bar***') == '<p><strong>foo <em>bar</em></strong></p>\n'
164 assert to_html('*foo **bar***') == '<p><em>foo <strong>bar</strong></em></p>\n'
165 assert to_html('*foo**bar**baz*') == '<p><em>foo<strong>bar</strong>baz</em></p>\n'
166 assert to_html('*foo **bar** baz*') == '<p><em>foo <strong>bar</strong> baz</em></p>\n'
167 assert to_html('**foo *bar* baz**') == '<p><strong>foo <em>bar</em> baz</strong></p>\n'
168}
169
170fn test_emphasis_multiple_of_three_resolution() {
171 assert to_html('***foo** bar*') == '<p><em><strong>foo</strong> bar</em></p>\n'
172 assert to_html('***foo* bar**') == '<p><strong><em>foo</em> bar</strong></p>\n'
173 assert to_html('***foo**bar*') == '<p><em><strong>foo</strong>bar</em></p>\n'
174}
175
176fn test_emphasis_underscore_punctuation_flanking() {
177 assert to_html('foo-_(bar)_') == '<p>foo-<em>(bar)</em></p>\n'
178 assert to_html('foo__bar__baz') == '<p>foo__bar__baz</p>\n'
179 assert to_html('foo__bar__') == '<p>foo__bar__</p>\n'
180 assert to_html('__foo__bar') == '<p>__foo__bar</p>\n'
181}
182
183fn test_setext_heading_leading_spaces() {
184 // CommonMark allows 0-3 leading spaces on the setext underline.
185 assert to_html('Foo\n ===') == '<h1>Foo</h1>\n'
186 assert to_html('Foo\n ---') == '<h2>Foo</h2>\n'
187 assert to_html('Foo\n ===') == '<h1>Foo</h1>\n'
188}
189
190fn test_emphasis_leftover_delimiters_are_literal() {
191 // Unmatched delimiters become literal text.
192 assert to_html('*a**b**') == '<p>*a<strong>b</strong></p>\n'
193 assert to_html('**a**b*') == '<p><strong>a</strong>b*</p>\n'
194 assert to_html('*foo bar') == '<p>*foo bar</p>\n'
195}
196
197fn test_emphasis_mixed_star_underscore() {
198 // * and _ delimiters do not pair with each other.
199 assert to_html('*foo _bar_ baz*') == '<p><em>foo <em>bar</em> baz</em></p>\n'
200 assert to_html('__foo *bar* baz__') == '<p><strong>foo <em>bar</em> baz</strong></p>\n'
201}
202
203fn test_link_ref_def_with_leading_spaces() {
204 // CommonMark allows 0-3 leading spaces before a link ref def.
205 assert to_html(' [foo]: https://example.com\n\n[foo]') == '<p><a href="https://example.com">foo</a></p>\n'
206 assert to_html(' [bar]: https://example.org\n\n[bar]') == '<p><a href="https://example.org">bar</a></p>\n'
207 assert to_html(' [baz]: https://v-lang.io\n\n[baz]') == '<p><a href="https://v-lang.io">baz</a></p>\n'
208}
209
210fn test_link_ref_def_with_four_leading_spaces_is_not_a_ref() {
211 // Four leading spaces start an indented code block, not a reference definition.
212 src := ' [foo]: https://example.com\n\n[foo]'
213 html := to_html(src)
214 assert !html.contains('<a href=')
215 assert html.contains('[foo]: https://example.com')
216}
217
218fn test_setext_heading_multiline_text() {
219 // Multi-line setext heading text should preserve soft breaks.
220 html := to_html('Foo\nbar\n===')
221 assert html == '<h1>Foo\nbar</h1>\n'
222}
223
224fn test_task_list() {
225 src := '- [ ] unchecked\n- [x] checked\n- [X] also checked'
226 html := to_html(src, task_list: true)
227 assert html.contains('<input type="checkbox" disabled="">')
228 assert html.contains('<input type="checkbox" disabled="" checked="">')
229 assert html.contains('unchecked')
230 assert html.contains('checked')
231}
232
233fn test_task_list_not_applied_without_extension() {
234 // Without the extension, task markers are rendered as plain text.
235 html := to_html('- [ ] item')
236 assert !html.contains('<input')
237 assert html.contains('[ ] item')
238}
239
240fn test_task_list_marker_requires_space_after_closing_bracket() {
241 // GFM task markers are [ ]/[x]/[X] followed by whitespace or end of item.
242 src := '- [x]ok\n- [ ]todo'
243 html := to_html(src, task_list: true)
244 assert !html.contains('<input')
245 assert html.contains('[x]ok')
246 assert html.contains('[ ]todo')
247}
248
249fn test_task_list_xhtml_checkbox_self_closing() {
250 html := to_html('- [x] done',
251 task_list: true
252 renderer_opts: RendererOptions{
253 xhtml: true
254 }
255 )
256 assert html.contains('<input type="checkbox" disabled="" checked="" />')
257}
258
259fn test_footnote_definition_inside_list_item_is_preserved() {
260 src := '- item[^note]\n\n [^note]: footnote in list\n\noutside[^note]'
261 html := to_html(src, footnotes: true)
262 assert html.contains('item<sup><a href="#fn-note" id="fnref-note">1</a></sup>')
263 assert html.contains('outside<sup><a href="#fn-note" id="fnref-note">1</a></sup>')
264 assert html.contains('<li id="fn-note">footnote in list')
265 assert html.contains('<a href="#fnref-note">↩</a></li>')
266}
267
268fn test_footnote_definition_inside_blockquote_is_preserved() {
269 src := '> quote[^q]\n>\n> [^q]: footnote in quote'
270 html := to_html(src, footnotes: true)
271 assert html.contains('quote<sup><a href="#fn-q" id="fnref-q">1</a></sup>')
272 assert html.contains('<li id="fn-q">footnote in quote')
273 assert html.contains('<a href="#fnref-q">↩</a></li>')
274}
275
276fn test_link_ref_def_multiline_title() {
277 // CommonMark allows the title on the next line when the destination is alone.
278 src := '[foo]: /url\n"a title"\n\n[foo]'
279 html := to_html(src)
280 assert html.contains('<a href="/url"')
281 assert html.contains('title="a title"')
282 assert html.contains('>foo</a>')
283}
284
285fn test_link_ref_def_multiline_title_single_quotes() {
286 src := "[bar]: /path\n'my title'\n\n[bar]"
287 html := to_html(src)
288 assert html.contains('<a href="/path"')
289 assert html.contains('title="my title"')
290}
291
292fn test_link_ref_def_multiline_no_title_next_line_is_content() {
293 // If the next line is not a title, it becomes normal content.
294 src := '[baz]: /url\n\nsome text\n\n[baz]'
295 html := to_html(src)
296 assert html.contains('<a href="/url">baz</a>')
297 assert html.contains('some text')
298}
299
300fn test_gfm_helper_sets_core_extension_flags() {
301 md := Markdown.new(extensions: gfm())
302 assert md.opts.tables
303 assert md.opts.strikethrough
304 assert md.opts.linkify
305 assert md.opts.task_list
306}
307
308fn test_individual_extension_helpers_set_flags() {
309 md_footnote := Markdown.new(extensions: [Extension(footnote())])
310 assert md_footnote.opts.footnotes
311
312 md_typographer := Markdown.new(extensions: [Extension(typographer())])
313 assert md_typographer.opts.typographer
314
315 md_definition_list := Markdown.new(extensions: [Extension(definition_list())])
316 assert md_definition_list.opts.definition_list
317}
318
319fn test_emphasis_goldmark_parity_edge_cases() {
320 assert to_html('_a* __*_* b b') == '<p><em>a* __*</em>* b b</p>\n'
321 assert to_html('* bb _ *__*a* a_') == '<ul>\n<li>bb _ *__<em>a</em> a_</li>\n</ul>\n'
322 assert to_html('baa _ a*aba**_ba') == '<p>baa _ a*aba**_ba</p>\n'
323 assert to_html('_a_*_b**_aba*') == '<p><em>a</em><em>_b**_aba</em></p>\n'
324 assert to_html('x_ ***b*ab*bb_a*a a') == '<p>x_ <em><em><em>b</em>ab</em>bb_a</em>a a</p>\n'
325}
326
327fn test_to_plaintext_basic_blocks_and_inlines() {
328 text := to_plaintext('# Héllo\n\nA *b* [site](https://example.com)')
329 assert text.contains('# Héllo')
330 assert text.contains('A *b* site (https://example.com)')
331}
332
333fn test_to_plaintext_task_list() {
334 text := to_plaintext('- [ ] todo\n- [x] done', task_list: true)
335 assert text.contains('☐')
336 assert text.contains('☑')
337 assert text.contains('todo')
338 assert text.contains('done')
339}
340
341fn test_to_plaintext_footnotes() {
342 text := to_plaintext('Text[^n]\n\n[^n]: note body', footnotes: true)
343 assert text.contains('Text[1]')
344 assert text.contains('Footnotes:')
345 assert text.contains('[1] note body')
346}
347
348fn test_to_plaintext_table_rows_are_separated() {
349 text := to_plaintext('| a | b |\n|---|---|\n| 1 | 2 |', extensions: gfm())
350 assert text.contains('a | b |')
351 assert text.contains('1 | 2 |')
352 assert text.contains('a | b | \n1 | 2 |')
353 assert !text.contains('a | b | 1 | 2 |')
354}
355
356fn test_to_plaintext_blockquote_footnotes_share_global_order() {
357 text := to_plaintext('> quote[^q]\n\n[^q]: note body', footnotes: true)
358 assert text.contains('> quote[1]')
359 assert text.contains('Footnotes:')
360 assert text.contains('[1] note body')
361}
362