v2 / vlib / x / markdown / inline.v
909 lines · 863 sloc · 21.73 KB · 46c3d7f13d605a08603985fe4e6f82f2a8771775
Raw
1// Copyright 2026 The V Language. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4module markdown
5
6import encoding.html as ehtml
7
8// parse_inline parses src as inline content and returns a slice of inline nodes.
9pub fn parse_inline(src string, opts Options, ref_map map[string]LinkRef) []&Node {
10 mut p := InlineParser{
11 src: src
12 opts: opts
13 ref_map: ref_map
14 }
15 return p.parse()
16}
17
18// InlineParser parses inline markdown content.
19struct InlineParser {
20 opts Options
21 ref_map map[string]LinkRef
22mut:
23 src string
24 pos int
25}
26
27struct EmphDelim {
28mut:
29 node_idx int
30 ch u8
31 length int
32 orig_len int
33 can_open bool
34 can_close bool
35 active bool = true
36}
37
38@[inline]
39fn rune_at_or_space(s string, pos int) rune {
40 if pos < 0 || pos >= s.len {
41 return ` `
42 }
43 char_len := utf8_char_len(s[pos])
44 if char_len <= 0 || pos + char_len > s.len {
45 return rune(s[pos])
46 }
47 return s[pos..pos + char_len].runes()[0]
48}
49
50@[inline]
51fn prev_rune_or_space(s string, pos int) rune {
52 if pos <= 0 {
53 return ` `
54 }
55 mut i := pos - 1
56 for i > 0 && (s[i] & u8(0xC0)) == u8(0x80) {
57 i--
58 }
59 return rune_at_or_space(s, i)
60}
61
62// parse parses the full inline source and returns a node slice.
63fn (mut p InlineParser) parse() []&Node {
64 mut result := []&Node{}
65 mut delims := []EmphDelim{}
66 for p.pos < p.src.len {
67 if p.src[p.pos] == `*` || p.src[p.pos] == `_` {
68 start := p.pos
69 ch := p.src[p.pos]
70 for p.pos < p.src.len && p.src[p.pos] == ch {
71 p.pos++
72 }
73 run := p.src[start..p.pos]
74 before := prev_rune_or_space(p.src, start)
75 after := rune_at_or_space(p.src, p.pos)
76 can_open := can_open_emphasis(ch, before, after)
77 can_close := can_close_emphasis(ch, before, after)
78 result << text_node(run)
79 if can_open || can_close {
80 delims << EmphDelim{
81 node_idx: result.len - 1
82 ch: ch
83 length: run.len
84 orig_len: run.len
85 can_open: can_open
86 can_close: can_close
87 }
88 }
89 continue
90 }
91 nodes := p.parse_one()
92 for n in nodes {
93 result << n
94 }
95 }
96 if delims.len > 0 {
97 resolve_emphasis(mut result, mut delims)
98 result = compact_empty_text_nodes(result)
99 }
100 return merge_text_nodes(result)
101}
102
103fn resolve_emphasis(mut nodes []&Node, mut delims []EmphDelim) {
104 mut i := 0
105 for i < delims.len {
106 if !delims[i].active || !delims[i].can_close || delims[i].length == 0 {
107 i++
108 continue
109 }
110 mut opener := i - 1
111 for opener >= 0 {
112 if !delims[opener].active || !delims[opener].can_open || delims[opener].length == 0 {
113 opener--
114 continue
115 }
116 if delims[opener].ch != delims[i].ch {
117 opener--
118 continue
119 }
120 if (delims[i].can_open || delims[opener].can_close)
121 && (delims[opener].orig_len + delims[i].orig_len) % 3 == 0
122 && (delims[opener].orig_len % 3 != 0 || delims[i].orig_len % 3 != 0) {
123 opener--
124 continue
125 }
126 if delims[opener].node_idx + 1 >= delims[i].node_idx {
127 opener--
128 continue
129 }
130 break
131 }
132 if opener < 0 {
133 i++
134 continue
135 }
136
137 use_len := if delims[opener].length >= 2 && delims[i].length >= 2 { 2 } else { 1 }
138 opener_idx := delims[opener].node_idx
139 closer_idx := delims[i].node_idx
140 if opener_idx < 0 || closer_idx < 0 || opener_idx >= nodes.len || closer_idx >= nodes.len
141 || opener_idx >= closer_idx {
142 delims[i].active = false
143 i++
144 continue
145 }
146 if nodes[opener_idx].literal.len < use_len || nodes[closer_idx].literal.len < use_len {
147 delims[i].active = false
148 i++
149 continue
150 }
151
152 nodes[opener_idx].literal = nodes[opener_idx].literal[..nodes[opener_idx].literal.len - use_len]
153 nodes[closer_idx].literal = nodes[closer_idx].literal[use_len..]
154 delims[opener].length -= use_len
155 delims[i].length -= use_len
156
157 mut emph := new_node(if use_len == 2 { .strong } else { .emphasis })
158 for child in nodes[opener_idx + 1..closer_idx] {
159 emph.append_child(child)
160 }
161
162 n_inner := closer_idx - opener_idx - 1
163 if n_inner > 0 {
164 nodes.delete_many(opener_idx + 1, n_inner)
165 nodes.insert(opener_idx + 1, emph)
166 }
167
168 mut delta := n_inner - 1
169 if delta < 0 {
170 delta = 0
171 }
172 for j := 0; j < delims.len; j++ {
173 if !delims[j].active {
174 continue
175 }
176 if delims[j].node_idx > opener_idx && delims[j].node_idx < closer_idx {
177 delims[j].active = false
178 continue
179 }
180 if delta > 0 && delims[j].node_idx >= closer_idx {
181 delims[j].node_idx -= delta
182 }
183 }
184
185 if delims[opener].length == 0 {
186 delims[opener].can_open = false
187 }
188 if delims[i].length == 0 {
189 delims[i].can_close = false
190 }
191 i = opener + 1
192 }
193}
194
195fn compact_empty_text_nodes(nodes []&Node) []&Node {
196 mut out := []&Node{}
197 for n in nodes {
198 if n.kind == .text && n.literal.len == 0 {
199 continue
200 }
201 out << n
202 }
203 return out
204}
205
206// parse_one parses one or more inline elements at the current position.
207fn (mut p InlineParser) parse_one() []&Node {
208 if p.pos >= p.src.len {
209 return []
210 }
211 c := p.src[p.pos]
212 match c {
213 `\\` {
214 return [p.parse_backslash()]
215 }
216 96 { // backtick
217 if node := p.try_code_span() {
218 return [node]
219 }
220 p.pos++
221 return [text_node('`')]
222 }
223 `*`, `_` {
224 p.pos++
225 return [text_node(c.ascii_str())]
226 }
227 `~` {
228 if p.opts.strikethrough {
229 if node := p.try_strikethrough() {
230 return [node]
231 }
232 }
233 p.pos++
234 return [text_node('~')]
235 }
236 `[` {
237 if nodes := p.try_link_or_footnote() {
238 return nodes
239 }
240 p.pos++
241 return [text_node('[')]
242 }
243 `!` {
244 if p.pos + 1 < p.src.len && p.src[p.pos + 1] == `[` {
245 saved := p.pos
246 p.pos += 2
247 if nodes := p.try_image_after_bang() {
248 return nodes
249 }
250 p.pos = saved
251 p.pos++
252 return [text_node('!')]
253 }
254 p.pos++
255 return [text_node('!')]
256 }
257 `<` {
258 if node := p.try_autolink_or_html() {
259 return [node]
260 }
261 p.pos++
262 return [text_node('<')]
263 }
264 `&` {
265 if node := p.try_entity() {
266 return [node]
267 }
268 p.pos++
269 return [text_node('&')]
270 }
271 `\n` {
272 return [p.parse_newline()]
273 }
274 else {
275 r := rune_at_or_space(p.src, p.pos)
276 step := utf8_char_len(p.src[p.pos])
277 if p.opts.linkify {
278 if node := p.try_linkify() {
279 return [node]
280 }
281 }
282 p.pos += step
283 return [text_node(r.str())]
284 }
285 }
286}
287
288// text_node creates a text node with the given literal string.
289fn text_node(s string) &Node {
290 mut n := new_node(.text)
291 n.literal = s
292 return n
293}
294
295// merge_text_nodes merges consecutive text nodes into one.
296fn merge_text_nodes(nodes []&Node) []&Node {
297 if nodes.len <= 1 {
298 return nodes
299 }
300 mut result := []&Node{}
301 for node in nodes {
302 if result.len > 0 && result[result.len - 1].kind == .text && node.kind == .text {
303 result[result.len - 1].literal += node.literal
304 } else {
305 result << node
306 }
307 }
308 return result
309}
310
311// can_open_emphasis reports whether a delimiter run can open emphasis.
312fn can_open_emphasis(delim u8, before rune, after rune) bool {
313 left_flanking := !is_unicode_space(after) && (!is_ascii_punct(after) || is_unicode_space(before)
314 || is_ascii_punct(before))
315 right_flanking := !is_unicode_space(before)
316 && (!is_ascii_punct(before) || is_unicode_space(after)
317 || is_ascii_punct(after))
318 if delim == `*` {
319 return left_flanking
320 }
321 if delim == `_` {
322 return left_flanking && (!right_flanking || is_ascii_punct(before))
323 }
324 return false
325}
326
327// can_close_emphasis reports whether a delimiter run can close emphasis.
328fn can_close_emphasis(delim u8, before rune, after rune) bool {
329 left_flanking := !is_unicode_space(after) && (!is_ascii_punct(after) || is_unicode_space(before)
330 || is_ascii_punct(before))
331 right_flanking := !is_unicode_space(before)
332 && (!is_ascii_punct(before) || is_unicode_space(after)
333 || is_ascii_punct(after))
334 if delim == `*` {
335 return right_flanking
336 }
337 if delim == `_` {
338 return right_flanking && (!left_flanking || is_ascii_punct(after))
339 }
340 return false
341}
342
343// parse_backslash handles backslash escapes and hard line breaks.
344fn (mut p InlineParser) parse_backslash() &Node {
345 p.pos++ // consume '\'
346 if p.pos >= p.src.len {
347 return text_node('\\')
348 }
349 ch := p.src[p.pos]
350 if ch == `\n` {
351 p.pos++
352 return new_node(.hard_break)
353 }
354 if is_ascii_punct(ch) {
355 p.pos++
356 return text_node(ch.ascii_str())
357 }
358 return text_node('\\')
359}
360
361// try_code_span attempts to parse a backtick code span.
362fn (mut p InlineParser) try_code_span() ?&Node {
363 start := p.pos
364 mut n := 0
365 for p.pos < p.src.len && p.src[p.pos] == 96 {
366 n++
367 p.pos++
368 }
369 content_start := p.pos
370 mut search := content_start
371 for search < p.src.len {
372 if p.src[search] == 96 {
373 close_start := search
374 mut close_n := 0
375 for search < p.src.len && p.src[search] == 96 {
376 close_n++
377 search++
378 }
379 if close_n == n {
380 code_raw := p.src[content_start..close_start]
381 mut code := code_raw.replace('\n', ' ')
382 if code.len >= 2 && code[0] == ` ` && code[code.len - 1] == ` `
383 && code.trim_space().len > 0 {
384 code = code[1..code.len - 1]
385 }
386 mut node := new_node(.code_span)
387 node.literal = code
388 p.pos = search
389 return node
390 }
391 } else {
392 search++
393 }
394 }
395 p.pos = start
396 return none
397}
398
399// try_emphasis attempts to parse *em*, **strong**, _em_, __strong__.
400fn (mut p InlineParser) try_emphasis(c u8) ?&Node {
401 start := p.pos
402 mut run := 0
403 for p.pos < p.src.len && p.src[p.pos] == c {
404 run++
405 p.pos++
406 }
407
408 // Prevent splitting an intraword __ run into a synthetic single-underscore opener.
409 if c == `_` && run == 1 && start > 1 && p.src[start - 1] == `_` {
410 before2 := prev_rune_or_space(p.src, start - 1)
411 after1 := rune_at_or_space(p.src, start + run)
412 if is_wordish(before2) && is_wordish(after1) {
413 p.pos = start
414 return none
415 }
416 }
417
418 before := prev_rune_or_space(p.src, start)
419 after := rune_at_or_space(p.src, start + run)
420 opener_can_open := can_open_emphasis(c, before, after)
421 opener_can_close := can_close_emphasis(c, before, after)
422
423 if !opener_can_open {
424 p.pos = start
425 return none
426 }
427
428 // Prefer em first for odd runs (e.g. ***foo*** -> <em><strong>foo</strong></em>).
429 if run % 2 == 1 {
430 p.pos = start + 1
431 if node := p.match_close_delim(c, 1, run, opener_can_close) {
432 return node
433 }
434 if run >= 2 {
435 p.pos = start + 2
436 if node := p.match_close_delim(c, 2, run, opener_can_close) {
437 return node
438 }
439 }
440 } else {
441 if run >= 2 {
442 p.pos = start + 2
443 if node := p.match_close_delim(c, 2, run, opener_can_close) {
444 return node
445 }
446 }
447 p.pos = start + 1
448 if node := p.match_close_delim(c, 1, run, opener_can_close) {
449 return node
450 }
451 }
452
453 p.pos = start
454 return none
455}
456
457// is_wordish reports whether c behaves like a word character for emphasis
458// boundary checks (includes non-ASCII bytes used in UTF-8 sequences).
459@[inline]
460fn is_wordish(c rune) bool {
461 return !is_unicode_space(c) && !is_ascii_punct(c)
462}
463
464// match_close_delim parses content after the opening delimiter run and finds
465// a matching closing delimiter of exactly `count` characters.
466fn (mut p InlineParser) match_close_delim(c u8, count int, opener_run int, opener_can_close bool) ?&Node {
467 content_start := p.pos
468 mut content_nodes := []&Node{}
469
470 for p.pos < p.src.len {
471 loop_start_pos := p.pos
472 ch := p.src[p.pos]
473 // Check for closing delimiter.
474 if ch == c {
475 close_pos := p.pos
476 mut close_run := 0
477 for p.pos < p.src.len && p.src[p.pos] == c {
478 close_run++
479 p.pos++
480 }
481 if close_run >= count {
482 // Verify right-flanking.
483 before_close := prev_rune_or_space(p.src, close_pos)
484 after_close := rune_at_or_space(p.src, p.pos)
485 closer_can_close := can_close_emphasis(c, before_close, after_close)
486 closer_can_open := can_open_emphasis(c, before_close, after_close)
487 if closer_can_close {
488 if count == 1 && opener_run == 1 && close_run > 1 && closer_can_open {
489 p.pos = close_pos
490 } else if count == 1 && opener_run > 1 && close_run > 1 && closer_can_open {
491 // Keep extra delimiters inside the emphasis span so nested
492 // strong parsing can consume them (e.g. foo***bar***baz).
493 inner_end := close_pos + (close_run - count)
494 if inner_end > content_start && inner_end <= p.src.len {
495 inner_nodes := parse_inline(p.src[content_start..inner_end], p.opts,
496 p.ref_map)
497 mut node := new_node(.emphasis)
498 for child in inner_nodes {
499 node.append_child(child)
500 }
501 p.pos = close_pos + close_run
502 return node
503 }
504 } else {
505 if opener_can_close && closer_can_open && (opener_run + close_run) % 3 == 0
506 && (opener_run % 3 != 0 || close_run % 3 != 0) {
507 p.pos = close_pos
508 } else {
509 if content_nodes.len == 0 {
510 content_nodes << text_node(c.ascii_str())
511 p.pos = close_pos + 1
512 continue
513 }
514 // Rewind extra closing chars beyond `count`.
515 p.pos = close_pos + count
516 kind := if count == 2 { NodeKind.strong } else { NodeKind.emphasis }
517 mut node := new_node(kind)
518 for child in content_nodes {
519 node.append_child(child)
520 }
521 return node
522 }
523 }
524 }
525 }
526 p.pos = close_pos
527 if count == 1 && opener_run > 1 && opener_can_close {
528 content_nodes << text_node(c.ascii_str())
529 p.pos++
530 continue
531 }
532 }
533 if ch == `\n` {
534 // Newlines stop emphasis search.
535 break
536 }
537 inner := p.parse_one()
538 if p.pos <= loop_start_pos {
539 // Safety net: force progress to avoid recursive delimiter stalls.
540 fallback := rune_at_or_space(p.src, loop_start_pos)
541 content_nodes << text_node(fallback.str())
542 p.pos = loop_start_pos + utf8_char_len(p.src[loop_start_pos])
543 continue
544 }
545 content_nodes << inner
546 }
547
548 // Not found; reset.
549 p.pos = content_start
550 return none
551}
552
553// try_strikethrough parses ~~text~~.
554fn (mut p InlineParser) try_strikethrough() ?&Node {
555 if p.pos + 1 >= p.src.len || p.src[p.pos + 1] != `~` {
556 return none
557 }
558 p.pos += 2
559 close := p.src.index_after_('~~', p.pos)
560 if close < 0 {
561 p.pos -= 2
562 return none
563 }
564 inner := p.src[p.pos..close]
565 inner_nodes := parse_inline(inner, p.opts, p.ref_map)
566 mut node := new_node(.strikethrough)
567 for child in inner_nodes {
568 node.append_child(child)
569 }
570 p.pos = close + 2
571 return node
572}
573
574// try_link_or_footnote handles [ and attempts to parse a link or footnote ref.
575fn (mut p InlineParser) try_link_or_footnote() ?[]&Node {
576 saved := p.pos
577 p.pos++ // consume '['
578 // Footnote reference [^label].
579 if p.opts.footnotes && p.pos < p.src.len && p.src[p.pos] == `^` {
580 fn_start := p.pos + 1
581 fn_close := p.src.index_after_(']', fn_start)
582 if fn_close > fn_start {
583 label := p.src[fn_start..fn_close]
584 mut fn_ref := new_node(.footnote_ref)
585 fn_ref.fn_label = label
586 p.pos = fn_close + 1
587 return [fn_ref]
588 }
589 }
590 text_start := p.pos
591 close := find_bracket_close(p.src, p.pos)
592 if close < 0 {
593 p.pos = saved
594 return none
595 }
596 link_text := p.src[text_start..close]
597 p.pos = close + 1
598
599 // Inline link (url).
600 if p.pos < p.src.len && p.src[p.pos] == `(` {
601 dest, title, end := parse_inline_link_dest_from(p.src, p.pos + 1)
602 if end >= 0 {
603 inner_nodes := parse_inline(link_text, p.opts, p.ref_map)
604 mut node := new_node(.link)
605 node.dest = unescape_string(dest)
606 node.title = unescape_string(title)
607 for child in inner_nodes {
608 node.append_child(child)
609 }
610 p.pos = end + 1
611 return [node]
612 }
613 }
614 // Full reference [text][label].
615 if p.pos < p.src.len && p.src[p.pos] == `[` {
616 ref_start := p.pos + 1
617 ref_close := p.src.index_after_(']', ref_start)
618 if ref_close >= 0 {
619 raw_label := p.src[ref_start..ref_close]
620 label := normalize_label(if raw_label.len > 0 { raw_label } else { link_text })
621 if label in p.ref_map {
622 ref := p.ref_map[label]
623 mut node := new_node(.link)
624 node.dest = ref.dest
625 node.title = ref.title
626 node.label = label
627 inner_nodes := parse_inline(link_text, p.opts, p.ref_map)
628 for child in inner_nodes {
629 node.append_child(child)
630 }
631 p.pos = ref_close + 1
632 return [node]
633 }
634 if raw_label.len > 0 {
635 // Do not downgrade explicit [text][label] to shortcut [text].
636 p.pos = saved
637 return none
638 }
639 }
640 }
641 // Shortcut reference [label].
642 label := normalize_label(link_text)
643 if label in p.ref_map {
644 ref := p.ref_map[label]
645 mut node := new_node(.link)
646 node.dest = ref.dest
647 node.title = ref.title
648 node.label = label
649 inner_nodes := parse_inline(link_text, p.opts, p.ref_map)
650 for child in inner_nodes {
651 node.append_child(child)
652 }
653 return [node]
654 }
655 p.pos = saved
656 return none
657}
658
659// try_image_after_bang parses the [alt](url) part of an image after '![' was consumed.
660fn (mut p InlineParser) try_image_after_bang() ?[]&Node {
661 text_start := p.pos
662 close := find_bracket_close(p.src, p.pos)
663 if close < 0 {
664 return none
665 }
666 alt_text := p.src[text_start..close]
667 p.pos = close + 1
668
669 if p.pos < p.src.len && p.src[p.pos] == `(` {
670 dest, title, end := parse_inline_link_dest_from(p.src, p.pos + 1)
671 if end >= 0 {
672 mut node := new_node(.image)
673 node.dest = unescape_string(dest)
674 node.title = unescape_string(title)
675 inner_nodes := parse_inline(alt_text, p.opts, p.ref_map)
676 for child in inner_nodes {
677 node.append_child(child)
678 }
679 p.pos = end + 1
680 return [node]
681 }
682 }
683 if p.pos < p.src.len && p.src[p.pos] == `[` {
684 ref_start := p.pos + 1
685 ref_close := p.src.index_after_(']', ref_start)
686 if ref_close >= 0 {
687 raw_label := p.src[ref_start..ref_close]
688 label := normalize_label(if raw_label.len > 0 { raw_label } else { alt_text })
689 if label in p.ref_map {
690 ref := p.ref_map[label]
691 mut node := new_node(.image)
692 node.dest = ref.dest
693 node.title = ref.title
694 inner_nodes := parse_inline(alt_text, p.opts, p.ref_map)
695 for child in inner_nodes {
696 node.append_child(child)
697 }
698 p.pos = ref_close + 1
699 return [node]
700 }
701 }
702 }
703 return none
704}
705
706// find_bracket_close finds the ] matching the [ at start, handling nesting and escapes.
707fn find_bracket_close(s string, start int) int {
708 mut depth := 1
709 mut i := start
710 for i < s.len {
711 if s[i] == `\\` && i + 1 < s.len {
712 i += 2
713 continue
714 }
715 if s[i] == `[` {
716 depth++
717 } else if s[i] == `]` {
718 depth--
719 if depth == 0 {
720 return i
721 }
722 }
723 i++
724 }
725 return -1
726}
727
728// parse_inline_link_dest_from parses (url) or (url "title") starting at s[start]
729// (start is after the opening paren). Returns (dest, title, end_paren_pos) or ("","", -1).
730fn parse_inline_link_dest_from(s string, start int) (string, string, int) {
731 i := skip_ws(s, start)
732 if i >= s.len {
733 return '', '', -1
734 }
735 dest, after_dest := parse_link_dest(s[i..])
736 j := i + (s[i..].len - after_dest.len)
737 k := skip_ws(s, j)
738 if k < s.len && s[k] == `)` {
739 return dest, '', k
740 }
741 title, after_title := parse_link_title(s[k..])
742 l := k + (s[k..].len - after_title.len)
743 m := skip_ws(s, l)
744 if m < s.len && s[m] == `)` {
745 return dest, title, m
746 }
747 return '', '', -1
748}
749
750// skip_ws returns the position in s after skipping whitespace from start.
751fn skip_ws(s string, start int) int {
752 mut i := start
753 for i < s.len {
754 ch := rune_at_or_space(s, i)
755 if ch !in unicode_space {
756 break
757 }
758 i += utf8_char_len(s[i])
759 }
760 return i
761}
762
763// try_autolink_or_html handles <...> for autolinks and raw HTML.
764fn (mut p InlineParser) try_autolink_or_html() ?&Node {
765 rest := p.src[p.pos..]
766 auto_end := try_autolink(rest)
767 if auto_end >= 0 {
768 content := rest[1..auto_end]
769 mut node := new_node(.autolink)
770 node.literal = content
771 if content.contains('@') && !content.contains('://') {
772 node.dest = 'mailto:' + content
773 } else {
774 node.dest = content
775 }
776 p.pos += auto_end + 1
777 return node
778 }
779 raw_end := try_raw_html_tag(rest)
780 if raw_end >= 0 {
781 mut node := new_node(.raw_html)
782 node.literal = rest[..raw_end + 1]
783 p.pos += raw_end + 1
784 return node
785 }
786 return none
787}
788
789// try_autolink matches an <autolink> returning the position of '>' or -1.
790fn try_autolink(s string) int {
791 if s.len < 3 || s[0] != `<` {
792 return -1
793 }
794 end := s.index_after_('>', 1)
795 if end < 0 {
796 return -1
797 }
798 inner := s[1..end]
799 if inner.contains(' ') || inner.contains('<') {
800 return -1
801 }
802 if inner.contains('://') {
803 return end
804 }
805 if inner.contains('@') && !inner.starts_with('@') {
806 return end
807 }
808 return -1
809}
810
811// try_raw_html_tag matches a raw HTML tag starting with '<' and returns the '>' position.
812fn try_raw_html_tag(s string) int {
813 if s.len < 3 || s[0] != `<` {
814 return -1
815 }
816 if s.starts_with('<!--') {
817 end := s.index_after_('-->', 4)
818 if end >= 0 {
819 return end + 2
820 }
821 return -1
822 }
823 if s.starts_with('<?') {
824 end := s.index_after_('?>', 2)
825 if end >= 0 {
826 return end + 1
827 }
828 return -1
829 }
830 low := s.to_lower()
831 if low.starts_with('<![cdata[') {
832 end := s.index_after_(']]>', 9)
833 if end >= 0 {
834 return end + 2
835 }
836 return -1
837 }
838 end := s.index_after_('>', 1)
839 if end < 0 {
840 return -1
841 }
842 inner := s[1..end]
843 if inner.len == 0 {
844 return -1
845 }
846 if !is_alpha(inner[0]) && inner[0] != `/` && inner[0] != `!` {
847 return -1
848 }
849 return end
850}
851
852// try_entity parses an HTML entity reference &name; or &#n; or &#xn'
853fn (mut p InlineParser) try_entity() ?&Node {
854 rest := p.src[p.pos..]
855 semi := rest.index(';') or { return none }
856 if semi > 32 || semi < 2 {
857 return none
858 }
859 candidate := rest[..semi + 1]
860 decoded := ehtml.unescape(candidate, all: true)
861 if decoded == candidate {
862 return none
863 }
864 p.pos += semi + 1
865 return text_node(decoded)
866}
867
868// parse_newline handles a newline character.
869fn (mut p InlineParser) parse_newline() &Node {
870 // Hard break if preceded by two or more spaces.
871 if p.pos >= 2 && p.src[p.pos - 1] == ` ` && p.src[p.pos - 2] == ` ` {
872 p.pos++
873 return new_node(.hard_break)
874 }
875 p.pos++
876 return new_node(.soft_break)
877}
878
879// try_linkify matches a bare URL (linkify extension).
880fn (mut p InlineParser) try_linkify() ?&Node {
881 rest := p.src[p.pos..]
882 for _, scheme in ['https://', 'http://', 'ftp://', 'mailto:'] {
883 if rest.starts_with(scheme) {
884 mut end := scheme.len
885 for end < rest.len {
886 ch := rest[end]
887 if ch == ` ` || ch == `<` || ch == `>` || ch == `"` || ch == `\n` || ch == `\t` {
888 break
889 }
890 end++
891 }
892 for end > scheme.len {
893 last := rest[end - 1]
894 if last == `.` || last == `,` || last == `;` || last == `!` || last == `?` {
895 end--
896 } else {
897 break
898 }
899 }
900 url := rest[..end]
901 mut node := new_node(.autolink)
902 node.literal = url
903 node.dest = url
904 p.pos += end
905 return node
906 }
907 }
908 return none
909}
910