Gitly


1 /*
2  * MD4C: Markdown parser for C
3  * (http://github.com/mity/md4c)
4  *
5  * Copyright (c) 2016-2020 Martin Mitas
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23  * IN THE SOFTWARE.
24  */
25 
26 #include "md4c.h"
27 
28 #include <limits.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 
33 
34 /*****************************
35  ***  Miscellaneous Stuff  ***
36  *****************************/
37 
38 #if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
39     /* C89/90 or old compilers in general may not understand "inline". */
40     #if defined __GNUC__
41         #define inline __inline__
42     #elif defined _MSC_VER
43         #define inline __inline
44     #else
45         #define inline
46     #endif
47 #endif
48 
49 /* Make the UTF-8 support the default. */
50 #if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16
51     #define MD4C_USE_UTF8
52 #endif
53 
54 /* Magic for making wide literals with MD4C_USE_UTF16. */
55 #ifdef _T
56     #undef _T
57 #endif
58 #if defined MD4C_USE_UTF16
59     #define _T(x)           L##x
60 #else
61     #define _T(x)           x
62 #endif
63 
64 /* Misc. macros. */
65 #define SIZEOF_ARRAY(a)     (sizeof(a) / sizeof(a[0]))
66 
67 #define STRINGIZE_(x)       #x
68 #define STRINGIZE(x)        STRINGIZE_(x)
69 
70 #ifndef TRUE
71     #define TRUE            1
72     #define FALSE           0
73 #endif
74 
75 #define MD_LOG(msg)                                                     \
76     do {                                                                \
77         if(ctx->parser.debug_log != NULL)                               \
78             ctx->parser.debug_log((msg), ctx->userdata);                \
79     } while(0)
80 
81 #ifdef DEBUG
82     #define MD_ASSERT(cond)                                             \
83             do {                                                        \
84                 if(!(cond)) {                                           \
85                     MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": "        \
86                            "Assertion '" STRINGIZE(cond) "' failed.");  \
87                     exit(1);                                            \
88                 }                                                       \
89             } while(0)
90 
91     #define MD_UNREACHABLE()        MD_ASSERT(1 == 0)
92 #else
93     #if defined __GNUC__ && !defined __TINYC__
94         #define MD_ASSERT(cond)     do { if(!(cond)) __builtin_unreachable(); } while(0)
95         #define MD_UNREACHABLE()    do { __builtin_unreachable(); } while(0)
96     #elif defined _MSC_VER  &&  _MSC_VER > 120
97         #define MD_ASSERT(cond)     do { __assume(cond); } while(0)
98         #define MD_UNREACHABLE()    do { __assume(0); } while(0)
99     #else
100         #define MD_ASSERT(cond)     do {} while(0)
101         #define MD_UNREACHABLE()    do {} while(0)
102     #endif
103 #endif
104 
105 /* For falling through case labels in switch statements. */
106 #if defined __clang__ && __clang_major__ >= 12
107     #define MD_FALLTHROUGH()        __attribute__((fallthrough))
108 #elif defined __GNUC__ && __GNUC__ >= 7
109     #define MD_FALLTHROUGH()        __attribute__((fallthrough))
110 #else
111     #define MD_FALLTHROUGH()        ((void)0)
112 #endif
113 
114 /* Suppress "unused parameter" warnings. */
115 #define MD_UNUSED(x)                ((void)x)
116 
117 
118 /************************
119  ***  Internal Types  ***
120  ************************/
121 
122 /* These are omnipresent so lets save some typing. */
123 #define CHAR    MD_CHAR
124 #define SZ      MD_SIZE
125 #define OFF     MD_OFFSET
126 
127 typedef struct MD_MARK_tag MD_MARK;
128 typedef struct MD_BLOCK_tag MD_BLOCK;
129 typedef struct MD_CONTAINER_tag MD_CONTAINER;
130 typedef struct MD_REF_DEF_tag MD_REF_DEF;
131 
132 
133 /* During analyzes of inline marks, we need to manage some "mark chains",
134  * of (yet unresolved) openers. This structure holds start/end of the chain.
135  * The chain internals are then realized through MD_MARK::prev and ::next.
136  */
137 typedef struct MD_MARKCHAIN_tag MD_MARKCHAIN;
138 struct MD_MARKCHAIN_tag {
139     int head;   /* Index of first mark in the chain, or -1 if empty. */
140     int tail;   /* Index of last mark in the chain, or -1 if empty. */
141 };
142 
143 /* Context propagated through all the parsing. */
144 typedef struct MD_CTX_tag MD_CTX;
145 struct MD_CTX_tag {
146     /* Immutable stuff (parameters of md_parse()). */
147     const CHAR* text;
148     SZ size;
149     MD_PARSER parser;
150     void* userdata;
151 
152     /* When this is true, it allows some optimizations. */
153     int doc_ends_with_newline;
154 
155     /* Helper temporary growing buffer. */
156     CHAR* buffer;
157     unsigned alloc_buffer;
158 
159     /* Reference definitions. */
160     MD_REF_DEF* ref_defs;
161     int n_ref_defs;
162     int alloc_ref_defs;
163     void** ref_def_hashtable;
164     int ref_def_hashtable_size;
165 
166     /* Stack of inline/span markers.
167      * This is only used for parsing a single block contents but by storing it
168      * here we may reuse the stack for subsequent blocks; i.e. we have fewer
169      * (re)allocations. */
170     MD_MARK* marks;
171     int n_marks;
172     int alloc_marks;
173 
174 #if defined MD4C_USE_UTF16
175     char mark_char_map[128];
176 #else
177     char mark_char_map[256];
178 #endif
179 
180     /* For resolving of inline spans. */
181     MD_MARKCHAIN mark_chains[13];
182 #define PTR_CHAIN                               (ctx->mark_chains[0])
183 #define TABLECELLBOUNDARIES                     (ctx->mark_chains[1])
184 #define ASTERISK_OPENERS_extraword_mod3_0       (ctx->mark_chains[2])
185 #define ASTERISK_OPENERS_extraword_mod3_1       (ctx->mark_chains[3])
186 #define ASTERISK_OPENERS_extraword_mod3_2       (ctx->mark_chains[4])
187 #define ASTERISK_OPENERS_intraword_mod3_0       (ctx->mark_chains[5])
188 #define ASTERISK_OPENERS_intraword_mod3_1       (ctx->mark_chains[6])
189 #define ASTERISK_OPENERS_intraword_mod3_2       (ctx->mark_chains[7])
190 #define UNDERSCORE_OPENERS                      (ctx->mark_chains[8])
191 #define TILDE_OPENERS_1                         (ctx->mark_chains[9])
192 #define TILDE_OPENERS_2                         (ctx->mark_chains[10])
193 #define BRACKET_OPENERS                         (ctx->mark_chains[11])
194 #define DOLLAR_OPENERS                          (ctx->mark_chains[12])
195 #define OPENERS_CHAIN_FIRST                     1
196 #define OPENERS_CHAIN_LAST                      12
197 
198     int n_table_cell_boundaries;
199 
200     /* For resolving links. */
201     int unresolved_link_head;
202     int unresolved_link_tail;
203 
204     /* For resolving raw HTML. */
205     OFF html_comment_horizon;
206     OFF html_proc_instr_horizon;
207     OFF html_decl_horizon;
208     OFF html_cdata_horizon;
209 
210     /* For block analysis.
211      * Notes:
212      *   -- It holds MD_BLOCK as well as MD_LINE structures. After each
213      *      MD_BLOCK, its (multiple) MD_LINE(s) follow.
214      *   -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used
215      *      instead of MD_LINE(s).
216      */
217     void* block_bytes;
218     MD_BLOCK* current_block;
219     int n_block_bytes;
220     int alloc_block_bytes;
221 
222     /* For container block analysis. */
223     MD_CONTAINER* containers;
224     int n_containers;
225     int alloc_containers;
226 
227     /* Minimal indentation to call the block "indented code block". */
228     unsigned code_indent_offset;
229 
230     /* Contextual info for line analysis. */
231     SZ code_fence_length;   /* For checking closing fence length. */
232     int html_block_type;    /* For checking closing raw HTML condition. */
233     int last_line_has_list_loosening_effect;
234     int last_list_item_starts_with_two_blank_lines;
235 };
236 
237 enum MD_LINETYPE_tag {
238     MD_LINE_BLANK,
239     MD_LINE_HR,
240     MD_LINE_ATXHEADER,
241     MD_LINE_SETEXTHEADER,
242     MD_LINE_SETEXTUNDERLINE,
243     MD_LINE_INDENTEDCODE,
244     MD_LINE_FENCEDCODE,
245     MD_LINE_HTML,
246     MD_LINE_TEXT,
247     MD_LINE_TABLE,
248     MD_LINE_TABLEUNDERLINE
249 };
250 typedef enum MD_LINETYPE_tag MD_LINETYPE;
251 
252 typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS;
253 struct MD_LINE_ANALYSIS_tag {
254     MD_LINETYPE type    : 16;
255     unsigned data       : 16;
256     OFF beg;
257     OFF end;
258     unsigned indent;        /* Indentation level. */
259 };
260 
261 typedef struct MD_LINE_tag MD_LINE;
262 struct MD_LINE_tag {
263     OFF beg;
264     OFF end;
265 };
266 
267 typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
268 struct MD_VERBATIMLINE_tag {
269     OFF beg;
270     OFF end;
271     OFF indent;
272 };
273 
274 
275 /*****************
276  ***  Helpers  ***
277  *****************/
278 
279 /* Character accessors. */
280 #define CH(off)                 (ctx->text[(off)])
281 #define STR(off)                (ctx->text + (off))
282 
283 /* Character classification.
284  * Note we assume ASCII compatibility of code points < 128 here. */
285 #define ISIN_(ch, ch_min, ch_max)       ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
286 #define ISANYOF_(ch, palette)           ((ch) != _T('\0')  &&  md_strchr((palette), (ch)) != NULL)
287 #define ISANYOF2_(ch, ch1, ch2)         ((ch) == (ch1) || (ch) == (ch2))
288 #define ISANYOF3_(ch, ch1, ch2, ch3)    ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3))
289 #define ISASCII_(ch)                    ((unsigned)(ch) <= 127)
290 #define ISBLANK_(ch)                    (ISANYOF2_((ch), _T(' '), _T('\t')))
291 #define ISNEWLINE_(ch)                  (ISANYOF2_((ch), _T('\r'), _T('\n')))
292 #define ISWHITESPACE_(ch)               (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
293 #define ISCNTRL_(ch)                    ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
294 #define ISPUNCT_(ch)                    (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
295 #define ISUPPER_(ch)                    (ISIN_(ch, _T('A'), _T('Z')))
296 #define ISLOWER_(ch)                    (ISIN_(ch, _T('a'), _T('z')))
297 #define ISALPHA_(ch)                    (ISUPPER_(ch) || ISLOWER_(ch))
298 #define ISDIGIT_(ch)                    (ISIN_(ch, _T('0'), _T('9')))
299 #define ISXDIGIT_(ch)                   (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f')))
300 #define ISALNUM_(ch)                    (ISALPHA_(ch) || ISDIGIT_(ch))
301 
302 #define ISANYOF(off, palette)           ISANYOF_(CH(off), (palette))
303 #define ISANYOF2(off, ch1, ch2)         ISANYOF2_(CH(off), (ch1), (ch2))
304 #define ISANYOF3(off, ch1, ch2, ch3)    ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
305 #define ISASCII(off)                    ISASCII_(CH(off))
306 #define ISBLANK(off)                    ISBLANK_(CH(off))
307 #define ISNEWLINE(off)                  ISNEWLINE_(CH(off))
308 #define ISWHITESPACE(off)               ISWHITESPACE_(CH(off))
309 #define ISCNTRL(off)                    ISCNTRL_(CH(off))
310 #define ISPUNCT(off)                    ISPUNCT_(CH(off))
311 #define ISUPPER(off)                    ISUPPER_(CH(off))
312 #define ISLOWER(off)                    ISLOWER_(CH(off))
313 #define ISALPHA(off)                    ISALPHA_(CH(off))
314 #define ISDIGIT(off)                    ISDIGIT_(CH(off))
315 #define ISXDIGIT(off)                   ISXDIGIT_(CH(off))
316 #define ISALNUM(off)                    ISALNUM_(CH(off))
317 
318 
319 #if defined MD4C_USE_UTF16
320     #define md_strchr wcschr
321 #else
322     #define md_strchr strchr
323 #endif
324 
325 
326 /* Case insensitive check of string equality. */
327 static inline int
328 md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
329 {
330     OFF i;
331     for(i = 0; i < n; i++) {
332         CHAR ch1 = s1[i];
333         CHAR ch2 = s2[i];
334 
335         if(ISLOWER_(ch1))
336             ch1 += ('A'-'a');
337         if(ISLOWER_(ch2))
338             ch2 += ('A'-'a');
339         if(ch1 != ch2)
340             return FALSE;
341     }
342     return TRUE;
343 }
344 
345 static inline int
346 md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n)
347 {
348     return memcmp(s1, s2, n * sizeof(CHAR)) == 0;
349 }
350 
351 static int
352 md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size)
353 {
354     OFF off = 0;
355     int ret = 0;
356 
357     while(1) {
358         while(off < size  &&  str[off] != _T('\0'))
359             off++;
360 
361         if(off > 0) {
362             ret = ctx->parser.text(type, str, off, ctx->userdata);
363             if(ret != 0)
364                 return ret;
365 
366             str += off;
367             size -= off;
368             off = 0;
369         }
370 
371         if(off >= size)
372             return 0;
373 
374         ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), 1, ctx->userdata);
375         if(ret != 0)
376             return ret;
377         off++;
378     }
379 }
380 
381 
382 #define MD_CHECK(func)                                                      \
383     do {                                                                    \
384         ret = (func);                                                       \
385         if(ret < 0)                                                         \
386             goto abort;                                                     \
387     } while(0)
388 
389 
390 #define MD_TEMP_BUFFER(sz)                                                  \
391     do {                                                                    \
392         if(sz > ctx->alloc_buffer) {                                        \
393             CHAR* new_buffer;                                               \
394             SZ new_size = ((sz) + (sz) / 2 + 128) & ~127;                   \
395                                                                             \
396             new_buffer = realloc(ctx->buffer, new_size);                    \
397             if(new_buffer == NULL) {                                        \
398                 MD_LOG("realloc() failed.");                                \
399                 ret = -1;                                                   \
400                 goto abort;                                                 \
401             }                                                               \
402                                                                             \
403             ctx->buffer = new_buffer;                                       \
404             ctx->alloc_buffer = new_size;                                   \
405         }                                                                   \
406     } while(0)
407 
408 
409 #define MD_ENTER_BLOCK(type, arg)                                           \
410     do {                                                                    \
411         ret = ctx->parser.enter_block((type), (arg), ctx->userdata);        \
412         if(ret != 0) {                                                      \
413             MD_LOG("Aborted from enter_block() callback.");                 \
414             goto abort;                                                     \
415         }                                                                   \
416     } while(0)
417 
418 #define MD_LEAVE_BLOCK(type, arg)                                           \
419     do {                                                                    \
420         ret = ctx->parser.leave_block((type), (arg), ctx->userdata);        \
421         if(ret != 0) {                                                      \
422             MD_LOG("Aborted from leave_block() callback.");                 \
423             goto abort;                                                     \
424         }                                                                   \
425     } while(0)
426 
427 #define MD_ENTER_SPAN(type, arg)                                            \
428     do {                                                                    \
429         ret = ctx->parser.enter_span((type), (arg), ctx->userdata);         \
430         if(ret != 0) {                                                      \
431             MD_LOG("Aborted from enter_span() callback.");                  \
432             goto abort;                                                     \
433         }                                                                   \
434     } while(0)
435 
436 #define MD_LEAVE_SPAN(type, arg)                                            \
437     do {                                                                    \
438         ret = ctx->parser.leave_span((type), (arg), ctx->userdata);         \
439         if(ret != 0) {                                                      \
440             MD_LOG("Aborted from leave_span() callback.");                  \
441             goto abort;                                                     \
442         }                                                                   \
443     } while(0)
444 
445 #define MD_TEXT(type, str, size)                                            \
446     do {                                                                    \
447         if(size > 0) {                                                      \
448             ret = ctx->parser.text((type), (str), (size), ctx->userdata);   \
449             if(ret != 0) {                                                  \
450                 MD_LOG("Aborted from text() callback.");                    \
451                 goto abort;                                                 \
452             }                                                               \
453         }                                                                   \
454     } while(0)
455 
456 #define MD_TEXT_INSECURE(type, str, size)                                   \
457     do {                                                                    \
458         if(size > 0) {                                                      \
459             ret = md_text_with_null_replacement(ctx, type, str, size);      \
460             if(ret != 0) {                                                  \
461                 MD_LOG("Aborted from text() callback.");                    \
462                 goto abort;                                                 \
463             }                                                               \
464         }                                                                   \
465     } while(0)
466 
467 
468 /* If the offset falls into a gap between line, we return the following
469  * line. */
470 static const MD_LINE*
471 md_lookup_line(OFF off, const MD_LINE* lines, int n_lines)
472 {
473     int lo, hi;
474     int pivot;
475     const MD_LINE* line;
476 
477     lo = 0;
478     hi = n_lines - 1;
479     while(lo <= hi) {
480         pivot = (lo + hi) / 2;
481         line = &lines[pivot];
482 
483         if(off < line->beg) {
484             hi = pivot - 1;
485             if(hi < 0  ||  lines[hi].end <= off)
486                 return line;
487         } else if(off > line->end) {
488             lo = pivot + 1;
489         } else {
490             return line;
491         }
492     }
493 
494     return NULL;
495 }
496 
497 
498 /*************************
499  ***  Unicode Support  ***
500  *************************/
501 
502 typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO;
503 struct MD_UNICODE_FOLD_INFO_tag {
504     unsigned codepoints[3];
505     unsigned n_codepoints;
506 };
507 
508 
509 #if defined MD4C_USE_UTF16 || defined MD4C_USE_UTF8
510     /* Binary search over sorted "map" of codepoints. Consecutive sequences
511      * of codepoints may be encoded in the map by just using the
512      * (MIN_CODEPOINT | 0x40000000) and (MAX_CODEPOINT | 0x80000000).
513      *
514      * Returns index of the found record in the map (in the case of ranges,
515      * the minimal value is used); or -1 on failure. */
516     static int
517     md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size)
518     {
519         int beg, end;
520         int pivot_beg, pivot_end;
521 
522         beg = 0;
523         end = (int) map_size-1;
524         while(beg <= end) {
525             /* Pivot may be a range, not just a single value. */
526             pivot_beg = pivot_end = (beg + end) / 2;
527             if(map[pivot_end] & 0x40000000)
528                 pivot_end++;
529             if(map[pivot_beg] & 0x80000000)
530                 pivot_beg--;
531 
532             if(codepoint < (map[pivot_beg] & 0x00ffffff))
533                 end = pivot_beg - 1;
534             else if(codepoint > (map[pivot_end] & 0x00ffffff))
535                 beg = pivot_end + 1;
536             else
537                 return pivot_beg;
538         }
539 
540         return -1;
541     }
542 
543     static int
544     md_is_unicode_whitespace__(unsigned codepoint)
545     {
546 #define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
547 #define S(cp)               (cp)
548         /* Unicode "Zs" category.
549          * (generated by scripts/build_whitespace_map.py) */
550         static const unsigned WHITESPACE_MAP[] = {
551             S(0x0020), S(0x00a0), S(0x1680), R(0x2000,0x200a), S(0x202f), S(0x205f), S(0x3000)
552         };
553 #undef R
554 #undef S
555 
556         /* The ASCII ones are the most frequently used ones, also CommonMark
557          * specification requests few more in this range. */
558         if(codepoint <= 0x7f)
559             return ISWHITESPACE_(codepoint);
560 
561         return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= 0);
562     }
563 
564     static int
565     md_is_unicode_punct__(unsigned codepoint)
566     {
567 #define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
568 #define S(cp)               (cp)
569         /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.
570          * (generated by scripts/build_punct_map.py) */
571         static const unsigned PUNCT_MAP[] = {
572             R(0x0021,0x0023), R(0x0025,0x002a), R(0x002c,0x002f), R(0x003a,0x003b), R(0x003f,0x0040),
573             R(0x005b,0x005d), S(0x005f), S(0x007b), S(0x007d), S(0x00a1), S(0x00a7), S(0x00ab), R(0x00b6,0x00b7),
574             S(0x00bb), S(0x00bf), S(0x037e), S(0x0387), R(0x055a,0x055f), R(0x0589,0x058a), S(0x05be), S(0x05c0),
575             S(0x05c3), S(0x05c6), R(0x05f3,0x05f4), R(0x0609,0x060a), R(0x060c,0x060d), S(0x061b), R(0x061e,0x061f),
576             R(0x066a,0x066d), S(0x06d4), R(0x0700,0x070d), R(0x07f7,0x07f9), R(0x0830,0x083e), S(0x085e),
577             R(0x0964,0x0965), S(0x0970), S(0x09fd), S(0x0a76), S(0x0af0), S(0x0c77), S(0x0c84), S(0x0df4), S(0x0e4f),
578             R(0x0e5a,0x0e5b), R(0x0f04,0x0f12), S(0x0f14), R(0x0f3a,0x0f3d), S(0x0f85), R(0x0fd0,0x0fd4),
579             R(0x0fd9,0x0fda), R(0x104a,0x104f), S(0x10fb), R(0x1360,0x1368), S(0x1400), S(0x166e), R(0x169b,0x169c),
580             R(0x16eb,0x16ed), R(0x1735,0x1736), R(0x17d4,0x17d6), R(0x17d8,0x17da), R(0x1800,0x180a),
581             R(0x1944,0x1945), R(0x1a1e,0x1a1f), R(0x1aa0,0x1aa6), R(0x1aa8,0x1aad), R(0x1b5a,0x1b60),
582             R(0x1bfc,0x1bff), R(0x1c3b,0x1c3f), R(0x1c7e,0x1c7f), R(0x1cc0,0x1cc7), S(0x1cd3), R(0x2010,0x2027),
583             R(0x2030,0x2043), R(0x2045,0x2051), R(0x2053,0x205e), R(0x207d,0x207e), R(0x208d,0x208e),
584             R(0x2308,0x230b), R(0x2329,0x232a), R(0x2768,0x2775), R(0x27c5,0x27c6), R(0x27e6,0x27ef),
585             R(0x2983,0x2998), R(0x29d8,0x29db), R(0x29fc,0x29fd), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70),
586             R(0x2e00,0x2e2e), R(0x2e30,0x2e4f), S(0x2e52), R(0x3001,0x3003), R(0x3008,0x3011), R(0x3014,0x301f),
587             S(0x3030), S(0x303d), S(0x30a0), S(0x30fb), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e),
588             R(0xa6f2,0xa6f7), R(0xa874,0xa877), R(0xa8ce,0xa8cf), R(0xa8f8,0xa8fa), S(0xa8fc), R(0xa92e,0xa92f),
589             S(0xa95f), R(0xa9c1,0xa9cd), R(0xa9de,0xa9df), R(0xaa5c,0xaa5f), R(0xaade,0xaadf), R(0xaaf0,0xaaf1),
590             S(0xabeb), R(0xfd3e,0xfd3f), R(0xfe10,0xfe19), R(0xfe30,0xfe52), R(0xfe54,0xfe61), S(0xfe63), S(0xfe68),
591             R(0xfe6a,0xfe6b), R(0xff01,0xff03), R(0xff05,0xff0a), R(0xff0c,0xff0f), R(0xff1a,0xff1b),
592             R(0xff1f,0xff20), R(0xff3b,0xff3d), S(0xff3f), S(0xff5b), S(0xff5d), R(0xff5f,0xff65), R(0x10100,0x10102),
593             S(0x1039f), S(0x103d0), S(0x1056f), S(0x10857), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f),
594             R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59),
595             R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175),
596             R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9),
597             R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643),
598             R(0x11660,0x1166c), R(0x1173c,0x1173e), S(0x1183b), R(0x11944,0x11946), S(0x119e2), R(0x11a3f,0x11a46),
599             R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8),
600             S(0x11fff), R(0x12470,0x12474), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3b), S(0x16b44),
601             R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9f), R(0x1da87,0x1da8b), R(0x1e95e,0x1e95f)
602         };
603 #undef R
604 #undef S
605 
606         /* The ASCII ones are the most frequently used ones, also CommonMark
607          * specification requests few more in this range. */
608         if(codepoint <= 0x7f)
609             return ISPUNCT_(codepoint);
610 
611         return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= 0);
612     }
613 
614     static void
615     md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
616     {
617 #define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
618 #define S(cp)               (cp)
619         /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.
620          * (generated by scripts/build_folding_map.py) */
621         static const unsigned FOLD_MAP_1[] = {
622             R(0x0041,0x005a), S(0x00b5), R(0x00c0,0x00d6), R(0x00d8,0x00de), R(0x0100,0x012e), R(0x0132,0x0136),
623             R(0x0139,0x0147), R(0x014a,0x0176), S(0x0178), R(0x0179,0x017d), S(0x017f), S(0x0181), S(0x0182),
624             S(0x0184), S(0x0186), S(0x0187), S(0x0189), S(0x018a), S(0x018b), S(0x018e), S(0x018f), S(0x0190),
625             S(0x0191), S(0x0193), S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f),
626             R(0x01a0,0x01a4), S(0x01a6), S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b2),
627             S(0x01b3), S(0x01b5), S(0x01b7), S(0x01b8), S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8),
628             S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), S(0x01f1), S(0x01f2), S(0x01f4), S(0x01f6), S(0x01f7),
629             R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), S(0x023b), S(0x023d), S(0x023e), S(0x0241),
630             S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), S(0x0370), S(0x0372), S(0x0376), S(0x037f),
631             S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), S(0x038f), R(0x0391,0x03a1), R(0x03a3,0x03ab),
632             S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), S(0x03f0), S(0x03f1),
633             S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f),
634             R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e),
635             R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81),
636             S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba),
637             R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d),
638             R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f),
639             R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8),
640             S(0x1fd9), S(0x1fda), S(0x1fdb), S(0x1fe8), S(0x1fe9), S(0x1fea), S(0x1feb), S(0x1fec), S(0x1ff8),
641             S(0x1ff9), S(0x1ffa), S(0x1ffb), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), S(0x2183),
642             R(0x24b6,0x24cf), R(0x2c00,0x2c2e), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), R(0x2c67,0x2c6b),
643             S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), S(0x2c7f), R(0x2c80,0x2ce2),
644             S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e),
645             S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792),
646             R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2),
647             S(0xa7b3), R(0xa7b4,0xa7be), S(0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7f5),
648             R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), R(0x10c80,0x10cb2),
649             R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921)
650         };
651         static const unsigned FOLD_MAP_1_DATA[] = {
652             0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148,
653             0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257,
654             0x018c, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275,
655             0x01a1, 0x01a5, 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x028b, 0x01b4, 0x01b6, 0x0292,
656             0x01b9, 0x01bd, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3,
657             0x01f5, 0x0195, 0x01bf, 0x01f9, 0x021f, 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242,
658             0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 0x03b9, 0x0371, 0x0373, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af,
659             0x03cc, 0x03cd, 0x03ce, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0,
660             0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f,
661             0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586,
662             0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a,
663             0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07,
664             0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60,
665             0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0,
666             0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170,
667             0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251,
668             0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641,
669             0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c,
670             0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d,
671             0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7f6, 0x13a0, 0x13ef, 0xff41,
672             0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922,
673             0x1e943
674         };
675         static const unsigned FOLD_MAP_2[] = {
676             S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99),
677             S(0x1e9a), S(0x1e9e), S(0x1f50), R(0x1f80,0x1f87), R(0x1f88,0x1f8f), R(0x1f90,0x1f97), R(0x1f98,0x1f9f),
678             R(0x1fa0,0x1fa7), R(0x1fa8,0x1faf), S(0x1fb2), S(0x1fb3), S(0x1fb4), S(0x1fb6), S(0x1fbc), S(0x1fc2),
679             S(0x1fc3), S(0x1fc4), S(0x1fc6), S(0x1fcc), S(0x1fd6), S(0x1fe4), S(0x1fe6), S(0x1ff2), S(0x1ff3),
680             S(0x1ff4), S(0x1ff6), S(0x1ffc), S(0xfb00), S(0xfb01), S(0xfb02), S(0xfb05), S(0xfb06), S(0xfb13),
681             S(0xfb14), S(0xfb15), S(0xfb16), S(0xfb17)
682         };
683         static const unsigned FOLD_MAP_2_DATA[] = {
684             0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308,
685             0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9,
686             0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9,
687             0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342,
688             0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342,
689             0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9,
690             0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565,
691             0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d
692         };
693         static const unsigned FOLD_MAP_3[] = {
694             S(0x0390), S(0x03b0), S(0x1f52), S(0x1f54), S(0x1f56), S(0x1fb7), S(0x1fc7), S(0x1fd2), S(0x1fd3),
695             S(0x1fd7), S(0x1fe2), S(0x1fe3), S(0x1fe7), S(0x1ff7), S(0xfb03), S(0xfb04)
696         };
697         static const unsigned FOLD_MAP_3_DATA[] = {
698             0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301,
699             0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300,
700             0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301,
701             0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c
702         };
703 #undef R
704 #undef S
705         static const struct {
706             const unsigned* map;
707             const unsigned* data;
708             size_t map_size;
709             unsigned n_codepoints;
710         } FOLD_MAP_LIST[] = {
711             { FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), 1 },
712             { FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), 2 },
713             { FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), 3 }
714         };
715 
716         int i;
717 
718         /* Fast path for ASCII characters. */
719         if(codepoint <= 0x7f) {
720             info->codepoints[0] = codepoint;
721             if(ISUPPER_(codepoint))
722                 info->codepoints[0] += 'a' - 'A';
723             info->n_codepoints = 1;
724             return;
725         }
726 
727         /* Try to locate the codepoint in any of the maps. */
728         for(i = 0; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) {
729             int index;
730 
731             index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size);
732             if(index >= 0) {
733                 /* Found the mapping. */
734                 unsigned n_codepoints = FOLD_MAP_LIST[i].n_codepoints;
735                 const unsigned* map = FOLD_MAP_LIST[i].map;
736                 const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints);
737 
738                 memcpy(info->codepoints, codepoints, sizeof(unsigned) * n_codepoints);
739                 info->n_codepoints = n_codepoints;
740 
741                 if(FOLD_MAP_LIST[i].map[index] != codepoint) {
742                     /* The found mapping maps whole range of codepoints,
743                      * i.e. we have to offset info->codepoints[0] accordingly. */
744                     if((map[index] & 0x00ffffff)+1 == codepoints[0]) {
745                         /* Alternating type of the range. */
746                         info->codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0);
747                     } else {
748                         /* Range to range kind of mapping. */
749                         info->codepoints[0] += (codepoint - (map[index] & 0x00ffffff));
750                     }
751                 }
752 
753                 return;
754             }
755         }
756 
757         /* No mapping found. Map the codepoint to itself. */
758         info->codepoints[0] = codepoint;
759         info->n_codepoints = 1;
760     }
761 #endif
762 
763 
764 #if defined MD4C_USE_UTF16
765     #define IS_UTF16_SURROGATE_HI(word)     (((WORD)(word) & 0xfc00) == 0xd800)
766     #define IS_UTF16_SURROGATE_LO(word)     (((WORD)(word) & 0xfc00) == 0xdc00)
767     #define UTF16_DECODE_SURROGATE(hi, lo)  (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) | (((unsigned)(lo) & 0x3ff) << 0)))
768 
769     static unsigned
770     md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size)
771     {
772         if(IS_UTF16_SURROGATE_HI(str[0])) {
773             if(1 < str_size && IS_UTF16_SURROGATE_LO(str[1])) {
774                 if(p_size != NULL)
775                     *p_size = 2;
776                 return UTF16_DECODE_SURROGATE(str[0], str[1]);
777             }
778         }
779 
780         if(p_size != NULL)
781             *p_size = 1;
782         return str[0];
783     }
784 
785     static unsigned
786     md_decode_utf16le_before__(MD_CTX* ctx, OFF off)
787     {
788         if(off > 2 && IS_UTF16_SURROGATE_HI(CH(off-2)) && IS_UTF16_SURROGATE_LO(CH(off-1)))
789             return UTF16_DECODE_SURROGATE(CH(off-2), CH(off-1));
790 
791         return CH(off);
792     }
793 
794     /* No whitespace uses surrogates, so no decoding needed here. */
795     #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
796     #define ISUNICODEWHITESPACE(off)        md_is_unicode_whitespace__(CH(off))
797     #define ISUNICODEWHITESPACEBEFORE(off)  md_is_unicode_whitespace__(CH((off)-1))
798 
799     #define ISUNICODEPUNCT(off)             md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL))
800     #define ISUNICODEPUNCTBEFORE(off)       md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off))
801 
802     static inline int
803     md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
804     {
805         return md_decode_utf16le__(str+off, str_size-off, p_char_size);
806     }
807 #elif defined MD4C_USE_UTF8
808     #define IS_UTF8_LEAD1(byte)     ((unsigned char)(byte) <= 0x7f)
809     #define IS_UTF8_LEAD2(byte)     (((unsigned char)(byte) & 0xe0) == 0xc0)
810     #define IS_UTF8_LEAD3(byte)     (((unsigned char)(byte) & 0xf0) == 0xe0)
811     #define IS_UTF8_LEAD4(byte)     (((unsigned char)(byte) & 0xf8) == 0xf0)
812     #define IS_UTF8_TAIL(byte)      (((unsigned char)(byte) & 0xc0) == 0x80)
813 
814     static unsigned
815     md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size)
816     {
817         if(!IS_UTF8_LEAD1(str[0])) {
818             if(IS_UTF8_LEAD2(str[0])) {
819                 if(1 < str_size && IS_UTF8_TAIL(str[1])) {
820                     if(p_size != NULL)
821                         *p_size = 2;
822 
823                     return (((unsigned int)str[0] & 0x1f) << 6) |
824                            (((unsigned int)str[1] & 0x3f) << 0);
825                 }
826             } else if(IS_UTF8_LEAD3(str[0])) {
827                 if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) {
828                     if(p_size != NULL)
829                         *p_size = 3;
830 
831                     return (((unsigned int)str[0] & 0x0f) << 12) |
832                            (((unsigned int)str[1] & 0x3f) << 6) |
833                            (((unsigned int)str[2] & 0x3f) << 0);
834                 }
835             } else if(IS_UTF8_LEAD4(str[0])) {
836                 if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) {
837                     if(p_size != NULL)
838                         *p_size = 4;
839 
840                     return (((unsigned int)str[0] & 0x07) << 18) |
841                            (((unsigned int)str[1] & 0x3f) << 12) |
842                            (((unsigned int)str[2] & 0x3f) << 6) |
843                            (((unsigned int)str[3] & 0x3f) << 0);
844                 }
845             }
846         }
847 
848         if(p_size != NULL)
849             *p_size = 1;
850         return (unsigned) str[0];
851     }
852 
853     static unsigned
854     md_decode_utf8_before__(MD_CTX* ctx, OFF off)
855     {
856         if(!IS_UTF8_LEAD1(CH(off-1))) {
857             if(off > 1 && IS_UTF8_LEAD2(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
858                 return (((unsigned int)CH(off-2) & 0x1f) << 6) |
859                        (((unsigned int)CH(off-1) & 0x3f) << 0);
860 
861             if(off > 2 && IS_UTF8_LEAD3(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
862                 return (((unsigned int)CH(off-3) & 0x0f) << 12) |
863                        (((unsigned int)CH(off-2) & 0x3f) << 6) |
864                        (((unsigned int)CH(off-1) & 0x3f) << 0);
865 
866             if(off > 3 && IS_UTF8_LEAD4(CH(off-4)) && IS_UTF8_TAIL(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
867                 return (((unsigned int)CH(off-4) & 0x07) << 18) |
868                        (((unsigned int)CH(off-3) & 0x3f) << 12) |
869                        (((unsigned int)CH(off-2) & 0x3f) << 6) |
870                        (((unsigned int)CH(off-1) & 0x3f) << 0);
871         }
872 
873         return (unsigned) CH(off-1);
874     }
875 
876     #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
877     #define ISUNICODEWHITESPACE(off)        md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
878     #define ISUNICODEWHITESPACEBEFORE(off)  md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off))
879 
880     #define ISUNICODEPUNCT(off)             md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
881     #define ISUNICODEPUNCTBEFORE(off)       md_is_unicode_punct__(md_decode_utf8_before__(ctx, off))
882 
883     static inline unsigned
884     md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
885     {
886         return md_decode_utf8__(str+off, str_size-off, p_char_size);
887     }
888 #else
889     #define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint)
890     #define ISUNICODEWHITESPACE(off)        ISWHITESPACE(off)
891     #define ISUNICODEWHITESPACEBEFORE(off)  ISWHITESPACE((off)-1)
892 
893     #define ISUNICODEPUNCT(off)             ISPUNCT(off)
894     #define ISUNICODEPUNCTBEFORE(off)       ISPUNCT((off)-1)
895 
896     static inline void
897     md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
898     {
899         info->codepoints[0] = codepoint;
900         if(ISUPPER_(codepoint))
901             info->codepoints[0] += 'a' - 'A';
902         info->n_codepoints = 1;
903     }
904 
905     static inline unsigned
906     md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size)
907     {
908         *p_size = 1;
909         return (unsigned) str[off];
910     }
911 #endif
912 
913 
914 /*************************************
915  ***  Helper string manipulations  ***
916  *************************************/
917 
918 /* Fill buffer with copy of the string between 'beg' and 'end' but replace any
919  * line breaks with given replacement character.
920  *
921  * NOTE: Caller is responsible to make sure the buffer is large enough.
922  * (Given the output is always shorter then input, (end - beg) is good idea
923  * what the caller should allocate.)
924  */
925 static void
926 md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
927                CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
928 {
929     CHAR* ptr = buffer;
930     int line_index = 0;
931     OFF off = beg;
932 
933     MD_UNUSED(n_lines);
934 
935     while(1) {
936         const MD_LINE* line = &lines[line_index];
937         OFF line_end = line->end;
938         if(end < line_end)
939             line_end = end;
940 
941         while(off < line_end) {
942             *ptr = CH(off);
943             ptr++;
944             off++;
945         }
946 
947         if(off >= end) {
948             *p_size = (MD_SIZE)(ptr - buffer);
949             return;
950         }
951 
952         *ptr = line_break_replacement_char;
953         ptr++;
954 
955         line_index++;
956         off = lines[line_index].beg;
957     }
958 }
959 
960 /* Wrapper of md_merge_lines() which allocates new buffer for the output string.
961  */
962 static int
963 md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
964                     CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
965 {
966     CHAR* buffer;
967 
968     buffer = (CHAR*) malloc(sizeof(CHAR) * (end - beg));
969     if(buffer == NULL) {
970         MD_LOG("malloc() failed.");
971         return -1;
972     }
973 
974     md_merge_lines(ctx, beg, end, lines, n_lines,
975                 line_break_replacement_char, buffer, p_size);
976 
977     *p_str = buffer;
978     return 0;
979 }
980 
981 static OFF
982 md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
983 {
984     SZ char_size;
985     unsigned codepoint;
986 
987     while(off < size) {
988         codepoint = md_decode_unicode(label, off, size, &char_size);
989         if(!ISUNICODEWHITESPACE_(codepoint)  &&  !ISNEWLINE_(label[off]))
990             break;
991         off += char_size;
992     }
993 
994     return off;
995 }
996 
997 
998 /******************************
999  ***  Recognizing raw HTML  ***
1000  ******************************/
1001 
1002 /* md_is_html_tag() may be called when processing inlines (inline raw HTML)
1003  * or when breaking document to blocks (checking for start of HTML block type 7).
1004  *
1005  * When breaking document to blocks, we do not yet know line boundaries, but
1006  * in that case the whole tag has to live on a single line. We distinguish this
1007  * by n_lines == 0.
1008  */
1009 static int
1010 md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1011 {
1012     int attr_state;
1013     OFF off = beg;
1014     OFF line_end = (n_lines > 0) ? lines[0].end : ctx->size;
1015     int i = 0;
1016 
1017     MD_ASSERT(CH(beg) == _T('<'));
1018 
1019     if(off + 1 >= line_end)
1020         return FALSE;
1021     off++;
1022 
1023     /* For parsing attributes, we need a little state automaton below.
1024      * State -1: no attributes are allowed.
1025      * State 0: attribute could follow after some whitespace.
1026      * State 1: after a whitespace (attribute name may follow).
1027      * State 2: after attribute name ('=' MAY follow).
1028      * State 3: after '=' (value specification MUST follow).
1029      * State 41: in middle of unquoted attribute value.
1030      * State 42: in middle of single-quoted attribute value.
1031      * State 43: in middle of double-quoted attribute value.
1032      */
1033     attr_state = 0;
1034 
1035     if(CH(off) == _T('/')) {
1036         /* Closer tag "</ ... >". No attributes may be present. */
1037         attr_state = -1;
1038         off++;
1039     }
1040 
1041     /* Tag name */
1042     if(off >= line_end  ||  !ISALPHA(off))
1043         return FALSE;
1044     off++;
1045     while(off < line_end  &&  (ISALNUM(off)  ||  CH(off) == _T('-')))
1046         off++;
1047 
1048     /* (Optional) attributes (if not closer), (optional) '/' (if not closer)
1049      * and final '>'. */
1050     while(1) {
1051         while(off < line_end  &&  !ISNEWLINE(off)) {
1052             if(attr_state > 40) {
1053                 if(attr_state == 41 && (ISBLANK(off) || ISANYOF(off, _T("\"'=<>`")))) {
1054                     attr_state = 0;
1055                     off--;  /* Put the char back for re-inspection in the new state. */
1056                 } else if(attr_state == 42 && CH(off) == _T('\'')) {
1057                     attr_state = 0;
1058                 } else if(attr_state == 43 && CH(off) == _T('"')) {
1059                     attr_state = 0;
1060                 }
1061                 off++;
1062             } else if(ISWHITESPACE(off)) {
1063                 if(attr_state == 0)
1064                     attr_state = 1;
1065                 off++;
1066             } else if(attr_state <= 2 && CH(off) == _T('>')) {
1067                 /* End. */
1068                 goto done;
1069             } else if(attr_state <= 2 && CH(off) == _T('/') && off+1 < line_end && CH(off+1) == _T('>')) {
1070                 /* End with digraph '/>' */
1071                 off++;
1072                 goto done;
1073             } else if((attr_state == 1 || attr_state == 2) && (ISALPHA(off) || CH(off) == _T('_') || CH(off) == _T(':'))) {
1074                 off++;
1075                 /* Attribute name */
1076                 while(off < line_end && (ISALNUM(off) || ISANYOF(off, _T("_.:-"))))
1077                     off++;
1078                 attr_state = 2;
1079             } else if(attr_state == 2 && CH(off) == _T('=')) {
1080                 /* Attribute assignment sign */
1081                 off++;
1082                 attr_state = 3;
1083             } else if(attr_state == 3) {
1084                 /* Expecting start of attribute value. */
1085                 if(CH(off) == _T('"'))
1086                     attr_state = 43;
1087                 else if(CH(off) == _T('\''))
1088                     attr_state = 42;
1089                 else if(!ISANYOF(off, _T("\"'=<>`"))  &&  !ISNEWLINE(off))
1090                     attr_state = 41;
1091                 else
1092                     return FALSE;
1093                 off++;
1094             } else {
1095                 /* Anything unexpected. */
1096                 return FALSE;
1097             }
1098         }
1099 
1100         /* We have to be on a single line. See definition of start condition
1101          * of HTML block, type 7. */
1102         if(n_lines == 0)
1103             return FALSE;
1104 
1105         i++;
1106         if(i >= n_lines)
1107             return FALSE;
1108 
1109         off = lines[i].beg;
1110         line_end = lines[i].end;
1111 
1112         if(attr_state == 0  ||  attr_state == 41)
1113             attr_state = 1;
1114 
1115         if(off >= max_end)
1116             return FALSE;
1117     }
1118 
1119 done:
1120     if(off >= max_end)
1121         return FALSE;
1122 
1123     *p_end = off+1;
1124     return TRUE;
1125 }
1126 
1127 static int
1128 md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len,
1129                         const MD_LINE* lines, int n_lines,
1130                         OFF beg, OFF max_end, OFF* p_end,
1131                         OFF* p_scan_horizon)
1132 {
1133     OFF off = beg;
1134     int i = 0;
1135 
1136     if(off < *p_scan_horizon  &&  *p_scan_horizon >= max_end - len) {
1137         /* We have already scanned the range up to the max_end so we know
1138          * there is nothing to see. */
1139         return FALSE;
1140     }
1141 
1142     while(TRUE) {
1143         while(off + len <= lines[i].end  &&  off + len <= max_end) {
1144             if(md_ascii_eq(STR(off), str, len)) {
1145                 /* Success. */
1146                 *p_end = off + len;
1147                 return TRUE;
1148             }
1149             off++;
1150         }
1151 
1152         i++;
1153         if(off >= max_end  ||  i >= n_lines) {
1154             /* Failure. */
1155             *p_scan_horizon = off;
1156             return FALSE;
1157         }
1158 
1159         off = lines[i].beg;
1160     }
1161 }
1162 
1163 static int
1164 md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1165 {
1166     OFF off = beg;
1167 
1168     MD_ASSERT(CH(beg) == _T('<'));
1169 
1170     if(off + 4 >= lines[0].end)
1171         return FALSE;
1172     if(CH(off+1) != _T('!')  ||  CH(off+2) != _T('-')  ||  CH(off+3) != _T('-'))
1173         return FALSE;
1174     off += 4;
1175 
1176     /* ">" and "->" must not follow the opening. */
1177     if(off < lines[0].end  &&  CH(off) == _T('>'))
1178         return FALSE;
1179     if(off+1 < lines[0].end  &&  CH(off) == _T('-')  &&  CH(off+1) == _T('>'))
1180         return FALSE;
1181 
1182     /* HTML comment must not contain "--", so we scan just for "--" instead
1183      * of "-->" and verify manually that '>' follows. */
1184     if(md_scan_for_html_closer(ctx, _T("--"), 2,
1185                 lines, n_lines, off, max_end, p_end, &ctx->html_comment_horizon))
1186     {
1187         if(*p_end < max_end  &&  CH(*p_end) == _T('>')) {
1188             *p_end = *p_end + 1;
1189             return TRUE;
1190         }
1191     }
1192 
1193     return FALSE;
1194 }
1195 
1196 static int
1197 md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1198 {
1199     OFF off = beg;
1200 
1201     if(off + 2 >= lines[0].end)
1202         return FALSE;
1203     if(CH(off+1) != _T('?'))
1204         return FALSE;
1205     off += 2;
1206 
1207     return md_scan_for_html_closer(ctx, _T("?>"), 2,
1208                 lines, n_lines, off, max_end, p_end, &ctx->html_proc_instr_horizon);
1209 }
1210 
1211 static int
1212 md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1213 {
1214     OFF off = beg;
1215 
1216     if(off + 2 >= lines[0].end)
1217         return FALSE;
1218     if(CH(off+1) != _T('!'))
1219         return FALSE;
1220     off += 2;
1221 
1222     /* Declaration name. */
1223     if(off >= lines[0].end  ||  !ISALPHA(off))
1224         return FALSE;
1225     off++;
1226     while(off < lines[0].end  &&  ISALPHA(off))
1227         off++;
1228     if(off < lines[0].end  &&  !ISWHITESPACE(off))
1229         return FALSE;
1230 
1231     return md_scan_for_html_closer(ctx, _T(">"), 1,
1232                 lines, n_lines, off, max_end, p_end, &ctx->html_decl_horizon);
1233 }
1234 
1235 static int
1236 md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1237 {
1238     static const CHAR open_str[] = _T("<![CDATA[");
1239     static const SZ open_size = SIZEOF_ARRAY(open_str) - 1;
1240 
1241     OFF off = beg;
1242 
1243     if(off + open_size >= lines[0].end)
1244         return FALSE;
1245     if(memcmp(STR(off), open_str, open_size) != 0)
1246         return FALSE;
1247     off += open_size;
1248 
1249     if(lines[n_lines-1].end < max_end)
1250         max_end = lines[n_lines-1].end - 2;
1251 
1252     return md_scan_for_html_closer(ctx, _T("]]>"), 3,
1253                 lines, n_lines, off, max_end, p_end, &ctx->html_cdata_horizon);
1254 }
1255 
1256 static int
1257 md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1258 {
1259     MD_ASSERT(CH(beg) == _T('<'));
1260     return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end)  ||
1261             md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end)  ||
1262             md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end)  ||
1263             md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end)  ||
1264             md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end));
1265 }
1266 
1267 
1268 /****************************
1269  ***  Recognizing Entity  ***
1270  ****************************/
1271 
1272 static int
1273 md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1274 {
1275     OFF off = beg;
1276     MD_UNUSED(ctx);
1277 
1278     while(off < max_end  &&  ISXDIGIT_(text[off])  &&  off - beg <= 8)
1279         off++;
1280 
1281     if(1 <= off - beg  &&  off - beg <= 6) {
1282         *p_end = off;
1283         return TRUE;
1284     } else {
1285         return FALSE;
1286     }
1287 }
1288 
1289 static int
1290 md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1291 {
1292     OFF off = beg;
1293     MD_UNUSED(ctx);
1294 
1295     while(off < max_end  &&  ISDIGIT_(text[off])  &&  off - beg <= 8)
1296         off++;
1297 
1298     if(1 <= off - beg  &&  off - beg <= 7) {
1299         *p_end = off;
1300         return TRUE;
1301     } else {
1302         return FALSE;
1303     }
1304 }
1305 
1306 static int
1307 md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1308 {
1309     OFF off = beg;
1310     MD_UNUSED(ctx);
1311 
1312     if(off < max_end  &&  ISALPHA_(text[off]))
1313         off++;
1314     else
1315         return FALSE;
1316 
1317     while(off < max_end  &&  ISALNUM_(text[off])  &&  off - beg <= 48)
1318         off++;
1319 
1320     if(2 <= off - beg  &&  off - beg <= 48) {
1321         *p_end = off;
1322         return TRUE;
1323     } else {
1324         return FALSE;
1325     }
1326 }
1327 
1328 static int
1329 md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1330 {
1331     int is_contents;
1332     OFF off = beg;
1333 
1334     MD_ASSERT(text[off] == _T('&'));
1335     off++;
1336 
1337     if(off+2 < max_end  &&  text[off] == _T('#')  &&  (text[off+1] == _T('x') || text[off+1] == _T('X')))
1338         is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off);
1339     else if(off+1 < max_end  &&  text[off] == _T('#'))
1340         is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off);
1341     else
1342         is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off);
1343 
1344     if(is_contents  &&  off < max_end  &&  text[off] == _T(';')) {
1345         *p_end = off+1;
1346         return TRUE;
1347     } else {
1348         return FALSE;
1349     }
1350 }
1351 
1352 static inline int
1353 md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
1354 {
1355     return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end);
1356 }
1357 
1358 
1359 /******************************
1360  ***  Attribute Management  ***
1361  ******************************/
1362 
1363 typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD;
1364 struct MD_ATTRIBUTE_BUILD_tag {
1365     CHAR* text;
1366     MD_TEXTTYPE* substr_types;
1367     OFF* substr_offsets;
1368     int substr_count;
1369     int substr_alloc;
1370     MD_TEXTTYPE trivial_types[1];
1371     OFF trivial_offsets[2];
1372 };
1373 
1374 
1375 #define MD_BUILD_ATTR_NO_ESCAPES    0x0001
1376 
1377 static int
1378 md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
1379                             MD_TEXTTYPE type, OFF off)
1380 {
1381     if(build->substr_count >= build->substr_alloc) {
1382         MD_TEXTTYPE* new_substr_types;
1383         OFF* new_substr_offsets;
1384 
1385         build->substr_alloc = (build->substr_alloc > 0
1386                 ? build->substr_alloc + build->substr_alloc / 2
1387                 : 8);
1388         new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types,
1389                                     build->substr_alloc * sizeof(MD_TEXTTYPE));
1390         if(new_substr_types == NULL) {
1391             MD_LOG("realloc() failed.");
1392             return -1;
1393         }
1394         /* Note +1 to reserve space for final offset (== raw_size). */
1395         new_substr_offsets = (OFF*) realloc(build->substr_offsets,
1396                                     (build->substr_alloc+1) * sizeof(OFF));
1397         if(new_substr_offsets == NULL) {
1398             MD_LOG("realloc() failed.");
1399             free(new_substr_types);
1400             return -1;
1401         }
1402 
1403         build->substr_types = new_substr_types;
1404         build->substr_offsets = new_substr_offsets;
1405     }
1406 
1407     build->substr_types[build->substr_count] = type;
1408     build->substr_offsets[build->substr_count] = off;
1409     build->substr_count++;
1410     return 0;
1411 }
1412 
1413 static void
1414 md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build)
1415 {
1416     MD_UNUSED(ctx);
1417 
1418     if(build->substr_alloc > 0) {
1419         free(build->text);
1420         free(build->substr_types);
1421         free(build->substr_offsets);
1422     }
1423 }
1424 
1425 static int
1426 md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
1427                    unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
1428 {
1429     OFF raw_off, off;
1430     int is_trivial;
1431     int ret = 0;
1432 
1433     memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD));
1434 
1435     /* If there is no backslash and no ampersand, build trivial attribute
1436      * without any malloc(). */
1437     is_trivial = TRUE;
1438     for(raw_off = 0; raw_off < raw_size; raw_off++) {
1439         if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) {
1440             is_trivial = FALSE;
1441             break;
1442         }
1443     }
1444 
1445     if(is_trivial) {
1446         build->text = (CHAR*) (raw_size ? raw_text : NULL);
1447         build->substr_types = build->trivial_types;
1448         build->substr_offsets = build->trivial_offsets;
1449         build->substr_count = 1;
1450         build->substr_alloc = 0;
1451         build->trivial_types[0] = MD_TEXT_NORMAL;
1452         build->trivial_offsets[0] = 0;
1453         build->trivial_offsets[1] = raw_size;
1454         off = raw_size;
1455     } else {
1456         build->text = (CHAR*) malloc(raw_size * sizeof(CHAR));
1457         if(build->text == NULL) {
1458             MD_LOG("malloc() failed.");
1459             goto abort;
1460         }
1461 
1462         raw_off = 0;
1463         off = 0;
1464 
1465         while(raw_off < raw_size) {
1466             if(raw_text[raw_off] == _T('\0')) {
1467                 MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
1468                 memcpy(build->text + off, raw_text + raw_off, 1);
1469                 off++;
1470                 raw_off++;
1471                 continue;
1472             }
1473 
1474             if(raw_text[raw_off] == _T('&')) {
1475                 OFF ent_end;
1476 
1477                 if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) {
1478                     MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off));
1479                     memcpy(build->text + off, raw_text + raw_off, ent_end - raw_off);
1480                     off += ent_end - raw_off;
1481                     raw_off = ent_end;
1482                     continue;
1483                 }
1484             }
1485 
1486             if(build->substr_count == 0  ||  build->substr_types[build->substr_count-1] != MD_TEXT_NORMAL)
1487                 MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off));
1488 
1489             if(!(flags & MD_BUILD_ATTR_NO_ESCAPES)  &&
1490                raw_text[raw_off] == _T('\\')  &&  raw_off+1 < raw_size  &&
1491                (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1])))
1492                 raw_off++;
1493 
1494             build->text[off++] = raw_text[raw_off++];
1495         }
1496         build->substr_offsets[build->substr_count] = off;
1497     }
1498 
1499     attr->text = build->text;
1500     attr->size = off;
1501     attr->substr_offsets = build->substr_offsets;
1502     attr->substr_types = build->substr_types;
1503     return 0;
1504 
1505 abort:
1506     md_free_attribute(ctx, build);
1507     return -1;
1508 }
1509 
1510 
1511 /*********************************************
1512  ***  Dictionary of Reference Definitions  ***
1513  *********************************************/
1514 
1515 #define MD_FNV1A_BASE       2166136261U
1516 #define MD_FNV1A_PRIME      16777619U
1517 
1518 static inline unsigned
1519 md_fnv1a(unsigned base, const void* data, size_t n)
1520 {
1521     const unsigned char* buf = (const unsigned char*) data;
1522     unsigned hash = base;
1523     size_t i;
1524 
1525     for(i = 0; i < n; i++) {
1526         hash ^= buf[i];
1527         hash *= MD_FNV1A_PRIME;
1528     }
1529 
1530     return hash;
1531 }
1532 
1533 
1534 struct MD_REF_DEF_tag {
1535     CHAR* label;
1536     CHAR* title;
1537     unsigned hash;
1538     SZ label_size;
1539     SZ title_size;
1540     OFF dest_beg;
1541     OFF dest_end;
1542     unsigned char label_needs_free : 1;
1543     unsigned char title_needs_free : 1;
1544 };
1545 
1546 /* Label equivalence is quite complicated with regards to whitespace and case
1547  * folding. This complicates computing a hash of it as well as direct comparison
1548  * of two labels. */
1549 
1550 static unsigned
1551 md_link_label_hash(const CHAR* label, SZ size)
1552 {
1553     unsigned hash = MD_FNV1A_BASE;
1554     OFF off;
1555     unsigned codepoint;
1556     int is_whitespace = FALSE;
1557 
1558     off = md_skip_unicode_whitespace(label, 0, size);
1559     while(off < size) {
1560         SZ char_size;
1561 
1562         codepoint = md_decode_unicode(label, off, size, &char_size);
1563         is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]);
1564 
1565         if(is_whitespace) {
1566             codepoint = ' ';
1567             hash = md_fnv1a(hash, &codepoint, sizeof(unsigned));
1568             off = md_skip_unicode_whitespace(label, off, size);
1569         } else {
1570             MD_UNICODE_FOLD_INFO fold_info;
1571 
1572             md_get_unicode_fold_info(codepoint, &fold_info);
1573             hash = md_fnv1a(hash, fold_info.codepoints, fold_info.n_codepoints * sizeof(unsigned));
1574             off += char_size;
1575         }
1576     }
1577 
1578     return hash;
1579 }
1580 
1581 static OFF
1582 md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size,
1583                                  MD_UNICODE_FOLD_INFO* fold_info)
1584 {
1585     unsigned codepoint;
1586     SZ char_size;
1587 
1588     if(off >= size) {
1589         /* Treat end of a link label as a whitespace. */
1590         goto whitespace;
1591     }
1592 
1593     codepoint = md_decode_unicode(label, off, size, &char_size);
1594     off += char_size;
1595     if(ISUNICODEWHITESPACE_(codepoint)) {
1596         /* Treat all whitespace as equivalent */
1597         goto whitespace;
1598     }
1599 
1600     /* Get real folding info. */
1601     md_get_unicode_fold_info(codepoint, fold_info);
1602     return off;
1603 
1604 whitespace:
1605     fold_info->codepoints[0] = _T(' ');
1606     fold_info->n_codepoints = 1;
1607     return md_skip_unicode_whitespace(label, off, size);
1608 }
1609 
1610 static int
1611 md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size)
1612 {
1613     OFF a_off;
1614     OFF b_off;
1615     MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 };
1616     MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 };
1617     OFF a_fi_off = 0;
1618     OFF b_fi_off = 0;
1619     int cmp;
1620 
1621     a_off = md_skip_unicode_whitespace(a_label, 0, a_size);
1622     b_off = md_skip_unicode_whitespace(b_label, 0, b_size);
1623     while(a_off < a_size || a_fi_off < a_fi.n_codepoints ||
1624           b_off < b_size || b_fi_off < b_fi.n_codepoints)
1625     {
1626         /* If needed, load fold info for next char. */
1627         if(a_fi_off >= a_fi.n_codepoints) {
1628             a_fi_off = 0;
1629             a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi);
1630         }
1631         if(b_fi_off >= b_fi.n_codepoints) {
1632             b_fi_off = 0;
1633             b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi);
1634         }
1635 
1636         cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off];
1637         if(cmp != 0)
1638             return cmp;
1639 
1640         a_fi_off++;
1641         b_fi_off++;
1642     }
1643 
1644     return 0;
1645 }
1646 
1647 typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST;
1648 struct MD_REF_DEF_LIST_tag {
1649     int n_ref_defs;
1650     int alloc_ref_defs;
1651     MD_REF_DEF* ref_defs[];  /* Valid items always  point into ctx->ref_defs[] */
1652 };
1653 
1654 static int
1655 md_ref_def_cmp(const void* a, const void* b)
1656 {
1657     const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
1658     const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
1659 
1660     if(a_ref->hash < b_ref->hash)
1661         return -1;
1662     else if(a_ref->hash > b_ref->hash)
1663         return +1;
1664     else
1665         return md_link_label_cmp(a_ref->label, a_ref->label_size, b_ref->label, b_ref->label_size);
1666 }
1667 
1668 static int
1669 md_ref_def_cmp_for_sort(const void* a, const void* b)
1670 {
1671     int cmp;
1672 
1673     cmp = md_ref_def_cmp(a, b);
1674 
1675     /* Ensure stability of the sorting. */
1676     if(cmp == 0) {
1677         const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
1678         const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
1679 
1680         if(a_ref < b_ref)
1681             cmp = -1;
1682         else if(a_ref > b_ref)
1683             cmp = +1;
1684         else
1685             cmp = 0;
1686     }
1687 
1688     return cmp;
1689 }
1690 
1691 static int
1692 md_build_ref_def_hashtable(MD_CTX* ctx)
1693 {
1694     int i, j;
1695 
1696     if(ctx->n_ref_defs == 0)
1697         return 0;
1698 
1699     ctx->ref_def_hashtable_size = (ctx->n_ref_defs * 5) / 4;
1700     ctx->ref_def_hashtable = malloc(ctx->ref_def_hashtable_size * sizeof(void*));
1701     if(ctx->ref_def_hashtable == NULL) {
1702         MD_LOG("malloc() failed.");
1703         goto abort;
1704     }
1705     memset(ctx->ref_def_hashtable, 0, ctx->ref_def_hashtable_size * sizeof(void*));
1706 
1707     /* Each member of ctx->ref_def_hashtable[] can be:
1708      *  -- NULL,
1709      *  -- pointer to the MD_REF_DEF in ctx->ref_defs[], or
1710      *  -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to
1711      *     such MD_REF_DEFs.
1712      */
1713     for(i = 0; i < ctx->n_ref_defs; i++) {
1714         MD_REF_DEF* def = &ctx->ref_defs[i];
1715         void* bucket;
1716         MD_REF_DEF_LIST* list;
1717 
1718         def->hash = md_link_label_hash(def->label, def->label_size);
1719         bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size];
1720 
1721         if(bucket == NULL) {
1722             /* The bucket is empty. Make it just point to the def. */
1723             ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def;
1724             continue;
1725         }
1726 
1727         if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1728             /* The bucket already contains one ref. def. Lets see whether it
1729              * is the same label (ref. def. duplicate) or different one
1730              * (hash conflict). */
1731             MD_REF_DEF* old_def = (MD_REF_DEF*) bucket;
1732 
1733             if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) {
1734                 /* Duplicate label: Ignore this ref. def. */
1735                 continue;
1736             }
1737 
1738             /* Make the bucket complex, i.e. able to hold more ref. defs. */
1739             list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*));
1740             if(list == NULL) {
1741                 MD_LOG("malloc() failed.");
1742                 goto abort;
1743             }
1744             list->ref_defs[0] = old_def;
1745             list->ref_defs[1] = def;
1746             list->n_ref_defs = 2;
1747             list->alloc_ref_defs = 2;
1748             ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1749             continue;
1750         }
1751 
1752         /* Append the def to the complex bucket list.
1753          *
1754          * Note in this case we ignore potential duplicates to avoid expensive
1755          * iterating over the complex bucket. Below, we revisit all the complex
1756          * buckets and handle it more cheaply after the complex bucket contents
1757          * is sorted. */
1758         list = (MD_REF_DEF_LIST*) bucket;
1759         if(list->n_ref_defs >= list->alloc_ref_defs) {
1760             int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2;
1761             MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list,
1762                         sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*));
1763             if(list_tmp == NULL) {
1764                 MD_LOG("realloc() failed.");
1765                 goto abort;
1766             }
1767             list = list_tmp;
1768             list->alloc_ref_defs = alloc_ref_defs;
1769             ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1770         }
1771 
1772         list->ref_defs[list->n_ref_defs] = def;
1773         list->n_ref_defs++;
1774     }
1775 
1776     /* Sort the complex buckets so we can use bsearch() with them. */
1777     for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
1778         void* bucket = ctx->ref_def_hashtable[i];
1779         MD_REF_DEF_LIST* list;
1780 
1781         if(bucket == NULL)
1782             continue;
1783         if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
1784             continue;
1785 
1786         list = (MD_REF_DEF_LIST*) bucket;
1787         qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort);
1788 
1789         /* Disable all duplicates in the complex bucket by forcing all such
1790          * records to point to the 1st such ref. def. I.e. no matter which
1791          * record is found during the lookup, it will always point to the right
1792          * ref. def. in ctx->ref_defs[]. */
1793         for(j = 1; j < list->n_ref_defs; j++) {
1794             if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0)
1795                 list->ref_defs[j] = list->ref_defs[j-1];
1796         }
1797     }
1798 
1799     return 0;
1800 
1801 abort:
1802     return -1;
1803 }
1804 
1805 static void
1806 md_free_ref_def_hashtable(MD_CTX* ctx)
1807 {
1808     if(ctx->ref_def_hashtable != NULL) {
1809         int i;
1810 
1811         for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
1812             void* bucket = ctx->ref_def_hashtable[i];
1813             if(bucket == NULL)
1814                 continue;
1815             if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
1816                 continue;
1817             free(bucket);
1818         }
1819 
1820         free(ctx->ref_def_hashtable);
1821     }
1822 }
1823 
1824 static const MD_REF_DEF*
1825 md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
1826 {
1827     unsigned hash;
1828     void* bucket;
1829 
1830     if(ctx->ref_def_hashtable_size == 0)
1831         return NULL;
1832 
1833     hash = md_link_label_hash(label, label_size);
1834     bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size];
1835 
1836     if(bucket == NULL) {
1837         return NULL;
1838     } else if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1839         const MD_REF_DEF* def = (MD_REF_DEF*) bucket;
1840 
1841         if(md_link_label_cmp(def->label, def->label_size, label, label_size) == 0)
1842             return def;
1843         else
1844             return NULL;
1845     } else {
1846         MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket;
1847         MD_REF_DEF key_buf;
1848         const MD_REF_DEF* key = &key_buf;
1849         const MD_REF_DEF** ret;
1850 
1851         key_buf.label = (CHAR*) label;
1852         key_buf.label_size = label_size;
1853         key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size);
1854 
1855         ret = (const MD_REF_DEF**) bsearch(&key, list->ref_defs,
1856                     list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp);
1857         if(ret != NULL)
1858             return *ret;
1859         else
1860             return NULL;
1861     }
1862 }
1863 
1864 
1865 /***************************
1866  ***  Recognizing Links  ***
1867  ***************************/
1868 
1869 /* Note this code is partially shared between processing inlines and blocks
1870  * as reference definitions and links share some helper parser functions.
1871  */
1872 
1873 typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
1874 struct MD_LINK_ATTR_tag {
1875     OFF dest_beg;
1876     OFF dest_end;
1877 
1878     CHAR* title;
1879     SZ title_size;
1880     int title_needs_free;
1881 };
1882 
1883 
1884 static int
1885 md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
1886                  OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
1887                  OFF* p_contents_beg, OFF* p_contents_end)
1888 {
1889     OFF off = beg;
1890     OFF contents_beg = 0;
1891     OFF contents_end = 0;
1892     int line_index = 0;
1893     int len = 0;
1894 
1895     if(CH(off) != _T('['))
1896         return FALSE;
1897     off++;
1898 
1899     while(1) {
1900         OFF line_end = lines[line_index].end;
1901 
1902         while(off < line_end) {
1903             if(CH(off) == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
1904                 if(contents_end == 0) {
1905                     contents_beg = off;
1906                     *p_beg_line_index = line_index;
1907                 }
1908                 contents_end = off + 2;
1909                 off += 2;
1910             } else if(CH(off) == _T('[')) {
1911                 return FALSE;
1912             } else if(CH(off) == _T(']')) {
1913                 if(contents_beg < contents_end) {
1914                     /* Success. */
1915                     *p_contents_beg = contents_beg;
1916                     *p_contents_end = contents_end;
1917                     *p_end = off+1;
1918                     *p_end_line_index = line_index;
1919                     return TRUE;
1920                 } else {
1921                     /* Link label must have some non-whitespace contents. */
1922                     return FALSE;
1923                 }
1924             } else {
1925                 unsigned codepoint;
1926                 SZ char_size;
1927 
1928                 codepoint = md_decode_unicode(ctx->text, off, ctx->size, &char_size);
1929                 if(!ISUNICODEWHITESPACE_(codepoint)) {
1930                     if(contents_end == 0) {
1931                         contents_beg = off;
1932                         *p_beg_line_index = line_index;
1933                     }
1934                     contents_end = off + char_size;
1935                 }
1936 
1937                 off += char_size;
1938             }
1939 
1940             len++;
1941             if(len > 999)
1942                 return FALSE;
1943         }
1944 
1945         line_index++;
1946         len++;
1947         if(line_index < n_lines)
1948             off = lines[line_index].beg;
1949         else
1950             break;
1951     }
1952 
1953     return FALSE;
1954 }
1955 
1956 static int
1957 md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1958                          OFF* p_contents_beg, OFF* p_contents_end)
1959 {
1960     OFF off = beg;
1961 
1962     if(off >= max_end  ||  CH(off) != _T('<'))
1963         return FALSE;
1964     off++;
1965 
1966     while(off < max_end) {
1967         if(CH(off) == _T('\\')  &&  off+1 < max_end  &&  ISPUNCT(off+1)) {
1968             off += 2;
1969             continue;
1970         }
1971 
1972         if(ISNEWLINE(off)  ||  CH(off) == _T('<'))
1973             return FALSE;
1974 
1975         if(CH(off) == _T('>')) {
1976             /* Success. */
1977             *p_contents_beg = beg+1;
1978             *p_contents_end = off;
1979             *p_end = off+1;
1980             return TRUE;
1981         }
1982 
1983         off++;
1984     }
1985 
1986     return FALSE;
1987 }
1988 
1989 static int
1990 md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1991                          OFF* p_contents_beg, OFF* p_contents_end)
1992 {
1993     OFF off = beg;
1994     int parenthesis_level = 0;
1995 
1996     while(off < max_end) {
1997         if(CH(off) == _T('\\')  &&  off+1 < max_end  &&  ISPUNCT(off+1)) {
1998             off += 2;
1999             continue;
2000         }
2001 
2002         if(ISWHITESPACE(off) || ISCNTRL(off))
2003             break;
2004 
2005         /* Link destination may include balanced pairs of unescaped '(' ')'.
2006          * Note we limit the maximal nesting level by 32 to protect us from
2007          * https://github.com/jgm/cmark/issues/214 */
2008         if(CH(off) == _T('(')) {
2009             parenthesis_level++;
2010             if(parenthesis_level > 32)
2011                 return FALSE;
2012         } else if(CH(off) == _T(')')) {
2013             if(parenthesis_level == 0)
2014                 break;
2015             parenthesis_level--;
2016         }
2017 
2018         off++;
2019     }
2020 
2021     if(parenthesis_level != 0  ||  off == beg)
2022         return FALSE;
2023 
2024     /* Success. */
2025     *p_contents_beg = beg;
2026     *p_contents_end = off;
2027     *p_end = off;
2028     return TRUE;
2029 }
2030 
2031 static inline int
2032 md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
2033                        OFF* p_contents_beg, OFF* p_contents_end)
2034 {
2035     if(CH(beg) == _T('<'))
2036         return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2037     else
2038         return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2039 }
2040 
2041 static int
2042 md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
2043                  OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
2044                  OFF* p_contents_beg, OFF* p_contents_end)
2045 {
2046     OFF off = beg;
2047     CHAR closer_char;
2048     int line_index = 0;
2049 
2050     /* White space with up to one line break. */
2051     while(off < lines[line_index].end  &&  ISWHITESPACE(off))
2052         off++;
2053     if(off >= lines[line_index].end) {
2054         line_index++;
2055         if(line_index >= n_lines)
2056             return FALSE;
2057         off = lines[line_index].beg;
2058     }
2059     if(off == beg)
2060         return FALSE;
2061 
2062     *p_beg_line_index = line_index;
2063 
2064     /* First char determines how to detect end of it. */
2065     switch(CH(off)) {
2066         case _T('"'):   closer_char = _T('"'); break;
2067         case _T('\''):  closer_char = _T('\''); break;
2068         case _T('('):   closer_char = _T(')'); break;
2069         default:        return FALSE;
2070     }
2071     off++;
2072 
2073     *p_contents_beg = off;
2074 
2075     while(line_index < n_lines) {
2076         OFF line_end = lines[line_index].end;
2077 
2078         while(off < line_end) {
2079             if(CH(off) == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
2080                 off++;
2081             } else if(CH(off) == closer_char) {
2082                 /* Success. */
2083                 *p_contents_end = off;
2084                 *p_end = off+1;
2085                 *p_end_line_index = line_index;
2086                 return TRUE;
2087             } else if(closer_char == _T(')')  &&  CH(off) == _T('(')) {
2088                 /* ()-style title cannot contain (unescaped '(')) */
2089                 return FALSE;
2090             }
2091 
2092             off++;
2093         }
2094 
2095         line_index++;
2096     }
2097 
2098     return FALSE;
2099 }
2100 
2101 /* Returns 0 if it is not a reference definition.
2102  *
2103  * Returns N > 0 if it is a reference definition. N then corresponds to the
2104  * number of lines forming it). In this case the definition is stored for
2105  * resolving any links referring to it.
2106  *
2107  * Returns -1 in case of an error (out of memory).
2108  */
2109 static int
2110 md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
2111 {
2112     OFF label_contents_beg;
2113     OFF label_contents_end;
2114     int label_contents_line_index = -1;
2115     int label_is_multiline = FALSE;
2116     OFF dest_contents_beg;
2117     OFF dest_contents_end;
2118     OFF title_contents_beg;
2119     OFF title_contents_end;
2120     int title_contents_line_index;
2121     int title_is_multiline = FALSE;
2122     OFF off;
2123     int line_index = 0;
2124     int tmp_line_index;
2125     MD_REF_DEF* def = NULL;
2126     int ret = 0;
2127 
2128     /* Link label. */
2129     if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg,
2130                 &off, &label_contents_line_index, &line_index,
2131                 &label_contents_beg, &label_contents_end))
2132         return FALSE;
2133     label_is_multiline = (label_contents_line_index != line_index);
2134 
2135     /* Colon. */
2136     if(off >= lines[line_index].end  ||  CH(off) != _T(':'))
2137         return FALSE;
2138     off++;
2139 
2140     /* Optional white space with up to one line break. */
2141     while(off < lines[line_index].end  &&  ISWHITESPACE(off))
2142         off++;
2143     if(off >= lines[line_index].end) {
2144         line_index++;
2145         if(line_index >= n_lines)
2146             return FALSE;
2147         off = lines[line_index].beg;
2148     }
2149 
2150     /* Link destination. */
2151     if(!md_is_link_destination(ctx, off, lines[line_index].end,
2152                 &off, &dest_contents_beg, &dest_contents_end))
2153         return FALSE;
2154 
2155     /* (Optional) title. Note we interpret it as an title only if nothing
2156      * more follows on its last line. */
2157     if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
2158                 &off, &title_contents_line_index, &tmp_line_index,
2159                 &title_contents_beg, &title_contents_end)
2160         &&  off >= lines[line_index + tmp_line_index].end)
2161     {
2162         title_is_multiline = (tmp_line_index != title_contents_line_index);
2163         title_contents_line_index += line_index;
2164         line_index += tmp_line_index;
2165     } else {
2166         /* Not a title. */
2167         title_is_multiline = FALSE;
2168         title_contents_beg = off;
2169         title_contents_end = off;
2170         title_contents_line_index = 0;
2171     }
2172 
2173     /* Nothing more can follow on the last line. */
2174     if(off < lines[line_index].end)
2175         return FALSE;
2176 
2177     /* So, it _is_ a reference definition. Remember it. */
2178     if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
2179         MD_REF_DEF* new_defs;
2180 
2181         ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0
2182                 ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2
2183                 : 16);
2184         new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF));
2185         if(new_defs == NULL) {
2186             MD_LOG("realloc() failed.");
2187             goto abort;
2188         }
2189 
2190         ctx->ref_defs = new_defs;
2191     }
2192     def = &ctx->ref_defs[ctx->n_ref_defs];
2193     memset(def, 0, sizeof(MD_REF_DEF));
2194 
2195     if(label_is_multiline) {
2196         MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
2197                     lines + label_contents_line_index, n_lines - label_contents_line_index,
2198                     _T(' '), &def->label, &def->label_size));
2199         def->label_needs_free = TRUE;
2200     } else {
2201         def->label = (CHAR*) STR(label_contents_beg);
2202         def->label_size = label_contents_end - label_contents_beg;
2203     }
2204 
2205     if(title_is_multiline) {
2206         MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2207                     lines + title_contents_line_index, n_lines - title_contents_line_index,
2208                     _T('\n'), &def->title, &def->title_size));
2209         def->title_needs_free = TRUE;
2210     } else {
2211         def->title = (CHAR*) STR(title_contents_beg);
2212         def->title_size = title_contents_end - title_contents_beg;
2213     }
2214 
2215     def->dest_beg = dest_contents_beg;
2216     def->dest_end = dest_contents_end;
2217 
2218     /* Success. */
2219     ctx->n_ref_defs++;
2220     return line_index + 1;
2221 
2222 abort:
2223     /* Failure. */
2224     if(def != NULL  &&  def->label_needs_free)
2225         free(def->label);
2226     if(def != NULL  &&  def->title_needs_free)
2227         free(def->title);
2228     return ret;
2229 }
2230 
2231 static int
2232 md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
2233                      OFF beg, OFF end, MD_LINK_ATTR* attr)
2234 {
2235     const MD_REF_DEF* def;
2236     const MD_LINE* beg_line;
2237     int is_multiline;
2238     CHAR* label;
2239     SZ label_size;
2240     int ret;
2241 
2242     MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!'));
2243     MD_ASSERT(CH(end-1) == _T(']'));
2244 
2245     beg += (CH(beg) == _T('!') ? 2 : 1);
2246     end--;
2247 
2248     /* Find lines corresponding to the beg and end positions. */
2249     beg_line = md_lookup_line(beg, lines, n_lines);
2250     is_multiline = (end > beg_line->end);
2251 
2252     if(is_multiline) {
2253         MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
2254                  (int)(n_lines - (beg_line - lines)), _T(' '), &label, &label_size));
2255     } else {
2256         label = (CHAR*) STR(beg);
2257         label_size = end - beg;
2258     }
2259 
2260     def = md_lookup_ref_def(ctx, label, label_size);
2261     if(def != NULL) {
2262         attr->dest_beg = def->dest_beg;
2263         attr->dest_end = def->dest_end;
2264         attr->title = def->title;
2265         attr->title_size = def->title_size;
2266         attr->title_needs_free = FALSE;
2267     }
2268 
2269     if(is_multiline)
2270         free(label);
2271 
2272     ret = (def != NULL);
2273 
2274 abort:
2275     return ret;
2276 }
2277 
2278 static int
2279 md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
2280                        OFF beg, OFF* p_end, MD_LINK_ATTR* attr)
2281 {
2282     int line_index = 0;
2283     int tmp_line_index;
2284     OFF title_contents_beg;
2285     OFF title_contents_end;
2286     int title_contents_line_index;
2287     int title_is_multiline;
2288     OFF off = beg;
2289     int ret = FALSE;
2290 
2291     while(off >= lines[line_index].end)
2292         line_index++;
2293 
2294     MD_ASSERT(CH(off) == _T('('));
2295     off++;
2296 
2297     /* Optional white space with up to one line break. */
2298     while(off < lines[line_index].end  &&  ISWHITESPACE(off))
2299         off++;
2300     if(off >= lines[line_index].end  &&  (off >= ctx->size  ||  ISNEWLINE(off))) {
2301         line_index++;
2302         if(line_index >= n_lines)
2303             return FALSE;
2304         off = lines[line_index].beg;
2305     }
2306 
2307     /* Link destination may be omitted, but only when not also having a title. */
2308     if(off < ctx->size  &&  CH(off) == _T(')')) {
2309         attr->dest_beg = off;
2310         attr->dest_end = off;
2311         attr->title = NULL;
2312         attr->title_size = 0;
2313         attr->title_needs_free = FALSE;
2314         off++;
2315         *p_end = off;
2316         return TRUE;
2317     }
2318 
2319     /* Link destination. */
2320     if(!md_is_link_destination(ctx, off, lines[line_index].end,
2321                         &off, &attr->dest_beg, &attr->dest_end))
2322         return FALSE;
2323 
2324     /* (Optional) title. */
2325     if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
2326                 &off, &title_contents_line_index, &tmp_line_index,
2327                 &title_contents_beg, &title_contents_end))
2328     {
2329         title_is_multiline = (tmp_line_index != title_contents_line_index);
2330         title_contents_line_index += line_index;
2331         line_index += tmp_line_index;
2332     } else {
2333         /* Not a title. */
2334         title_is_multiline = FALSE;
2335         title_contents_beg = off;
2336         title_contents_end = off;
2337         title_contents_line_index = 0;
2338     }
2339 
2340     /* Optional whitespace followed with final ')'. */
2341     while(off < lines[line_index].end  &&  ISWHITESPACE(off))
2342         off++;
2343     if (off >= lines[line_index].end  &&  (off >= ctx->size || ISNEWLINE(off))) {
2344         line_index++;
2345         if(line_index >= n_lines)
2346             return FALSE;
2347         off = lines[line_index].beg;
2348     }
2349     if(CH(off) != _T(')'))
2350         goto abort;
2351     off++;
2352 
2353     if(title_contents_beg >= title_contents_end) {
2354         attr->title = NULL;
2355         attr->title_size = 0;
2356         attr->title_needs_free = FALSE;
2357     } else if(!title_is_multiline) {
2358         attr->title = (CHAR*) STR(title_contents_beg);
2359         attr->title_size = title_contents_end - title_contents_beg;
2360         attr->title_needs_free = FALSE;
2361     } else {
2362         MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2363                     lines + title_contents_line_index, n_lines - title_contents_line_index,
2364                     _T('\n'), &attr->title, &attr->title_size));
2365         attr->title_needs_free = TRUE;
2366     }
2367 
2368     *p_end = off;
2369     ret = TRUE;
2370 
2371 abort:
2372     return ret;
2373 }
2374 
2375 static void
2376 md_free_ref_defs(MD_CTX* ctx)
2377 {
2378     int i;
2379 
2380     for(i = 0; i < ctx->n_ref_defs; i++) {
2381         MD_REF_DEF* def = &ctx->ref_defs[i];
2382 
2383         if(def->label_needs_free)
2384             free(def->label);
2385         if(def->title_needs_free)
2386             free(def->title);
2387     }
2388 
2389     free(ctx->ref_defs);
2390 }
2391 
2392 
2393 /******************************************
2394  ***  Processing Inlines (a.k.a Spans)  ***
2395  ******************************************/
2396 
2397 /* We process inlines in few phases:
2398  *
2399  * (1) We go through the block text and collect all significant characters
2400  *     which may start/end a span or some other significant position into
2401  *     ctx->marks[]. Core of this is what md_collect_marks() does.
2402  *
2403  *     We also do some very brief preliminary context-less analysis, whether
2404  *     it might be opener or closer (e.g. of an emphasis span).
2405  *
2406  *     This speeds the other steps as we do not need to re-iterate over all
2407  *     characters anymore.
2408  *
2409  * (2) We analyze each potential mark types, in order by their precedence.
2410  *
2411  *     In each md_analyze_XXX() function, we re-iterate list of the marks,
2412  *     skipping already resolved regions (in preceding precedences) and try to
2413  *     resolve them.
2414  *
2415  * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark
2416  *       them as resolved.
2417  *
2418  * (2.2) For range-type marks, we analyze whether the mark could be closer
2419  *       and, if yes, whether there is some preceding opener it could satisfy.
2420  *
2421  *       If not we check whether it could be really an opener and if yes, we
2422  *       remember it so subsequent closers may resolve it.
2423  *
2424  * (3) Finally, when all marks were analyzed, we render the block contents
2425  *     by calling MD_RENDERER::text() callback, interrupting by ::enter_span()
2426  *     or ::close_span() whenever we reach a resolved mark.
2427  */
2428 
2429 
2430 /* The mark structure.
2431  *
2432  * '\\': Maybe escape sequence.
2433  * '\0': NULL char.
2434  *  '*': Maybe (strong) emphasis start/end.
2435  *  '_': Maybe (strong) emphasis start/end.
2436  *  '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH).
2437  *  '`': Maybe code span start/end.
2438  *  '&': Maybe start of entity.
2439  *  ';': Maybe end of entity.
2440  *  '<': Maybe start of raw HTML or autolink.
2441  *  '>': Maybe end of raw HTML or autolink.
2442  *  '[': Maybe start of link label or link text.
2443  *  '!': Equivalent of '[' for image.
2444  *  ']': Maybe end of link label or link text.
2445  *  '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS).
2446  *  ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS).
2447  *  '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS).
2448  *  'D': Dummy mark, it reserves a space for splitting a previous mark
2449  *       (e.g. emphasis) or to make more space for storing some special data
2450  *       related to the preceding mark (e.g. link).
2451  *
2452  * Note that not all instances of these chars in the text imply creation of the
2453  * structure. Only those which have (or may have, after we see more context)
2454  * the special meaning.
2455  *
2456  * (Keep this struct as small as possible to fit as much of them into CPU
2457  * cache line.)
2458  */
2459 struct MD_MARK_tag {
2460     OFF beg;
2461     OFF end;
2462 
2463     /* For unresolved openers, 'prev' and 'next' form the chain of open openers
2464      * of given type 'ch'.
2465      *
2466      * During resolving, we disconnect from the chain and point to the
2467      * corresponding counterpart so opener points to its closer and vice versa.
2468      */
2469     int prev;
2470     int next;
2471     CHAR ch;
2472     unsigned char flags;
2473 };
2474 
2475 /* Mark flags (these apply to ALL mark types). */
2476 #define MD_MARK_POTENTIAL_OPENER            0x01  /* Maybe opener. */
2477 #define MD_MARK_POTENTIAL_CLOSER            0x02  /* Maybe closer. */
2478 #define MD_MARK_OPENER                      0x04  /* Definitely opener. */
2479 #define MD_MARK_CLOSER                      0x08  /* Definitely closer. */
2480 #define MD_MARK_RESOLVED                    0x10  /* Resolved in any definite way. */
2481 
2482 /* Mark flags specific for various mark types (so they can share bits). */
2483 #define MD_MARK_EMPH_INTRAWORD              0x20  /* Helper for the "rule of 3". */
2484 #define MD_MARK_EMPH_MOD3_0                 0x40
2485 #define MD_MARK_EMPH_MOD3_1                 0x80
2486 #define MD_MARK_EMPH_MOD3_2                 (0x40 | 0x80)
2487 #define MD_MARK_EMPH_MOD3_MASK              (0x40 | 0x80)
2488 #define MD_MARK_AUTOLINK                    0x20  /* Distinguisher for '<', '>'. */
2489 #define MD_MARK_VALIDPERMISSIVEAUTOLINK     0x20  /* For permissive autolinks. */
2490 #define MD_MARK_HASNESTEDBRACKETS           0x20  /* For '[' to rule out invalid link labels early */
2491 
2492 static MD_MARKCHAIN*
2493 md_asterisk_chain(MD_CTX* ctx, unsigned flags)
2494 {
2495     switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) {
2496         case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0:  return &ASTERISK_OPENERS_intraword_mod3_0;
2497         case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1:  return &ASTERISK_OPENERS_intraword_mod3_1;
2498         case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2:  return &ASTERISK_OPENERS_intraword_mod3_2;
2499         case MD_MARK_EMPH_MOD3_0:                           return &ASTERISK_OPENERS_extraword_mod3_0;
2500         case MD_MARK_EMPH_MOD3_1:                           return &ASTERISK_OPENERS_extraword_mod3_1;
2501         case MD_MARK_EMPH_MOD3_2:                           return &ASTERISK_OPENERS_extraword_mod3_2;
2502         default:                                            MD_UNREACHABLE();
2503     }
2504     return NULL;
2505 }
2506 
2507 static MD_MARKCHAIN*
2508 md_mark_chain(MD_CTX* ctx, int mark_index)
2509 {
2510     MD_MARK* mark = &ctx->marks[mark_index];
2511 
2512     switch(mark->ch) {
2513         case _T('*'):   return md_asterisk_chain(ctx, mark->flags);
2514         case _T('_'):   return &UNDERSCORE_OPENERS;
2515         case _T('~'):   return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
2516         case _T('!'):   MD_FALLTHROUGH();
2517         case _T('['):   return &BRACKET_OPENERS;
2518         case _T('|'):   return &TABLECELLBOUNDARIES;
2519         default:        return NULL;
2520     }
2521 }
2522 
2523 static MD_MARK*
2524 md_push_mark(MD_CTX* ctx)
2525 {
2526     if(ctx->n_marks >= ctx->alloc_marks) {
2527         MD_MARK* new_marks;
2528 
2529         ctx->alloc_marks = (ctx->alloc_marks > 0
2530                 ? ctx->alloc_marks + ctx->alloc_marks / 2
2531                 : 64);
2532         new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK));
2533         if(new_marks == NULL) {
2534             MD_LOG("realloc() failed.");
2535             return NULL;
2536         }
2537 
2538         ctx->marks = new_marks;
2539     }
2540 
2541     return &ctx->marks[ctx->n_marks++];
2542 }
2543 
2544 #define PUSH_MARK_()                                                    \
2545         do {                                                            \
2546             mark = md_push_mark(ctx);                                   \
2547             if(mark == NULL) {                                          \
2548                 ret = -1;                                               \
2549                 goto abort;                                             \
2550             }                                                           \
2551         } while(0)
2552 
2553 #define PUSH_MARK(ch_, beg_, end_, flags_)                              \
2554         do {                                                            \
2555             PUSH_MARK_();                                               \
2556             mark->beg = (beg_);                                         \
2557             mark->end = (end_);                                         \
2558             mark->prev = -1;                                            \
2559             mark->next = -1;                                            \
2560             mark->ch = (char)(ch_);                                     \
2561             mark->flags = (flags_);                                     \
2562         } while(0)
2563 
2564 
2565 static void
2566 md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index)
2567 {
2568     if(chain->tail >= 0)
2569         ctx->marks[chain->tail].next = mark_index;
2570     else
2571         chain->head = mark_index;
2572 
2573     ctx->marks[mark_index].prev = chain->tail;
2574     ctx->marks[mark_index].next = -1;
2575     chain->tail = mark_index;
2576 }
2577 
2578 /* Sometimes, we need to store a pointer into the mark. It is quite rare
2579  * so we do not bother to make MD_MARK use union, and it can only happen
2580  * for dummy marks. */
2581 static inline void
2582 md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr)
2583 {
2584     MD_MARK* mark = &ctx->marks[mark_index];
2585     MD_ASSERT(mark->ch == 'D');
2586 
2587     /* Check only members beg and end are misused for this. */
2588     MD_ASSERT(sizeof(void*) <= 2 * sizeof(OFF));
2589     memcpy(mark, &ptr, sizeof(void*));
2590 }
2591 
2592 static inline void*
2593 md_mark_get_ptr(MD_CTX* ctx, int mark_index)
2594 {
2595     void* ptr;
2596     MD_MARK* mark = &ctx->marks[mark_index];
2597     MD_ASSERT(mark->ch == 'D');
2598     memcpy(&ptr, mark, sizeof(void*));
2599     return ptr;
2600 }
2601 
2602 static void
2603 md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index)
2604 {
2605     MD_MARK* opener = &ctx->marks[opener_index];
2606     MD_MARK* closer = &ctx->marks[closer_index];
2607 
2608     /* Remove opener from the list of openers. */
2609     if(chain != NULL) {
2610         if(opener->prev >= 0)
2611             ctx->marks[opener->prev].next = opener->next;
2612         else
2613             chain->head = opener->next;
2614 
2615         if(opener->next >= 0)
2616             ctx->marks[opener->next].prev = opener->prev;
2617         else
2618             chain->tail = opener->prev;
2619     }
2620 
2621     /* Interconnect opener and closer and mark both as resolved. */
2622     opener->next = closer_index;
2623     opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
2624     closer->prev = opener_index;
2625     closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
2626 }
2627 
2628 
2629 #define MD_ROLLBACK_ALL         0
2630 #define MD_ROLLBACK_CROSSING    1
2631 
2632 /* In the range ctx->marks[opener_index] ... [closer_index], undo some or all
2633  * resolvings accordingly to these rules:
2634  *
2635  * (1) All openers BEFORE the range corresponding to any closer inside the
2636  *     range are un-resolved and they are re-added to their respective chains
2637  *     of unresolved openers. This ensures we can reuse the opener for closers
2638  *     AFTER the range.
2639  *
2640  * (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range
2641  *     are discarded.
2642  *
2643  * (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled
2644  *     in (1) are discarded. I.e. pairs of openers and closers which are both
2645  *     inside the range are retained as well as any unpaired marks.
2646  */
2647 static void
2648 md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
2649 {
2650     int i;
2651     int mark_index;
2652 
2653     /* Cut all unresolved openers at the mark index. */
2654     for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+1; i++) {
2655         MD_MARKCHAIN* chain = &ctx->mark_chains[i];
2656 
2657         while(chain->tail >= opener_index) {
2658             int same = chain->tail == opener_index;
2659             chain->tail = ctx->marks[chain->tail].prev;
2660             if (same) break;
2661         }
2662 
2663         if(chain->tail >= 0)
2664             ctx->marks[chain->tail].next = -1;
2665         else
2666             chain->head = -1;
2667     }
2668 
2669     /* Go backwards so that unresolved openers are re-added into their
2670      * respective chains, in the right order. */
2671     mark_index = closer_index - 1;
2672     while(mark_index > opener_index) {
2673         MD_MARK* mark = &ctx->marks[mark_index];
2674         int mark_flags = mark->flags;
2675         int discard_flag = (how == MD_ROLLBACK_ALL);
2676 
2677         if(mark->flags & MD_MARK_CLOSER) {
2678             int mark_opener_index = mark->prev;
2679 
2680             /* Undo opener BEFORE the range. */
2681             if(mark_opener_index < opener_index) {
2682                 MD_MARK* mark_opener = &ctx->marks[mark_opener_index];
2683                 MD_MARKCHAIN* chain;
2684 
2685                 mark_opener->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED);
2686                 chain = md_mark_chain(ctx, opener_index);
2687                 if(chain != NULL) {
2688                     md_mark_chain_append(ctx, chain, mark_opener_index);
2689                     discard_flag = 1;
2690                 }
2691             }
2692         }
2693 
2694         /* And reset our flags. */
2695         if(discard_flag) {
2696             /* Make zero-length closer a dummy mark as that's how it was born */
2697             if((mark->flags & MD_MARK_CLOSER)  &&  mark->beg == mark->end)
2698                 mark->ch = 'D';
2699 
2700             mark->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED);
2701         }
2702 
2703         /* Jump as far as we can over unresolved or non-interesting marks. */
2704         switch(how) {
2705             case MD_ROLLBACK_CROSSING:
2706                 if((mark_flags & MD_MARK_CLOSER)  &&  mark->prev > opener_index) {
2707                     /* If we are closer with opener INSIDE the range, there may
2708                      * not be any other crosser inside the subrange. */
2709                     mark_index = mark->prev;
2710                     break;
2711                 }
2712                 MD_FALLTHROUGH();
2713             default:
2714                 mark_index--;
2715                 break;
2716         }
2717     }
2718 }
2719 
2720 static void
2721 md_build_mark_char_map(MD_CTX* ctx)
2722 {
2723     memset(ctx->mark_char_map, 0, sizeof(ctx->mark_char_map));
2724 
2725     ctx->mark_char_map['\\'] = 1;
2726     ctx->mark_char_map['*'] = 1;
2727     ctx->mark_char_map['_'] = 1;
2728     ctx->mark_char_map['`'] = 1;
2729     ctx->mark_char_map['&'] = 1;
2730     ctx->mark_char_map[';'] = 1;
2731     ctx->mark_char_map['<'] = 1;
2732     ctx->mark_char_map['>'] = 1;
2733     ctx->mark_char_map['['] = 1;
2734     ctx->mark_char_map['!'] = 1;
2735     ctx->mark_char_map[']'] = 1;
2736     ctx->mark_char_map['\0'] = 1;
2737 
2738     if(ctx->parser.flags & MD_FLAG_STRIKETHROUGH)
2739         ctx->mark_char_map['~'] = 1;
2740 
2741     if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS)
2742         ctx->mark_char_map['$'] = 1;
2743 
2744     if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS)
2745         ctx->mark_char_map['@'] = 1;
2746 
2747     if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS)
2748         ctx->mark_char_map[':'] = 1;
2749 
2750     if(ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS)
2751         ctx->mark_char_map['.'] = 1;
2752 
2753     if((ctx->parser.flags & MD_FLAG_TABLES) || (ctx->parser.flags & MD_FLAG_WIKILINKS))
2754         ctx->mark_char_map['|'] = 1;
2755 
2756     if(ctx->parser.flags & MD_FLAG_COLLAPSEWHITESPACE) {
2757         int i;
2758 
2759         for(i = 0; i < (int) sizeof(ctx->mark_char_map); i++) {
2760             if(ISWHITESPACE_(i))
2761                 ctx->mark_char_map[i] = 1;
2762         }
2763     }
2764 }
2765 
2766 /* We limit code span marks to lower than 32 backticks. This solves the
2767  * pathologic case of too many openers, each of different length: Their
2768  * resolving would be then O(n^2). */
2769 #define CODESPAN_MARK_MAXLEN    32
2770 
2771 static int
2772 md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
2773                 OFF* p_opener_beg, OFF* p_opener_end,
2774                 OFF* p_closer_beg, OFF* p_closer_end,
2775                 OFF last_potential_closers[CODESPAN_MARK_MAXLEN],
2776                 int* p_reached_paragraph_end)
2777 {
2778     OFF opener_beg = beg;
2779     OFF opener_end;
2780     OFF closer_beg;
2781     OFF closer_end;
2782     SZ mark_len;
2783     OFF line_end;
2784     int has_space_after_opener = FALSE;
2785     int has_eol_after_opener = FALSE;
2786     int has_space_before_closer = FALSE;
2787     int has_eol_before_closer = FALSE;
2788     int has_only_space = TRUE;
2789     int line_index = 0;
2790 
2791     line_end = lines[0].end;
2792     opener_end = opener_beg;
2793     while(opener_end < line_end  &&  CH(opener_end) == _T('`'))
2794         opener_end++;
2795     has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(' '));
2796     has_eol_after_opener = (opener_end == line_end);
2797 
2798     /* The caller needs to know end of the opening mark even if we fail. */
2799     *p_opener_end = opener_end;
2800 
2801     mark_len = opener_end - opener_beg;
2802     if(mark_len > CODESPAN_MARK_MAXLEN)
2803         return FALSE;
2804 
2805     /* Check whether we already know there is no closer of this length.
2806      * If so, re-scan does no sense. This fixes issue #59. */
2807     if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end  ||
2808        (*p_reached_paragraph_end  &&  last_potential_closers[mark_len-1] < opener_end))
2809         return FALSE;
2810 
2811     closer_beg = opener_end;
2812     closer_end = opener_end;
2813 
2814     /* Find closer mark. */
2815     while(TRUE) {
2816         while(closer_beg < line_end  &&  CH(closer_beg) != _T('`')) {
2817             if(CH(closer_beg) != _T(' '))
2818                 has_only_space = FALSE;
2819             closer_beg++;
2820         }
2821         closer_end = closer_beg;
2822         while(closer_end < line_end  &&  CH(closer_end) == _T('`'))
2823             closer_end++;
2824 
2825         if(closer_end - closer_beg == mark_len) {
2826             /* Success. */
2827             has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-1) == _T(' '));
2828             has_eol_before_closer = (closer_beg == lines[line_index].beg);
2829             break;
2830         }
2831 
2832         if(closer_end - closer_beg > 0) {
2833             /* We have found a back-tick which is not part of the closer. */
2834             has_only_space = FALSE;
2835 
2836             /* But if we eventually fail, remember it as a potential closer
2837              * of its own length for future attempts. This mitigates needs for
2838              * rescans. */
2839             if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) {
2840                 if(closer_beg > last_potential_closers[closer_end - closer_beg - 1])
2841                     last_potential_closers[closer_end - closer_beg - 1] = closer_beg;
2842             }
2843         }
2844 
2845         if(closer_end >= line_end) {
2846             line_index++;
2847             if(line_index >= n_lines) {
2848                 /* Reached end of the paragraph and still nothing. */
2849                 *p_reached_paragraph_end = TRUE;
2850                 return FALSE;
2851             }
2852             /* Try on the next line. */
2853             line_end = lines[line_index].end;
2854             closer_beg = lines[line_index].beg;
2855         } else {
2856             closer_beg = closer_end;
2857         }
2858     }
2859 
2860     /* If there is a space or a new line both after and before the opener
2861      * (and if the code span is not made of spaces only), consume one initial
2862      * and one trailing space as part of the marks. */
2863     if(!has_only_space  &&
2864        (has_space_after_opener || has_eol_after_opener)  &&
2865        (has_space_before_closer || has_eol_before_closer))
2866     {
2867         if(has_space_after_opener)
2868             opener_end++;
2869         else
2870             opener_end = lines[1].beg;
2871 
2872         if(has_space_before_closer)
2873             closer_beg--;
2874         else {
2875             closer_beg = lines[line_index-1].end;
2876             /* We need to eat the preceding "\r\n" but not any line trailing
2877              * spaces. */
2878             while(closer_beg < ctx->size  &&  ISBLANK(closer_beg))
2879                 closer_beg++;
2880         }
2881     }
2882 
2883     *p_opener_beg = opener_beg;
2884     *p_opener_end = opener_end;
2885     *p_closer_beg = closer_beg;
2886     *p_closer_end = closer_end;
2887     return TRUE;
2888 }
2889 
2890 static int
2891 md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2892 {
2893     OFF off = beg+1;
2894 
2895     MD_ASSERT(CH(beg) == _T('<'));
2896 
2897     /* Check for scheme. */
2898     if(off >= max_end  ||  !ISASCII(off))
2899         return FALSE;
2900     off++;
2901     while(1) {
2902         if(off >= max_end)
2903             return FALSE;
2904         if(off - beg > 32)
2905             return FALSE;
2906         if(CH(off) == _T(':')  &&  off - beg >= 3)
2907             break;
2908         if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.'))
2909             return FALSE;
2910         off++;
2911     }
2912 
2913     /* Check the path after the scheme. */
2914     while(off < max_end  &&  CH(off) != _T('>')) {
2915         if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<'))
2916             return FALSE;
2917         off++;
2918     }
2919 
2920     if(off >= max_end)
2921         return FALSE;
2922 
2923     MD_ASSERT(CH(off) == _T('>'));
2924     *p_end = off+1;
2925     return TRUE;
2926 }
2927 
2928 static int
2929 md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2930 {
2931     OFF off = beg + 1;
2932     int label_len;
2933 
2934     MD_ASSERT(CH(beg) == _T('<'));
2935 
2936     /* The code should correspond to this regexp:
2937             /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+
2938             @[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
2939             (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
2940      */
2941 
2942     /* Username (before '@'). */
2943     while(off < max_end  &&  (ISALNUM(off) || ISANYOF(off, _T(".!#$%&'*+/=?^_`{|}~-"))))
2944         off++;
2945     if(off <= beg+1)
2946         return FALSE;
2947 
2948     /* '@' */
2949     if(off >= max_end  ||  CH(off) != _T('@'))
2950         return FALSE;
2951     off++;
2952 
2953     /* Labels delimited with '.'; each label is sequence of 1 - 63 alnum
2954      * characters or '-', but '-' is not allowed as first or last char. */
2955     label_len = 0;
2956     while(off < max_end) {
2957         if(ISALNUM(off))
2958             label_len++;
2959         else if(CH(off) == _T('-')  &&  label_len > 0)
2960             label_len++;
2961         else if(CH(off) == _T('.')  &&  label_len > 0  &&  CH(off-1) != _T('-'))
2962             label_len = 0;
2963         else
2964             break;
2965 
2966         if(label_len > 63)
2967             return FALSE;
2968 
2969         off++;
2970     }
2971 
2972     if(label_len <= 0  || off >= max_end  ||  CH(off) != _T('>') ||  CH(off-1) == _T('-'))
2973         return FALSE;
2974 
2975     *p_end = off+1;
2976     return TRUE;
2977 }
2978 
2979 static int
2980 md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto)
2981 {
2982     if(md_is_autolink_uri(ctx, beg, max_end, p_end)) {
2983         *p_missing_mailto = FALSE;
2984         return TRUE;
2985     }
2986 
2987     if(md_is_autolink_email(ctx, beg, max_end, p_end)) {
2988         *p_missing_mailto = TRUE;
2989         return TRUE;
2990     }
2991 
2992     return FALSE;
2993 }
2994 
2995 static int
2996 md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
2997 {
2998     const MD_LINE* line_term = lines + n_lines;
2999     const MD_LINE* line;
3000     int ret = 0;
3001     MD_MARK* mark;
3002     OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { 0 };
3003     int codespan_scanned_till_paragraph_end = FALSE;
3004 
3005     for(line = lines; line < line_term; line++) {
3006         OFF off = line->beg;
3007         OFF line_end = line->end;
3008 
3009         while(TRUE) {
3010             CHAR ch;
3011 
3012 #ifdef MD4C_USE_UTF16
3013     /* For UTF-16, mark_char_map[] covers only ASCII. */
3014     #define IS_MARK_CHAR(off)   ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map))  &&  \
3015                                 (ctx->mark_char_map[(unsigned char) CH(off)]))
3016 #else
3017     /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */
3018     #define IS_MARK_CHAR(off)   (ctx->mark_char_map[(unsigned char) CH(off)])
3019 #endif
3020 
3021             /* Optimization: Use some loop unrolling. */
3022             while(off + 3 < line_end  &&  !IS_MARK_CHAR(off+0)  &&  !IS_MARK_CHAR(off+1)
3023                                       &&  !IS_MARK_CHAR(off+2)  &&  !IS_MARK_CHAR(off+3))
3024                 off += 4;
3025             while(off < line_end  &&  !IS_MARK_CHAR(off+0))
3026                 off++;
3027 
3028             if(off >= line_end)
3029                 break;
3030 
3031             ch = CH(off);
3032 
3033             /* A backslash escape.
3034              * It can go beyond line->end as it may involve escaped new
3035              * line to form a hard break. */
3036             if(ch == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
3037                 /* Hard-break cannot be on the last line of the block. */
3038                 if(!ISNEWLINE(off+1)  ||  line+1 < line_term)
3039                     PUSH_MARK(ch, off, off+2, MD_MARK_RESOLVED);
3040                 off += 2;
3041                 continue;
3042             }
3043 
3044             /* A potential (string) emphasis start/end. */
3045             if(ch == _T('*')  ||  ch == _T('_')) {
3046                 OFF tmp = off+1;
3047                 int left_level;     /* What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. */
3048                 int right_level;    /* What follows: 0 = whitespace; 1 = punctuation; 2 = other char. */
3049 
3050                 while(tmp < line_end  &&  CH(tmp) == ch)
3051                     tmp++;
3052 
3053                 if(off == line->beg  ||  ISUNICODEWHITESPACEBEFORE(off))
3054                     left_level = 0;
3055                 else if(ISUNICODEPUNCTBEFORE(off))
3056                     left_level = 1;
3057                 else
3058                     left_level = 2;
3059 
3060                 if(tmp == line_end  ||  ISUNICODEWHITESPACE(tmp))
3061                     right_level = 0;
3062                 else if(ISUNICODEPUNCT(tmp))
3063                     right_level = 1;
3064                 else
3065                     right_level = 2;
3066 
3067                 /* Intra-word underscore doesn't have special meaning. */
3068                 if(ch == _T('_')  &&  left_level == 2  &&  right_level == 2) {
3069                     left_level = 0;
3070                     right_level = 0;
3071                 }
3072 
3073                 if(left_level != 0  ||  right_level != 0) {
3074                     unsigned flags = 0;
3075 
3076                     if(left_level > 0  &&  left_level >= right_level)
3077                         flags |= MD_MARK_POTENTIAL_CLOSER;
3078                     if(right_level > 0  &&  right_level >= left_level)
3079                         flags |= MD_MARK_POTENTIAL_OPENER;
3080                     if(left_level == 2  &&  right_level == 2)
3081                         flags |= MD_MARK_EMPH_INTRAWORD;
3082 
3083                     /* For "the rule of three" we need to remember the original
3084                      * size of the mark (modulo three), before we potentially
3085                      * split the mark when being later resolved partially by some
3086                      * shorter closer. */
3087                     switch((tmp - off) % 3) {
3088                         case 0: flags |= MD_MARK_EMPH_MOD3_0; break;
3089                         case 1: flags |= MD_MARK_EMPH_MOD3_1; break;
3090                         case 2: flags |= MD_MARK_EMPH_MOD3_2; break;
3091                     }
3092 
3093                     PUSH_MARK(ch, off, tmp, flags);
3094 
3095                     /* During resolving, multiple asterisks may have to be
3096                      * split into independent span start/ends. Consider e.g.
3097                      * "**foo* bar*". Therefore we push also some empty dummy
3098                      * marks to have enough space for that. */
3099                     off++;
3100                     while(off < tmp) {
3101                         PUSH_MARK('D', off, off, 0);
3102                         off++;
3103                     }
3104                     continue;
3105                 }
3106 
3107                 off = tmp;
3108                 continue;
3109             }
3110 
3111             /* A potential code span start/end. */
3112             if(ch == _T('`')) {
3113                 OFF opener_beg, opener_end;
3114                 OFF closer_beg, closer_end;
3115                 int is_code_span;
3116 
3117                 is_code_span = md_is_code_span(ctx, line, line_term - line, off,
3118                                     &opener_beg, &opener_end, &closer_beg, &closer_end,
3119                                     codespan_last_potential_closers,
3120                                     &codespan_scanned_till_paragraph_end);
3121                 if(is_code_span) {
3122                     PUSH_MARK(_T('`'), opener_beg, opener_end, MD_MARK_OPENER | MD_MARK_RESOLVED);
3123                     PUSH_MARK(_T('`'), closer_beg, closer_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
3124                     ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
3125                     ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
3126 
3127                     off = closer_end;
3128 
3129                     /* Advance the current line accordingly. */
3130                     if(off > line_end) {
3131                         line = md_lookup_line(off, line, line_term - line);
3132                         if(NULL == line) exit(15);
3133                         line_end = line->end;
3134                     }
3135                     continue;
3136                 }
3137 
3138                 off = opener_end;
3139                 continue;
3140             }
3141 
3142             /* A potential entity start. */
3143             if(ch == _T('&')) {
3144                 PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
3145                 off++;
3146                 continue;
3147             }
3148 
3149             /* A potential entity end. */
3150             if(ch == _T(';')) {
3151                 /* We surely cannot be entity unless the previous mark is '&'. */
3152                 if(ctx->n_marks > 0  &&  ctx->marks[ctx->n_marks-1].ch == _T('&'))
3153                     PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
3154 
3155                 off++;
3156                 continue;
3157             }
3158 
3159             /* A potential autolink or raw HTML start/end. */
3160             if(ch == _T('<')) {
3161                 int is_autolink;
3162                 OFF autolink_end;
3163                 int missing_mailto;
3164 
3165                 if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) {
3166                     int is_html;
3167                     OFF html_end;
3168 
3169                     /* Given the nature of the raw HTML, we have to recognize
3170                      * it here. Doing so later in md_analyze_lt_gt() could
3171                      * open can of worms of quadratic complexity. */
3172                     is_html = md_is_html_any(ctx, line, line_term - line, off,
3173                                     lines[n_lines-1].end, &html_end);
3174                     if(is_html) {
3175                         PUSH_MARK(_T('<'), off, off, MD_MARK_OPENER | MD_MARK_RESOLVED);
3176                         PUSH_MARK(_T('>'), html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
3177                         ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
3178                         ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
3179                         off = html_end;
3180 
3181                         /* Advance the current line accordingly. */
3182                         if(off > line_end) {
3183                             line = md_lookup_line(off, line, line_term - line);
3184                             line_end = line->end;
3185                         }
3186                         continue;
3187                     }
3188                 }
3189 
3190                 is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end,
3191                                     &autolink_end, &missing_mailto);
3192                 if(is_autolink) {
3193                     PUSH_MARK((missing_mailto ? _T('@') : _T('<')), off, off+1,
3194                                 MD_MARK_OPENER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK);
3195                     PUSH_MARK(_T('>'), autolink_end-1, autolink_end,
3196                                 MD_MARK_CLOSER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK);
3197                     ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
3198                     ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
3199                     off = autolink_end;
3200                     continue;
3201                 }
3202 
3203                 off++;
3204                 continue;
3205             }
3206 
3207             /* A potential link or its part. */
3208             if(ch == _T('[')  ||  (ch == _T('!') && off+1 < line_end && CH(off+1) == _T('['))) {
3209                 OFF tmp = (ch == _T('[') ? off+1 : off+2);
3210                 PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER);
3211                 off = tmp;
3212                 /* Two dummies to make enough place for data we need if it is
3213                  * a link. */
3214                 PUSH_MARK('D', off, off, 0);
3215                 PUSH_MARK('D', off, off, 0);
3216                 continue;
3217             }
3218             if(ch == _T(']')) {
3219                 PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
3220                 off++;
3221                 continue;
3222             }
3223 
3224             /* A potential permissive e-mail autolink. */
3225             if(ch == _T('@')) {
3226                 if(line->beg + 1 <= off  &&  ISALNUM(off-1)  &&
3227                     off + 3 < line->end  &&  ISALNUM(off+1))
3228                 {
3229                     PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
3230                     /* Push a dummy as a reserve for a closer. */
3231                     PUSH_MARK('D', off, off, 0);
3232                 }
3233 
3234                 off++;
3235                 continue;
3236             }
3237 
3238             /* A potential permissive URL autolink. */
3239             if(ch == _T(':')) {
3240                 static struct {
3241                     const CHAR* scheme;
3242                     SZ scheme_size;
3243                     const CHAR* suffix;
3244                     SZ suffix_size;
3245                 } scheme_map[] = {
3246                     /* In the order from the most frequently used, arguably. */
3247                     { _T("http"), 4,    _T("//"), 2 },
3248                     { _T("https"), 5,   _T("//"), 2 },
3249                     { _T("ftp"), 3,     _T("//"), 2 }
3250                 };
3251                 int scheme_index;
3252 
3253                 for(scheme_index = 0; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) {
3254                     const CHAR* scheme = scheme_map[scheme_index].scheme;
3255                     const SZ scheme_size = scheme_map[scheme_index].scheme_size;
3256                     const CHAR* suffix = scheme_map[scheme_index].suffix;
3257                     const SZ suffix_size = scheme_map[scheme_index].suffix_size;
3258 
3259                     if(line->beg + scheme_size <= off  &&  md_ascii_eq(STR(off-scheme_size), scheme, scheme_size)  &&
3260                         (line->beg + scheme_size == off || ISWHITESPACE(off-scheme_size-1) || ISANYOF(off-scheme_size-1, _T("*_~([")))  &&
3261                         off + 1 + suffix_size < line->end  &&  md_ascii_eq(STR(off+1), suffix, suffix_size))
3262                     {
3263                         PUSH_MARK(ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER);
3264                         /* Push a dummy as a reserve for a closer. */
3265                         PUSH_MARK('D', off, off, 0);
3266                         off += 1 + suffix_size;
3267                         break;
3268                     }
3269                 }
3270 
3271                 off++;
3272                 continue;
3273             }
3274 
3275             /* A potential permissive WWW autolink. */
3276             if(ch == _T('.')) {
3277                 if(line->beg + 3 <= off  &&  md_ascii_eq(STR(off-3), _T("www"), 3)  &&
3278                     (line->beg + 3 == off || ISWHITESPACE(off-4) || ISANYOF(off-4, _T("*_~([")))  &&
3279                     off + 1 < line_end)
3280                 {
3281                     PUSH_MARK(ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER);
3282                     /* Push a dummy as a reserve for a closer. */
3283                     PUSH_MARK('D', off, off, 0);
3284                     off++;
3285                     continue;
3286                 }
3287 
3288                 off++;
3289                 continue;
3290             }
3291 
3292             /* A potential table cell boundary or wiki link label delimiter. */
3293             if((table_mode || ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T('|')) {
3294                 PUSH_MARK(ch, off, off+1, 0);
3295                 off++;
3296                 continue;
3297             }
3298 
3299             /* A potential strikethrough start/end. */
3300             if(ch == _T('~')) {
3301                 OFF tmp = off+1;
3302 
3303                 while(tmp < line_end  &&  CH(tmp) == _T('~'))
3304                     tmp++;
3305 
3306                 if(tmp - off < 3) {
3307                     unsigned flags = 0;
3308 
3309                     if(tmp < line_end  &&  !ISUNICODEWHITESPACE(tmp))
3310                         flags |= MD_MARK_POTENTIAL_OPENER;
3311                     if(off > line->beg  &&  !ISUNICODEWHITESPACEBEFORE(off))
3312                         flags |= MD_MARK_POTENTIAL_CLOSER;
3313                     if(flags != 0)
3314                         PUSH_MARK(ch, off, tmp, flags);
3315                 }
3316 
3317                 off = tmp;
3318                 continue;
3319             }
3320 
3321             /* A potential equation start/end */
3322             if(ch == _T('$')) {
3323                 /* We can have at most two consecutive $ signs,
3324                  * where two dollar signs signify a display equation. */
3325                 OFF tmp = off+1;
3326 
3327                 while(tmp < line_end && CH(tmp) == _T('$'))
3328                     tmp++;
3329 
3330                 if (tmp - off <= 2)
3331                     PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER);
3332                 off = tmp;
3333                 continue;
3334             }
3335 
3336             /* Turn non-trivial whitespace into single space. */
3337             if(ISWHITESPACE_(ch)) {
3338                 OFF tmp = off+1;
3339 
3340                 while(tmp < line_end  &&  ISWHITESPACE(tmp))
3341                     tmp++;
3342 
3343                 if(tmp - off > 1  ||  ch != _T(' '))
3344                     PUSH_MARK(ch, off, tmp, MD_MARK_RESOLVED);
3345 
3346                 off = tmp;
3347                 continue;
3348             }
3349 
3350             /* NULL character. */
3351             if(ch == _T('\0')) {
3352                 PUSH_MARK(ch, off, off+1, MD_MARK_RESOLVED);
3353                 off++;
3354                 continue;
3355             }
3356 
3357             off++;
3358         }
3359     }
3360 
3361     /* Add a dummy mark at the end of the mark vector to simplify
3362      * process_inlines(). */
3363     PUSH_MARK(127, ctx->size, ctx->size, MD_MARK_RESOLVED);
3364 
3365 abort:
3366     return ret;
3367 }
3368 
3369 static void
3370 md_analyze_bracket(MD_CTX* ctx, int mark_index)
3371 {
3372     /* We cannot really resolve links here as for that we would need
3373      * more context. E.g. a following pair of brackets (reference link),
3374      * or enclosing pair of brackets (if the inner is the link, the outer
3375      * one cannot be.)
3376      *
3377      * Therefore we here only construct a list of '[' ']' pairs ordered by
3378      * position of the closer. This allows us to analyze what is or is not
3379      * link in the right order, from inside to outside in case of nested
3380      * brackets.
3381      *
3382      * The resolving itself is deferred to md_resolve_links().
3383      */
3384 
3385     MD_MARK* mark = &ctx->marks[mark_index];
3386 
3387     if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
3388         if(BRACKET_OPENERS.head != -1)
3389             ctx->marks[BRACKET_OPENERS.tail].flags |= MD_MARK_HASNESTEDBRACKETS;
3390 
3391         md_mark_chain_append(ctx, &BRACKET_OPENERS, mark_index);
3392         return;
3393     }
3394 
3395     if(BRACKET_OPENERS.tail >= 0) {
3396         /* Pop the opener from the chain. */
3397         int opener_index = BRACKET_OPENERS.tail;
3398         MD_MARK* opener = &ctx->marks[opener_index];
3399         if(opener->prev >= 0)
3400             ctx->marks[opener->prev].next = -1;
3401         else
3402             BRACKET_OPENERS.head = -1;
3403         BRACKET_OPENERS.tail = opener->prev;
3404 
3405         /* Interconnect the opener and closer. */
3406         opener->next = mark_index;
3407         mark->prev = opener_index;
3408 
3409         /* Add the pair into chain of potential links for md_resolve_links().
3410          * Note we misuse opener->prev for this as opener->next points to its
3411          * closer. */
3412         if(ctx->unresolved_link_tail >= 0)
3413             ctx->marks[ctx->unresolved_link_tail].prev = opener_index;
3414         else
3415             ctx->unresolved_link_head = opener_index;
3416         ctx->unresolved_link_tail = opener_index;
3417         opener->prev = -1;
3418     }
3419 }
3420 
3421 /* Forward declaration. */
3422 static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
3423                                      int mark_beg, int mark_end);
3424 
3425 static int
3426 md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
3427 {
3428     int opener_index = ctx->unresolved_link_head;
3429     OFF last_link_beg = 0;
3430     OFF last_link_end = 0;
3431     OFF last_img_beg = 0;
3432     OFF last_img_end = 0;
3433 
3434     while(opener_index >= 0) {
3435         MD_MARK* opener = &ctx->marks[opener_index];
3436         int closer_index = opener->next;
3437         MD_MARK* closer = &ctx->marks[closer_index];
3438         int next_index = opener->prev;
3439         MD_MARK* next_opener;
3440         MD_MARK* next_closer;
3441         MD_LINK_ATTR attr;
3442         int is_link = FALSE;
3443 
3444         if(next_index >= 0) {
3445             next_opener = &ctx->marks[next_index];
3446             next_closer = &ctx->marks[next_opener->next];
3447         } else {
3448             next_opener = NULL;
3449             next_closer = NULL;
3450         }
3451 
3452         /* If nested ("[ [ ] ]"), we need to make sure that:
3453          *   - The outer does not end inside of (...) belonging to the inner.
3454          *   - The outer cannot be link if the inner is link (i.e. not image).
3455          *
3456          * (Note we here analyze from inner to outer as the marks are ordered
3457          * by closer->beg.)
3458          */
3459         if((opener->beg < last_link_beg  &&  closer->end < last_link_end)  ||
3460            (opener->beg < last_img_beg  &&  closer->end < last_img_end)  ||
3461            (opener->beg < last_link_end  &&  opener->ch == '['))
3462         {
3463             opener_index = next_index;
3464             continue;
3465         }
3466 
3467         /* Recognize and resolve wiki links.
3468          * Wiki-links maybe '[[destination]]' or '[[destination|label]]'.
3469          */
3470         if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
3471             (opener->end - opener->beg == 1) &&         /* not image */
3472             next_opener != NULL &&                      /* double '[' opener */
3473             next_opener->ch == '[' &&
3474             (next_opener->beg == opener->beg - 1) &&
3475             (next_opener->end - next_opener->beg == 1) &&
3476             next_closer != NULL &&                      /* double ']' closer */
3477             next_closer->ch == ']' &&
3478             (next_closer->beg == closer->beg + 1) &&
3479             (next_closer->end - next_closer->beg == 1))
3480         {
3481             MD_MARK* delim = NULL;
3482             int delim_index;
3483             OFF dest_beg, dest_end;
3484 
3485             is_link = TRUE;
3486 
3487             /* We don't allow destination to be longer than 100 characters.