Gitly


1 /*
2  * MD4C: Markdown parser for C
3  * (http://github.com/mity/md4c)
4  *
5  * Copyright (c) 2016-2019 Martin Mitas
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23  * IN THE SOFTWARE.
24  */
25 
26 #include <stdio.h>
27 #include <string.h>
28 
29 #include "md4c-html.h"
30 #include "entity.h"
31 
32 
33 #if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
34     /* C89/90 or old compilers in general may not understand "inline". */
35     #if defined __GNUC__
36         #define inline __inline__
37     #elif defined _MSC_VER
38         #define inline __inline
39     #else
40         #define inline
41     #endif
42 #endif
43 
44 #ifdef _WIN32
45     #define snprintf _snprintf
46 #endif
47 
48 
49 
50 typedef struct MD_HTML_tag MD_HTML;
51 struct MD_HTML_tag {
52     void (*process_output)(const MD_CHAR*, MD_SIZE, void*);
53     void* userdata;
54     unsigned flags;
55     int image_nesting_level;
56     char escape_map[256];
57 };
58 
59 #define NEED_HTML_ESC_FLAG   0x1
60 #define NEED_URL_ESC_FLAG    0x2
61 
62 
63 /*****************************************
64  ***  HTML rendering helper functions  ***
65  *****************************************/
66 
67 #define ISDIGIT(ch)     ('0' <= (ch) && (ch) <= '9')
68 #define ISLOWER(ch)     ('a' <= (ch) && (ch) <= 'z')
69 #define ISUPPER(ch)     ('A' <= (ch) && (ch) <= 'Z')
70 #define ISALNUM(ch)     (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))
71 
72 
73 static inline void
74 render_verbatim(MD_HTML* r, const MD_CHAR* text, MD_SIZE size)
75 {
76     r->process_output(text, size, r->userdata);
77 }
78 
79 /* Keep this as a macro. Most compiler should then be smart enough to replace
80  * the strlen() call with a compile-time constant if the string is a C literal. */
81 #define RENDER_VERBATIM(r, verbatim)                                    \
82         render_verbatim((r), (verbatim), (MD_SIZE) (strlen(verbatim)))
83 
84 
85 static void
86 render_html_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
87 {
88     MD_OFFSET beg = 0;
89     MD_OFFSET off = 0;
90 
91     /* Some characters need to be escaped in normal HTML text. */
92     #define NEED_HTML_ESC(ch)   (r->escape_map[(unsigned char)(ch)] & NEED_HTML_ESC_FLAG)
93 
94     while(1) {
95         /* Optimization: Use some loop unrolling. */
96         while(off + 3 < size  &&  !NEED_HTML_ESC(data[off+0])  &&  !NEED_HTML_ESC(data[off+1])
97                               &&  !NEED_HTML_ESC(data[off+2])  &&  !NEED_HTML_ESC(data[off+3]))
98             off += 4;
99         while(off < size  &&  !NEED_HTML_ESC(data[off]))
100             off++;
101 
102         if(off > beg)
103             render_verbatim(r, data + beg, off - beg);
104 
105         if(off < size) {
106             switch(data[off]) {
107                 case '&':   RENDER_VERBATIM(r, "&"); break;
108                 case '<':   RENDER_VERBATIM(r, "<"); break;
109                 case '>':   RENDER_VERBATIM(r, ">"); break;
110                 case '"':   RENDER_VERBATIM(r, """); break;
111             }
112             off++;
113         } else {
114             break;
115         }
116         beg = off;
117     }
118 }
119 
120 static void
121 render_url_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
122 {
123     static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
124     MD_OFFSET beg = 0;
125     MD_OFFSET off = 0;
126 
127     /* Some characters need to be escaped in URL attributes. */
128     #define NEED_URL_ESC(ch)    (r->escape_map[(unsigned char)(ch)] & NEED_URL_ESC_FLAG)
129 
130     while(1) {
131         while(off < size  &&  !NEED_URL_ESC(data[off]))
132             off++;
133         if(off > beg)
134             render_verbatim(r, data + beg, off - beg);
135 
136         if(off < size) {
137             char hex[3];
138 
139             switch(data[off]) {
140                 case '&':   RENDER_VERBATIM(r, "&"); break;
141                 default:
142                     hex[0] = '%';
143                     hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf];
144                     hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf];
145                     render_verbatim(r, hex, 3);
146                     break;
147             }
148             off++;
149         } else {
150             break;
151         }
152 
153         beg = off;
154     }
155 }
156 
157 static unsigned
158 hex_val(char ch)
159 {
160     if('0' <= ch && ch <= '9')
161         return ch - '0';
162     if('A' <= ch && ch <= 'Z')
163         return ch - 'A' + 10;
164     else
165         return ch - 'a' + 10;
166 }
167 
168 static void
169 render_utf8_codepoint(MD_HTML* r, unsigned codepoint,
170                       void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
171 {
172     static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
173 
174     unsigned char utf8[4];
175     size_t n;
176 
177     if(codepoint <= 0x7f) {
178         n = 1;
179         utf8[0] = codepoint;
180     } else if(codepoint <= 0x7ff) {
181         n = 2;
182         utf8[0] = 0xc0 | ((codepoint >>  6) & 0x1f);
183         utf8[1] = 0x80 + ((codepoint >>  0) & 0x3f);
184     } else if(codepoint <= 0xffff) {
185         n = 3;
186         utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
187         utf8[1] = 0x80 + ((codepoint >>  6) & 0x3f);
188         utf8[2] = 0x80 + ((codepoint >>  0) & 0x3f);
189     } else {
190         n = 4;
191         utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
192         utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
193         utf8[2] = 0x80 + ((codepoint >>  6) & 0x3f);
194         utf8[3] = 0x80 + ((codepoint >>  0) & 0x3f);
195     }
196 
197     if(0 < codepoint  &&  codepoint <= 0x10ffff)
198         fn_append(r, (char*)utf8, (MD_SIZE)n);
199     else
200         fn_append(r, utf8_replacement_char, 3);
201 }
202 
203 /* Translate entity to its UTF-8 equivalent, or output the verbatim one
204  * if such entity is unknown (or if the translation is disabled). */
205 static void
206 render_entity(MD_HTML* r, const MD_CHAR* text, MD_SIZE size,
207               void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
208 {
209     if(r->flags & MD_HTML_FLAG_VERBATIM_ENTITIES) {
210         render_verbatim(r, text, size);
211         return;
212     }
213 
214     /* We assume UTF-8 output is what is desired. */
215     if(size > 3 && text[1] == '#') {
216         unsigned codepoint = 0;
217 
218         if(text[2] == 'x' || text[2] == 'X') {
219             /* Hexadecimal entity (e.g. "�")). */
220             MD_SIZE i;
221             for(i = 3; i < size-1; i++)
222                 codepoint = 16 * codepoint + hex_val(text[i]);
223         } else {
224             /* Decimal entity (e.g. "&1234;") */
225             MD_SIZE i;
226             for(i = 2; i < size-1; i++)
227                 codepoint = 10 * codepoint + (text[i] - '0');
228         }
229 
230         render_utf8_codepoint(r, codepoint, fn_append);
231         return;
232     } else {
233         /* Named entity (e.g. " "). */
234         const struct entity* ent;
235 
236         ent = entity_lookup(text, size);
237         if(ent != NULL) {
238             render_utf8_codepoint(r, ent->codepoints[0], fn_append);
239             if(ent->codepoints[1])
240                 render_utf8_codepoint(r, ent->codepoints[1], fn_append);
241             return;
242         }
243     }
244 
245     fn_append(r, text, size);
246 }
247 
248 static void
249 render_attribute(MD_HTML* r, const MD_ATTRIBUTE* attr,
250                  void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
251 {
252     int i;
253 
254     for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
255         MD_TEXTTYPE type = attr->substr_types[i];
256         MD_OFFSET off = attr->substr_offsets[i];
257         MD_SIZE size = attr->substr_offsets[i+1] - off;
258         const MD_CHAR* text = attr->text + off;
259 
260         switch(type) {
261             case MD_TEXT_NULLCHAR:  render_utf8_codepoint(r, 0x0000, render_verbatim); break;
262             case MD_TEXT_ENTITY:    render_entity(r, text, size, fn_append); break;
263             default:                fn_append(r, text, size); break;
264         }
265     }
266 }
267 
268 
269 static void
270 render_open_ol_block(MD_HTML* r, const MD_BLOCK_OL_DETAIL* det)
271 {
272     char buf[64];
273 
274     if(det->start == 1) {
275         RENDER_VERBATIM(r, "<ol>\n");
276         return;
277     }
278 
279     snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start);
280     RENDER_VERBATIM(r, buf);
281 }
282 
283 static void
284 render_open_li_block(MD_HTML* r, const MD_BLOCK_LI_DETAIL* det)
285 {
286     if(det->is_task) {
287         RENDER_VERBATIM(r, "<li class=\"task-list-item\">"
288                           "<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled");
289         if(det->task_mark == 'x' || det->task_mark == 'X')
290             RENDER_VERBATIM(r, " checked");
291         RENDER_VERBATIM(r, ">");
292     } else {
293         RENDER_VERBATIM(r, "<li>");
294     }
295 }
296 
297 static void
298 render_open_code_block(MD_HTML* r, const MD_BLOCK_CODE_DETAIL* det)
299 {
300     RENDER_VERBATIM(r, "<pre><code");
301 
302     /* If known, output the HTML 5 attribute class="language-LANGNAME". */
303     if(det->lang.text != NULL) {
304         RENDER_VERBATIM(r, " class=\"language-");
305         render_attribute(r, &det->lang, render_html_escaped);
306         RENDER_VERBATIM(r, "\"");
307     }
308 
309     RENDER_VERBATIM(r, ">");
310 }
311 
312 static void
313 render_open_td_block(MD_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
314 {
315     RENDER_VERBATIM(r, "<");
316     RENDER_VERBATIM(r, cell_type);
317 
318     switch(det->align) {
319         case MD_ALIGN_LEFT:     RENDER_VERBATIM(r, " align=\"left\">"); break;
320         case MD_ALIGN_CENTER:   RENDER_VERBATIM(r, " align=\"center\">"); break;
321         case MD_ALIGN_RIGHT:    RENDER_VERBATIM(r, " align=\"right\">"); break;
322         default:                RENDER_VERBATIM(r, ">"); break;
323     }
324 }
325 
326 static void
327 render_open_a_span(MD_HTML* r, const MD_SPAN_A_DETAIL* det)
328 {
329     RENDER_VERBATIM(r, "<a href=\"");
330     render_attribute(r, &det->href, render_url_escaped);
331 
332     if(det->title.text != NULL) {
333         RENDER_VERBATIM(r, "\" title=\"");
334         render_attribute(r, &det->title, render_html_escaped);
335     }
336 
337     RENDER_VERBATIM(r, "\">");
338 }
339 
340 static void
341 render_open_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
342 {
343     RENDER_VERBATIM(r, "<img src=\"");
344     render_attribute(r, &det->src, render_url_escaped);
345 
346     RENDER_VERBATIM(r, "\" alt=\"");
347 
348     r->image_nesting_level++;
349 }
350 
351 static void
352 render_close_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
353 {
354     if(det->title.text != NULL) {
355         RENDER_VERBATIM(r, "\" title=\"");
356         render_attribute(r, &det->title, render_html_escaped);
357     }
358 
359     RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\" />" : "\">");
360 
361     r->image_nesting_level--;
362 }
363 
364 static void
365 render_open_wikilink_span(MD_HTML* r, const MD_SPAN_WIKILINK_DETAIL* det)
366 {
367     RENDER_VERBATIM(r, "<x-wikilink data-target=\"");
368     render_attribute(r, &det->target, render_html_escaped);
369 
370     RENDER_VERBATIM(r, "\">");
371 }
372 
373 
374 /**************************************
375  ***  HTML renderer implementation  ***
376  **************************************/
377 
378 static int
379 enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
380 {
381     static const MD_CHAR* head[6] = { "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" };
382     MD_HTML* r = (MD_HTML*) userdata;
383 
384     switch(type) {
385         case MD_BLOCK_DOC:      /* noop */ break;
386         case MD_BLOCK_QUOTE:    RENDER_VERBATIM(r, "<blockquote>\n"); break;
387         case MD_BLOCK_UL:       RENDER_VERBATIM(r, "<ul>\n"); break;
388         case MD_BLOCK_OL:       render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break;
389         case MD_BLOCK_LI:       render_open_li_block(r, (const MD_BLOCK_LI_DETAIL*)detail); break;
390         case MD_BLOCK_HR:       RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "<hr />\n" : "<hr>\n"); break;
391         case MD_BLOCK_H:        RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
392         case MD_BLOCK_CODE:     render_open_code_block(r, (const MD_BLOCK_CODE_DETAIL*) detail); break;
393         case MD_BLOCK_HTML:     /* noop */ break;
394         case MD_BLOCK_P:        RENDER_VERBATIM(r, "<p>"); break;
395         case MD_BLOCK_TABLE:    RENDER_VERBATIM(r, "<table>\n"); break;
396         case MD_BLOCK_THEAD:    RENDER_VERBATIM(r, "<thead>\n"); break;
397         case MD_BLOCK_TBODY:    RENDER_VERBATIM(r, "<tbody>\n"); break;
398         case MD_BLOCK_TR:       RENDER_VERBATIM(r, "<tr>\n"); break;
399         case MD_BLOCK_TH:       render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL*)detail); break;
400         case MD_BLOCK_TD:       render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL*)detail); break;
401     }
402 
403     return 0;
404 }
405 
406 static int
407 leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
408 {
409     static const MD_CHAR* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" };
410     MD_HTML* r = (MD_HTML*) userdata;
411 
412     switch(type) {
413         case MD_BLOCK_DOC:      /*noop*/ break;
414         case MD_BLOCK_QUOTE:    RENDER_VERBATIM(r, "</blockquote>\n"); break;
415         case MD_BLOCK_UL:       RENDER_VERBATIM(r, "</ul>\n"); break;
416         case MD_BLOCK_OL:       RENDER_VERBATIM(r, "</ol>\n"); break;
417         case MD_BLOCK_LI:       RENDER_VERBATIM(r, "</li>\n"); break;
418         case MD_BLOCK_HR:       /*noop*/ break;
419         case MD_BLOCK_H:        RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
420         case MD_BLOCK_CODE:     RENDER_VERBATIM(r, "</code></pre>\n"); break;
421         case MD_BLOCK_HTML:     /* noop */ break;
422         case MD_BLOCK_P:        RENDER_VERBATIM(r, "</p>\n"); break;
423         case MD_BLOCK_TABLE:    RENDER_VERBATIM(r, "</table>\n"); break;
424         case MD_BLOCK_THEAD:    RENDER_VERBATIM(r, "</thead>\n"); break;
425         case MD_BLOCK_TBODY:    RENDER_VERBATIM(r, "</tbody>\n"); break;
426         case MD_BLOCK_TR:       RENDER_VERBATIM(r, "</tr>\n"); break;
427         case MD_BLOCK_TH:       RENDER_VERBATIM(r, "</th>\n"); break;
428         case MD_BLOCK_TD:       RENDER_VERBATIM(r, "</td>\n"); break;
429     }
430 
431     return 0;
432 }
433 
434 static int
435 enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
436 {
437     MD_HTML* r = (MD_HTML*) userdata;
438 
439     if(r->image_nesting_level > 0) {
440         /* We are inside a Markdown image label. Markdown allows to use any
441          * emphasis and other rich contents in that context similarly as in
442          * any link label.
443          *
444          * However, unlike in the case of links (where that contents becomes
445          * contents of the <a>...</a> tag), in the case of images the contents
446          * is supposed to fall into the attribute alt: <img alt="...">.
447          *
448          * In that context we naturally cannot output nested HTML tags. So lets
449          * suppress them and only output the plain text (i.e. what falls into
450          * text() callback).
451          *
452          * This make-it-a-plain-text approach is the recommended practice by
453          * CommonMark specification (for HTML output).
454          */
455         return 0;
456     }
457 
458     switch(type) {
459         case MD_SPAN_EM:                RENDER_VERBATIM(r, "<em>"); break;
460         case MD_SPAN_STRONG:            RENDER_VERBATIM(r, "<strong>"); break;
461         case MD_SPAN_U:                 RENDER_VERBATIM(r, "<u>"); break;
462         case MD_SPAN_A:                 render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
463         case MD_SPAN_IMG:               render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
464         case MD_SPAN_CODE:              RENDER_VERBATIM(r, "<code>"); break;
465         case MD_SPAN_DEL:               RENDER_VERBATIM(r, "<del>"); break;
466         case MD_SPAN_LATEXMATH:         RENDER_VERBATIM(r, "<x-equation>"); break;
467         case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "<x-equation type=\"display\">"); break;
468         case MD_SPAN_WIKILINK:          render_open_wikilink_span(r, (MD_SPAN_WIKILINK_DETAIL*) detail); break;
469     }
470 
471     return 0;
472 }
473 
474 static int
475 leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
476 {
477     MD_HTML* r = (MD_HTML*) userdata;
478 
479     if(r->image_nesting_level > 0) {
480         /* Ditto as in enter_span_callback(), except we have to allow the
481          * end of the <img> tag. */
482         if(r->image_nesting_level == 1  &&  type == MD_SPAN_IMG)
483             render_close_img_span(r, (MD_SPAN_IMG_DETAIL*) detail);
484         return 0;
485     }
486 
487     switch(type) {
488         case MD_SPAN_EM:                RENDER_VERBATIM(r, "</em>"); break;
489         case MD_SPAN_STRONG:            RENDER_VERBATIM(r, "</strong>"); break;
490         case MD_SPAN_U:                 RENDER_VERBATIM(r, "</u>"); break;
491         case MD_SPAN_A:                 RENDER_VERBATIM(r, "</a>"); break;
492         case MD_SPAN_IMG:               /*noop, handled above*/ break;
493         case MD_SPAN_CODE:              RENDER_VERBATIM(r, "</code>"); break;
494         case MD_SPAN_DEL:               RENDER_VERBATIM(r, "</del>"); break;
495         case MD_SPAN_LATEXMATH:         /*fall through*/
496         case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "</x-equation>"); break;
497         case MD_SPAN_WIKILINK:          RENDER_VERBATIM(r, "</x-wikilink>"); break;
498     }
499 
500     return 0;
501 }
502 
503 static int
504 text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata)
505 {
506     MD_HTML* r = (MD_HTML*) userdata;
507 
508     switch(type) {
509         case MD_TEXT_NULLCHAR:  render_utf8_codepoint(r, 0x0000, render_verbatim); break;
510         case MD_TEXT_BR:        RENDER_VERBATIM(r, (r->image_nesting_level == 0
511                                         ? ((r->flags & MD_HTML_FLAG_XHTML) ? "<br />\n" : "<br>\n")
512                                         : " "));
513                                 break;
514         case MD_TEXT_SOFTBR:    RENDER_VERBATIM(r, (r->image_nesting_level == 0 ? "\n" : " ")); break;
515         case MD_TEXT_HTML:      render_verbatim(r, text, size); break;
516         case MD_TEXT_ENTITY:    render_entity(r, text, size, render_html_escaped); break;
517         default:                render_html_escaped(r, text, size); break;
518     }
519 
520     return 0;
521 }
522 
523 static void
524 debug_log_callback(const char* msg, void* userdata)
525 {
526     MD_HTML* r = (MD_HTML*) userdata;
527     if(r->flags & MD_HTML_FLAG_DEBUG)
528         fprintf(stderr, "MD4C: %s\n", msg);
529 }
530 
531 int
532 md_html(const MD_CHAR* input, MD_SIZE input_size,
533         void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
534         void* userdata, unsigned parser_flags, unsigned renderer_flags)
535 {
536     MD_HTML render = { process_output, userdata, renderer_flags, 0, { 0 } };
537     int i;
538 
539     MD_PARSER parser = {
540         0,
541         parser_flags,
542         enter_block_callback,
543         leave_block_callback,
544         enter_span_callback,
545         leave_span_callback,
546         text_callback,
547         debug_log_callback,
548         NULL
549     };
550 
551     /* Build map of characters which need escaping. */
552     for(i = 0; i < 256; i++) {
553         unsigned char ch = (unsigned char) i;
554 
555         if(strchr("\"&<>", ch) != NULL)
556             render.escape_map[i] |= NEED_HTML_ESC_FLAG;
557 
558         if(!ISALNUM(ch)  &&  strchr("~-_.+!*(),%#@?=;:/,+$", ch) == NULL)
559             render.escape_map[i] |= NEED_URL_ESC_FLAG;
560     }
561 
562     /* Consider skipping UTF-8 byte order mark (BOM). */
563     if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM  &&  sizeof(MD_CHAR) == 1) {
564         static const MD_CHAR bom[3] = { 0xef, 0xbb, 0xbf };
565         if(input_size >= sizeof(bom)  &&  memcmp(input, bom, sizeof(bom)) == 0) {
566             input += sizeof(bom);
567             input_size -= sizeof(bom);
568         }
569     }
570 
571     return md_parse(input, input_size, &parser, (void*) &render);
572 }
573 
574

1	/*
2	* MD4C: Markdown parser for C
3	* (http://github.com/mity/md4c)
4	*
5	* Copyright (c) 2016-2019 Martin Mitas
6	*
7	* Permission is hereby granted, free of charge, to any person obtaining a
8	* copy of this software and associated documentation files (the "Software"),
9	* to deal in the Software without restriction, including without limitation
10	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
11	* and/or sell copies of the Software, and to permit persons to whom the
12	* Software is furnished to do so, subject to the following conditions:
13	*
14	* The above copyright notice and this permission notice shall be included in
15	* all copies or substantial portions of the Software.
16	*
17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23	* IN THE SOFTWARE.
24	*/
25
26	#include <stdio.h>
27	#include <string.h>
28
29	#include "md4c-html.h"
30	#include "entity.h"
31
32
33	#if !defined(__STDC_VERSION__) \|\| __STDC_VERSION__ < 199409L
34	/ C89/90 or old compilers in general may not understand "inline". /
35	#if defined __GNUC__
36	#define inline __inline__
37	#elif defined _MSC_VER
38	#define inline __inline
39	#else
40	#define inline
41	#endif
42	#endif
43
44	#ifdef _WIN32
45	#define snprintf _snprintf
46	#endif
47
48
49
50	typedef struct MD_HTML_tag MD_HTML;
51	struct MD_HTML_tag {
52	void (process_output)(const* MD_CHAR, MD_SIZE, void**);
53	void* userdata;
54	unsigned flags;
55	int image_nesting_level;
56	char escape_map[256];
57	};
58
59	#define NEED_HTML_ESC_FLAG 0x1
60	#define NEED_URL_ESC_FLAG 0x2
61
62
63	/*****************************************
64	* HTML rendering helper functions *
65	*****************************************/
66
67	#define ISDIGIT(ch) ('0' <= (ch) && (ch) <= '9')
68	#define ISLOWER(ch) ('a' <= (ch) && (ch) <= 'z')
69	#define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z')
70	#define ISALNUM(ch) (ISLOWER(ch) \|\| ISUPPER(ch) \|\| ISDIGIT(ch))
71
72
73	static inline void
74	render_verbatim(MD_HTML* r, const MD_CHAR* text, MD_SIZE size)
75	{
76	r->process_output(text, size, r->userdata);
77	}
78
79	/ Keep this as a macro. Most compiler should then be smart enough to replace*
80	* the strlen() call with a compile-time constant if the string is a C literal. */
81	#define RENDER_VERBATIM(r, verbatim) \
82	render_verbatim((r), (verbatim), (MD_SIZE) (strlen(verbatim)))
83
84
85	static void
86	render_html_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
87	{
88	MD_OFFSET beg = 0;
89	MD_OFFSET off = 0;
90
91	/ Some characters need to be escaped in normal HTML text. /
92	#define NEED_HTML_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_HTML_ESC_FLAG)
93
94	while(1) {
95	/ Optimization: Use some loop unrolling. /
96	while(off + 3 < size && !NEED_HTML_ESC(data[off+0]) && !NEED_HTML_ESC(data[off+1])
97	&& !NEED_HTML_ESC(data[off+2]) && !NEED_HTML_ESC(data[off+3]))
98	off += 4;
99	while(off < size && !NEED_HTML_ESC(data[off]))
100	off++;
101
102	if(off > beg)
103	render_verbatim(r, data + beg, off - beg);
104
105	if(off < size) {
106	switch(data[off]) {
107	case '&': RENDER_VERBATIM(r, "&"); break;
108	case '<': RENDER_VERBATIM(r, "<"); break;
109	case '>': RENDER_VERBATIM(r, ">"); break;
110	case '"': RENDER_VERBATIM(r, """); break;
111	}
112	off++;
113	} else {
114	break;
115	}
116	beg = off;
117	}
118	}
119
120	static void
121	render_url_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
122	{
123	static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
124	MD_OFFSET beg = 0;
125	MD_OFFSET off = 0;
126
127	/ Some characters need to be escaped in URL attributes. /
128	#define NEED_URL_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_URL_ESC_FLAG)
129
130	while(1) {
131	while(off < size && !NEED_URL_ESC(data[off]))
132	off++;
133	if(off > beg)
134	render_verbatim(r, data + beg, off - beg);
135
136	if(off < size) {
137	char hex[3];
138
139	switch(data[off]) {
140	case '&': RENDER_VERBATIM(r, "&"); break;
141	default:
142	hex[0] = '%';
143	hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf];
144	hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf];
145	render_verbatim(r, hex, 3);
146	break;
147	}
148	off++;
149	} else {
150	break;
151	}
152
153	beg = off;
154	}
155	}
156
157	static unsigned
158	hex_val(char ch)
159	{
160	if('0' <= ch && ch <= '9')
161	return ch - '0';
162	if('A' <= ch && ch <= 'Z')
163	return ch - 'A' + 10;
164	else
165	return ch - 'a' + 10;
166	}
167
168	static void
169	render_utf8_codepoint(MD_HTML* r, unsigned codepoint,
170	void (fn_append)(MD_HTML, const MD_CHAR*, MD_SIZE))
171	{
172	static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
173
174	unsigned char utf8[4];
175	size_t n;
176
177	if(codepoint <= 0x7f) {
178	n = 1;
179	utf8[0] = codepoint;
180	} else if(codepoint <= 0x7ff) {
181	n = 2;
182	utf8[0] = 0xc0 \| ((codepoint >> 6) & 0x1f);
183	utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
184	} else if(codepoint <= 0xffff) {
185	n = 3;
186	utf8[0] = 0xe0 \| ((codepoint >> 12) & 0xf);
187	utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
188	utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
189	} else {
190	n = 4;
191	utf8[0] = 0xf0 \| ((codepoint >> 18) & 0x7);
192	utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
193	utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
194	utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
195	}
196
197	if(0 < codepoint && codepoint <= 0x10ffff)
198	fn_append(r, (char*)utf8, (MD_SIZE)n);
199	else
200	fn_append(r, utf8_replacement_char, 3);
201	}
202
203	/ Translate entity to its UTF-8 equivalent, or output the verbatim one*
204	* if such entity is unknown (or if the translation is disabled). */
205	static void
206	render_entity(MD_HTML* r, const MD_CHAR* text, MD_SIZE size,
207	void (fn_append)(MD_HTML, const MD_CHAR*, MD_SIZE))
208	{
209	if(r->flags & MD_HTML_FLAG_VERBATIM_ENTITIES) {
210	render_verbatim(r, text, size);
211	return;
212	}
213
214	/ We assume UTF-8 output is what is desired. /
215	if(size > 3 && text[1] == '#') {
216	unsigned codepoint = 0;
217
218	if(text[2] == 'x' \|\| text[2] == 'X') {
219	/ Hexadecimal entity (e.g. "�")). /
220	MD_SIZE i;
221	for(i = 3; i < size-1; i++)
222	codepoint = 16 * codepoint + hex_val(text[i]);
223	} else {
224	/ Decimal entity (e.g. "&1234;") /
225	MD_SIZE i;
226	for(i = 2; i < size-1; i++)
227	codepoint = 10 * codepoint + (text[i] - '0');
228	}
229
230	render_utf8_codepoint(r, codepoint, fn_append);
231	return;
232	} else {
233	/ Named entity (e.g. " "). /
234	const struct entity* ent;
235
236	ent = entity_lookup(text, size);
237	if(ent != NULL) {
238	render_utf8_codepoint(r, ent->codepoints[0], fn_append);
239	if(ent->codepoints[1])
240	render_utf8_codepoint(r, ent->codepoints[1], fn_append);
241	return;
242	}
243	}
244
245	fn_append(r, text, size);
246	}
247
248	static void
249	render_attribute(MD_HTML* r, const MD_ATTRIBUTE* attr,
250	void (fn_append)(MD_HTML, const MD_CHAR*, MD_SIZE))
251	{
252	int i;
253
254	for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
255	MD_TEXTTYPE type = attr->substr_types[i];
256	MD_OFFSET off = attr->substr_offsets[i];
257	MD_SIZE size = attr->substr_offsets[i+1] - off;
258	const MD_CHAR* text = attr->text + off;
259
260	switch(type) {
261	case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_verbatim); break;
262	case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break;
263	default: fn_append(r, text, size); break;
264	}
265	}
266	}
267
268
269	static void
270	render_open_ol_block(MD_HTML* r, const MD_BLOCK_OL_DETAIL* det)
271	{
272	char buf[64];
273
274	if(det->start == 1) {
275	RENDER_VERBATIM(r, "<ol>\n");
276	return;
277	}
278
279	snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start);
280	RENDER_VERBATIM(r, buf);
281	}
282
283	static void
284	render_open_li_block(MD_HTML* r, const MD_BLOCK_LI_DETAIL* det)
285	{
286	if(det->is_task) {
287	RENDER_VERBATIM(r, "<li class=\"task-list-item\">"
288	"<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled");
289	if(det->task_mark == 'x' \|\| det->task_mark == 'X')
290	RENDER_VERBATIM(r, " checked");
291	RENDER_VERBATIM(r, ">");
292	} else {
293	RENDER_VERBATIM(r, "<li>");
294	}
295	}
296
297	static void
298	render_open_code_block(MD_HTML* r, const MD_BLOCK_CODE_DETAIL* det)
299	{
300	RENDER_VERBATIM(r, "<pre><code");
301
302	/ If known, output the HTML 5 attribute class="language-LANGNAME". /
303	if(det->lang.text != NULL) {
304	RENDER_VERBATIM(r, " class=\"language-");
305	render_attribute(r, &det->lang, render_html_escaped);
306	RENDER_VERBATIM(r, "\"");
307	}
308
309	RENDER_VERBATIM(r, ">");
310	}
311
312	static void
313	render_open_td_block(MD_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
314	{
315	RENDER_VERBATIM(r, "<");
316	RENDER_VERBATIM(r, cell_type);
317
318	switch(det->align) {
319	case MD_ALIGN_LEFT: RENDER_VERBATIM(r, " align=\"left\">"); break;
320	case MD_ALIGN_CENTER: RENDER_VERBATIM(r, " align=\"center\">"); break;
321	case MD_ALIGN_RIGHT: RENDER_VERBATIM(r, " align=\"right\">"); break;
322	default: RENDER_VERBATIM(r, ">"); break;
323	}
324	}
325
326	static void
327	render_open_a_span(MD_HTML* r, const MD_SPAN_A_DETAIL* det)
328	{
329	RENDER_VERBATIM(r, "<a href=\"");
330	render_attribute(r, &det->href, render_url_escaped);
331
332	if(det->title.text != NULL) {
333	RENDER_VERBATIM(r, "\" title=\"");
334	render_attribute(r, &det->title, render_html_escaped);
335	}
336
337	RENDER_VERBATIM(r, "\">");
338	}
339
340	static void
341	render_open_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
342	{
343	RENDER_VERBATIM(r, "<img src=\"");
344	render_attribute(r, &det->src, render_url_escaped);
345
346	RENDER_VERBATIM(r, "\" alt=\"");
347
348	r->image_nesting_level++;
349	}
350
351	static void
352	render_close_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
353	{
354	if(det->title.text != NULL) {
355	RENDER_VERBATIM(r, "\" title=\"");
356	render_attribute(r, &det->title, render_html_escaped);
357	}
358
359	RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\" />" : "\">");
360
361	r->image_nesting_level--;
362	}
363
364	static void
365	render_open_wikilink_span(MD_HTML* r, const MD_SPAN_WIKILINK_DETAIL* det)
366	{
367	RENDER_VERBATIM(r, "<x-wikilink data-target=\"");
368	render_attribute(r, &det->target, render_html_escaped);
369
370	RENDER_VERBATIM(r, "\">");
371	}
372
373
374	/**************************************
375	* HTML renderer implementation *
376	**************************************/
377
378	static int
379	enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
380	{
381	static const MD_CHAR* head[6] = { "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" };
382	MD_HTML* r = (MD_HTML*) userdata;
383
384	switch(type) {
385	case MD_BLOCK_DOC: / noop / break;
386	case MD_BLOCK_QUOTE: RENDER_VERBATIM(r, "<blockquote>\n"); break;
387	case MD_BLOCK_UL: RENDER_VERBATIM(r, "<ul>\n"); break;
388	case MD_BLOCK_OL: render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL)detail); break*;
389	case MD_BLOCK_LI: render_open_li_block(r, (const MD_BLOCK_LI_DETAIL)detail); break*;
390	case MD_BLOCK_HR: RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "<hr />\n" : "<hr>\n"); break;
391	case MD_BLOCK_H: RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL)detail)->level - 1]); break*;
392	case MD_BLOCK_CODE: render_open_code_block(r, (const MD_BLOCK_CODE_DETAIL) detail); break*;
393	case MD_BLOCK_HTML: / noop / break;
394	case MD_BLOCK_P: RENDER_VERBATIM(r, "<p>"); break;
395	case MD_BLOCK_TABLE: RENDER_VERBATIM(r, "<table>\n"); break;
396	case MD_BLOCK_THEAD: RENDER_VERBATIM(r, "<thead>\n"); break;
397	case MD_BLOCK_TBODY: RENDER_VERBATIM(r, "<tbody>\n"); break;
398	case MD_BLOCK_TR: RENDER_VERBATIM(r, "<tr>\n"); break;
399	case MD_BLOCK_TH: render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL)detail); break*;
400	case MD_BLOCK_TD: render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL)detail); break*;
401	}
402
403	return 0;
404	}
405
406	static int
407	leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
408	{
409	static const MD_CHAR* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" };
410	MD_HTML* r = (MD_HTML*) userdata;
411
412	switch(type) {
413	case MD_BLOCK_DOC: /noop/ break;
414	case MD_BLOCK_QUOTE: RENDER_VERBATIM(r, "</blockquote>\n"); break;
415	case MD_BLOCK_UL: RENDER_VERBATIM(r, "</ul>\n"); break;
416	case MD_BLOCK_OL: RENDER_VERBATIM(r, "</ol>\n"); break;
417	case MD_BLOCK_LI: RENDER_VERBATIM(r, "</li>\n"); break;
418	case MD_BLOCK_HR: /noop/ break;
419	case MD_BLOCK_H: RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL)detail)->level - 1]); break*;
420	case MD_BLOCK_CODE: RENDER_VERBATIM(r, "</code></pre>\n"); break;
421	case MD_BLOCK_HTML: / noop / break;
422	case MD_BLOCK_P: RENDER_VERBATIM(r, "</p>\n"); break;
423	case MD_BLOCK_TABLE: RENDER_VERBATIM(r, "</table>\n"); break;
424	case MD_BLOCK_THEAD: RENDER_VERBATIM(r, "</thead>\n"); break;
425	case MD_BLOCK_TBODY: RENDER_VERBATIM(r, "</tbody>\n"); break;
426	case MD_BLOCK_TR: RENDER_VERBATIM(r, "</tr>\n"); break;
427	case MD_BLOCK_TH: RENDER_VERBATIM(r, "</th>\n"); break;
428	case MD_BLOCK_TD: RENDER_VERBATIM(r, "</td>\n"); break;
429	}
430
431	return 0;
432	}
433
434	static int
435	enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
436	{
437	MD_HTML* r = (MD_HTML*) userdata;
438
439	if(r->image_nesting_level > 0) {
440	/ We are inside a Markdown image label. Markdown allows to use any*
441	* emphasis and other rich contents in that context similarly as in
442	* any link label.
443	*
444	* However, unlike in the case of links (where that contents becomes
445	* contents of the <a>...</a> tag), in the case of images the contents
446	* is supposed to fall into the attribute alt: <img alt="...">.
447	*
448	* In that context we naturally cannot output nested HTML tags. So lets
449	* suppress them and only output the plain text (i.e. what falls into
450	* text() callback).
451	*
452	* This make-it-a-plain-text approach is the recommended practice by
453	* CommonMark specification (for HTML output).
454	*/
455	return 0;
456	}
457
458	switch(type) {
459	case MD_SPAN_EM: RENDER_VERBATIM(r, "<em>"); break;
460	case MD_SPAN_STRONG: RENDER_VERBATIM(r, "<strong>"); break;
461	case MD_SPAN_U: RENDER_VERBATIM(r, "<u>"); break;
462	case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL) detail); break*;
463	case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL) detail); break*;
464	case MD_SPAN_CODE: RENDER_VERBATIM(r, "<code>"); break;
465	case MD_SPAN_DEL: RENDER_VERBATIM(r, "<del>"); break;
466	case MD_SPAN_LATEXMATH: RENDER_VERBATIM(r, "<x-equation>"); break;
467	case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "<x-equation type=\"display\">"); break;
468	case MD_SPAN_WIKILINK: render_open_wikilink_span(r, (MD_SPAN_WIKILINK_DETAIL) detail); break*;
469	}
470
471	return 0;
472	}
473
474	static int
475	leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
476	{
477	MD_HTML* r = (MD_HTML*) userdata;
478
479	if(r->image_nesting_level > 0) {
480	/ Ditto as in enter_span_callback(), except we have to allow the*
481	* end of the <img> tag. */
482	if(r->image_nesting_level == 1 && type == MD_SPAN_IMG)
483	render_close_img_span(r, (MD_SPAN_IMG_DETAIL*) detail);
484	return 0;
485	}
486
487	switch(type) {
488	case MD_SPAN_EM: RENDER_VERBATIM(r, "</em>"); break;
489	case MD_SPAN_STRONG: RENDER_VERBATIM(r, "</strong>"); break;
490	case MD_SPAN_U: RENDER_VERBATIM(r, "</u>"); break;
491	case MD_SPAN_A: RENDER_VERBATIM(r, "</a>"); break;
492	case MD_SPAN_IMG: /noop, handled above/ break;
493	case MD_SPAN_CODE: RENDER_VERBATIM(r, "</code>"); break;
494	case MD_SPAN_DEL: RENDER_VERBATIM(r, "</del>"); break;
495	case MD_SPAN_LATEXMATH: /fall through/
496	case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "</x-equation>"); break;
497	case MD_SPAN_WIKILINK: RENDER_VERBATIM(r, "</x-wikilink>"); break;
498	}
499
500	return 0;
501	}
502
503	static int
504	text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata)
505	{
506	MD_HTML* r = (MD_HTML*) userdata;
507
508	switch(type) {
509	case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_verbatim); break;
510	case MD_TEXT_BR: RENDER_VERBATIM(r, (r->image_nesting_level == 0
511	? ((r->flags & MD_HTML_FLAG_XHTML) ? "<br />\n" : "<br>\n")
512	: " "));
513	break;
514	case MD_TEXT_SOFTBR: RENDER_VERBATIM(r, (r->image_nesting_level == 0 ? "\n" : " ")); break;
515	case MD_TEXT_HTML: render_verbatim(r, text, size); break;
516	case MD_TEXT_ENTITY: render_entity(r, text, size, render_html_escaped); break;
517	default: render_html_escaped(r, text, size); break;
518	}
519
520	return 0;
521	}
522
523	static void
524	debug_log_callback(const char* msg, void* userdata)
525	{
526	MD_HTML* r = (MD_HTML*) userdata;
527	if(r->flags & MD_HTML_FLAG_DEBUG)
528	fprintf(stderr, "MD4C: %s\n", msg);
529	}
530
531	int
532	md_html(const MD_CHAR* input, MD_SIZE input_size,
533	void (process_output)(const* MD_CHAR, MD_SIZE, void**),
534	void* userdata, unsigned parser_flags, unsigned renderer_flags)
535	{
536	MD_HTML render = { process_output, userdata, renderer_flags, 0, { 0 } };
537	int i;
538
539	MD_PARSER parser = {
540	0,
541	parser_flags,
542	enter_block_callback,
543	leave_block_callback,
544	enter_span_callback,
545	leave_span_callback,
546	text_callback,
547	debug_log_callback,
548	NULL
549	};
550
551	/ Build map of characters which need escaping. /
552	for(i = 0; i < 256; i++) {
553	unsigned char ch = (unsigned char) i;
554
555	if(strchr("\"&<>", ch) != NULL)
556	render.escape_map[i] \|= NEED_HTML_ESC_FLAG;
557
558	if(!ISALNUM(ch) && strchr("~-_.+!*(),%#@?=;:/,+$", ch) == NULL)
559	render.escape_map[i] \|= NEED_URL_ESC_FLAG;
560	}
561
562	/ Consider skipping UTF-8 byte order mark (BOM). /
563	if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM && sizeof(MD_CHAR) == 1) {
564	static const MD_CHAR bom[3] = { 0xef, 0xbb, 0xbf };
565	if(input_size >= sizeof(bom) && memcmp(input, bom, sizeof(bom)) == 0) {
566	input += sizeof(bom);
567	input_size -= sizeof(bom);
568	}
569	}
570
571	return md_parse(input, input_size, &parser, (void*) &render);
572	}
573
574