// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved. // Use of this source code is governed by a GPL license that can be found in the LICENSE file. module highlight const tab = ' ' // ' // returns HTML code, number of lines, number of lines with source code pub fn highlight_text(st string, file_path string, commit bool) (string, int, int) { if !commit { file_extension := extract_extension_from_file_path(file_path) if file_extension == 'md' { return convert_markdown_to_html(st), 0, 0 } else if file_extension == 'txt' { return st, 0, 0 } } lang := extension_to_lang(file_path) or { Lang{} } text := '${st} ' mut res := []u8{cap: text.len} mut lines := 0 mut sloc := 0 mut ss := u8(` `) lc := lang.line_comments mut mlc := '' mut mlc_end := '' if lang.mline_comments.len >= 2 { mlc = lang.mline_comments[0] mlc_end = lang.mline_comments[1] } res << ''.bytes() res << `\n` if !is_single_line(st) { res << '\n\n'.bytes() res << '
1'.bytes() lines++ } mut in_comment := false mut in_line_comment := false mut in_string := false mut runes := text.bytes() for pos := 0; pos < runes.len - 1; pos++ { mut c := runes[pos] if c == `\n` { lines++ if commit { mut class := '' if runes[pos + 1] == `+` { class = 'class="a"' } else if runes[pos + 1] == `-` { class = 'class="d"' } res << '
${lines}'.bytes() } else { res << '
${lines}'.bytes() } if in_line_comment { in_line_comment = false res << ''.bytes() } if in_comment { res << ''.bytes() } if !in_comment && !in_line_comment && runes[pos + 1] != `\n` { sloc++ } continue } if c == `\t` { res << tab.bytes() continue } if in_comment { res << write(c) if c == mlc_end[0] && is_line_comment(runes, pos, mlc_end) { in_comment = false res << runes[pos + 1] pos++ res << ''.bytes() } continue } if in_line_comment { res << write(c) continue } if in_string { res << write(c) if runes[pos - 1] == `\\` && ss == `"` { continue } if c == ss { in_string = false res << ''.bytes() } continue } if is_letter(c, lang) { word_start := pos for is_letter(c, lang) { pos++ c = runes[pos] } delta := pos - word_start mut data := []u8{} for i in 0 .. delta { data << runes[word_start + i] } w := data.bytestr() pos-- if w in lang.keywords { res << '${w}'.bytes() } else { res << w.bytes() } continue } if is_string_token(c, lang) { in_string = true ss = c res << ''.bytes() } else if mlc != '' && c == mlc.bytes()[0] && is_line_comment(runes, pos, mlc) { in_comment = true res << ''.bytes() } else if lc != '' && c == lc.bytes()[0] && is_line_comment(runes, pos, lc) { in_line_comment = true res << ''.bytes() } res << write(c) } res << '
'.bytes() return res.bytestr(), lines, sloc } // highlight_line returns HTML-escaped, syntax-highlighted markup for a // single line of source code. It is stateless across calls (does not // track multi-line strings or block comments), so it suits diff rendering // where each line is colored independently. pub fn highlight_line(content string, file_path string) string { if content.len == 0 { return '' } lang := extension_to_lang(file_path) or { return escape_html(content) } lc := lang.line_comments mut mlc := '' if lang.mline_comments.len >= 2 { mlc = lang.mline_comments[0] } runes := content.bytes() mut res := []u8{cap: runes.len + 16} mut in_string := false mut ss := u8(` `) mut in_line_comment := false for pos := 0; pos < runes.len; pos++ { mut c := runes[pos] if in_line_comment { res << write(c) continue } if in_string { res << write(c) if pos > 0 && runes[pos - 1] == `\\` && ss == `"` { continue } if c == ss { in_string = false res << ''.bytes() } continue } if is_letter(c, lang) { word_start := pos for pos < runes.len && is_letter(runes[pos], lang) { pos++ } w := runes[word_start..pos].bytestr() pos-- if w in lang.keywords { res << ''.bytes() res << w.bytes() res << ''.bytes() } else { res << w.bytes() } continue } if is_string_token(c, lang) { in_string = true ss = c res << ''.bytes() res << write(c) continue } if mlc != '' && c == mlc[0] && pos + mlc.len <= runes.len && is_line_comment(runes, pos, mlc) { in_line_comment = true res << ''.bytes() res << write(c) continue } if lc != '' && c == lc[0] && pos + lc.len <= runes.len && is_line_comment(runes, pos, lc) { in_line_comment = true res << ''.bytes() res << write(c) continue } res << write(c) } if in_line_comment { res << ''.bytes() } if in_string { res << ''.bytes() } return res.bytestr() } fn escape_html(s string) string { mut res := []u8{cap: s.len} for i in 0 .. s.len { c := s[i] if c == `<` { res << '<'.bytes() } else if c == `>` { res << '>'.bytes() } else if c == `&` { res << '&'.bytes() } else { res << c } } return res.bytestr() } fn write(c u8) []u8 { mut tmp := []u8{} if c == `<` { tmp << '<'.bytes() } else if c == `>` { tmp << '>'.bytes() } else { tmp << c } return tmp } fn is_letter(c u8, lang Lang) bool { name := lang.name.to_lower() if (name == 'cpp' || name == 'c' || name == 'd' || name == 'swift') && c == `#` { return true } return c.is_letter() || c == `_` } fn is_string_token(c u8, lang Lang) bool { for val in lang.string_start { if c == val[0] { return true } } return false } fn is_line_comment(s []u8, pos int, lc string) bool { for i, b in lc { if s[pos + i] != b { return false } } return true } fn is_single_line(s string) bool { mut cnt := 0 for i in 0 .. s.len { if s[i] == `\n` { cnt++ if cnt > 1 { return false } } } return true } fn extract_extension_from_file_path(path string) string { return path.split('.').last().to_lower() }