// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by a GPL license that can be found in the LICENSE file.
module highlight
const tab = ' ' // '
// returns HTML code, number of lines, number of lines with source code
pub fn highlight_text(st string, file_path string, commit bool) (string, int, int) {
if !commit {
file_extension := extract_extension_from_file_path(file_path)
if file_extension == 'md' {
return convert_markdown_to_html(st), 0, 0
} else if file_extension == 'txt' {
return st, 0, 0
}
}
lang := extension_to_lang(file_path) or { Lang{} }
text := '${st} '
mut res := []u8{cap: text.len}
mut lines := 0
mut sloc := 0
mut ss := u8(` `)
lc := lang.line_comments
mut mlc := ''
mut mlc_end := ''
if lang.mline_comments.len >= 2 {
mlc = lang.mline_comments[0]
mlc_end = lang.mline_comments[1]
}
res << '
'.bytes()
res << `\n`
if !is_single_line(st) {
res << '| 1 | '.bytes()
lines++
}
mut in_comment := false
mut in_line_comment := false
mut in_string := false
mut runes := text.bytes()
for pos := 0; pos < runes.len - 1; pos++ {
mut c := runes[pos]
if c == `\n` {
lines++
if commit {
mut class := ''
if runes[pos + 1] == `+` {
class = 'class="a"'
} else if runes[pos + 1] == `-` {
class = 'class="d"'
}
res << ' |
\n| ${lines} | '.bytes()
} else {
res << ' |
\n| ${lines} | '.bytes()
}
if in_line_comment {
in_line_comment = false
res << ''.bytes()
}
if in_comment {
res << ''.bytes()
}
if !in_comment && !in_line_comment && runes[pos + 1] != `\n` {
sloc++
}
continue
}
if c == `\t` {
res << tab.bytes()
continue
}
if in_comment {
res << write(c)
if c == mlc_end[0] && is_line_comment(runes, pos, mlc_end) {
in_comment = false
res << runes[pos + 1]
pos++
res << ''.bytes()
}
continue
}
if in_line_comment {
res << write(c)
continue
}
if in_string {
res << write(c)
if runes[pos - 1] == `\\` && ss == `"` {
continue
}
if c == ss {
in_string = false
res << ''.bytes()
}
continue
}
if is_letter(c, lang) {
word_start := pos
for is_letter(c, lang) {
pos++
c = runes[pos]
}
delta := pos - word_start
mut data := []u8{}
for i in 0 .. delta {
data << runes[word_start + i]
}
w := data.bytestr()
pos--
if w in lang.keywords {
res << '${w}'.bytes()
} else {
res << w.bytes()
}
continue
}
if is_string_token(c, lang) {
in_string = true
ss = c
res << ''.bytes()
} else if mlc != '' && c == mlc.bytes()[0] && is_line_comment(runes, pos, mlc) {
in_comment = true
res << ''.bytes()
} else if lc != '' && c == lc.bytes()[0] && is_line_comment(runes, pos, lc) {
in_line_comment = true
res << ''.bytes()
}
res << write(c)
}
res << ' |
'.bytes()
res << '
'.bytes()
return res.bytestr(), lines, sloc
}
// highlight_line returns HTML-escaped, syntax-highlighted markup for a
// single line of source code. It is stateless across calls (does not
// track multi-line strings or block comments), so it suits diff rendering
// where each line is colored independently.
pub fn highlight_line(content string, file_path string) string {
if content.len == 0 {
return ''
}
lang := extension_to_lang(file_path) or { return escape_html(content) }
lc := lang.line_comments
mut mlc := ''
if lang.mline_comments.len >= 2 {
mlc = lang.mline_comments[0]
}
runes := content.bytes()
mut res := []u8{cap: runes.len + 16}
mut in_string := false
mut ss := u8(` `)
mut in_line_comment := false
for pos := 0; pos < runes.len; pos++ {
mut c := runes[pos]
if in_line_comment {
res << write(c)
continue
}
if in_string {
res << write(c)
if pos > 0 && runes[pos - 1] == `\\` && ss == `"` {
continue
}
if c == ss {
in_string = false
res << ''.bytes()
}
continue
}
if is_letter(c, lang) {
word_start := pos
for pos < runes.len && is_letter(runes[pos], lang) {
pos++
}
w := runes[word_start..pos].bytestr()
pos--
if w in lang.keywords {
res << ''.bytes()
res << w.bytes()
res << ''.bytes()
} else {
res << w.bytes()
}
continue
}
if is_string_token(c, lang) {
in_string = true
ss = c
res << ''.bytes()
res << write(c)
continue
}
if mlc != '' && c == mlc[0] && pos + mlc.len <= runes.len
&& is_line_comment(runes, pos, mlc) {
in_line_comment = true
res << ''.bytes()
res << write(c)
continue
}
if lc != '' && c == lc[0] && pos + lc.len <= runes.len && is_line_comment(runes, pos, lc) {
in_line_comment = true
res << ''.bytes()
res << write(c)
continue
}
res << write(c)
}
if in_line_comment {
res << ''.bytes()
}
if in_string {
res << ''.bytes()
}
return res.bytestr()
}
fn escape_html(s string) string {
mut res := []u8{cap: s.len}
for i in 0 .. s.len {
c := s[i]
if c == `<` {
res << '<'.bytes()
} else if c == `>` {
res << '>'.bytes()
} else if c == `&` {
res << '&'.bytes()
} else {
res << c
}
}
return res.bytestr()
}
fn write(c u8) []u8 {
mut tmp := []u8{}
if c == `<` {
tmp << '<'.bytes()
} else if c == `>` {
tmp << '>'.bytes()
} else {
tmp << c
}
return tmp
}
fn is_letter(c u8, lang Lang) bool {
name := lang.name.to_lower()
if (name == 'cpp' || name == 'c' || name == 'd' || name == 'swift') && c == `#` {
return true
}
return c.is_letter() || c == `_`
}
fn is_string_token(c u8, lang Lang) bool {
for val in lang.string_start {
if c == val[0] {
return true
}
}
return false
}
fn is_line_comment(s []u8, pos int, lc string) bool {
for i, b in lc {
if s[pos + i] != b {
return false
}
}
return true
}
fn is_single_line(s string) bool {
mut cnt := 0
for i in 0 .. s.len {
if s[i] == `\n` {
cnt++
if cnt > 1 {
return false
}
}
}
return true
}
fn extract_extension_from_file_path(path string) string {
return path.split('.').last().to_lower()
}