| 1 | module validate |
| 2 | |
| 3 | // http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
| 4 | |
| 5 | // vfmt off |
| 6 | const utf8d = [ |
| 7 | u8(0), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 8 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 9 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 10 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 11 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 12 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 13 | 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, |
| 14 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 15 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 16 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, 3, |
| 17 | 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, |
| 18 | 8, 8, 8, 8, 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, |
| 19 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, |
| 20 | 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, |
| 21 | 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, |
| 22 | 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, |
| 23 | 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, |
| 24 | 12, 12, 12 ]! |
| 25 | // vfmt on |
| 26 | |
| 27 | // utf8_string returns true, if the given string `s` consists only of valid UTF-8 runes |
| 28 | pub fn utf8_string(s string) bool { |
| 29 | return utf8_data(s.str, s.len) |
| 30 | } |
| 31 | |
| 32 | // utf8_data returns true, if the given `data` block, with length `len` bytes, consists only of valid UTF-8 runes |
| 33 | @[direct_array_access] |
| 34 | pub fn utf8_data(data &u8, len int) bool { |
| 35 | mut state := 0 |
| 36 | |
| 37 | for i := 0; i < len; i++ { |
| 38 | b := unsafe { data[i] } |
| 39 | state = utf8d[256 + state + utf8d[b]] |
| 40 | if state == 12 { |
| 41 | return false |
| 42 | } |
| 43 | } |
| 44 | return state == 0 |
| 45 | } |
| 46 | |