| 1 | module term |
| 2 | |
| 3 | // utf8_getchar returns an utf8 rune from standard input. |
| 4 | pub fn utf8_getchar() ?rune { |
| 5 | c := input_character() |
| 6 | if c == -1 { |
| 7 | return none |
| 8 | } |
| 9 | len := utf8_len(u8(~c)) |
| 10 | if c < 0 { |
| 11 | return 0 |
| 12 | } else if len == 0 { |
| 13 | return c |
| 14 | } else if len == 1 { |
| 15 | return -1 |
| 16 | } else { |
| 17 | mut uc := c & ((1 << (7 - len)) - 1) |
| 18 | for i := 0; i + 1 < len; i++ { |
| 19 | c2 := input_character() |
| 20 | if c2 != -1 && (c2 >> 6) == 2 { |
| 21 | uc <<= 6 |
| 22 | uc |= (c2 & 63) |
| 23 | } else if c2 == -1 { |
| 24 | return 0 |
| 25 | } else { |
| 26 | return -1 |
| 27 | } |
| 28 | } |
| 29 | return uc |
| 30 | } |
| 31 | } |
| 32 | |
| 33 | // utf8_len calculates the length of a utf8 rune to read, according to its first byte. |
| 34 | pub fn utf8_len(c u8) int { |
| 35 | mut b := 0 |
| 36 | mut x := c |
| 37 | if (x & 240) != 0 { |
| 38 | // 0xF0 |
| 39 | x >>= 4 |
| 40 | } else { |
| 41 | b += 4 |
| 42 | } |
| 43 | if (x & 12) != 0 { |
| 44 | // 0x0C |
| 45 | x >>= 2 |
| 46 | } else { |
| 47 | b += 2 |
| 48 | } |
| 49 | if (x & 2) == 0 { |
| 50 | // 0x02 |
| 51 | b++ |
| 52 | } |
| 53 | return b |
| 54 | } |
| 55 | |