v / vlib / strings / textscanner / textscanner.v
186 lines · 167 sloc · 5.02 KB · 37255767290c243b71f0e78d77c3bd5e875748e6
Raw
1module textscanner
2
3// TextScanner simplifies writing small scanners/parsers.
4// It helps by providing safe methods to scan texts character by character, peek for the next characters, go back, etc.
5pub struct TextScanner {
6pub:
7 input string
8 ilen int
9pub mut:
10 pos int // current position; pos is *always* kept in [0,ilen]
11}
12
13// new returns a stack allocated instance of TextScanner.
14pub fn new(input string) TextScanner {
15 return TextScanner{
16 input: input
17 ilen: input.len
18 }
19}
20
21// free frees all allocated resources.
22@[unsafe]
23pub fn (mut ss TextScanner) free() {
24 unsafe {
25 ss.input.free()
26 }
27}
28
29// remaining returns how many characters remain from current position.
30@[inline]
31pub fn (ss &TextScanner) remaining() int {
32 return ss.ilen - ss.pos
33}
34
35// next returns the next character code from the input text.
36// next returns `-1` if it can't reach the next character.
37// next advances the scanner position.
38@[direct_array_access; inline]
39pub fn (mut ss TextScanner) next() int {
40 if ss.pos < ss.ilen {
41 opos := ss.pos
42 ss.pos++
43 return ss.input[opos]
44 }
45 return -1
46}
47
48// skip skips one character ahead; `skip()` is slightly faster than `.next()`.
49// `skip()` does not return a result.
50@[inline]
51pub fn (mut ss TextScanner) skip() {
52 if ss.pos < ss.ilen {
53 ss.pos++
54 }
55}
56
57// skip_n skips ahead `n` characters, stopping at the end of the input.
58@[inline]
59pub fn (mut ss TextScanner) skip_n(n int) {
60 ss.pos += n
61 if ss.pos > ss.ilen {
62 ss.pos = ss.ilen
63 }
64}
65
66// peek returns the *next* character code from the input text.
67// peek returns `-1` if it can't peek the next character.
68// unlike `next()`, `peek()` does not change the state of the scanner.
69@[direct_array_access; inline]
70pub fn (ss &TextScanner) peek() int {
71 if ss.pos < ss.ilen {
72 return ss.input[ss.pos]
73 }
74 return -1
75}
76
77// peek_u8 returns the *next* character code from the input text, as a byte/u8.
78// unlike `next()`, `peek_u8()` does not change the state of the scanner.
79// Note: peek_u8 returns `0`, if it can't peek the next character.
80// Note: use `peek()`, instead of `peek_u8()`, if your input itself can
81// legitimately contain bytes with value `0`.
82@[direct_array_access; inline]
83pub fn (ss &TextScanner) peek_u8() u8 {
84 if ss.pos < ss.ilen {
85 return ss.input[ss.pos]
86 }
87 return 0
88}
89
90// peek_n returns the character code from the input text at position + `n`.
91// peek_n returns `-1` if it can't peek `n` characters ahead.
92// ts.peek_n(0) == ts.current() .
93// ts.peek_n(1) == ts.peek() .
94@[direct_array_access; inline]
95pub fn (ss &TextScanner) peek_n(n int) int {
96 if ss.pos + n < ss.ilen {
97 return ss.input[ss.pos + n]
98 }
99 return -1
100}
101
102// peek_n_u8 returns the character code from the input text, at position + `n`,
103// as a byte/u8.
104// Note: peek_n_u8 returns `0`, if it can't peek the next character.
105// Note: use `peek_n()`, instead of `peek_n_u8()`, if your input itself can
106// legitimately contain bytes with value `0`.
107@[direct_array_access; inline]
108pub fn (ss &TextScanner) peek_n_u8(n int) u8 {
109 if ss.pos + n < ss.ilen {
110 return ss.input[ss.pos + n]
111 }
112 return 0
113}
114
115// back goes back one character from the current scanner position.
116@[inline]
117pub fn (mut ss TextScanner) back() {
118 if ss.pos > 0 {
119 ss.pos--
120 }
121}
122
123// back_n goes back `n` characters from the current scanner position.
124pub fn (mut ss TextScanner) back_n(n int) {
125 ss.pos -= n
126 if ss.pos < 0 {
127 ss.pos = 0
128 }
129 if ss.pos > ss.ilen {
130 ss.pos = ss.ilen
131 }
132}
133
134// peek_back returns the *previous* character code from the input text.
135// peek_back returns `-1` if it can't peek the previous character.
136// unlike `back()`, `peek_back()` does not change the state of the scanner.
137@[direct_array_access; inline]
138pub fn (ss &TextScanner) peek_back() int {
139 return ss.peek_back_n(1)
140}
141
142// peek_back_n returns the character code from the input text at position - `n`.
143// peek_back_n returns `-1` if it can't peek `n` characters back.
144// ts.peek_back_n(0) == ts.current()
145// ts.peek_back_n(1) == ts.peek_back()
146@[direct_array_access; inline]
147pub fn (ss &TextScanner) peek_back_n(n int) int {
148 offset := n + 1
149 if ss.pos >= offset {
150 return ss.input[ss.pos - offset]
151 }
152 return -1
153}
154
155// current returns the current character code from the input text.
156// current returns `-1` at the start of the input text.
157// Note: after `c := ts.next()`, `ts.current()` will also return `c`.
158@[direct_array_access; inline]
159pub fn (mut ss TextScanner) current() int {
160 if ss.pos > 0 {
161 return ss.input[ss.pos - 1]
162 }
163 return -1
164}
165
166// reset resets the internal state of the scanner.
167// After calling .reset(), .next() will start reading
168// again from the start of the input text.
169pub fn (mut ss TextScanner) reset() {
170 ss.pos = 0
171}
172
173// goto_end has the same effect as `for ts.next() != -1 {}`.
174// i.e. after calling .goto_end(), the scanner will be at
175// the end of the input text. Further .next() calls will
176// return -1, unless you go back.
177pub fn (mut ss TextScanner) goto_end() {
178 ss.pos = ss.ilen
179}
180
181// skip_whitespace advances the scanner pass any space characters in the input.
182pub fn (mut ss TextScanner) skip_whitespace() {
183 for ss.ilen - ss.pos > 0 && ss.peek_u8().is_space() {
184 ss.next()
185 }
186}
187