v2 / vlib / v / vmod / parser.v
281 lines · 261 sloc · 5.54 KB · 8e35f4d9848f7ad35d857a187dddbfd2eca5e19d
Raw
1module vmod
2
3import os
4
5const err_label = 'vmod:'
6
7enum TokenKind {
8 module_keyword
9 field_key
10 lcbr
11 rcbr
12 labr
13 rabr
14 comma
15 colon
16 eof
17 str
18 ident
19 unknown
20}
21
22pub struct Manifest {
23pub mut:
24 name string
25 base_url string
26 description string
27 version string
28 license string
29 repo_url string
30 repo_branch string = 'master'
31 author string
32 dependencies []string
33 unknown map[string][]string
34}
35
36struct Scanner {
37mut:
38 pos int
39 line int = 1
40 text string
41 inside_text bool
42 tokens []Token
43}
44
45struct Parser {
46mut:
47 file_path string
48 scanner Scanner
49}
50
51struct Token {
52 typ TokenKind
53 val string
54 line int
55}
56
57pub fn from_file(vmod_path string) !Manifest {
58 if !os.exists(vmod_path) {
59 return error('v.mod: v.mod file not found.')
60 }
61 contents := os.read_file(vmod_path) or { '' }
62 return decode(contents)
63}
64
65pub fn decode(contents string) !Manifest {
66 mut parser := Parser{
67 scanner: Scanner{
68 pos: 0
69 text: contents
70 }
71 }
72 return parser.parse()
73}
74
75fn (mut s Scanner) tokenize(t_type TokenKind, val string) {
76 s.tokens << Token{t_type, val, s.line}
77}
78
79fn (mut s Scanner) skip_whitespace() {
80 for s.pos < s.text.len && s.text[s.pos].is_space() {
81 s.pos++
82 }
83}
84
85fn is_name_alpha(chr u8) bool {
86 return chr.is_letter() || chr == `_`
87}
88
89fn (mut s Scanner) create_string(q u8) string {
90 mut str := ''
91 for s.pos < s.text.len && s.text[s.pos] != q {
92 if s.text[s.pos] == `\\` && s.text[s.pos + 1] == q {
93 str += s.text[s.pos..s.pos + 1]
94 s.pos += 2
95 } else {
96 str += s.text[s.pos].ascii_str()
97 s.pos++
98 }
99 }
100 return str
101}
102
103fn (mut s Scanner) create_ident() string {
104 mut text := ''
105 for s.pos < s.text.len && is_name_alpha(s.text[s.pos]) {
106 text += s.text[s.pos].ascii_str()
107 s.pos++
108 }
109 return text
110}
111
112fn (s &Scanner) peek_char(c u8) bool {
113 return s.pos - 1 < s.text.len && s.text[s.pos - 1] == c
114}
115
116fn (mut s Scanner) scan_all() {
117 for s.pos < s.text.len {
118 c := s.text[s.pos]
119 if c.is_space() || c == `\\` {
120 s.pos++
121 if c == `\n` {
122 s.line++
123 }
124 continue
125 }
126 if is_name_alpha(c) {
127 name := s.create_ident()
128 if name == 'Module' {
129 s.tokenize(.module_keyword, name)
130 s.pos++
131 continue
132 } else if s.pos < s.text.len && s.text[s.pos] == `:` {
133 s.tokenize(.field_key, name + ':')
134 s.pos += 2
135 continue
136 } else {
137 s.tokenize(.ident, name)
138 s.pos++
139 continue
140 }
141 }
142 if c in [`'`, `\"`] && !s.peek_char(`\\`) {
143 s.pos++
144 str := s.create_string(c)
145 s.tokenize(.str, str)
146 s.pos++
147 continue
148 }
149 match c {
150 `{` { s.tokenize(.lcbr, c.ascii_str()) }
151 `}` { s.tokenize(.rcbr, c.ascii_str()) }
152 `[` { s.tokenize(.labr, c.ascii_str()) }
153 `]` { s.tokenize(.rabr, c.ascii_str()) }
154 `:` { s.tokenize(.colon, c.ascii_str()) }
155 `,` { s.tokenize(.comma, c.ascii_str()) }
156 else { s.tokenize(.unknown, c.ascii_str()) }
157 }
158
159 s.pos++
160 }
161 s.tokenize(.eof, 'eof')
162}
163
164fn get_array_content(tokens []Token, st_idx int) !([]string, int) {
165 mut vals := []string{}
166 mut idx := st_idx
167 if tokens[idx].typ != .labr {
168 return error('${err_label} not a valid array, at line ${tokens[idx].line}')
169 }
170 idx++
171 for {
172 tok := tokens[idx]
173 match tok.typ {
174 .str {
175 vals << tok.val
176 if tokens[idx + 1].typ !in [.comma, .rabr] {
177 return error('${err_label} invalid separator "${tokens[idx + 1].val}", at line ${tok.line}')
178 }
179 idx += if tokens[idx + 1].typ == .comma { 2 } else { 1 }
180 }
181 .rabr {
182 idx++
183 break
184 }
185 else {
186 return error('${err_label} invalid token "${tok.val}", at line ${tok.line}')
187 }
188 }
189 }
190 return vals, idx
191}
192
193fn (mut p Parser) parse() !Manifest {
194 if p.scanner.text.len == 0 {
195 return error('${err_label} no content.')
196 }
197 p.scanner.scan_all()
198 tokens := p.scanner.tokens
199 mut mn := Manifest{}
200 if tokens[0].typ != .module_keyword {
201 return error('${err_label} v.mod files should start with Module, at line ${tokens[0].line}')
202 }
203 mut i := 1
204 for i < tokens.len {
205 tok := tokens[i]
206 match tok.typ {
207 .lcbr {
208 if tokens[i + 1].typ !in [.field_key, .rcbr] {
209 return error('${err_label} invalid content after opening brace, at line ${tok.line}')
210 }
211 i++
212 continue
213 }
214 .rcbr {
215 break
216 }
217 .field_key {
218 field_name := tok.val.trim_right(':')
219 if tokens[i + 1].typ !in [.str, .labr] {
220 return error('${err_label} value of field "${field_name}" must be either string or an array of strings, at line ${tok.line}')
221 }
222 field_value := tokens[i + 1].val
223 match field_name {
224 'name' {
225 mn.name = field_value
226 }
227 'base_url' {
228 mn.base_url = field_value
229 }
230 'version' {
231 mn.version = field_value
232 }
233 'license' {
234 mn.license = field_value
235 }
236 'repo_url' {
237 mn.repo_url = field_value
238 }
239 'repo_branch' {
240 mn.repo_branch = field_value
241 }
242 'description' {
243 mn.description = field_value
244 }
245 'author' {
246 mn.author = field_value
247 }
248 'dependencies' {
249 deps, idx := get_array_content(tokens, i + 1)!
250 mn.dependencies = deps
251 i = idx
252 continue
253 }
254 else {
255 if tokens[i + 1].typ == .labr {
256 vals, idx := get_array_content(tokens, i + 1)!
257 mn.unknown[field_name] = vals
258 i = idx
259 continue
260 }
261 mn.unknown[field_name] = [field_value]
262 }
263 }
264
265 i += 2
266 continue
267 }
268 .comma {
269 if tokens[i - 1].typ !in [.str, .rabr] || tokens[i + 1].typ != .field_key {
270 return error('${err_label} invalid comma placement, at line ${tok.line}')
271 }
272 i++
273 continue
274 }
275 else {
276 return error('${err_label} invalid token "${tok.val}", at line ${tok.line}')
277 }
278 }
279 }
280 return mn
281}
282