v / vlib / picohttpparser / picohttpparser.v
487 lines · 443 sloc · 10.37 KB · efc8f7fb3ef7e90a088db64ad2c68dd0698b90d5
Raw
1module picohttpparser
2
3// NOTE: picohttpparser is designed for speed. Please do some benchmarks when
4// you change something in this file
5
6// token_char_map contains all allowed characters in HTTP headers
7const token_char_map = '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0' +
8 '\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0' +
9 '\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1' +
10 '\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0' +
11 '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0' +
12 '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0' +
13 '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0' +
14 '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0'
15
16fn (mut r Request) phr_parse_request_path(buf_start &u8, buf_end &u8, mut pret Pret) {
17 mut buf := unsafe { buf_start + 0 }
18
19 // ADVANCE_TOKEN
20 method := advance_token(buf, buf_end, mut pret)
21 if pret.ret < 0 {
22 return
23 }
24 unsafe {
25 buf += pret.ret
26 }
27 $if trace_parse ? {
28 eprintln('method: ${method}')
29 }
30 // skip spaces
31 for {
32 unsafe { buf++ }
33 if *buf != ` ` {
34 break
35 }
36 }
37
38 path := advance_token(buf, buf_end, mut pret)
39 if pret.ret < 0 {
40 return
41 }
42 $if trace_parse ? {
43 eprintln('path: ${path}')
44 }
45 unsafe {
46 buf += pret.ret
47 }
48 // skip spaces
49 for {
50 unsafe { buf++ }
51 if *buf != ` ` {
52 break
53 }
54 }
55 // validate
56 if method == '' || path == '' {
57 pret.ret = -1
58 pret.err = 'error parsing request: invalid method or path'
59 return
60 }
61 r.method = method
62 r.path = path
63
64 pret.ret = unsafe { buf - buf_start }
65}
66
67fn (mut r Request) phr_parse_request_path_pipeline(buf_start &u8, buf_end &u8, mut pret Pret) {
68 mut buf := unsafe { buf_start }
69 method := advance_token2(buf, buf_end, mut pret)
70 if pret.ret < 0 {
71 return
72 }
73 unsafe {
74 buf += pret.ret
75 }
76 path := advance_token2(buf, buf_end, mut pret)
77 if pret.ret < 0 {
78 return
79 }
80 unsafe {
81 buf += pret.ret
82 }
83 // validate
84 if method == '' || path == '' {
85 pret.ret = -1
86 pret.err = 'error parsing request: invalid method or path'
87 return
88 }
89 r.method = method
90 r.path = path
91
92 for buf < buf_end {
93 unsafe { buf++ }
94 // check if following 4 characters are '\r\n\r\n' indicating a new request line
95 if unsafe { *(&u32(buf)) == 0x0a0d0a0d } {
96 unsafe {
97 buf += 4
98 }
99 pret.ret = unsafe { buf - buf_start }
100 return
101 }
102 }
103
104 pret.ret = -1
105 pret.err = 'error parsing request: no request found'
106}
107
108fn (mut r Request) phr_parse_request(buf_start &u8, buf_end &u8, mut pret Pret) &u8 {
109 // make copy of `buf_start` that can be mutated
110 mut buf := unsafe { buf_start }
111
112 // skip first empty line (some clients add CRLF after POST content)
113 // CHECK_EOF
114 if buf == buf_end {
115 pret.ret = -2
116 return unsafe { nil }
117 }
118 if *buf == `\r` {
119 unsafe { buf++ }
120 // EXPECT_CHAR
121 if buf == buf_end {
122 pret.ret = -2
123 return unsafe { nil }
124 }
125 if *buf != `\n` {
126 pret.ret = -1
127 pret.err = 'error parsing request: expected "\n" after "\r"'
128 return unsafe { nil }
129 }
130 }
131
132 // parse request line
133 r.phr_parse_request_path(buf, buf_end, mut pret)
134 if pret.ret < 0 {
135 return unsafe { nil }
136 }
137 unsafe {
138 buf += pret.ret
139 }
140 minor_version := parse_http_version(buf, buf_end, mut pret)
141 if pret.ret < 0 {
142 return unsafe { nil }
143 }
144 $if trace_parse ? {
145 eprintln('minor_version: ${minor_version}')
146 }
147 unsafe {
148 buf += pret.ret
149 }
150 // CHECK_EOF
151 if buf == buf_end {
152 pret.ret = -2
153 return unsafe { nil }
154 }
155 if *buf == `\r` {
156 unsafe { buf++ }
157 // EXPECT_CHAR
158 if buf == buf_end {
159 pret.ret = -2
160 return unsafe { nil }
161 }
162 if *buf != `\n` {
163 pret.ret = -1
164 pret.err = 'error parsing request: expected "\n" after "\r"'
165 return unsafe { nil }
166 }
167 unsafe { buf++ }
168 } else if *buf == `\n` {
169 unsafe { buf++ }
170 } else {
171 pret.ret = -1
172 pret.err = 'error parsing request: expecting "\r\n" after HTTP version'
173 return unsafe { nil }
174 }
175
176 return r.parse_headers(buf, buf_end, mut pret)
177}
178
179@[direct_array_access]
180fn (mut r Request) parse_headers(buf_start &u8, buf_end &u8, mut pret Pret) &u8 {
181 mut buf := unsafe { buf_start }
182
183 mut i := 0
184
185 for i = r.num_headers; i < max_headers; i++ {
186 // CHECK_EOF
187 if buf == buf_end {
188 pret.ret = -2
189 return unsafe { nil }
190 }
191 if *buf == `\r` {
192 unsafe { buf++ }
193 // EXPECT_CHAR
194 if buf == buf_end {
195 pret.ret = -2
196 return unsafe { nil }
197 }
198 if *buf != `\n` {
199 pret.ret = -1
200 pret.err = 'error parsing request: expected "\n" after "\r"'
201 return unsafe { nil }
202 }
203 unsafe { buf++ }
204
205 break
206 } else if *buf == `\n` {
207 unsafe { buf++ }
208 break
209 }
210
211 if !(*buf == ` ` || *buf == `\t`) {
212 name_start := buf
213 // parsing name, but do not discard SP before colon, see
214 // http://www.mozilla.org/security/announce/2006/mfsa2006-33.html
215 for *buf != `:` {
216 // check if the current character is allowed in an HTTP header
217 if token_char_map[*buf] == 0 {
218 $if trace_parse ? {
219 eprintln('invalid character! ${*buf}')
220 }
221 pret.ret = -1
222 pret.err = 'error parsing request: invalid character in header "${*buf}"'
223 return unsafe { nil }
224 }
225 unsafe { buf++ }
226
227 // CHECK_EOF
228 if buf == buf_end {
229 pret.ret = -2
230 return unsafe { nil }
231 }
232 }
233
234 name_len := unsafe { buf - name_start }
235 if name_len == 0 {
236 pret.ret = -1
237 pret.err = 'error parsing request: invalid header name'
238 return unsafe { nil }
239 }
240 r.headers[i].name = unsafe { tos(name_start, name_len) }
241
242 unsafe { buf++ }
243 for { // CHECK_EOF
244 if buf == buf_end {
245 pret.ret = -2
246 return unsafe { nil }
247 }
248 if !(*buf == ` ` || *buf == `\t`) {
249 break
250 }
251 unsafe { buf++ }
252 }
253 } else {
254 r.headers[i].name = ''
255 }
256
257 mut value_len := get_token_length_to_eol(buf, buf_end, mut pret)
258 if pret.ret < 0 {
259 return unsafe { nil }
260 }
261
262 // TODO: strip characters
263 value_end := unsafe { buf + value_len }
264 for value_end != buf {
265 c := unsafe { *(value_end - 1) }
266 if !(c == ` ` || c == `\t`) {
267 break
268 }
269 unsafe { value_end-- }
270 }
271
272 r.headers[i].value = unsafe { tos(buf, value_end - buf) }
273 r.num_headers++
274
275 unsafe {
276 buf += pret.ret
277 }
278 }
279
280 if i == max_headers {
281 // too many headers
282 eprintln('Too many headers!')
283 pret.ret = -1
284 pret.err = 'error parsing request: too many headers!'
285 return unsafe { nil }
286 }
287
288 pret.ret = unsafe { buf - buf_start }
289 return buf
290}
291
292// is_complete checks if an http request is done
293fn is_complete(buf_start &u8, buf_end &u8, last_len int, mut pret Pret) &u8 {
294 mut ret_cnt := 0
295 // get the last 3 characters of the request buffer
296 buf := if last_len < 3 { buf_start } else { unsafe { buf_start + last_len - 3 } }
297
298 for {
299 // CHECK_EOF
300 if buf == buf_end {
301 pret.ret = -2
302 return unsafe { nil }
303 }
304 // We expect a line of an http request to end with '\r\n'
305 if *buf == `\r` {
306 unsafe { buf++ }
307 // CHECK_EOF
308 if buf == buf_end {
309 pret.ret = -2
310 return unsafe { nil }
311 }
312 // EXPECT_CHAR_NO_CHECK
313 if *buf != `\n` {
314 // no '\n' after '\r' indicates a parse error
315 pret.ret = -1
316 pret.err = 'error parsing request: expected "\n" after "\r"'
317 return unsafe { nil }
318 }
319 unsafe { buf++ }
320
321 ret_cnt++
322 } else if *buf == `\n` {
323 unsafe { buf++ }
324 ret_cnt++
325 } else {
326 // other character
327 unsafe { buf++ }
328 ret_cnt = 0
329 }
330 if ret_cnt == 2 {
331 return buf
332 }
333 }
334
335 pret.ret = -2
336 return unsafe { nil }
337}
338
339fn parse_http_version(buf_start &u8, buf_end &u8, mut pret Pret) int {
340 // we want at least [HTTP/1.<two chars>] to try to parse
341 if unsafe { buf_end - buf_start } < 9 {
342 pret.ret = -2
343 return 0
344 }
345 if unsafe { tos(buf_start, 7) != 'HTTP/1.' } {
346 pret.ret = -1
347 pret.err = 'error parsing request: picohttpparser only supports HTTP/1.x'
348 return 0
349 }
350
351 // PARSE_INT
352 c := unsafe { *(buf_start + 7) }
353 if c < `0` || c > `9` {
354 pret.ret = -1
355 pret.err = 'error parsing request: invalid HTTP version'
356 return 0
357 }
358 pret.ret = 8
359 return int(c - `0`)
360}
361
362fn get_token_length_to_eol(buf_start &u8, buf_end &u8, mut pret Pret) int {
363 mut buf := unsafe { buf_start }
364 mut token_len := 0
365
366 // find non-printable char within the next 8 bytes
367 // HOT code: (TODO: should be manually inlined)
368 for _likely_(unsafe { buf_end - buf >= 8 }) {
369 for _ in 0 .. 8 {
370 if _unlikely_(!is_printable_ascii(*buf)) {
371 // non printable
372 unsafe {
373 goto non_printable
374 }
375 }
376 unsafe { buf++ }
377 continue
378
379 non_printable:
380 // allow space and horizontal tab
381 if _likely_(*buf < ` ` && *buf != 9) || _unlikely_(*buf == 127) {
382 // found clear the line (CTL)
383 unsafe {
384 goto found_ctl
385 }
386 }
387 unsafe { buf++ }
388 }
389 }
390 // remaining characters
391 for {
392 // CHECK_EOF
393 if buf == buf_end {
394 pret.ret = -2
395 return 0
396 }
397 if _likely_(*buf < ` ` && *buf != 9) || _unlikely_(*buf == 127) {
398 // found clear the line (CTL)
399 unsafe {
400 goto found_ctl
401 }
402 }
403 unsafe { buf++ }
404 }
405
406 found_ctl:
407 if _likely_(*buf == `\r`) {
408 unsafe { buf++ }
409 // EXPECT_CHAR
410 if buf == buf_end {
411 pret.ret = -2
412 return 0
413 }
414 if *buf != `\n` {
415 // no '\n' after '\r' indicates a parse error
416 pret.ret = -1
417 pret.err = 'error parsing request: expected "\n" after "\r"'
418 return 0
419 }
420 unsafe { buf++ }
421 token_len = unsafe { buf - 2 - buf_start }
422 } else if *buf == `\n` {
423 token_len = unsafe { buf - buf_start }
424 unsafe { buf++ }
425 } else {
426 pret.ret = -1
427 pret.err = 'error parsing request: expecting "\r\n" after header'
428 return 0
429 }
430
431 if token_len == 0 {
432 pret.ret = 0
433 return 0
434 }
435
436 pret.ret = unsafe { buf - buf_start }
437 return token_len
438}
439
440// following functions are #define in the C version, but inline here for better readability
441
442@[inline]
443fn advance_token(tok_start &u8, tok_end &u8, mut pret Pret) string {
444 mut buf := unsafe { tok_start }
445 for *buf != ` ` {
446 if _unlikely_(!is_printable_ascii(*buf)) {
447 if *buf < ` ` || *buf == 127 {
448 pret.ret = -1
449 pret.err = 'error parsing request: invalid character "${*buf}"'
450 return ''
451 }
452 }
453 unsafe { buf++ }
454 // CHECK_EOF
455 if buf == tok_end {
456 pret.ret = -2
457 return ''
458 }
459 }
460
461 pret.ret = unsafe { buf - tok_start }
462 return unsafe { tos(tok_start, pret.ret) }
463}
464
465// advance_token2 is a less safe version of advance_token
466@[inline]
467fn advance_token2(tok_start &u8, _tok_end &u8, mut pret Pret) string {
468 mut len := 0
469 mut i := 0
470 for {
471 if unsafe { *(tok_start + i) == ` ` } {
472 len = i
473 for unsafe { *(tok_start + i) == ` ` } {
474 i++
475 }
476 break
477 }
478 i++
479 }
480 pret.ret = i
481 return unsafe { tos(tok_start, len) }
482}
483
484@[inline]
485fn is_printable_ascii(c u8) bool {
486 return u32(c - 32) < 95
487}
488