v / vlib / fasthttp / request_parser.v
409 lines · 381 sloc · 9.63 KB · b7113f8425b17fc689a617834a7e6083e3193572
Raw
1module fasthttp
2
3const empty_space = u8(` `)
4const cr_char = u8(0x0d)
5const lf_char = u8(0x0a)
6
7// libc memchr is AVX2-accelerated via glibc IFUNC
8@[inline]
9fn find_byte(buf &u8, len int, c u8) int {
10 unsafe {
11 p := C.memchr(buf, c, len)
12 if p == voidptr(nil) {
13 return -1
14 }
15 return int(&u8(p) - buf)
16 }
17}
18
19// parse_http1_request_line parses the request line of an HTTP/1.1 request.
20// spec: https://datatracker.ietf.org/doc/rfc9112/
21// request-line is the start-line for for requests
22// According to RFC 9112, the request line is structured as:
23// `request-line = method SP request-target SP HTTP-version`
24// where:
25// METHOD is the HTTP method (e.g., GET, POST)
26// SP is a single space character
27// REQUEST-TARGET is the path or resource being requested
28// HTTP-VERSION is the version of HTTP being used (e.g., HTTP/1.1)
29// CRLF is a carriage return followed by a line feed
30// returns the position after the CRLF on success
31@[direct_array_access]
32pub fn parse_http1_request_line(mut req HttpRequest) !int {
33 unsafe {
34 buf := &req.buffer[0]
35 len := req.buffer.len
36
37 if len < 12 {
38 return error('Too short')
39 }
40
41 // METHOD
42 pos1 := find_byte(buf, len, empty_space)
43 if pos1 <= 0 {
44 return error('Invalid method')
45 }
46 req.method = Slice{0, pos1}
47
48 // PATH - skip any extra spaces
49 mut pos2 := pos1 + 1
50 for pos2 < len && buf[pos2] == empty_space {
51 pos2++
52 }
53 if pos2 >= len {
54 return error('Missing path')
55 }
56
57 path_start := pos2
58 space_pos := find_byte(buf + pos2, len - pos2, empty_space)
59 cr_pos := find_byte(buf + pos2, len - pos2, cr_char)
60
61 if space_pos < 0 && cr_pos < 0 {
62 return error('Invalid request line')
63 }
64
65 // pick earliest delimiter
66 mut path_len := 0
67 mut delim_pos := 0
68 if space_pos >= 0 && (cr_pos < 0 || space_pos < cr_pos) {
69 path_len = space_pos
70 delim_pos = pos2 + space_pos
71 } else {
72 path_len = cr_pos
73 delim_pos = pos2 + cr_pos
74 }
75
76 req.path = Slice{path_start, path_len}
77
78 // VERSION
79 if buf[delim_pos] == cr_char {
80 // No HTTP version specified
81 req.version = Slice{delim_pos, 0}
82 } else {
83 version_start := delim_pos + 1
84 cr := find_byte(buf + version_start, len - version_start, cr_char)
85 if cr < 0 {
86 return error('Invalid HTTP request line: Missing CR')
87 }
88 req.version = Slice{version_start, cr}
89 delim_pos = version_start + cr
90 }
91
92 // Validate CRLF
93 if delim_pos + 1 >= len || buf[delim_pos + 1] != lf_char {
94 return error('Invalid CRLF')
95 }
96
97 return delim_pos + 2 // Return position after CRLF
98 }
99}
100
101// decode_http_request parses a raw HTTP request from the given byte buffer
102pub fn decode_http_request(buffer []u8) !HttpRequest {
103 mut req := HttpRequest{
104 buffer: buffer
105 }
106
107 // header_start is the byte index immediately after the request line's \r\n
108 header_start := parse_http1_request_line(mut req)!
109
110 // Find the end of the header block (\r\n\r\n)
111 mut body_start := -1
112 for i := header_start; i <= buffer.len - 4; i++ {
113 if buffer[i] == cr_char && buffer[i + 1] == lf_char && buffer[i + 2] == cr_char
114 && buffer[i + 3] == lf_char {
115 body_start = i + 4
116
117 // The header fields slice covers everything from header_start
118 // up to (but not including) the final double CRLF
119 req.header_fields = Slice{
120 start: header_start
121 len: i - header_start
122 }
123 break
124 }
125 }
126
127 if body_start != -1 {
128 req.body = Slice{
129 start: body_start
130 len: buffer.len - body_start
131 }
132 } else {
133 // If no body delimiter found, assume headers go to end or body is missing
134 req.header_fields = Slice{header_start, buffer.len - header_start - 2}
135 req.body = Slice{0, 0}
136 }
137
138 return req
139}
140
141// Helper function to convert Slice to string for debugging
142fn (slice Slice) to_string(buffer []u8) string {
143 if slice.len <= 0 {
144 return ''
145 }
146 return buffer[slice.start..slice.start + slice.len].bytestr()
147}
148
149@[direct_array_access]
150fn find_header_end_in_buf(buf &u8, buf_len int) int {
151 for i := 0; i < buf_len - 1; i++ {
152 unsafe {
153 if buf[i] == `\n` {
154 if i + 1 < buf_len && buf[i + 1] == `\n` {
155 return i + 2
156 }
157 if i + 2 < buf_len && buf[i + 1] == `\r` && buf[i + 2] == `\n` {
158 return i + 3
159 }
160 }
161 if i + 3 < buf_len && buf[i] == `\r` && buf[i + 1] == `\n` && buf[i + 2] == `\r`
162 && buf[i + 3] == `\n` {
163 return i + 4
164 }
165 }
166 }
167 return -1
168}
169
170// has_complete_body checks if a raw HTTP request buffer contains the full body
171// as indicated by the Content-Length or Transfer-Encoding headers. Returns true if:
172// - there is no Content-Length header and no chunked encoding (body not expected)
173// - Content-Length is 0
174// - enough body bytes have been received
175// - chunked encoding is complete (the zero-size chunk and trailers were parsed)
176// Returns false only when more body data is expected.
177@[direct_array_access]
178fn has_complete_body(buf &u8, buf_len int) bool {
179 header_end := find_header_end_in_buf(buf, buf_len)
180 if header_end < 0 {
181 return false // headers not complete yet
182 }
183 // Check for Transfer-Encoding: chunked header (case-insensitive)
184 if has_chunked_transfer_encoding_in_buf(buf, header_end) {
185 return has_complete_chunked_body(buf, buf_len, header_end)
186 }
187 content_length := parse_content_length_from_buf(buf, header_end)
188 if content_length <= 0 {
189 return true // no content-length or zero: body complete
190 }
191 body_received := buf_len - header_end
192 return body_received >= content_length
193}
194
195@[direct_array_access]
196fn has_complete_chunked_body(buf &u8, buf_len int, body_start int) bool {
197 mut pos := body_start
198 for {
199 lf_pos := find_line_lf_in_buf(buf, buf_len, pos)
200 if lf_pos < 0 {
201 return false
202 }
203 mut line_end := lf_pos
204 unsafe {
205 if line_end > pos && buf[line_end - 1] == `\r` {
206 line_end--
207 }
208 }
209 mut size_end := line_end
210 for i := pos; i < line_end; i++ {
211 unsafe {
212 if buf[i] == `;` {
213 size_end = i
214 break
215 }
216 }
217 }
218 mut size_start := pos
219 for size_start < size_end {
220 unsafe {
221 if buf[size_start] != ` ` && buf[size_start] != `\t` {
222 break
223 }
224 }
225 size_start++
226 }
227 for size_end > size_start {
228 unsafe {
229 if buf[size_end - 1] != ` ` && buf[size_end - 1] != `\t` {
230 break
231 }
232 }
233 size_end--
234 }
235 if size_start == size_end {
236 return true
237 }
238 mut chunk_size := 0
239 for i := size_start; i < size_end; i++ {
240 digit := chunked_hex_digit_value(unsafe { buf[i] })
241 if digit < 0 {
242 return true
243 }
244 if chunk_size > (max_int - digit) / 16 {
245 return true
246 }
247 chunk_size = chunk_size * 16 + digit
248 }
249 pos = lf_pos + 1
250 if chunk_size == 0 {
251 return has_complete_chunked_trailers(buf, buf_len, pos)
252 }
253 if chunk_size > buf_len - pos {
254 return false
255 }
256 data_end := pos + chunk_size
257 if data_end + 2 > buf_len {
258 return false
259 }
260 unsafe {
261 if buf[data_end] != `\r` || buf[data_end + 1] != `\n` {
262 return true
263 }
264 }
265 pos = data_end + 2
266 }
267 return false
268}
269
270@[direct_array_access]
271fn has_complete_chunked_trailers(buf &u8, buf_len int, start int) bool {
272 mut pos := start
273 for {
274 lf_pos := find_line_lf_in_buf(buf, buf_len, pos)
275 if lf_pos < 0 {
276 return false
277 }
278 mut line_end := lf_pos
279 unsafe {
280 if line_end > pos && buf[line_end - 1] == `\r` {
281 line_end--
282 }
283 }
284 if line_end == pos {
285 return true
286 }
287 pos = lf_pos + 1
288 }
289 return false
290}
291
292@[direct_array_access]
293fn find_line_lf_in_buf(buf &u8, buf_len int, start int) int {
294 for i := start; i < buf_len; i++ {
295 unsafe {
296 if buf[i] == `\n` {
297 return i
298 }
299 }
300 }
301 return -1
302}
303
304fn chunked_hex_digit_value(ch u8) int {
305 if ch >= `0` && ch <= `9` {
306 return int(ch - `0`)
307 }
308 if ch >= `a` && ch <= `f` {
309 return int(ch - `a` + 10)
310 }
311 if ch >= `A` && ch <= `F` {
312 return int(ch - `A` + 10)
313 }
314 return -1
315}
316
317// has_chunked_transfer_encoding_in_buf scans the header bytes for a
318// "Transfer-Encoding:" header whose value contains "chunked" (case-insensitive).
319@[direct_array_access]
320fn has_chunked_transfer_encoding_in_buf(buf &u8, header_end int) bool {
321 te_lower := 'transfer-encoding:'
322 for i := 0; i < header_end - te_lower.len; i++ {
323 unsafe {
324 if buf[i] != `\n` {
325 continue
326 }
327 pos := i + 1
328 if pos + te_lower.len > header_end {
329 continue
330 }
331 mut matched := true
332 for j := 0; j < te_lower.len; j++ {
333 mut ch := buf[pos + j]
334 if ch >= `A` && ch <= `Z` {
335 ch = ch + 32
336 }
337 if ch != te_lower[j] {
338 matched = false
339 break
340 }
341 }
342 if matched {
343 chunked_str := 'chunked'
344 for val_start := pos + te_lower.len; val_start < header_end - chunked_str.len; val_start++ {
345 if buf[val_start] == `\r` || buf[val_start] == `\n` {
346 break
347 }
348 mut cmatch := true
349 for k := 0; k < chunked_str.len; k++ {
350 mut ch2 := buf[val_start + k]
351 if ch2 >= `A` && ch2 <= `Z` {
352 ch2 = ch2 + 32
353 }
354 if ch2 != chunked_str[k] {
355 cmatch = false
356 break
357 }
358 }
359 if cmatch {
360 return true
361 }
362 }
363 }
364 }
365 }
366 return false
367}
368
369// parse_content_length_from_buf scans the header bytes for a Content-Length header
370// and returns its integer value, or -1 if not found.
371@[direct_array_access]
372fn parse_content_length_from_buf(buf &u8, header_end int) int {
373 cl_lower := 'content-length:'
374 for i := 0; i < header_end - cl_lower.len; i++ {
375 unsafe {
376 if buf[i] != `\n` {
377 continue
378 }
379 pos := i + 1
380 if pos + cl_lower.len > header_end {
381 continue
382 }
383 mut matched := true
384 for j := 0; j < cl_lower.len; j++ {
385 mut ch := buf[pos + j]
386 if ch >= `A` && ch <= `Z` {
387 ch = ch + 32
388 }
389 if ch != cl_lower[j] {
390 matched = false
391 break
392 }
393 }
394 if matched {
395 mut start := pos + cl_lower.len
396 for start < header_end && buf[start] == ` ` {
397 start++
398 }
399 mut val := 0
400 for start < header_end && buf[start] >= `0` && buf[start] <= `9` {
401 val = val * 10 + int(buf[start] - `0`)
402 start++
403 }
404 return val
405 }
406 }
407 }
408 return -1
409}
410