v / vlib / time / parse.c.v
604 lines · 558 sloc · 17.77 KB · e2e5cf8db56f3562c7baa735061690be936bdf3e
Raw
1// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4module time
5
6import strconv
7
8const date_format_buffer = [u8(`0`), `0`, `0`, `0`, `-`, `0`, `0`, `-`, `0`, `0`]!
9const time_format_buffer = [u8(`0`), `0`, `:`, `0`, `0`, `:`, `0`, `0`]!
10
11fn validate_time_bounds(hour int, minute int, second int, nanosecond int) ! {
12 if hour < 0 || hour > 23 {
13 return error('invalid hour: ${hour}')
14 }
15 if minute < 0 || minute > 59 {
16 return error('invalid minute: ${minute}')
17 }
18 if second < 0 || second > 59 {
19 return error('invalid second: ${second}')
20 }
21 if nanosecond < 0 || nanosecond > 1_000_000_000 {
22 return error('invalid nanosecond: ${nanosecond}')
23 }
24}
25
26fn check_and_extract_time(s string) !(int, int, int, int) {
27 mut hour_ := 0
28 mut minute_ := 0
29 mut second_ := 0
30 mut nanosecond_ := 0
31
32 // Check if the string start in the format "HH:MM:SS"
33 for i := 0; i < time_format_buffer.len; i++ {
34 if time_format_buffer[i] == u8(`0`) {
35 if s[i] < u8(`0`) || s[i] > u8(`9`) {
36 return error('`HH:MM:SS` match error: expected digit, not `${s[i]}` in position ${i}')
37 } else {
38 if i < 2 {
39 hour_ = hour_ * 10 + (s[i] - u8(`0`))
40 } else if i < 5 {
41 minute_ = minute_ * 10 + (s[i] - u8(`0`))
42 } else {
43 second_ = second_ * 10 + (s[i] - u8(`0`))
44 }
45 }
46 } else if time_format_buffer[i] != s[i] {
47 return error('time separator error: expected `:`, not `${[s[i]].bytestr()}` in position ${i}')
48 }
49 }
50
51 if s.len == time_format_buffer.len + 1 {
52 if s[time_format_buffer.len] !in [u8(`Z`), `z`] {
53 return error('timezone error: expected "Z" or "z" at the end of the string')
54 }
55 validate_time_bounds(hour_, minute_, second_, nanosecond_)!
56 return hour_, minute_, second_, nanosecond_
57 }
58
59 if s.len < time_format_buffer.len + 1 {
60 return error('datetime string is too short')
61 }
62
63 if s[time_format_buffer.len] == u8(`.`) {
64 // Check if the string contains the nanoseconds part after the time part
65 if s.len < time_format_buffer.len + 1 {
66 return error('datetime string is too short')
67 }
68 // Check if the string start in the format ".NNNNNNNNN"
69 mut nanosecond_digits := 0
70 for i := time_format_buffer.len + 1; i < s.len; i++ {
71 if s[i] < u8(`0`) || s[i] > u8(`9`) {
72 if s[i] in [u8(`Z`), `z`] {
73 if i != s.len - 1 {
74 return error('timezone error: "Z" or "z" can only be at the end of the string')
75 }
76 break
77 } else if s[i] in [u8(`+`), `-`] {
78 break
79 }
80 return error('nanoseconds error: expected digit, not `${s[i]}` in position ${i}')
81 }
82 if !(i >= time_format_buffer.len + 1 + 9) {
83 // nanoseconds limit is 9 digits
84 nanosecond_ = nanosecond_ * 10 + (s[i] - u8(`0`))
85 nanosecond_digits++
86 }
87 }
88 if nanosecond_digits < 9 {
89 for i := 0; i < 9 - nanosecond_digits; i++ {
90 nanosecond_ *= 10
91 }
92 }
93 }
94 validate_time_bounds(hour_, minute_, second_, nanosecond_)!
95 return hour_, minute_, second_, nanosecond_
96}
97
98fn check_and_extract_date(s string) !(int, int, int) {
99 mut year := 0
100 mut month := 0
101 mut day := 0
102 // Check if the string start in the format "YYYY-MM-DD"
103 for i := 0; i < date_format_buffer.len; i++ {
104 if date_format_buffer[i] == u8(`0`) {
105 if s[i] < u8(`0`) || s[i] > u8(`9`) {
106 return error('`YYYY-MM-DD` match error: expected digit, not `${s[i]}` in position ${i}')
107 } else {
108 if i < 4 {
109 year = year * 10 + (s[i] - u8(`0`))
110 } else if i < 7 {
111 month = month * 10 + (s[i] - u8(`0`))
112 } else {
113 day = day * 10 + (s[i] - u8(`0`))
114 }
115 }
116 } else if date_format_buffer[i] != s[i] {
117 return error('date separator error:expected "${date_format_buffer[i]}", not `${s[i]}` in position ${i}')
118 }
119 }
120 if month < 1 || month > 12 {
121 return error('date error: invalid month ${month}')
122 }
123 if day < 1 || day > 31 {
124 return error('date error: invalid day ${day}')
125 }
126 return year, month, day
127}
128
129// Convert header string formatted as RFC 2616 to Time.
130pub fn parse_http_header_string(s string) !Time {
131 return parse_rfc2616(s)
132}
133
134// parse_rfc2616 returns the time from a date string in RFC 3339 datetime format.
135// Wed, 06 Nov 2024 08:49:37 GMT ; RFC 822, updated by RFC 1123
136// Wednesday, 06-Nov-24 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
137// Wed Nov 6 08:49:37 2024 ; ANSI C's asctime() format
138pub fn parse_rfc2616(s string) !Time {
139 if s == '' {
140 return error_invalid_time(0, 'datetime string is empty')
141 }
142
143 // Remove or Replace unwanted tokens.
144 rmv := ['GMT', '', 'Monday', '', 'Tuesday', '', 'Wednesday', '', 'Thursday', '', 'Friday',
145 '', 'Saturday', '', 'Sunday', '', 'Mon', '', 'Tue', '', 'Wed', '', 'Thu', '', 'Fri', '',
146 'Sat', '', 'Sun', '', '-', ' ', ',', '']
147
148 mut f := s.replace_each(rmv)
149 f = remove_consecutive_spaces(f)
150
151 if r := parse_format(f, 'DD MMM YYYY HH:mm:ss') {
152 return r
153 }
154
155 // parse_format maps YY to this century (94 maps to 2094, 20 to 2020).
156 // if parsed year > current year, the date belongs to previous century.
157 if r := parse_format(f, 'DD MMM YY HH:mm:ss') {
158 return if r.year > now().year {
159 r.add_days(-(days_per_100_years + 1))
160 } else {
161 r
162 }
163 }
164 if r := parse_format(f, 'MMM D HH:mm:ss YYYY') {
165 return r
166 }
167 return error('unable to parse date: "${f}"')
168}
169
170// Remove consecutive spaces, only keep one.
171fn remove_consecutive_spaces(s string) string {
172 mut t := s.trim_space()
173 mut r := ''
174 mut sp := false
175
176 for c in t {
177 if c == u8(` `) {
178 if !sp {
179 r += ' '
180 sp = true
181 }
182 } else {
183 r += c.ascii_str()
184 sp = false
185 }
186 }
187 return r
188}
189
190// parse_rfc3339 returns the time from a date string in RFC 3339 datetime format.
191// See also https://ijmacd.github.io/rfc3339-iso8601/ for a visual reference of
192// the differences between ISO-8601 and RFC 3339.
193pub fn parse_rfc3339(s string) !Time {
194 if s == '' {
195 return error_invalid_time(0, 'datetime string is empty')
196 }
197
198 if s.len < time_format_buffer.len {
199 return error('string is too short to parse')
200 }
201
202 mut year, mut month, mut day := 0, 0, 0
203 mut hour_, mut minute_, mut second_, mut nanosecond_ := 0, 0, 0, 0
204
205 is_time := if s.len >= time_format_buffer.len {
206 s[2] == u8(`:`) && s[5] == u8(`:`)
207 } else {
208 false
209 }
210 if is_time {
211 return error('missing date part of RFC 3339')
212 }
213
214 is_date := if s.len >= date_format_buffer.len {
215 s[4] == u8(`-`) && s[7] == u8(`-`)
216 } else {
217 false
218 }
219
220 if is_date {
221 year, month, day = check_and_extract_date(s)!
222 }
223 if s.len <= date_format_buffer.len {
224 return error('date-time too short to parse')
225 }
226 if s[10] !in [u8(`T`), `t`, ` `] {
227 return error('invalid date-time separator:${s[10].ascii_str()}')
228 }
229
230 is_datetime := if s.len >= date_format_buffer.len + 1 + time_format_buffer.len + 1 {
231 is_date
232 } else {
233 false
234 }
235 if is_datetime {
236 // year, month, day := check_and_extract_date(s)!
237 hour_, minute_, second_, nanosecond_ =
238 check_and_extract_time(s[date_format_buffer.len + 1..])!
239 }
240
241 mut timezone_start_position := 0
242
243 if is_datetime || is_time {
244 timezone_start_position = date_format_buffer.len + 1 + time_format_buffer.len
245 if s[timezone_start_position] == u8(`.`) {
246 timezone_start_position++
247
248 for s[timezone_start_position] !in [u8(`Z`), `z`, `+`, `-`] {
249 timezone_start_position++
250 if timezone_start_position == s.len {
251 return error('timezone error: expected "Z" or "z" or "+" or "-" in position ${timezone_start_position}, not "${[
252 s[s.len - 1],
253 ].bytestr()}"')
254 }
255 }
256 }
257 }
258
259 pos := date_format_buffer.len + time_format_buffer.len + 1
260 if pos >= s.len {
261 return error('timezone error: datetime string is too short')
262 }
263 if s[date_format_buffer.len + time_format_buffer.len + 1] !in [u8(`Z`), `z`, `+`, `-`, `.`] {
264 // RFC 3339 needs a timezone
265 return error('timezone error: expected "Z" or "z" or "+" or "-" in position ${
266 date_format_buffer.len + time_format_buffer.len + 1}, not "${[
267 s[date_format_buffer.len + time_format_buffer.len + 1],
268 ].bytestr()}"')
269 } else {
270 if s[s.len - 1] in [u8(`Z`), `z`] {
271 return new(Time{
272 year: year
273 month: month
274 day: day
275 hour: hour_
276 minute: minute_
277 second: second_
278 nanosecond: nanosecond_
279 is_local: false
280 })
281 } else {
282 // Check if the string contains the timezone part after the time part +00:00
283 if s.len < date_format_buffer.len + 1 + time_format_buffer.len + 6 {
284 return error('datetime string is too short')
285 }
286 if s[s.len - 3] != u8(`:`) {
287 return error('timezone separator error: expected ":", not `${[
288 s[date_format_buffer.len + time_format_buffer.len + 3],
289 ].bytestr()}` in position ${date_format_buffer.len + time_format_buffer.len + 3}')
290 }
291
292 // Check if it is UTC time
293 if unsafe { vmemcmp(s.str + s.len - 5, c'00:00', 5) == 0 } {
294 return new(Time{
295 year: year
296 month: month
297 day: day
298 hour: hour_
299 minute: minute_
300 second: second_
301 nanosecond: nanosecond_
302 is_local: false
303 })
304 }
305
306 is_negative := s[s.len - 6] == u8(`-`)
307
308 // To local time using the offset to add_seconds
309 mut offset_in_minutes := 0
310 mut offset_in_hours := 0
311 // offset hours
312 for i := 0; i < 2; i++ {
313 offset_in_hours = offset_in_hours * 10 + (s[s.len - 5 + i] - u8(`0`))
314 }
315
316 // offset minutes
317 for i := 0; i < 2; i++ {
318 offset_in_minutes = offset_in_minutes * 10 + (s[s.len - 2 + i] - u8(`0`))
319 }
320
321 offset_in_minutes += offset_in_hours * 60
322
323 if !is_negative {
324 offset_in_minutes *= -1
325 }
326
327 mut time_to_be_returned := new(Time{
328 year: year
329 month: month
330 day: day
331 hour: hour_
332 minute: minute_
333 second: second_
334 nanosecond: nanosecond_
335 is_local: false
336 })
337
338 time_to_be_returned = time_to_be_returned.add_seconds(offset_in_minutes * 60)
339
340 return time_to_be_returned
341 }
342 }
343
344 return error_invalid_time(9, 'malformed date')
345}
346
347// parse returns the time from a date string in "YYYY-MM-DD HH:mm:ss" format.
348pub fn parse(s string) !Time {
349 if s == '' {
350 return error_invalid_time(0, 'datetime string is empty')
351 }
352 pos := s.index(' ') or {
353 return error_invalid_time(1, 'string has no space between date and time')
354 }
355 symd := s[..pos]
356 ymd := symd.split('-')
357 if ymd.len != 3 {
358 return error_invalid_time(2, 'date must be in the form of y-m-d')
359 }
360 shms := s[pos..]
361 hms := shms.split(':')
362 if hms.len != 3 {
363 return error_invalid_time(9, 'time must be in the form of H:i:s')
364 }
365 hour_ := hms[0][1..]
366 minute_ := hms[1]
367 second_ := hms[2]
368
369 iyear := strconv.atoi(ymd[0]) or {
370 return error_invalid_time(0, 'invalid year format: ${ymd[0]}')
371 }
372 imonth := strconv.atoi(ymd[1]) or {
373 return error_invalid_time(0, 'invalid month format: ${ymd[1]}')
374 }
375 iday := strconv.atoi(ymd[2]) or {
376 return error_invalid_time(0, 'invalid day format: ${ymd[2]}')
377 }
378 ihour := strconv.atoi(hour_) or {
379 return error_invalid_time(0, 'invalid hour format: ${hour_}')
380 }
381 iminute := strconv.atoi(minute_) or {
382 return error_invalid_time(0, 'invalid minute format: ${minute_}')
383 }
384 isecond := strconv.atoi(second_) or {
385 return error_invalid_time(0, 'invalid second format: ${second_}')
386 }
387
388 // eprintln('>> iyear: ${iyear} | imonth: ${imonth} | iday: ${iday} | ihour: ${ihour} | iminute: ${iminute} | isecond: ${isecond}')
389 if iyear > 9999 || iyear < -9999 {
390 return error_invalid_time(3, 'year must be between -10000 and 10000')
391 }
392 if imonth > 12 || imonth < 1 {
393 return error_invalid_time(4, 'month must be between 1 and 12')
394 }
395 if iday > 31 || iday < 1 {
396 return error_invalid_time(5, 'day must be between 1 and 31')
397 }
398 if ihour > 23 || ihour < 0 {
399 return error_invalid_time(6, 'hours must be between 0 and 24')
400 }
401 if iminute > 59 || iminute < 0 {
402 return error_invalid_time(7, 'minutes must be between 0 and 60')
403 }
404 if isecond > 59 || isecond < 0 {
405 return error_invalid_time(8, 'seconds must be between 0 and 60')
406 }
407 res := new(Time{
408 year: iyear
409 month: imonth
410 day: iday
411 hour: ihour
412 minute: iminute
413 second: isecond
414 })
415 return res
416}
417
418// parse_format parses the string `s`, as a custom `format`, containing the following specifiers:
419//
420// |Category| Format | Description |
421// |:----- | :----- | :---------- |
422// |Year | YYYY | 4 digit year, 0000..9999 |
423// | | YY | 2 digit year, 00..99 |
424// |Month | M | month, 1..12 |
425// | | MM | month, 2 digits, 01..12 |
426// | | MMM | month, three letters, Jan..Dec |
427// | | MMMM | name of month |
428// |Day | D | day of the month, 1..31 |
429// | | DD | day of the month, 01..31 |
430// | | d | day of week, 0..6 |
431// | | c | day of week, 1..7 |
432// | | dd | day of week, Su..Sa |
433// | | ddd | day of week, Sun..Sat |
434// | | dddd | day of week, Sunday..Saturday |
435// |Hour | H | hour, 0..23 |
436// | | HH | hour, 00..23 |
437// | | h | hour, 0..23 |
438// | | hh | hour, 0..23 |
439// | | k | hour, 0..23 |
440// | | kk | hour, 0..23 |
441// |Minute | m | minute, 0..59 |
442// | | mm | minute, 0..59 |
443// |Second | s | second, 0..59 |
444// | | ss | second, 0..59 |
445pub fn parse_format(s string, format string) !Time {
446 if s == '' {
447 return error_invalid_time(0, 'datetime string is empty')
448 }
449 mut p := DateTimeParser{
450 datetime: s
451 format: format
452 }
453 return p.parse()
454}
455
456// parse_iso8601 parses the ISO 8601 time format yyyy-MM-ddTHH:mm:ss.dddddd+dd:dd as local time.
457// The fraction part is difference in milli seconds, and the last part is offset from UTC time.
458// Both can be +/- HH:mm .
459// See https://en.wikipedia.org/wiki/ISO_8601 .
460// Remarks: not all of ISO 8601 is supported; checks and support for leapseconds should be added.
461pub fn parse_iso8601(s string) !Time {
462 if s == '' {
463 return error_invalid_time(0, 'datetime string is empty')
464 }
465 t_i := s.index_('T')
466 parts := if t_i != -1 { [s[..t_i], s[t_i + 1..]] } else { s.split(' ') }
467 if !(parts.len == 1 || parts.len == 2) {
468 return error_invalid_time(12, 'malformed date')
469 }
470 year, month, day := parse_iso8601_date(parts[0])!
471 mut hour_, mut minute_, mut second_, mut microsecond_, mut nanosecond_, mut unix_offset, mut is_local_time := 0, 0, 0, 0, 0, i64(0), true
472 if parts.len == 2 {
473 hour_, minute_, second_, microsecond_, nanosecond_, unix_offset, is_local_time =
474 parse_iso8601_time(parts[1])!
475 }
476 mut t := new(
477 year: year
478 month: month
479 day: day
480 hour: hour_
481 minute: minute_
482 second: second_
483 nanosecond: nanosecond_
484 )
485 if is_local_time {
486 return t // Time already local time
487 }
488 mut unix_time := t.unix
489 if unix_offset < 0 {
490 unix_time -= (-unix_offset)
491 } else if unix_offset > 0 {
492 unix_time += unix_offset
493 }
494 t = unix_nanosecond(i64(unix_time), t.nanosecond)
495 return t
496}
497
498// parse_rfc2822 returns the time from a date string in RFC 2822 datetime format.
499pub fn parse_rfc2822(s string) !Time {
500 if s == '' {
501 return error_invalid_time(0, 'datetime string is empty')
502 }
503 fields := s.split(' ')
504 if fields.len < 5 {
505 return error_invalid_time(1, 'datetime string must have 5 components, has: ${fields.len}')
506 }
507 pos := months_string.index(fields[2]) or {
508 return error_invalid_time(2, 'invalid month format')
509 }
510 mm := pos / 3 + 1
511 unsafe {
512 tmstr := malloc_noscan(s.len * 2)
513 count := C.snprintf(&char(tmstr), (s.len * 2), c'%s-%02d-%s %s', fields[3].str, mm,
514 fields[1].str, fields[4].str)
515 return parse(tos(tmstr, count))
516 }
517}
518
519// ----- iso8601 -----
520fn parse_iso8601_date(s string) !(int, int, int) {
521 year, month, day, dummy := 0, 0, 0, u8(0)
522 count := unsafe { C.sscanf(&char(s.str), c'%4d-%2d-%2d%c', &year, &month, &day, &dummy) }
523 if count != 3 {
524 return error_invalid_time(10, 'datetime string must have 3 components, but has ${count}')
525 }
526 if year > 9999 {
527 return error_invalid_time(13, 'year must be smaller than 10000')
528 }
529 if month > 12 {
530 return error_invalid_time(14, 'month must be smaller than 12')
531 }
532 if day > 31 {
533 return error_invalid_time(15, 'day must be smaller than 31')
534 }
535 return year, month, day
536}
537
538fn parse_iso8601_time(s string) !(int, int, int, int, int, i64, bool) {
539 hour_ := 0
540 minute_ := 0
541 second_ := 0
542 mut microsecond_ := 0
543 mut nanosecond_ := 0
544 plus_min_z := `a`
545 offset_hour := 0
546 offset_minute := 0
547 mut count := 0
548 count = unsafe {
549 C.sscanf(&char(s.str), c'%2d:%2d:%2d.%9d%c', &hour_, &minute_, &second_, &nanosecond_,
550 &char(&plus_min_z))
551 }
552 if count == 5 && plus_min_z == `Z` {
553 // normalise the nanoseconds:
554 mut ndigits := 0
555 if mut pos := s.index('.') {
556 pos++
557 for ; pos < s.len && s[pos].is_digit(); pos++ {
558 ndigits++
559 }
560 }
561 for ndigits < 9 {
562 nanosecond_ *= 10
563 ndigits++
564 }
565 microsecond_ = nanosecond_ / 1000
566 } else {
567 count = unsafe {
568 C.sscanf(&char(s.str), c'%2d:%2d:%2d.%9d%c%2d:%2d', &hour_, &minute_, &second_,
569 µsecond_, &char(&plus_min_z), &offset_hour, &offset_minute)
570 }
571 // Missread microsecond ([Sec Hour Minute].len == 3 < 4)
572 if count < 4 {
573 count = unsafe {
574 C.sscanf(&char(s.str), c'%2d:%2d:%2d%c%2d:%2d', &hour_, &minute_, &second_,
575 &char(&plus_min_z), &offset_hour, &offset_minute)
576 }
577 count++ // Increment count because skipped microsecond
578 }
579 if count < 4 {
580 return error_invalid_time(10, 'malformed date')
581 }
582 nanosecond_ = microsecond_ * 1000
583 }
584 is_local_time := plus_min_z == `a` && count == 4
585 is_utc := plus_min_z == `Z` && count == 5
586 if !(count == 7 || is_local_time || is_utc) {
587 return error_invalid_time(11, 'malformed date')
588 }
589 if plus_min_z != `+` && plus_min_z != `-` && !is_utc && !is_local_time {
590 return error_invalid_time(12, 'missing timezone')
591 }
592 mut unix_offset := 0
593 if offset_hour > 0 {
594 unix_offset += 3600 * offset_hour
595 }
596 if offset_minute > 0 {
597 unix_offset += 60 * offset_minute
598 }
599 if plus_min_z == `+` {
600 unix_offset *= -1
601 }
602 // eprintln('parse_iso8601_time s: ${s} | hour_: ${hour_} | minute_: ${minute_} | second_: ${second_} | microsecond_: ${microsecond_} | nanosecond_: ${nanosecond_} | unix_offset: ${unix_offset} | is_local: ${is_local_time}')
603 return hour_, minute_, second_, microsecond_, nanosecond_, unix_offset, is_local_time
604}
605