diff --git a/vlib/time/parse.c.v b/vlib/time/parse.c.v index 7d6315fef5..5dacf5b4ec 100644 --- a/vlib/time/parse.c.v +++ b/vlib/time/parse.c.v @@ -5,6 +5,127 @@ module time import strconv +const date_format_buffer = [u8(`0`), `0`, `0`, `0`, `-`, `0`, `0`, `-`, `0`, `0`]! +const time_format_buffer = [u8(`0`), `0`, `:`, `0`, `0`, `:`, `0`, `0`]! + +fn validate_time_bounds(hour int, minute int, second int, nanosecond int) ! { + if hour < 0 || hour > 23 { + return error('invalid hour: ${hour}') + } + if minute < 0 || minute > 59 { + return error('invalid minute: ${minute}') + } + if second < 0 || second > 59 { + return error('invalid second: ${second}') + } + if nanosecond < 0 || nanosecond > 1_000_000_000 { + return error('invalid nanosecond: ${nanosecond}') + } +} + +fn check_and_extract_time(s string) !(int, int, int, int) { + mut hour_ := 0 + mut minute_ := 0 + mut second_ := 0 + mut nanosecond_ := 0 + + // Check if the string start in the format "HH:MM:SS" + for i := 0; i < time_format_buffer.len; i++ { + if time_format_buffer[i] == u8(`0`) { + if s[i] < u8(`0`) && s[i] > u8(`9`) { + return error('`HH:MM:SS` match error: expected digit, not `${s[i]}` in position ${i}') + } else { + if i < 2 { + hour_ = hour_ * 10 + (s[i] - u8(`0`)) + } else if i < 5 { + minute_ = minute_ * 10 + (s[i] - u8(`0`)) + } else { + second_ = second_ * 10 + (s[i] - u8(`0`)) + } + } + } else if time_format_buffer[i] != s[i] { + return error('time separator error: expected `:`, not `${[s[i]].bytestr()}` in position ${i}') + } + } + + if s.len == time_format_buffer.len + 1 { + if s[time_format_buffer.len] !in [u8(`Z`), `z`] { + return error('timezone error: expected "Z" or "z" at the end of the string') + } + validate_time_bounds(hour_, minute_, second_, nanosecond_)! + return hour_, minute_, second_, nanosecond_ + } + + if s.len < time_format_buffer.len + 1 { + return error('datetime string is too short') + } + + if s[time_format_buffer.len] == u8(`.`) { + // Check if the string contains the nanoseconds part after the time part + if s.len < time_format_buffer.len + 1 { + return error('datetime string is too short') + } + // Check if the string start in the format ".NNNNNNNNN" + mut nanosecond_digits := 0 + for i := time_format_buffer.len + 1; i < s.len; i++ { + if s[i] < u8(`0`) || s[i] > u8(`9`) { + if s[i] in [u8(`Z`), `z`] { + if i != s.len - 1 { + return error('timezone error: "Z" or "z" can only be at the end of the string') + } + break + } else if s[i] in [u8(`+`), `-`] { + break + } + return error('nanoseconds error: expected digit, not `${s[i]}` in position ${i}') + } + if !(i >= time_format_buffer.len + 1 + 9) { + // nanoseconds limit is 9 digits + nanosecond_ = nanosecond_ * 10 + (s[i] - u8(`0`)) + nanosecond_digits++ + } + } + if nanosecond_digits < 9 { + for i := 0; i < 9 - nanosecond_digits; i++ { + nanosecond_ *= 10 + } + } + } + validate_time_bounds(hour_, minute_, second_, nanosecond_)! + return hour_, minute_, second_, nanosecond_ +} + +fn check_and_extract_date(s string) !(int, int, int) { + mut year := 0 + mut month := 0 + mut day := 0 + // Check if the string start in the format "YYYY-MM-DD" + for i := 0; i < date_format_buffer.len; i++ { + if date_format_buffer[i] == u8(`0`) { + if s[i] < u8(`0`) && s[i] > u8(`9`) { + return error('`YYYY-MM-DD` match error: expected digit, not `${s[i]}` in position ${i}') + } else { + if i < 4 { + year = year * 10 + (s[i] - u8(`0`)) + } else if i < 7 { + month = month * 10 + (s[i] - u8(`0`)) + } else { + day = day * 10 + (s[i] - u8(`0`)) + } + } + } else if date_format_buffer[i] != s[i] { + return error('date separator error:expected "${date_format_buffer[i]}", not `${s[i]}` in position ${i}') + } + } + if month < 1 || month > 12 { + return error('date error: invalid month ${month}') + } + if day < 1 || day > 31 { + return error('date error: invalid day ${day}') + } + return year, month, day +} + // parse_rfc3339 returns the time from a date string in RFC 3339 datetime format. // See also https://ijmacd.github.io/rfc3339-iso8601/ for a visual reference of // the differences between ISO-8601 and RFC 3339. @@ -12,48 +133,152 @@ pub fn parse_rfc3339(s string) !Time { if s == '' { return error_invalid_time(0, 'datetime string is empty') } - // Normalize the input before parsing. Good since iso8601 doesn't permit lower case `t` and `z`. - sn := s.replace_each(['t', 'T', 'z', 'Z']) - mut t := parse_iso8601(sn) or { Time{} } - // If parse_iso8601 DID NOT result in default values (i.e. date was parsed correctly) - if t != Time{} { - return t + + if s.len < time_format_buffer.len { + return error('string is too short to parse') } - t_i := sn.index('T') or { -1 } - parts := if t_i != -1 { [sn[..t_i], sn[t_i + 1..]] } else { sn.split(' ') } + mut year, mut month, mut day := 0, 0, 0 + mut hour_, mut minute_, mut second_, mut nanosecond_ := 0, 0, 0, 0 - // Check if sn is date only - if !parts[0].contains_any(' Z') && parts[0].contains('-') { - year, month, day := parse_iso8601_date(sn)! - t = new(Time{ - year: year - month: month - day: day - }) - return t + is_time := if s.len >= time_format_buffer.len { + s[2] == u8(`:`) && s[5] == u8(`:`) + } else { + false } - // Check if sn is time only - if !parts[0].contains('-') && parts[0].contains(':') { - mut hour_, mut minute_, mut second_, mut microsecond_, mut nanosecond_, mut unix_offset, mut is_local_time := 0, 0, 0, 0, 0, i64(0), true - hour_, minute_, second_, microsecond_, nanosecond_, unix_offset, is_local_time = parse_iso8601_time(parts[0])! - t = new(Time{ - hour: hour_ - minute: minute_ - second: second_ - nanosecond: nanosecond_ - }) - if is_local_time { - return t // Time is already local time + if is_time { + return error('missing date part of RFC 3339') + } + + is_date := if s.len >= date_format_buffer.len { + s[4] == u8(`-`) && s[7] == u8(`-`) + } else { + false + } + + if is_date { + year, month, day = check_and_extract_date(s)! + if s.len == date_format_buffer.len { + return new(Time{ + year: year + month: month + day: day + is_local: false + }) } - mut unix_time := t.unix - if unix_offset < 0 { - unix_time -= (-unix_offset) - } else if unix_offset > 0 { - unix_time += unix_offset + } + + is_datetime := if s.len >= date_format_buffer.len + 1 + time_format_buffer.len + 1 { + is_date && s[10] == u8(`T`) + } else { + false + } + if is_datetime { + // year, month, day := check_and_extract_date(s)! + hour_, minute_, second_, nanosecond_ = check_and_extract_time(s[date_format_buffer.len + 1..])! + } + + mut timezone_start_position := 0 + + if is_datetime || is_time { + timezone_start_position = date_format_buffer.len + 1 + time_format_buffer.len + if s[timezone_start_position] == u8(`.`) { + timezone_start_position++ + + for s[timezone_start_position] !in [u8(`Z`), `z`, `+`, `-`] { + timezone_start_position++ + if timezone_start_position == s.len { + return error('timezone error: expected "Z" or "z" or "+" or "-" in position ${timezone_start_position}, not "${[ + s[timezone_start_position], + ].bytestr()}"') + } + } + } + } + + pos := date_format_buffer.len + time_format_buffer.len + 1 + if pos >= s.len { + return error('timezone error: datetime string is too short') + } + if s[date_format_buffer.len + time_format_buffer.len + 1] !in [u8(`Z`), `z`, `+`, `-`, `.`] { + // RFC 3339 needs a timezone + return error('timezone error: expected "Z" or "z" or "+" or "-" in position ${ + date_format_buffer.len + time_format_buffer.len + 1}, not "${[ + s[date_format_buffer.len + time_format_buffer.len + 1], + ].bytestr()}"') + } else { + if s[s.len - 1] in [u8(`Z`), `z`] { + return new(Time{ + year: year + month: month + day: day + hour: hour_ + minute: minute_ + second: second_ + nanosecond: nanosecond_ + is_local: false + }) + } else { + // Check if the string contains the timezone part after the time part +00:00 + if s.len < date_format_buffer.len + 1 + time_format_buffer.len + 6 { + return error('datetime string is too short') + } + if s[s.len - 3] != u8(`:`) { + return error('timezone separator error: expected ":", not `${[ + s[date_format_buffer.len + time_format_buffer.len + 3], + ].bytestr()}` in position ${date_format_buffer.len + time_format_buffer.len + 3}') + } + + // Check if it is UTC time + if unsafe { vmemcmp(s.str + s.len - 5, '00:00'.str, 5) == 0 } { + return new(Time{ + year: year + month: month + day: day + hour: hour_ + minute: minute_ + second: second_ + nanosecond: nanosecond_ + is_local: false + }) + } + + is_negative := s[s.len - 6] == u8(`-`) + + // To local time using the offset to add_seconds + mut offset_in_minutes := 0 + mut offset_in_hours := 0 + // offset hours + for i := 0; i < 2; i++ { + offset_in_hours = offset_in_minutes * 10 + (s[s.len - 5 + i] - u8(`0`)) + } + + // offset minutes + for i := 0; i < 2; i++ { + offset_in_minutes = offset_in_minutes * 10 + (s[s.len - 2 + i] - u8(`0`)) + } + + offset_in_minutes += offset_in_hours * 60 + + if !is_negative { + offset_in_minutes *= -1 + } + + mut time_to_be_returned := new(Time{ + year: year + month: month + day: day + hour: hour_ + minute: minute_ + second: second_ + nanosecond: nanosecond_ + is_local: false + }) + + time_to_be_returned = time_to_be_returned.add_seconds(offset_in_minutes * 60) + + return time_to_be_returned } - t = unix_nanosecond(i64(unix_time), t.nanosecond) - return t } return error_invalid_time(9, 'malformed date') @@ -310,6 +535,6 @@ fn parse_iso8601_time(s string) !(int, int, int, int, int, i64, bool) { if plus_min_z == `+` { unix_offset *= -1 } - // eprintln('parse_iso8601_time s: $s | hour_: $hour_ | minute_: $minute_ | second_: $second_ | microsecond_: $microsecond_ | nanosecond_: $nanosecond_ | unix_offset: $unix_offset | is_local_time: $is_local_time') + // eprintln('parse_iso8601_time s: $s | hour_: $hour_ | minute_: $minute_ | second_: $second_ | microsecond_: $microsecond_ | nanosecond_: $nanosecond_ | unix_offset: $unix_offset | is_local: $is_local_time') return hour_, minute_, second_, microsecond_, nanosecond_, unix_offset, is_local_time } diff --git a/vlib/time/parse_test.v b/vlib/time/parse_test.v index 1e76384e9f..26e8480ee3 100644 --- a/vlib/time/parse_test.v +++ b/vlib/time/parse_test.v @@ -3,8 +3,7 @@ import time fn test_parse() { s := '2018-01-27 12:48:34' t := time.parse(s) or { - eprintln('> failing format: ${s} | err: ${err}') - assert false + assert false, '> failing format: ${s} | err: ${err}' return } assert t.year == 2018 && t.month == 1 && t.day == 27 && t.hour == 12 && t.minute == 48 @@ -27,8 +26,7 @@ fn test_parse_invalid() { fn test_parse_rfc2822() { s1 := 'Thu, 12 Dec 2019 06:07:45 GMT' t1 := time.parse_rfc2822(s1) or { - eprintln('> failing format: ${s1} | err: ${err}') - assert false + assert false, '> failing format: ${s1} | err: ${err}' return } assert t1.year == 2019 && t1.month == 12 && t1.day == 12 && t1.hour == 6 && t1.minute == 7 @@ -36,8 +34,7 @@ fn test_parse_rfc2822() { assert t1.unix() == 1576130865 s2 := 'Thu 12 Dec 2019 06:07:45 +0800' t2 := time.parse_rfc2822(s2) or { - eprintln('> failing format: ${s2} | err: ${err}') - assert false + assert false, '> failing format: ${s2} | err: ${err}' return } assert t2.year == 2019 && t2.month == 12 && t2.day == 12 && t2.hour == 6 && t2.minute == 7 @@ -73,8 +70,7 @@ fn test_parse_iso8601() { ] for i, format in formats { t := time.parse_iso8601(format) or { - eprintln('>>> failing format: ${format} | err: ${err}') - assert false + assert false, '>>> failing format: ${format} | err: ${err}' continue } year := times[i][0] @@ -97,8 +93,7 @@ fn test_parse_iso8601() { fn test_parse_iso8601_local() { format := '2020-06-05T15:38:06.015959' t := time.parse_iso8601(format) or { - eprintln('> failing format: ${format} | err: ${err}') - assert false + assert false, '> failing format: ${format} | err: ${err}' return } assert t.year == 2020 @@ -135,8 +130,7 @@ fn test_parse_iso8601_invalid() { fn test_parse_iso8601_date_only() { format := '2020-06-05' t := time.parse_iso8601(format) or { - eprintln('> failing format: ${format} | err: ${err}') - assert false + assert false, '> failing format: ${format} | err: ${err}' return } assert t.year == 2020 @@ -150,12 +144,21 @@ fn test_parse_iso8601_date_only() { fn check_invalid_date(s string) { if date := time.parse(s) { - eprintln('invalid date: "${s}" => "${date}"') - assert false + assert false, 'invalid date: "${s}" => "${date}"' } assert true } +fn invalid_rfc3339(s string) string { + if date := time.parse_rfc3339(s) { + assert false, 'invalid date: "${s}" => "${date}"' + } else { + assert true + return err.str() + } + return '' +} + fn test_invalid_dates_should_error_during_parse() { check_invalid_date('-99999-12-20 00:00:00') check_invalid_date('99999-12-20 00:00:00') @@ -175,17 +178,48 @@ fn test_parse_rfc3339() { pairs := [ ['2015-01-06T15:47:32.080254511Z', '2015-01-06 15:47:32.080254'], ['2015-01-06T15:47:32.072697474Z', '2015-01-06 15:47:32.072697'], + ['2015-01-06T15:47:32.1234Z', '2015-01-06 15:47:32.123400'], + ['2015-01-06T15:47:32.001234Z', '2015-01-06 15:47:32.001234'], + ['2015-01-06T15:47:32Z', '2015-01-06 15:47:32.000000'], + ['2015-01-06T15:47:32+00:00', '2015-01-06 15:47:32.000000'], + ['2015-01-06T15:47:32-00:00', '2015-01-06 15:47:32.000000'], + ['2015-01-06T15:47:32-01:00', '2015-01-06 16:47:32.000000'], + ['2015-01-06T15:47:32+01:00', '2015-01-06 14:47:32.000000'], + ['2015-01-06T15:47:32-01:10', '2015-01-06 16:57:32.000000'], + ['2015-01-06T15:47:32+01:10', '2015-01-06 14:37:32.000000'], + ['2015-01-06T15:47:32.1234-00:00', '2015-01-06 15:47:32.123400'], + ['2015-01-06T15:47:32.1234+01:00', '2015-01-06 14:47:32.123400'], + ['2015-01-06T15:47:32.1234-01:00', '2015-01-06 16:47:32.123400'], + ['2015-01-06T22:59:59-00:10', '2015-01-06 23:09:59.000000'], + ['1979-05-27T07:32:00-08:00', '1979-05-27 15:32:00.000000'], + ['2024-10-19T22:47:08-00:00', '2024-10-19 22:47:08.000000'], + ['2024-10-19T22:47:08.9+00:00', '2024-10-19 22:47:08.900000'], + ['2024-10-20T01:47:08+03:00', '2024-10-19 22:47:08.000000'], + ['2024-10-20T01:47:08.981+03:00', '2024-10-19 22:47:08.981000'], ] for pair in pairs { input, expected := pair[0], pair[1] res := time.parse_rfc3339(input) or { - eprintln('>>> failing input: ${input} | err: ${err}') - assert false + assert false, '>>> failing input: ${input} | err: ${err}' return } output := res.format_ss_micro() assert expected == output } + assert invalid_rfc3339('22:47:08Z') == 'missing date part of RFC 3339' + assert invalid_rfc3339('01:47:08.981+03:00') == 'missing date part of RFC 3339' + assert invalid_rfc3339('2006-01-00') == 'date error: invalid day 0' + assert invalid_rfc3339('2006-01-32') == 'date error: invalid day 32' + assert invalid_rfc3339('2006-01-88') == 'date error: invalid day 88' + assert invalid_rfc3339('2006-00-01') == 'date error: invalid month 0' + assert invalid_rfc3339('2006-13-01') == 'date error: invalid month 13' + assert invalid_rfc3339('2006-77-01') == 'date error: invalid month 77' + assert invalid_rfc3339('2006-01-01T24:47:08Z') == 'invalid hour: 24' + assert invalid_rfc3339('2006-01-01T99:47:08Z') == 'invalid hour: 99' + assert invalid_rfc3339('2006-01-01T23:60:08Z') == 'invalid minute: 60' + assert invalid_rfc3339('2006-01-01T23:99:08Z') == 'invalid minute: 99' + assert invalid_rfc3339('2006-01-01T23:59:60Z') == 'invalid second: 60' + assert invalid_rfc3339('2006-01-01T23:59:99Z') == 'invalid second: 99' } fn test_ad_second_to_parse_result_in_2001() { @@ -205,8 +239,7 @@ fn test_ad_second_to_parse_result_pre_2001() { fn test_parse_format() { mut s := '2018-01-27 12:48:34' mut t := time.parse_format(s, 'YYYY-MM-DD HH:mm:ss') or { - eprintln('> failing format: ${s} | err: ${err}') - assert false + assert false, '> failing format: ${s} | err: ${err}' return } assert t.year == 2018 && t.month == 1 && t.day == 27 && t.hour == 12 && t.minute == 48 @@ -214,8 +247,7 @@ fn test_parse_format() { s = '2018-November-27 12:48:20' t = time.parse_format(s, 'YYYY-MMMM-DD HH:mm:ss') or { - eprintln('> failing format: ${s} | err: ${err}') - assert false + assert false, '> failing format: ${s} | err: ${err}' return } assert t.year == 2018 && t.month == 11 && t.day == 27 && t.hour == 12 && t.minute == 48 @@ -223,8 +255,7 @@ fn test_parse_format() { s = '18-1-2 0:8:2' t = time.parse_format(s, 'YY-M-D H:m:s') or { - eprintln('> failing format: ${s} | err: ${err}') - assert false + assert false, '> failing format: ${s} | err: ${err}' return } assert t.year == 2018 && t.month == 1 && t.day == 2 && t.hour == 0 && t.minute == 8 @@ -233,6 +264,6 @@ fn test_parse_format() { // This should always fail, because we test if M and D allow for a 01 value which they shouldn't s = '2018-01-02 1:8:2' t = time.parse_format(s, 'YYYY-M-D H:m:s') or { return } - eprintln('> failing for datetime: ${s}, the datetime string should not have passed the format "YYYY-M-D H:m:s"') - assert false + + assert false, '> failing for datetime: ${s}, the datetime string should not have passed the format "YYYY-M-D H:m:s"' } diff --git a/vlib/toml/checker/checker.v b/vlib/toml/checker/checker.v index 305200b722..15ae7ec675 100644 --- a/vlib/toml/checker/checker.v +++ b/vlib/toml/checker/checker.v @@ -17,6 +17,14 @@ pub const allowed_basic_escape_chars = [`u`, `U`, `b`, `t`, `n`, `f`, `r`, `"`, // utf8_max is the largest inclusive value of the Unicodes scalar value ranges. const utf8_max = 0x10FFFF +fn toml_parse_time(s string) !time.Time { + if s.len > 3 && s[2] == `:` { + // complete the partial time, with an arbitrary date: + return time.parse_rfc3339('0001-01-01T' + s) + } + return time.parse_rfc3339(s)! +} + // Checker checks a tree of TOML `ast.Value`'s for common errors. pub struct Checker { pub: @@ -318,8 +326,21 @@ fn (c &Checker) check_date_time(dt ast.DateTime) ! { col: dt.pos.col + split[0].len } })! - // Use V's builtin functionality to validate the string - time.parse_rfc3339(lit) or { + // Simulate a time offset if it's missing then it can be checked. Already toml supports local time and rfc3339 don't. + mut has_time_offset := false + for ch in lit#[19..] { + if ch in [u8(`-`), `+`, `Z`] { + has_time_offset = true + break + } + } + + mut lit_with_offset := lit + if !has_time_offset { + lit_with_offset += 'Z' + } + + toml_parse_time(lit_with_offset) or { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' "${lit}" is not a valid RFC 3339 Date-Time format string "${err}". In ...${c.excerpt(dt.pos)}...') } @@ -352,8 +373,7 @@ fn (c &Checker) check_date(date ast.Date) ! { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' "${lit}" does not have a valid RFC 3339 day indication in ...${c.excerpt(date.pos)}...') } - // Use V's builtin functionality to validate the string - time.parse_rfc3339(lit) or { + toml_parse_time(lit) or { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' "${lit}" is not a valid RFC 3339 Date format string "${err}". In ...${c.excerpt(date.pos)}...') } @@ -380,8 +400,22 @@ fn (c &Checker) check_time(t ast.Time) ! { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' "${lit}" is not a valid RFC 3339 Time format string in ...${c.excerpt(t.pos)}...') } - // Use V's builtin functionality to validate the time string - time.parse_rfc3339(parts[0]) or { + + // Simulate a time offset if it's missing then it can be checked. Already toml supports local time and rfc3339 don't. + mut has_time_offset := false + for ch in parts[0]#[8..] { + if ch in [u8(`-`), `+`, `Z`] { + has_time_offset = true + break + } + } + + mut part_with_offset := parts[0] + if !has_time_offset { + part_with_offset += 'Z' + } + + toml_parse_time(part_with_offset) or { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' "${lit}" is not a valid RFC 3339 Time format string "${err}". In ...${c.excerpt(t.pos)}...') }