diff --git a/vlib/strconv/atoi.v b/vlib/strconv/atoi.v index 9fc9dae27a..3073446dda 100644 --- a/vlib/strconv/atoi.v +++ b/vlib/strconv/atoi.v @@ -216,10 +216,12 @@ pub fn parse_int(_s string, base int, _bit_size int) !i64 { return common_parse_int(_s, base, _bit_size, true, true) } -// atoi is equivalent to parse_int(s, 10, 0), converted to type int. -// It follows V scanner as much as observed. +// atoi_common_check perform basics check on string to parse: +// Test emptiness, + or - sign presence, presence of digit after signs and no +// underscore as first character. +// returns +1 or -1 depending on sign, and s first digit index or an error. @[direct_array_access] -pub fn atoi(s string) !int { +fn atoi_common_check(s string) !(i64, int) { if s == '' { return error('strconv.atoi: parsing "": empty string') } @@ -241,7 +243,15 @@ pub fn atoi(s string) !int { if s[start_idx] == `_` || s[s.len - 1] == `_` { return error('strconv.atoi: parsing "${s}": values cannot start or end with underscores') } + return sign, start_idx +} +// atoi_common performs computation for all i8, i16 and i32 type, excluding i64. +// Parse values, and returns consistent error message over differents types. +// s is string to parse, type_min/max are respective types min/max values. +@[direct_array_access] +fn atoi_common(s string, type_min i64, type_max i64) !i64 { + mut sign, mut start_idx := atoi_common_check(s)! mut x := i64(0) mut underscored := false for i in start_idx .. s.len { @@ -258,16 +268,94 @@ pub fn atoi(s string) !int { } underscored = false x = (x * 10) + (c * sign) - if sign == 1 && x > i64_max_int32 { + if sign == 1 && x > type_max { return error('strconv.atoi: parsing "${s}": integer overflow') } else { - if x < i64_min_int32 { + if x < type_min { return error('strconv.atoi: parsing "${s}": integer underflow') } } } } - return int(x) + return x +} + +// atoi is equivalent to parse_int(s, 10, 0), converted to type int. +// It follows V scanner as much as observed. +pub fn atoi(s string) !int { + return int(atoi_common(s, i64_min_int32, i64_max_int32)!) +} + +// atoi8 is equivalent to atoi(s), converted to type i8. +// returns an i8 [-128 .. 127] or an error. +pub fn atoi8(s string) !i8 { + return i8(atoi_common(s, min_i8, max_i8)!) +} + +// atoi16 is equivalent to atoi(s), converted to type i16. +// returns an i16 [-32678 .. 32767] or an error. +pub fn atoi16(s string) !i16 { + return i16(atoi_common(s, min_i16, max_i16)!) +} + +// atoi32 is equivalent to atoi(s), converted to type i32. +// returns an i32 [-2147483648 .. 2147483647] or an error. +pub fn atoi32(s string) !i32 { + return i32(atoi_common(s, min_i32, max_i32)!) +} + +// atoi64 converts radix 10 string to i64 type. +// returns an i64 [-9223372036854775808 .. 9223372036854775807] or an error. +@[direct_array_access] +pub fn atoi64(s string) !i64 { + mut sign, mut start_idx := atoi_common_check(s)! + mut x := i64(0) + mut underscored := false + for i in start_idx .. s.len { + c := s[i] - `0` + if c == 47 { // 47 = Ascii(`_`) - ascii(`0`) = 95 - 48. + if underscored == true { // Two consecutives underscore + return error('strconv.atoi64: parsing "${s}": consecutives underscores are not allowed') + } + underscored = true + continue // Skip underscore + } else { + if c > 9 { + return error('strconv.atoi64: parsing "${s}": invalid radix 10 character') + } + underscored = false + x = safe_mul10_64bits(x) or { return error('strconv.atoi64: parsing "${s}": ${err}') } + x = safe_add_64bits(x, int(c * sign)) or { + return error('strconv.atoi64: parsing "${s}": ${err}') + } + } + } + return x +} + +// safe_add64 performs a signed 64 bits addition and returns an error +// in case of overflow or underflow. +@[inline] +fn safe_add_64bits(a i64, b i64) !i64 { + if a > 0 && b > (max_i64 - a) { + return error('integer overflow') + } else if a < 0 && b < (min_i64 - a) { + return error('integer underflow') + } + return a + b +} + +// safe_mul10 performs a * 10 multiplication and returns an error +// in case of overflow or underflow. +@[inline] +fn safe_mul10_64bits(a i64) !i64 { + if a > 0 && a > (max_i64 / 10) { + return error('integer overflow') + } + if a < 0 && a < (min_i64 / 10) { + return error('integer underflow') + } + return a * 10 } const i64_min_int32 = i64(-2147483647) - 1 // msvc has a bug that treats just i64(min_int) as 2147483648 :-(; this is a workaround for it diff --git a/vlib/strconv/atoi_test.v b/vlib/strconv/atoi_test.v index 1ec62c349d..6ee9babfce 100644 --- a/vlib/strconv/atoi_test.v +++ b/vlib/strconv/atoi_test.v @@ -1,30 +1,90 @@ import strconv -fn test_atoi() { - struct StrVal { // Inner test struct - str_value string - int_value int +struct StrInt { // Inner test struct + str_value string + int_value int +} + +// test what should be catch by atoi_common_check +fn test_common_check() { + // Parsing of these strings should fail on all types. + ko := [ + '', // Empty string + '-', // Only sign + '+', // Only sign + '_', // Only Underscore + '_10', // Start with underscore + '+_10', // Start with underscore after sign. + '-_16', // Start with underscore after sign. + '123_', // End with underscore + ] + + for v in ko { + if r := strconv.atoi(v) { + // These conversions should fail so force assertion ! + assert false, 'The string "${v}" should not succeed or be considered as valid ${r}).' + } else { + // println('Parsing fails as it should for : "${v}') + assert true + } + } +} + +// Test things accepted, and rejected in atoi_common function. +fn test_atoi_common() { + // Parsing of theses value should succeed on all types. + ok := [ + StrInt{'1', 1}, + StrInt{'-1', -1}, + StrInt{'0', 0}, + StrInt{'+0', 0}, + StrInt{'-0', 0}, + StrInt{'-0_00', 0}, + StrInt{'+0_00', 0}, + StrInt{'+1', 1}, + StrInt{'+123', 123}, + StrInt{'-1_2_1', -121}, + StrInt{'00000006', 6}, + StrInt{'0_0_0_0_0_0_0_6', 6}, + ] + + // Check that extracted int value matches its string. + for v in ok { + // println('Parsing ${v.str_value} should equals ${v.int_value}') + assert strconv.atoi(v.str_value)! == v.int_value } - // Parsing of theses value should succeed. + ko := [// Parsing of these strings should fail on all types. + '-3__1', // Two consecutives underscore. + '-3_1A', // Non radix 10 char. + 'A42', // Non radix 10 char. + ] + + for v in ko { + if r := strconv.atoi(v) { + // These conversions should fail so force assertion ! + assert false, 'The string ${v} int extraction should not succeed or be considered as valid ${r}).' + } else { + // println('Parsing fails as it should for : "${v}') + assert true + } + } +} + +// performs numeric (bounds) tests over int type. +fn test_atoi() { ok := [ - StrVal{'1', 1}, - StrVal{'-1', -1}, - StrVal{'0', 0}, - StrVal{'+0', 0}, - StrVal{'-0', 0}, - StrVal{'-0_00', 0}, - StrVal{'+0_00', 0}, - StrVal{'+1', 1}, - StrVal{'+1024', 1024}, - StrVal{'+3_14159', 314159}, - StrVal{'-1_00_1', -1001}, - StrVal{'-1_024', -1024}, - StrVal{'123_456_789', 123456789}, - StrVal{'00000006', 6}, - StrVal{'0_0_0_0_0_0_0_6', 6}, - StrVal{'2147483647', 2147483647}, // Signed 32bits max. - StrVal{'-2147483648', -2147483648}, // Signed 32bits min. + StrInt{'1', 1}, + StrInt{'-1', -1}, + StrInt{'0', 0}, + StrInt{'+3_14159', 314159}, + StrInt{'-1_00_1', -1001}, + StrInt{'-1_024', -1024}, + StrInt{'123_456_789', 123456789}, + StrInt{'00000006', 6}, + StrInt{'0_0_0_0_0_0_0_6', 6}, + StrInt{'2147483647', max_int}, + StrInt{'-2147483648', min_int}, ] // Check that extracted int value matches its string. @@ -35,17 +95,6 @@ fn test_atoi() { // Parsing of these values should fail ! ko := [ - '', // Empty string - '-', // Only sign - '+', // Only sign - '_', // Only Underscore - '_10', // Start with underscore - '+_10', // Start with underscore after sign. - '-_16', // Start with underscore after sign. - '123_', // End with underscore - '-3__14159', // Two consecutives underscore. - '-3_14159A', // Non radix 10 char. - 'A42', // Non radix 10 char. '-2147483649', // 32bits underflow by 1. '+2147483648', // 32 bit overflow by 1. '+3147483648', // 32 bit overflow by a lot. @@ -64,6 +113,201 @@ fn test_atoi() { } } +// performs numeric (bounds) tests over i8 type. +fn test_atoi8() { + struct StrI8 { // Inner test struct + str_value string + int_value i8 + } + + ok := [ + StrI8{'0', 0}, // All kind of zeroes + StrI8{'+0', 0}, + StrI8{'-0', 0}, + StrI8{'-0_00', 0}, + StrI8{'+0_00', 0}, + StrI8{'1', 1}, + StrI8{'+1', 1}, + StrI8{'-1', -1}, + StrI8{'+123', 123}, + StrI8{'-1_2_1', -121}, + StrI8{'0_0_0_0_0_0_0_6', 6}, + StrI8{'127', max_i8}, + StrI8{'-128', min_i8}, + ] + + // Check that extracted int value matches its string. + for v in ok { + // println('Parsing ${v.str_value} should equals ${v.int_value}') + assert strconv.atoi8(v.str_value)! == v.int_value + } + + // Parsing of these values should fail ! + ko := [ + '-129', // i8 bits underflow by 1. + '+128', // i8 bit overflow by 1. + '+256', // i8 overflow with value equal to max u8. + '+3147483648', // i8 bit overflow by a lot. + '-4836470', // Large i8 underflow. + ] + + for v in ko { + if r := strconv.atoi8(v) { + // These conversions should fail so force assertion ! + assert false, 'The string ${v} int extraction should not succeed or be considered as valid ${r}).' + } else { + // println('Parsing fails as it should for : "${v}') + assert true + } + } +} + +// performs numeric (bounds) tests over i16 type. +fn test_atoi16() { + struct StrI16 { // Inner test struct + str_value string + int_value i16 + } + + ok := [ + StrI16{'0', 0}, // All kind of zeroes + StrI16{'+0', 0}, + StrI16{'-0', 0}, + StrI16{'-0_00', 0}, + StrI16{'+0_00', 0}, + StrI16{'1', 1}, + StrI16{'+1', 1}, + StrI16{'-1', -1}, + StrI16{'+123', 123}, + StrI16{'-1_2_1', -121}, + StrI16{'0_0_0_0_0_0_0_6', 6}, + StrI16{'32767', max_i16}, + StrI16{'-32768', min_i16}, + ] + + // Check that extracted int value matches its string. + for v in ok { + // println('Parsing ${v.str_value} should equals ${v.int_value}') + assert strconv.atoi16(v.str_value)! == v.int_value + } + + // Parsing of these values should fail ! + ko := [ + '-32769', // i16 bits underflow by 1. + '+32768', // i16 bit overflow by 1. + '+45_000', // i16 bit overflow by a lot. + '65536', // i16 overflow with value equal to u16 max. + '-483_647_909', // Large i16 underflow. + ] + + for v in ko { + if r := strconv.atoi16(v) { + // These conversions should fail so force assertion ! + assert false, 'The string ${v} int extraction should not succeed or be considered as valid ${r}).' + } else { + // println('Parsing fails as it should for : "${v}') + assert true + } + } +} + +// performs numeric (bounds) tests over i32 type. This test is redundant with atoi +// which performs same on int (actually 32bits). In the future, int COULD be mapped +// on arch with (e.g 64bits). That's why this test exists. +fn test_atoi32() { + struct StrI32 { // Inner test struct + str_value string + int_value i32 + } + + ok := [ + StrI32{'0', 0}, // All kind of zeroes + StrI32{'+0', 0}, + StrI32{'-0', 0}, + StrI32{'-0_00', 0}, + StrI32{'+0_00', 0}, + StrI32{'1', 1}, + StrI32{'+1', 1}, + StrI32{'-1', -1}, + StrI32{'+123', 123}, + StrI32{'-1_2_1', -121}, + StrI32{'0_0_0_0_0_0_0_6', 6}, + StrI32{'2147483647', max_i32}, + StrI32{'-2147483648', min_i32}, + ] + + // Check that extracted int value matches its string. + for v in ok { + // println('Parsing ${v.str_value} should equals ${v.int_value}') + assert strconv.atoi32(v.str_value)! == v.int_value + } + + // Parsing of these values should fail ! + ko := [ + '-2147483649', // i32 bits underflow by 1. + '+2147483648', // i32 bit overflow by 1. + '+4294967295', // Large Overflow but equal to u32 max. + '-483_647_909_912_754', // Large i32 underflow. + ] + + for v in ko { + if r := strconv.atoi32(v) { + // These conversions should fail so force assertion ! + assert false, 'The string ${v} int extraction should not succeed or be considered as valid ${r}).' + } else { + // println('Parsing fails as it should for : "${v}') + assert true + } + } +} + +fn test_atoi64() { + struct StrI64 { // Inner test struct + str_value string + int_value i64 + } + + ok := [ + StrI64{'0', 0}, // All kind of zeroes + StrI64{'+0', 0}, + StrI64{'-0', 0}, + StrI64{'-0_00', 0}, + StrI64{'+0_00', 0}, + StrI64{'1', 1}, + StrI64{'+1', 1}, + StrI64{'-1', -1}, + StrI64{'+123', 123}, + StrI64{'-1_2_1', -121}, + StrI64{'0_0_0_0_0_0_0_6', 6}, + StrI64{'9223372036854775807', max_i64}, + StrI64{'-9223372036854775808', min_i64}, + ] + + // Check that extracted int value matches its string. + for v in ok { + // println('Parsing ${v.str_value} should equals ${v.int_value}') + assert strconv.atoi64(v.str_value)! == v.int_value + } + + // Parsing of these values should fail ! + ko := [ + '-9223372036854775809', // i64 bits underflow by 1. + '+9223372036854775808', // i64 bit overflow by 1. + '+18446744073709551615', // Large Overflow but equal to u64 max. + '-483647909912754123456789', // Large i64 underflow. + ] + + for v in ko { + if r := strconv.atoi64(v) { + // These conversions should fail so force assertion ! + assert false, 'The string ${v} int extraction should not succeed or be considered as valid ${r}).' + } else { + // println('Parsing fails as it should for : "${v}') + assert true + } + } +} + fn test_parse_int() { // symbols coverage assert strconv.parse_int('1234567890', 10, 32)! == 1234567890