strconv : add atoi8/16/32/64 helper functions with their tests (#23757)

This commit is contained in:
Bruno-Vdr 2025-02-19 01:32:19 +01:00 committed by GitHub
parent 1274f46150
commit a694918f4c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 371 additions and 39 deletions

View File

@ -216,10 +216,12 @@ pub fn parse_int(_s string, base int, _bit_size int) !i64 {
return common_parse_int(_s, base, _bit_size, true, true)
}
// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
// It follows V scanner as much as observed.
// atoi_common_check perform basics check on string to parse:
// Test emptiness, + or - sign presence, presence of digit after signs and no
// underscore as first character.
// returns +1 or -1 depending on sign, and s first digit index or an error.
@[direct_array_access]
pub fn atoi(s string) !int {
fn atoi_common_check(s string) !(i64, int) {
if s == '' {
return error('strconv.atoi: parsing "": empty string')
}
@ -241,7 +243,15 @@ pub fn atoi(s string) !int {
if s[start_idx] == `_` || s[s.len - 1] == `_` {
return error('strconv.atoi: parsing "${s}": values cannot start or end with underscores')
}
return sign, start_idx
}
// atoi_common performs computation for all i8, i16 and i32 type, excluding i64.
// Parse values, and returns consistent error message over differents types.
// s is string to parse, type_min/max are respective types min/max values.
@[direct_array_access]
fn atoi_common(s string, type_min i64, type_max i64) !i64 {
mut sign, mut start_idx := atoi_common_check(s)!
mut x := i64(0)
mut underscored := false
for i in start_idx .. s.len {
@ -258,16 +268,94 @@ pub fn atoi(s string) !int {
}
underscored = false
x = (x * 10) + (c * sign)
if sign == 1 && x > i64_max_int32 {
if sign == 1 && x > type_max {
return error('strconv.atoi: parsing "${s}": integer overflow')
} else {
if x < i64_min_int32 {
if x < type_min {
return error('strconv.atoi: parsing "${s}": integer underflow')
}
}
}
}
return int(x)
return x
}
// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
// It follows V scanner as much as observed.
pub fn atoi(s string) !int {
return int(atoi_common(s, i64_min_int32, i64_max_int32)!)
}
// atoi8 is equivalent to atoi(s), converted to type i8.
// returns an i8 [-128 .. 127] or an error.
pub fn atoi8(s string) !i8 {
return i8(atoi_common(s, min_i8, max_i8)!)
}
// atoi16 is equivalent to atoi(s), converted to type i16.
// returns an i16 [-32678 .. 32767] or an error.
pub fn atoi16(s string) !i16 {
return i16(atoi_common(s, min_i16, max_i16)!)
}
// atoi32 is equivalent to atoi(s), converted to type i32.
// returns an i32 [-2147483648 .. 2147483647] or an error.
pub fn atoi32(s string) !i32 {
return i32(atoi_common(s, min_i32, max_i32)!)
}
// atoi64 converts radix 10 string to i64 type.
// returns an i64 [-9223372036854775808 .. 9223372036854775807] or an error.
@[direct_array_access]
pub fn atoi64(s string) !i64 {
mut sign, mut start_idx := atoi_common_check(s)!
mut x := i64(0)
mut underscored := false
for i in start_idx .. s.len {
c := s[i] - `0`
if c == 47 { // 47 = Ascii(`_`) - ascii(`0`) = 95 - 48.
if underscored == true { // Two consecutives underscore
return error('strconv.atoi64: parsing "${s}": consecutives underscores are not allowed')
}
underscored = true
continue // Skip underscore
} else {
if c > 9 {
return error('strconv.atoi64: parsing "${s}": invalid radix 10 character')
}
underscored = false
x = safe_mul10_64bits(x) or { return error('strconv.atoi64: parsing "${s}": ${err}') }
x = safe_add_64bits(x, int(c * sign)) or {
return error('strconv.atoi64: parsing "${s}": ${err}')
}
}
}
return x
}
// safe_add64 performs a signed 64 bits addition and returns an error
// in case of overflow or underflow.
@[inline]
fn safe_add_64bits(a i64, b i64) !i64 {
if a > 0 && b > (max_i64 - a) {
return error('integer overflow')
} else if a < 0 && b < (min_i64 - a) {
return error('integer underflow')
}
return a + b
}
// safe_mul10 performs a * 10 multiplication and returns an error
// in case of overflow or underflow.
@[inline]
fn safe_mul10_64bits(a i64) !i64 {
if a > 0 && a > (max_i64 / 10) {
return error('integer overflow')
}
if a < 0 && a < (min_i64 / 10) {
return error('integer underflow')
}
return a * 10
}
const i64_min_int32 = i64(-2147483647) - 1 // msvc has a bug that treats just i64(min_int) as 2147483648 :-(; this is a workaround for it

View File

@ -1,30 +1,90 @@
import strconv
fn test_atoi() {
struct StrVal { // Inner test struct
str_value string
int_value int
struct StrInt { // Inner test struct
str_value string
int_value int
}
// test what should be catch by atoi_common_check
fn test_common_check() {
// Parsing of these strings should fail on all types.
ko := [
'', // Empty string
'-', // Only sign
'+', // Only sign
'_', // Only Underscore
'_10', // Start with underscore
'+_10', // Start with underscore after sign.
'-_16', // Start with underscore after sign.
'123_', // End with underscore
]
for v in ko {
if r := strconv.atoi(v) {
// These conversions should fail so force assertion !
assert false, 'The string "${v}" should not succeed or be considered as valid ${r}).'
} else {
// println('Parsing fails as it should for : "${v}')
assert true
}
}
}
// Test things accepted, and rejected in atoi_common function.
fn test_atoi_common() {
// Parsing of theses value should succeed on all types.
ok := [
StrInt{'1', 1},
StrInt{'-1', -1},
StrInt{'0', 0},
StrInt{'+0', 0},
StrInt{'-0', 0},
StrInt{'-0_00', 0},
StrInt{'+0_00', 0},
StrInt{'+1', 1},
StrInt{'+123', 123},
StrInt{'-1_2_1', -121},
StrInt{'00000006', 6},
StrInt{'0_0_0_0_0_0_0_6', 6},
]
// Check that extracted int value matches its string.
for v in ok {
// println('Parsing ${v.str_value} should equals ${v.int_value}')
assert strconv.atoi(v.str_value)! == v.int_value
}
// Parsing of theses value should succeed.
ko := [// Parsing of these strings should fail on all types.
'-3__1', // Two consecutives underscore.
'-3_1A', // Non radix 10 char.
'A42', // Non radix 10 char.
]
for v in ko {
if r := strconv.atoi(v) {
// These conversions should fail so force assertion !
assert false, 'The string ${v} int extraction should not succeed or be considered as valid ${r}).'
} else {
// println('Parsing fails as it should for : "${v}')
assert true
}
}
}
// performs numeric (bounds) tests over int type.
fn test_atoi() {
ok := [
StrVal{'1', 1},
StrVal{'-1', -1},
StrVal{'0', 0},
StrVal{'+0', 0},
StrVal{'-0', 0},
StrVal{'-0_00', 0},
StrVal{'+0_00', 0},
StrVal{'+1', 1},
StrVal{'+1024', 1024},
StrVal{'+3_14159', 314159},
StrVal{'-1_00_1', -1001},
StrVal{'-1_024', -1024},
StrVal{'123_456_789', 123456789},
StrVal{'00000006', 6},
StrVal{'0_0_0_0_0_0_0_6', 6},
StrVal{'2147483647', 2147483647}, // Signed 32bits max.
StrVal{'-2147483648', -2147483648}, // Signed 32bits min.
StrInt{'1', 1},
StrInt{'-1', -1},
StrInt{'0', 0},
StrInt{'+3_14159', 314159},
StrInt{'-1_00_1', -1001},
StrInt{'-1_024', -1024},
StrInt{'123_456_789', 123456789},
StrInt{'00000006', 6},
StrInt{'0_0_0_0_0_0_0_6', 6},
StrInt{'2147483647', max_int},
StrInt{'-2147483648', min_int},
]
// Check that extracted int value matches its string.
@ -35,17 +95,6 @@ fn test_atoi() {
// Parsing of these values should fail !
ko := [
'', // Empty string
'-', // Only sign
'+', // Only sign
'_', // Only Underscore
'_10', // Start with underscore
'+_10', // Start with underscore after sign.
'-_16', // Start with underscore after sign.
'123_', // End with underscore
'-3__14159', // Two consecutives underscore.
'-3_14159A', // Non radix 10 char.
'A42', // Non radix 10 char.
'-2147483649', // 32bits underflow by 1.
'+2147483648', // 32 bit overflow by 1.
'+3147483648', // 32 bit overflow by a lot.
@ -64,6 +113,201 @@ fn test_atoi() {
}
}
// performs numeric (bounds) tests over i8 type.
fn test_atoi8() {
struct StrI8 { // Inner test struct
str_value string
int_value i8
}
ok := [
StrI8{'0', 0}, // All kind of zeroes
StrI8{'+0', 0},
StrI8{'-0', 0},
StrI8{'-0_00', 0},
StrI8{'+0_00', 0},
StrI8{'1', 1},
StrI8{'+1', 1},
StrI8{'-1', -1},
StrI8{'+123', 123},
StrI8{'-1_2_1', -121},
StrI8{'0_0_0_0_0_0_0_6', 6},
StrI8{'127', max_i8},
StrI8{'-128', min_i8},
]
// Check that extracted int value matches its string.
for v in ok {
// println('Parsing ${v.str_value} should equals ${v.int_value}')
assert strconv.atoi8(v.str_value)! == v.int_value
}
// Parsing of these values should fail !
ko := [
'-129', // i8 bits underflow by 1.
'+128', // i8 bit overflow by 1.
'+256', // i8 overflow with value equal to max u8.
'+3147483648', // i8 bit overflow by a lot.
'-4836470', // Large i8 underflow.
]
for v in ko {
if r := strconv.atoi8(v) {
// These conversions should fail so force assertion !
assert false, 'The string ${v} int extraction should not succeed or be considered as valid ${r}).'
} else {
// println('Parsing fails as it should for : "${v}')
assert true
}
}
}
// performs numeric (bounds) tests over i16 type.
fn test_atoi16() {
struct StrI16 { // Inner test struct
str_value string
int_value i16
}
ok := [
StrI16{'0', 0}, // All kind of zeroes
StrI16{'+0', 0},
StrI16{'-0', 0},
StrI16{'-0_00', 0},
StrI16{'+0_00', 0},
StrI16{'1', 1},
StrI16{'+1', 1},
StrI16{'-1', -1},
StrI16{'+123', 123},
StrI16{'-1_2_1', -121},
StrI16{'0_0_0_0_0_0_0_6', 6},
StrI16{'32767', max_i16},
StrI16{'-32768', min_i16},
]
// Check that extracted int value matches its string.
for v in ok {
// println('Parsing ${v.str_value} should equals ${v.int_value}')
assert strconv.atoi16(v.str_value)! == v.int_value
}
// Parsing of these values should fail !
ko := [
'-32769', // i16 bits underflow by 1.
'+32768', // i16 bit overflow by 1.
'+45_000', // i16 bit overflow by a lot.
'65536', // i16 overflow with value equal to u16 max.
'-483_647_909', // Large i16 underflow.
]
for v in ko {
if r := strconv.atoi16(v) {
// These conversions should fail so force assertion !
assert false, 'The string ${v} int extraction should not succeed or be considered as valid ${r}).'
} else {
// println('Parsing fails as it should for : "${v}')
assert true
}
}
}
// performs numeric (bounds) tests over i32 type. This test is redundant with atoi
// which performs same on int (actually 32bits). In the future, int COULD be mapped
// on arch with (e.g 64bits). That's why this test exists.
fn test_atoi32() {
struct StrI32 { // Inner test struct
str_value string
int_value i32
}
ok := [
StrI32{'0', 0}, // All kind of zeroes
StrI32{'+0', 0},
StrI32{'-0', 0},
StrI32{'-0_00', 0},
StrI32{'+0_00', 0},
StrI32{'1', 1},
StrI32{'+1', 1},
StrI32{'-1', -1},
StrI32{'+123', 123},
StrI32{'-1_2_1', -121},
StrI32{'0_0_0_0_0_0_0_6', 6},
StrI32{'2147483647', max_i32},
StrI32{'-2147483648', min_i32},
]
// Check that extracted int value matches its string.
for v in ok {
// println('Parsing ${v.str_value} should equals ${v.int_value}')
assert strconv.atoi32(v.str_value)! == v.int_value
}
// Parsing of these values should fail !
ko := [
'-2147483649', // i32 bits underflow by 1.
'+2147483648', // i32 bit overflow by 1.
'+4294967295', // Large Overflow but equal to u32 max.
'-483_647_909_912_754', // Large i32 underflow.
]
for v in ko {
if r := strconv.atoi32(v) {
// These conversions should fail so force assertion !
assert false, 'The string ${v} int extraction should not succeed or be considered as valid ${r}).'
} else {
// println('Parsing fails as it should for : "${v}')
assert true
}
}
}
fn test_atoi64() {
struct StrI64 { // Inner test struct
str_value string
int_value i64
}
ok := [
StrI64{'0', 0}, // All kind of zeroes
StrI64{'+0', 0},
StrI64{'-0', 0},
StrI64{'-0_00', 0},
StrI64{'+0_00', 0},
StrI64{'1', 1},
StrI64{'+1', 1},
StrI64{'-1', -1},
StrI64{'+123', 123},
StrI64{'-1_2_1', -121},
StrI64{'0_0_0_0_0_0_0_6', 6},
StrI64{'9223372036854775807', max_i64},
StrI64{'-9223372036854775808', min_i64},
]
// Check that extracted int value matches its string.
for v in ok {
// println('Parsing ${v.str_value} should equals ${v.int_value}')
assert strconv.atoi64(v.str_value)! == v.int_value
}
// Parsing of these values should fail !
ko := [
'-9223372036854775809', // i64 bits underflow by 1.
'+9223372036854775808', // i64 bit overflow by 1.
'+18446744073709551615', // Large Overflow but equal to u64 max.
'-483647909912754123456789', // Large i64 underflow.
]
for v in ko {
if r := strconv.atoi64(v) {
// These conversions should fail so force assertion !
assert false, 'The string ${v} int extraction should not succeed or be considered as valid ${r}).'
} else {
// println('Parsing fails as it should for : "${v}')
assert true
}
}
}
fn test_parse_int() {
// symbols coverage
assert strconv.parse_int('1234567890', 10, 32)! == 1234567890