v/vlib/strconv/atoi.v

module strconv

// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
// TODO: use options, or some way to return default with error.
// int_size is the size in bits of an int or uint value.
// int_size = 32 << (~u32(0) >> 63)
// max_u64 = u64(u64(1 << 63) - 1)
const int_size = 32

@[inline]
pub fn byte_to_lower(c u8) u8 {
	return c | 32
}

// common_parse_uint is called by parse_uint and allows the parsing
// to stop on non or invalid digit characters and return with an error
pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !u64 {
	result, err := common_parse_uint2(s, _base, _bit_size)
	// TODO: error_on_non_digit and error_on_high_digit have no difference
	if err != 0 && (error_on_non_digit || error_on_high_digit) {
		match err {
			-1 { return error('common_parse_uint: wrong base ${_base} for ${s}') }
			-2 { return error('common_parse_uint: wrong bit size ${_bit_size} for ${s}') }
			-3 { return error('common_parse_uint: integer overflow ${s}') }
			else { return error('common_parse_uint: syntax error ${s}') }
		}
	}
	return result
}

// the first returned value contains the parsed value,
// the second returned value contains the error code (0 = OK, >1 = index of first non-parseable character + 1, -1 = wrong base, -2 = wrong bit size, -3 = overflow)
@[direct_array_access]
pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
	if s == '' {
		return u64(0), 1
	}

	mut bit_size := _bit_size
	mut base := _base
	mut start_index := 0

	if base == 0 {
		// Look for octal, binary and hex prefix.
		base = 10
		if s[0] == `0` {
			ch := if s.len > 1 { s[1] | 32 } else { `0` }
			if s.len >= 3 {
				if ch == `b` {
					base = 2
					start_index += 2
				} else if ch == `o` {
					base = 8
					start_index += 2
				} else if ch == `x` {
					base = 16
					start_index += 2
				}

				// check for underscore after the base prefix
				if s[start_index] == `_` {
					start_index++
				}
			}
			// manage leading zeros in decimal base's numbers
			// otherwise it is an octal for C compatibility
			// TODO: Check if this behaviour is logically right
			else if s.len >= 2 && (s[1] >= `0` && s[1] <= `9`) {
				base = 10
				start_index++
			} else {
				base = 8
				start_index++
			}
		}
	}

	if bit_size == 0 {
		bit_size = int_size
	} else if bit_size < 0 || bit_size > 64 {
		return u64(0), -2
	}
	// Cutoff is the smallest number such that cutoff*base > maxUint64.
	// Use compile-time constants for common cases.
	cutoff := max_u64 / u64(base) + u64(1)
	max_val := if bit_size == 64 { max_u64 } else { (u64(1) << u64(bit_size)) - u64(1) }
	basem1 := base - 1

	mut n := u64(0)
	for i in start_index .. s.len {
		mut c := s[i]

		// manage underscore inside the number
		if c == `_` {
			if i == start_index || i >= (s.len - 1) {
				// println("_ limit")
				return u64(0), 1
			}
			if s[i - 1] == `_` || s[i + 1] == `_` {
				// println("_ *2")
				return u64(0), 1
			}

			continue
		}

		mut sub_count := 0

		// get the 0-9 digit
		c -= 48 // subtract the rune `0`

		// check if we are in the superior base rune interval [A..Z]
		if c >= 17 { // (65 - 48)
			sub_count++
			c -= 7 // subtract the `A` - `0` rune to obtain the value of the digit

			// check if we are in the superior base rune interval [a..z]
			if c >= 42 { // (97 - 7 - 48)
				sub_count++
				c -= 32 // subtract the `a` - `0` rune to obtain the value of the digit
			}
		}

		// check for digit over base
		if c > basem1 || (sub_count == 0 && c > 9) {
			return n, i + 1
		}

		// check if we are in the cutoff zone
		if n >= cutoff {
			// n*base overflows
			// return error('parse_uint: range error $s')
			return max_val, -3
		}
		n *= u64(base)
		n1 := n + u64(c)
		if n1 < n || n1 > max_val {
			// n+v overflows
			// return error('parse_uint: range error $s')
			return max_val, -3
		}
		n = n1
	}
	return n, 0
}

// parse_uint is like parse_int but for unsigned numbers.
pub fn parse_uint(s string, _base int, _bit_size int) !u64 {
	return common_parse_uint(s, _base, _bit_size, true, true)
}

// common_parse_int is called by parse int and allows the parsing
// to stop on non or invalid digit characters and return with an error
@[direct_array_access]
pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !i64 {
	if _s == '' {
		// return error('parse_int: syntax error $s')
		return i64(0)
	}
	mut bit_size := _bit_size
	if bit_size == 0 {
		bit_size = int_size
	}
	mut s := _s
	// Pick off leading sign.
	mut neg := false
	if s[0] == `+` {
		// s = s[1..]
		unsafe {
			s = tos(s.str + 1, s.len - 1)
		}
	} else if s[0] == `-` {
		neg = true
		// s = s[1..]
		unsafe {
			s = tos(s.str + 1, s.len - 1)
		}
	}

	// Convert unsigned and check range.
	// un := parse_uint(s, base, bit_size) or {
	// return i64(0)
	// }
	un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit)!
	if un == 0 {
		return i64(0)
	}
	// TODO: check should u64(bit_size-1) be size of int (32)?
	cutoff := u64(1) << u64(bit_size - 1)
	if !neg && un >= cutoff {
		// return error('parse_int: range error $s0')
		return i64(cutoff - u64(1))
	}
	if neg && un > cutoff {
		// return error('parse_int: range error $s0')
		return -i64(cutoff)
	}
	return if neg { -i64(un) } else { i64(un) }
}

// parse_int interprets a string s in the given base (0, 2 to 36) and
// bit size (0 to 64) and returns the corresponding value i.
//
// If the base argument is 0, the true base is implied by the string's
// prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
// Also, for argument base 0 only, underscore characters are permitted
// as defined by the Go syntax for integer literals.
//
// The bitSize argument specifies the integer type
// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
// correspond to int, int8, int16, int32, and int64.
// If bitSize is below 0 or above 64, an error is returned.
pub fn parse_int(_s string, base int, _bit_size int) !i64 {
	return common_parse_int(_s, base, _bit_size, true, true)
}

// atoi_common_check perform basics check on string to parse:
// Test emptiness, + or - sign presence, presence of digit after signs and no
// underscore as first character.
// returns +1 or -1 depending on sign, and s first digit index or an error.
@[direct_array_access]
fn atoi_common_check(s string) !(i64, int) {
	if s == '' {
		return error('strconv.atoi: parsing "": empty string')
	}

	mut start_idx := 0
	mut sign := i64(1)

	if s[0] == `-` || s[0] == `+` {
		start_idx++
		if s[0] == `-` {
			sign = -1
		}
	}

	if s.len - start_idx < 1 {
		return error('strconv.atoi: parsing "${s}": no number after sign')
	}

	if s[start_idx] == `_` || s[s.len - 1] == `_` {
		return error('strconv.atoi: parsing "${s}": values cannot start or end with underscores')
	}
	return sign, start_idx
}

// atoi_common performs computation for all i8, i16 and i32 type, excluding i64.
// Parse values, and returns consistent error message over differents types.
// s is string to parse, type_min/max are respective types min/max values.
@[direct_array_access]
fn atoi_common(s string, type_min i64, type_max i64) !i64 {
	mut sign, mut start_idx := atoi_common_check(s)!
	mut x := i64(0)
	mut underscored := false
	for i in start_idx .. s.len {
		c := s[i] - `0`
		if c == 47 { // 47 = Ascii(`_`) -  ascii(`0`) = 95 - 48.
			if underscored == true { // Two consecutives underscore
				return error('strconv.atoi: parsing "${s}": consecutives underscores are not allowed')
			}
			underscored = true
			continue // Skip underscore
		} else {
			if c > 9 {
				return error('strconv.atoi: parsing "${s}": invalid radix 10 character')
			}
			underscored = false
			x = (x * 10) + (c * sign)
			if sign == 1 && x > type_max {
				return error('strconv.atoi: parsing "${s}": integer overflow')
			} else {
				if x < type_min {
					return error('strconv.atoi: parsing "${s}": integer underflow')
				}
			}
		}
	}
	return x
}

// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
// It follows V scanner as much as observed.
pub fn atoi(s string) !int {
	return int(atoi_common(s, i64_min_int32, i64_max_int32)!)
}

// atoi8 is equivalent to atoi(s), converted to type i8.
// returns an i8 [-128 .. 127] or an error.
pub fn atoi8(s string) !i8 {
	return i8(atoi_common(s, min_i8, max_i8)!)
}

// atoi16 is equivalent to atoi(s), converted to type i16.
// returns an i16 [-32678 .. 32767] or an error.
pub fn atoi16(s string) !i16 {
	return i16(atoi_common(s, min_i16, max_i16)!)
}

// atoi32 is equivalent to atoi(s), converted to type i32.
// returns an i32 [-2147483648 .. 2147483647] or an error.
pub fn atoi32(s string) !i32 {
	return i32(atoi_common(s, min_i32, max_i32)!)
}

// atoi64 converts radix 10 string to i64 type.
// returns an i64 [-9223372036854775808 .. 9223372036854775807] or an error.
@[direct_array_access]
pub fn atoi64(s string) !i64 {
	mut sign, mut start_idx := atoi_common_check(s)!
	mut x := i64(0)
	mut underscored := false
	for i in start_idx .. s.len {
		c := s[i] - `0`
		if c == 47 { // 47 = Ascii(`_`) -  ascii(`0`) = 95 - 48.
			if underscored == true { // Two consecutives underscore
				return error('strconv.atoi64: parsing "${s}": consecutives underscores are not allowed')
			}
			underscored = true
			continue // Skip underscore
		} else {
			if c > 9 {
				return error('strconv.atoi64: parsing "${s}": invalid radix 10 character')
			}
			underscored = false
			x = safe_mul10_64bits(x) or { return error('strconv.atoi64: parsing "${s}": ${err}') }
			x = safe_add_64bits(x, int(c * sign)) or {
				return error('strconv.atoi64: parsing "${s}": ${err}')
			}
		}
	}
	return x
}

// safe_add64 performs a signed 64 bits addition and returns an error
// in case of overflow or underflow.
@[inline]
fn safe_add_64bits(a i64, b i64) !i64 {
	if a > 0 && b > (max_i64 - a) {
		return error('integer overflow')
	} else if a < 0 && b < (min_i64 - a) {
		return error('integer underflow')
	}
	return a + b
}

// safe_mul10 performs a * 10 multiplication and returns an error
// in case of overflow or underflow.
@[inline]
fn safe_mul10_64bits(a i64) !i64 {
	if a > 0 && a > (max_i64 / 10) {
		return error('integer overflow')
	}
	if a < 0 && a < (min_i64 / 10) {
		return error('integer underflow')
	}
	return a * 10
}

const i64_min_int32 = i64(-2147483647) - 1 // msvc has a bug that treats just i64(min_int) as 2147483648 :-(; this is a workaround for it
const i64_max_int32 = i64(2147483646) + 1