module decoder2 import strconv import time // Node represents a node in a linked list to store ValueInfo. struct Node { value ValueInfo mut: next &Node = unsafe { nil } // next is the next node in the linked list. } // ValueInfo represents the position and length of a value, such as string, number, array, object key, and object value in a JSON string. struct ValueInfo { position int // The position of the value in the JSON string. value_kind ValueKind // The kind of the value. mut: length int // The length of the value in the JSON string. } // Decoder represents a JSON decoder. struct Decoder { json string // json is the JSON data to be decoded. mut: values_info LinkedList // A linked list to store ValueInfo. checker_idx int // checker_idx is the current index of the decoder. current_node &Node = unsafe { nil } // The current node in the linked list. } // LinkedList represents a linked list to store ValueInfo. struct LinkedList { mut: head &Node = unsafe { nil } // head is the first node in the linked list. tail &Node = unsafe { nil } // tail is the last node in the linked list. len int // len is the length of the linked list. } // push adds a new element to the linked list. fn (mut list LinkedList) push(value ValueInfo) { new_node := &Node{ value: value } if list.head == unsafe { nil } { list.head = new_node list.tail = new_node } else { list.tail.next = new_node list.tail = new_node } list.len++ } // last returns the last element added to the linked list. fn (list LinkedList) last() &ValueInfo { return &list.tail.value } // str returns a string representation of the linked list. fn (list LinkedList) str() string { mut result_buffer := []u8{} mut current := list.head for current != unsafe { nil } { value_kind_as_string := current.value.value_kind.str() unsafe { result_buffer.push_many(value_kind_as_string.str, value_kind_as_string.len) } result_buffer << u8(` `) current = current.next } return result_buffer.bytestr() } @[unsafe] fn (list &LinkedList) free() { mut current := list.head for current != unsafe { nil } { mut next := current.next current.next = unsafe { nil } unsafe { free(current) } current = next } list.head = unsafe { nil } list.tail = unsafe { nil } list.len = 0 } // ValueKind represents the kind of a JSON value. pub enum ValueKind { unknown array object string_ number boolean null } // check_if_json_match checks if the JSON string matches the expected type T. fn check_if_json_match[T](val string) ! { // check if the JSON string is empty if val == '' { return error('empty string') } // check if generic type matches the JSON type value_kind := get_value_kind(val[0]) $if T is $option { // TODO } $else $if T is $sumtype { // TODO } $else $if T is $alias { // TODO } $else $if T is $string { if value_kind != .string_ { return error('Expected string, but got ${value_kind}') } } $else $if T is time.Time { if value_kind != .string_ { return error('Expected string, but got ${value_kind}') } } $else $if T is $map { if value_kind != .object { return error('Expected object, but got ${value_kind}') } } $else $if T is $array { if value_kind != .array { return error('Expected array, but got ${value_kind}') } } $else $if T is $struct { if value_kind != .object { return error('Expected object, but got ${value_kind}') } } $else $if T in [$enum, $int, $float] { if value_kind != .number { return error('Expected number, but got ${value_kind}') } } $else $if T is bool { if value_kind != .boolean { return error('Expected boolean, but got ${value_kind}') } } $else { return error('cannot encode value with ${value_kind} type') } } // error generates an error message with context from the JSON string. fn (mut checker Decoder) error(message string) ! { json := if checker.json.len < checker.checker_idx + 5 { checker.json } else { checker.json[0..checker.checker_idx + 5] } mut error_message := '\n' last_new_line := json.last_index_u8(`\n`) if last_new_line != -1 { error_message += json[last_new_line..checker.checker_idx] } else { error_message += json[0..checker.checker_idx] } error_message += [json[checker.checker_idx]].bytestr() error_message += '\n' if last_new_line != -1 { error_message += ' '.repeat(checker.checker_idx - last_new_line) } else { error_message += ' '.repeat(checker.checker_idx) } error_message += '^ ${message}' return error(error_message) } // check_json_format checks if the JSON string is valid and updates the decoder state. fn (mut checker Decoder) check_json_format(val string) ! { checker_end := checker.json.len // check if the JSON string is empty if val == '' { return checker.error('empty string') } // check if generic type matches the JSON type value_kind := get_value_kind(val[checker.checker_idx]) start_idx_position := checker.checker_idx checker.values_info.push(ValueInfo{ position: start_idx_position value_kind: value_kind }) mut actual_value_info_pointer := checker.values_info.last() match value_kind { .unknown { return checker.error('unknown value kind') } .null { // check if the JSON string is a null value if checker_end - checker.checker_idx <= 3 { return checker.error('EOF error: expecting `null`') } is_not_ok := unsafe { vmemcmp(checker.json.str + checker.checker_idx, 'null'.str, 4) } if is_not_ok != 0 { return checker.error('invalid null value. Got `${checker.json[checker.checker_idx.. checker.checker_idx + 4]}` instead of `null`') } checker.checker_idx += 3 } .object { checker.checker_idx++ for val[checker.checker_idx] != `}` { // check if the JSON string is an empty object if checker_end - checker.checker_idx <= 2 { continue } // skip whitespace for val[checker.checker_idx] in [` `, `\t`, `\n`] { if checker.checker_idx >= checker_end - 1 { break } checker.checker_idx++ } if val[checker.checker_idx] == `}` { continue } if val[checker.checker_idx] != `"` { return checker.error('Expecting object key') } // Object key checker.check_json_format(val)! for val[checker.checker_idx] != `:` { if checker.checker_idx >= checker_end - 1 { return checker.error('EOF error: key colon not found') } if val[checker.checker_idx] !in [` `, `\t`, `\n`] { return checker.error('invalid value after object key') } checker.checker_idx++ } if val[checker.checker_idx] != `:` { return checker.error('Expecting `:` after object key') } // skip `:` checker.checker_idx++ // skip whitespace for val[checker.checker_idx] in [` `, `\t`, `\n`] { checker.checker_idx++ } match val[checker.checker_idx] { `"`, `[`, `{`, `0`...`9`, `-`, `n`, `t`, `f` { checker.check_json_format(val)! // whitespace for val[checker.checker_idx] in [` `, `\t`, `\n`] { checker.checker_idx++ } if val[checker.checker_idx] == `}` { break } if checker.checker_idx >= checker_end - 1 { return checker.error('EOF error: braces are not closed') } if val[checker.checker_idx] == `,` { checker.checker_idx++ for val[checker.checker_idx] in [` `, `\t`, `\n`] { checker.checker_idx++ } if val[checker.checker_idx] != `"` { return checker.error('Expecting object key') } } else { if val[checker.checker_idx] == `}` { break } else { return checker.error('invalid object value') } } } else { return checker.error('invalid object value') } } } } .array { // check if the JSON string is an empty array if checker_end >= checker.checker_idx + 2 { checker.checker_idx++ if val[checker.checker_idx] == `]` { return } } else { return checker.error('EOF error: There are not enough length for an array') } for val[checker.checker_idx] != `]` { // skip whitespace for val[checker.checker_idx] in [` `, `\t`, `\n`] { if checker.checker_idx >= checker_end - 1 { break } checker.checker_idx++ } if val[checker.checker_idx] == `]` { return } if checker.checker_idx >= checker_end - 1 { return checker.error('EOF error: array not closed') } checker.check_json_format(val)! // whitespace for val[checker.checker_idx] in [` `, `\t`, `\n`] { checker.checker_idx++ } if val[checker.checker_idx] == `]` { break } if checker.checker_idx >= checker_end - 1 { return checker.error('EOF error: braces are not closed') } if val[checker.checker_idx] == `,` { checker.checker_idx++ for val[checker.checker_idx] in [` `, `\t`, `\n`] { checker.checker_idx++ } if val[checker.checker_idx] == `]` { return checker.error('Cannot use `,`, before `]`') } continue } else { if val[checker.checker_idx] == `]` { break } else { return checker.error('`]` after value') } } } } .string_ { // check if the JSON string is a valid string if checker.checker_idx >= checker_end - 1 { return checker.error('EOF error: string not closed') } checker.checker_idx++ // check if the JSON string is a valid escape sequence for val[checker.checker_idx] != `"` && val[checker.checker_idx - 1] != `\\` { if val[checker.checker_idx] == `\\` { if checker.checker_idx + 1 >= checker_end - 1 { return checker.error('invalid escape sequence') } escaped_char := val[checker.checker_idx + 1] match escaped_char { `/`, `b`, `f`, `n`, `r`, `t`, `"`, `\\` {} `u` { // check if the JSON string is a valid unicode escape sequence escaped_char_last_index := checker.checker_idx + 5 if escaped_char_last_index < checker_end - 1 { // 2 bytes for the unicode escape sequence `\u` checker.checker_idx += 2 for checker.checker_idx < escaped_char_last_index { match val[checker.checker_idx] { `0`...`9`, `a`...`f`, `A`...`F` { checker.checker_idx++ } else { return checker.error('invalid unicode escape sequence') } } } // REVIEW: Should we increment the index here? continue } else { return checker.error('short unicode escape sequence ${checker.json[checker.checker_idx.. escaped_char_last_index + 1]}') } } else { return checker.error('unknown escape sequence') } } } checker.checker_idx++ } } .number { // check if the JSON string is a valid float or integer mut is_negative := val[0] == `-` mut has_dot := false mut digits_count := 1 if is_negative { checker.checker_idx++ } for checker.checker_idx < checker_end - 1 && val[checker.checker_idx + 1] !in [`,`, `}`, `]`, ` `, `\t`, `\n`] && checker.checker_idx < checker_end - 1 { if val[checker.checker_idx] == `.` { if has_dot { return checker.error('invalid float. Multiple dots') } has_dot = true checker.checker_idx++ continue } else if val[checker.checker_idx] == `-` { if is_negative { return checker.error('invalid float. Multiple negative signs') } checker.checker_idx++ continue } else { if val[checker.checker_idx] < `0` || val[checker.checker_idx] > `9` { return checker.error('invalid number') } } if digits_count >= 64 { return checker.error('number exceeds 64 digits') } digits_count++ checker.checker_idx++ } } .boolean { // check if the JSON string is a valid boolean match val[checker.checker_idx] { `t` { if checker_end - checker.checker_idx <= 3 { return checker.error('EOF error: expecting `true`') } is_not_ok := unsafe { vmemcmp(checker.json.str + checker.checker_idx, 'true'.str, 4) } if is_not_ok != 0 { return checker.error('invalid boolean value. Got `${checker.json[checker.checker_idx.. checker.checker_idx + 4]}` instead of `true`') } checker.checker_idx += 3 } `f` { if checker_end - checker.checker_idx <= 4 { return checker.error('EOF error: expecting `false`') } is_not_ok := unsafe { vmemcmp(checker.json.str + checker.checker_idx, 'false'.str, 5) } if is_not_ok != 0 { return checker.error('invalid boolean value. Got `${checker.json[checker.checker_idx.. checker.checker_idx + 5]}` instead of `false`') } checker.checker_idx += 4 } else { return checker.error('invalid boolean') } } } } actual_value_info_pointer.length = checker.checker_idx + 1 - start_idx_position if checker.checker_idx < checker_end - 1 { checker.checker_idx++ } for checker.checker_idx < checker_end - 1 && val[checker.checker_idx] !in [`,`, `:`, `}`, `]`] { // get trash characters after the value if val[checker.checker_idx] !in [` `, `\t`, `\n`] { checker.error('invalid value. Unexpected character after ${value_kind} end')! } else { // whitespace } checker.checker_idx++ } } // decode decodes a JSON string into a specified type. pub fn decode[T](val string) !T { mut decoder := Decoder{ json: val } decoder.check_json_format(val)! check_if_json_match[T](val)! mut result := T{} decoder.current_node = decoder.values_info.head decoder.decode_value(mut &result)! return result } // decode_value decodes a value from the JSON nodes. fn (mut decoder Decoder) decode_value[T](mut val T) ! { $if T is $option { mut unwrapped_val := create_value_from_optional(val.$(field.name)) decoder.decode_value(mut unwrapped_val)! val.$(field.name) = unwrapped_val } $else $if T.unaliased_typ is string { string_info := decoder.current_node.value if string_info.value_kind == .string_ { buffer_lenght, escape_positions := decoder.calculate_string_space_and_escapes()! string_buffer := []u8{cap: buffer_lenght} if escape_positions.len == 0 { if string_info.length != 0 { unsafe { string_buffer.push_many(decoder.json.str + string_info.position + 1, buffer_lenght) } } } else { for i := 0; i < escape_positions.len; i++ { escape_position := escape_positions[i] if i == 0 { // Pushes a substring from the JSON string into the string buffer. // The substring starts at the position of the value in the JSON string plus one, // and ends at the escape position minus one. // This is used to handle escaped characters within the JSON string. unsafe { string_buffer.push_many(decoder.json.str + string_info.position + 1, escape_position - string_info.position - 1) } } else { // Pushes a substring from the JSON string into the string buffer, starting after the previous escape position // and ending just before the current escape position. This handles the characters between escape sequences. unsafe { string_buffer.push_many(decoder.json.str + escape_positions[i - 1] + 6, escape_position - escape_positions[i - 1] - 6) } } unescaped_buffer := generate_unicode_escape_sequence(unsafe { (decoder.json.str + escape_positions[i] + 2).vbytes(4) })! unsafe { string_buffer.push_many(&unescaped_buffer[0], unescaped_buffer.len) } } end_of_last_escape_position := escape_positions[escape_positions.len - 1] + 6 unsafe { string_buffer.push_many(decoder.json.str + end_of_last_escape_position, string_info.length - end_of_last_escape_position - 1) } } val = string_buffer.bytestr() } } $else $if T.unaliased_typ is $sumtype { decoder.decode_sumtype(mut val)! return } $else $if T.unaliased_typ is time.Time { time_info := decoder.current_node.value if time_info.value_kind == .string_ { string_time := decoder.json.substr_unsafe(time_info.position + 1, time_info.position + time_info.length - 1) val = time.parse_rfc3339(string_time) or { time.Time{} } } } $else $if T.unaliased_typ is $map { decoder.decode_map(mut val)! return } $else $if T.unaliased_typ is $array { decoder.decode_array(mut val)! // return to avoid the next increment of the current node // this is because the current node is already incremented in the decode_array function // remove this line will cause the current node to be incremented twice // and bug recursive array decoding like `[][]int{}` return } $else $if T.unaliased_typ is $struct { struct_info := decoder.current_node.value if struct_info.value_kind == .object { struct_position := struct_info.position struct_end := struct_position + struct_info.length decoder.current_node = decoder.current_node.next for { if decoder.current_node == unsafe { nil } { break } key_info := decoder.current_node.value if key_info.position >= struct_end { break } decoder.current_node = decoder.current_node.next $for field in T.fields { if key_info.length - 2 == field.name.len { // This `vmemcmp` compares the name of a key in a JSON with a given struct field. if unsafe { vmemcmp(decoder.json.str + key_info.position + 1, field.name.str, field.name.len) == 0 } { $if field.typ is $option { mut unwrapped_val := create_value_from_optional(val.$(field.name)) decoder.decode_value(mut unwrapped_val)! val.$(field.name) = unwrapped_val } $else { decoder.decode_value(mut val.$(field.name))! } } } } } } return } $else $if T.unaliased_typ is bool { value_info := decoder.current_node.value unsafe { val = vmemcmp(decoder.json.str + value_info.position, 'true'.str, 4) == 0 } } $else $if T.unaliased_typ in [$float, $int, $enum] { value_info := decoder.current_node.value if value_info.value_kind == .number { bytes := unsafe { (decoder.json.str + value_info.position).vbytes(value_info.length) } unsafe { string_buffer_to_generic_number(val, bytes) } } } $else { return error('cannot encode value with ${typeof(val).name} type') } if decoder.current_node != unsafe { nil } { decoder.current_node = decoder.current_node.next } } fn (mut decoder Decoder) decode_array[T](mut val []T) ! { array_info := decoder.current_node.value if array_info.value_kind == .array { decoder.current_node = decoder.current_node.next array_position := array_info.position array_end := array_position + array_info.length for { if decoder.current_node == unsafe { nil } || decoder.current_node.value.position >= array_end { break } mut array_element := T{} decoder.decode_value(mut array_element)! val << array_element } } } fn (mut decoder Decoder) decode_map[K, V](mut val map[K]V) ! { map_info := decoder.current_node.value if map_info.value_kind == .object { map_position := map_info.position map_end := map_position + map_info.length decoder.current_node = decoder.current_node.next for { if decoder.current_node == unsafe { nil } || decoder.current_node.value.position >= map_end { break } key_info := decoder.current_node.value if key_info.position >= map_end { break } key := decoder.json[key_info.position + 1..key_info.position + key_info.length - 1] decoder.current_node = decoder.current_node.next value_info := decoder.current_node.value if value_info.position + value_info.length >= map_end { break } mut map_value := V{} $if V is $map { val[key] = map_value.move() } $else { val[key] = map_value } decoder.decode_value(mut val[key])! } } } // get_value_kind returns the kind of a JSON value. fn get_value_kind(value u8) ValueKind { if value == u8(`"`) { return .string_ } else if value == u8(`t`) || value == u8(`f`) { return .boolean } else if value == u8(`{`) { return .object } else if value == u8(`[`) { return .array } else if (value >= u8(48) && value <= u8(57)) || value == u8(`-`) { return .number } else if value == u8(`n`) { return .null } return .unknown } fn create_value_from_optional[T](val ?T) T { return T{} } fn utf8_byte_length(unicode_value u32) int { if unicode_value <= 0x7F { return 1 } else if unicode_value <= 0x7FF { return 2 } else if unicode_value <= 0xFFFF { return 3 } else { return 4 } } fn (mut decoder Decoder) calculate_string_space_and_escapes() !(int, []int) { value_info := decoder.current_node.value len := value_info.length if len < 2 || decoder.json[value_info.position] != `"` || decoder.json[value_info.position + len - 1] != `"` { return error('Invalid JSON string format') } mut space_required := 0 mut escape_positions := []int{} mut idx := 1 // Start after the opening quote for idx < len - 1 { current_byte := decoder.json[value_info.position + idx] if current_byte == `\\` { // Escape sequence, handle accordingly idx++ if idx >= len - 1 { return error('Invalid escape sequence at the end of string') } escaped_char := decoder.json[value_info.position + idx] match escaped_char { // All simple escapes take 1 byte of space `/`, `b`, `f`, `n`, `r`, `t`, `"`, `\\` { space_required++ } `u` { // Unicode escape sequence \uXXXX if idx + 4 >= len - 1 { return error('Invalid unicode escape sequence') } // Extract the hex value from the \uXXXX sequence hex_str := decoder.json[value_info.position + idx + 1..value_info.position + idx + 5] unicode_value := u32(strconv.parse_int(hex_str, 16, 32)!) // Determine the number of bytes needed for this Unicode character in UTF-8 space_required += utf8_byte_length(unicode_value) idx += 4 // Skip the next 4 hex digits // REVIEW: If the Unicode character is a surrogate pair, we need to skip the next \uXXXX sequence? // \\uXXXX is 6 bytes, so we need to skip 5 more bytes escape_positions << value_info.position + idx - 5 } else { return error('Unknown escape sequence') } } } else { // Regular character, just increment space required by 1 byte space_required++ } idx++ } return space_required, escape_positions } // \uXXXX to unicode with 4 hex digits fn generate_unicode_escape_sequence(escape_sequence_byte []u8) ![]u8 { if escape_sequence_byte.len != 4 { return error('Invalid unicode escape sequence') } unicode_value := u32(strconv.parse_int(escape_sequence_byte.bytestr(), 16, 32)!) mut utf8_bytes := []u8{cap: utf8_byte_length(unicode_value)} if unicode_value <= 0x7F { utf8_bytes << u8(unicode_value) } else if unicode_value <= 0x7FF { utf8_bytes << u8(0xC0 | (unicode_value >> 6)) utf8_bytes << u8(0x80 | (unicode_value & 0x3F)) } else if unicode_value <= 0xFFFF { utf8_bytes << u8(0xE0 | (unicode_value >> 12)) utf8_bytes << u8(0x80 | ((unicode_value >> 6) & 0x3F)) utf8_bytes << u8(0x80 | (unicode_value & 0x3F)) } else { utf8_bytes << u8(0xF0 | (unicode_value >> 18)) utf8_bytes << u8(0x80 | ((unicode_value >> 12) & 0x3F)) utf8_bytes << u8(0x80 | ((unicode_value >> 6) & 0x3F)) utf8_bytes << u8(0x80 | (unicode_value & 0x3F)) } return utf8_bytes } // string_buffer_to_generic_number converts a buffer of bytes (data) into a generic type T and // stores the result in the provided result pointer. // The function supports conversion to the following types: // - Signed integers: i8, i16, i32, i64 // - Unsigned integers: u8, u16, u32, u64 // - Floating-point numbers: f32, f64 // // For signed integers, the function handles negative numbers by checking for a '-' character. // For floating-point numbers, the function handles decimal points and adjusts the result // accordingly. // // If the type T is not supported, the function will panic with an appropriate error message. // // Parameters: // - data []u8: The buffer of bytes to be converted. // - result &T: A pointer to the variable where the converted result will be stored. // // NOTE: This aims works with not new memory allocated data, to more efficient use `vbytes` before @[direct_array_access; unsafe] pub fn string_buffer_to_generic_number[T](result &T, data []u8) { $if T.unaliased_typ is $int { mut is_negative := false for ch in data { if ch == `-` { is_negative = true continue } digit := T(ch - `0`) *result = T(*result * 10 + digit) } if is_negative { *result *= -1 } } $else $if T.unaliased_typ is $float { mut is_negative := false mut decimal_seen := false mut decimal_divider := T(1) for ch in data { if ch == `-` { is_negative = true continue } if ch == `.` { decimal_seen = true continue } digit := T(ch - u8(`0`)) if decimal_seen { decimal_divider *= 10 *result += T(digit / decimal_divider) } else { *result = T(*result * 10 + digit) } } if is_negative { *result *= -1 } } $else $if T.unaliased_typ is $enum { // Convert the string to an integer enumeration := 0 for ch in data { digit := int(ch - `0`) enumeration = enumeration * 10 + digit } *result = T(enumeration) } $else { panic('unsupported type ${typeof[T]().name}') } }