v/vlib/x/json2/encoder.v

608 lines
16 KiB
V

// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module json2
import time
import math
import strconv
// Encoder encodes the an `Any` type into JSON representation.
// It provides parameters in order to change the end result.
pub struct Encoder {
pub:
newline u8
newline_spaces_count int
escape_unicode bool = true
}
// byte array versions of the most common tokens/chars to avoid reallocations
const null_in_bytes = 'null'
const true_in_string = 'true'
const false_in_string = 'false'
const empty_array = [u8(`[`), `]`]!
const comma_rune = `,`
const colon_rune = `:`
const quote_rune = `"`
const back_slash = [u8(`\\`), `\\`]!
const quote = [u8(`\\`), `"`]!
const slash = [u8(`\\`), `/`]!
const null_unicode = [u8(`\\`), `u`, `0`, `0`, `0`, `0`]!
const ascii_control_characters = ['\\u0000', '\\t', '\\n', '\\r', '\\u0004', '\\u0005', '\\u0006',
'\\u0007', '\\b', '\\t', '\\n', '\\u000b', '\\f', '\\r', '\\u000e', '\\u000f', '\\u0010',
'\\u0011', '\\u0012', '\\u0013', '\\u0014', '\\u0015', '\\u0016', '\\u0017', '\\u0018', '\\u0019',
'\\u001a', '\\u001b', '\\u001c', '\\u001d', '\\u001e', '\\u001f']!
const curly_open_rune = `{`
const curly_close_rune = `}`
const ascii_especial_characters = [u8(`\\`), `"`, `/`]!
// encode is a generic function that encodes a type into a JSON string.
@[manualfree]
pub fn encode[T](val T) string {
$if T is $array {
return encode_array(val)
} $else {
mut count := Count{0}
count.count_chars(val)
mut buf := []u8{cap: count.total}
defer {
unsafe { buf.free() }
}
encoder := Encoder{}
encoder.encode_value(val, mut buf) or {
println(err)
encoder.encode_value[string]('null', mut buf) or {}
}
return buf.bytestr()
}
}
// encode_array is a generic function that encodes a array into a JSON string.
@[manualfree]
fn encode_array[T](val []T) string {
if val.len == 0 {
return '[]'
}
mut buf := []u8{}
defer {
unsafe { buf.free() }
}
encoder := Encoder{}
encoder.encode_array(val, 1, mut buf) or {
println(err)
encoder.encode_value[string]('null', mut buf) or {}
}
return buf.bytestr()
}
// encode_pretty ...
pub fn encode_pretty[T](typed_data T) string {
encoded := encode(typed_data)
raw_decoded := raw_decode(encoded) or { 0 }
return raw_decoded.prettify_json_str()
}
// encode_value encodes a value to the specific buffer.
pub fn (e &Encoder) encode_value[T](val T, mut buf []u8) ! {
e.encode_value_with_level[T](val, 1, mut buf)!
}
fn (e &Encoder) encode_newline(level int, mut buf []u8) ! {
if e.newline != 0 {
buf << e.newline
for j := 0; j < level * e.newline_spaces_count; j++ {
buf << ` `
}
}
}
fn (e &Encoder) encode_map[T](value T, level int, mut buf []u8) ! {
buf << curly_open_rune
mut idx := 0
for k, v in value {
e.encode_newline(level, mut buf)!
// e.encode_string(k.str(), mut buf)!
e.encode_string(k, mut buf)!
buf << colon_rune
if e.newline != 0 {
buf << ` `
}
// workaround to avoid `cannot convert 'struct x__json2__Any' to 'struct string'`
$if v is $sumtype {
$for variant_value in v.variants {
if v is variant_value {
e.encode_value_with_level(v, level + 1, mut buf)!
}
}
} $else {
e.encode_value_with_level(v, level + 1, mut buf)!
}
if idx < value.len - 1 {
buf << comma_rune
}
idx++
}
e.encode_newline(level - 1, mut buf)!
buf << curly_close_rune
}
fn (e &Encoder) encode_value_with_level[T](val T, level int, mut buf []u8) ! {
$if val is $option {
workaround := val
if workaround != none {
e.encode_value_with_level(val, level, mut buf)!
}
} $else $if T is string {
e.encode_string(val, mut buf)!
} $else $if T is $sumtype {
$for v in val.variants {
if val is v {
e.encode_value_with_level(val, level, mut buf)!
}
}
} $else $if T is $alias {
// TODO
} $else $if T is time.Time {
str_value := val.format_rfc3339()
buf << quote_rune
unsafe { buf.push_many(str_value.str, str_value.len) }
buf << quote_rune
} $else $if T is $map {
e.encode_map(val, level, mut buf)!
} $else $if T is $array {
e.encode_array(val, level, mut buf)!
} $else $if T is Encodable {
str_value := val.json_str()
unsafe { buf.push_many(str_value.str, str_value.len) }
} $else $if T is Null {
unsafe { buf.push_many(null_in_bytes.str, null_in_bytes.len) }
} $else $if T is $struct {
e.encode_struct(val, level, mut buf)!
} $else $if T is $enum {
str_int := int(val).str()
unsafe { buf.push_many(str_int.str, str_int.len) }
} $else $if T is $int || T is bool {
str_int := val.str()
unsafe { buf.push_many(str_int.str, str_int.len) }
} $else $if T is $float {
str_float := encode_number(val)
unsafe { buf.push_many(str_float.str, str_float.len) }
} $else {
return error('cannot encode value with ${typeof(val).name} type')
}
}
fn (e &Encoder) encode_struct[U](val U, level int, mut buf []u8) ! {
buf << curly_open_rune
mut i := 0
mut fields_len := 0
$for field in U.fields {
mut @continue := false
for attr in field.attrs {
if attr.contains('skip') {
@continue = true
}
if attr.contains('json: ') {
if attr.replace('json: ', '') == '-' {
@continue = true
}
break
}
}
if !@continue {
$if field.is_option {
if val.$(field.name) != none {
fields_len++
}
} $else {
fields_len++
}
}
}
$for field in U.fields {
mut ignore_field := false
value := val.$(field.name)
is_nil := val.$(field.name).str() == '&nil'
mut json_name := ''
for attr in field.attrs {
if attr.contains('skip') {
ignore_field = true
}
if attr.contains('json: ') {
json_name = attr.replace('json: ', '')
if json_name == '-' {
ignore_field = true
}
break
}
}
if !ignore_field {
$if value is $option {
workaround := val.$(field.name)
if workaround != none { // smartcast
e.encode_newline(level, mut buf)!
if json_name != '' {
e.encode_string(json_name, mut buf)!
} else {
e.encode_string(field.name, mut buf)!
}
buf << colon_rune
if e.newline != 0 {
buf << ` `
}
e.encode_value_with_level(value, level, mut buf)!
} else {
ignore_field = true
}
} $else {
is_none := val.$(field.name).str() == 'unknown sum type value' // assert json.encode(StructType[SumTypes]{}) == '{}'
if !is_none && !is_nil {
e.encode_newline(level, mut buf)!
if json_name != '' {
e.encode_string(json_name, mut buf)!
} else {
e.encode_string(field.name, mut buf)!
}
buf << colon_rune
if e.newline != 0 {
buf << ` `
}
}
$if field.indirections != 0 {
if val.$(field.name) != unsafe { nil } {
$if field.indirections == 1 {
e.encode_value_with_level(*val.$(field.name), level + 1, mut
buf)!
}
$if field.indirections == 2 {
e.encode_value_with_level(**val.$(field.name), level + 1, mut
buf)!
}
$if field.indirections == 3 {
e.encode_value_with_level(***val.$(field.name), level + 1, mut
buf)!
}
}
} $else $if field.typ is string {
e.encode_string(val.$(field.name).str(), mut buf)!
} $else $if field.typ is time.Time {
str_value := val.$(field.name).format_rfc3339()
buf << quote_rune
unsafe { buf.push_many(str_value.str, str_value.len) }
buf << quote_rune
} $else $if field.typ is bool {
if value {
unsafe { buf.push_many(true_in_string.str, true_in_string.len) }
} else {
unsafe { buf.push_many(false_in_string.str, false_in_string.len) }
}
} $else $if field.typ is $int {
str_value := val.$(field.name).str()
unsafe { buf.push_many(str_value.str, str_value.len) }
} $else $if field.typ is $float {
str_value := encode_number(val.$(field.name))
unsafe { buf.push_many(str_value.str, str_value.len) }
} $else $if field.is_array {
// TODO: replace for `field.typ is $array`
e.encode_array(value, level + 1, mut buf)!
} $else $if field.typ is $array {
// e.encode_array(value, level + 1, mut buf)! // FIXME: error: could not infer generic type `U` in call to `encode_array`
} $else $if field.typ is $struct {
e.encode_struct(value, level + 1, mut buf)!
} $else $if field.is_map {
e.encode_map(value, level + 1, mut buf)!
} $else $if field.is_enum {
// TODO: replace for `field.typ is $enum`
// str_value := int(val.$(field.name)).str()
// unsafe { buf.push_many(str_value.str, str_value.len) }
e.encode_value_with_level(val.$(field.name), level + 1, mut buf)!
} $else $if field.typ is $enum {
} $else $if field.typ is $sumtype {
field_value := val.$(field.name)
if field_value.str() != 'unknown sum type value' {
$for v in field_value.variants {
if field_value is v {
e.encode_value_with_level(field_value, level, mut buf)!
}
}
}
} $else $if field.typ is $alias {
$if field.unaliased_typ is string {
e.encode_string(val.$(field.name).str(), mut buf)!
} $else $if field.unaliased_typ is time.Time {
parsed_time := time.parse(val.$(field.name).str()) or { time.Time{} }
e.encode_string(parsed_time.format_rfc3339(), mut buf)!
} $else $if field.unaliased_typ is bool {
if val.$(field.name) {
unsafe { buf.push_many(true_in_string.str, true_in_string.len) }
} else {
unsafe { buf.push_many(false_in_string.str, false_in_string.len) }
}
} $else $if field.unaliased_typ is $int {
str_value := val.$(field.name).str()
unsafe { buf.push_many(str_value.str, str_value.len) }
} $else $if field.unaliased_typ is $float {
str_value := encode_number(val)
unsafe { buf.push_many(str_value.str, str_value.len) }
} $else $if field.unaliased_typ is $array {
// TODO
} $else $if field.unaliased_typ is $struct {
e.encode_struct(value, level + 1, mut buf)!
} $else $if field.unaliased_typ is $enum {
// TODO
} $else $if field.unaliased_typ is $sumtype {
// TODO
} $else {
return error('the alias ${typeof(val).name} cannot be encoded')
}
} $else {
return error('type ${typeof(val).name} cannot be array encoded')
}
}
}
if i < fields_len - 1 && !ignore_field {
if !is_nil {
buf << comma_rune
}
}
if !ignore_field {
i++
}
}
e.encode_newline(level - 1, mut buf)!
buf << curly_close_rune
// b.measure('encode_struct')
}
fn (e &Encoder) encode_array[U](val []U, level int, mut buf []u8) ! {
if val.len == 0 {
unsafe { buf.push_many(&empty_array[0], empty_array.len) }
return
}
buf << `[`
for i in 0 .. val.len {
e.encode_newline(level, mut buf)!
$if U is string || U is bool || U is $int || U is $float {
e.encode_value_with_level(val[i], level + 1, mut buf)!
} $else $if U is $array {
e.encode_array(val[i], level + 1, mut buf)!
} $else $if U is $struct {
e.encode_struct(val[i], level + 1, mut buf)!
} $else $if U is $sumtype {
e.encode_value_with_level(val[i], level + 1, mut buf)!
} $else $if U is $enum {
// TODO: test
e.encode_value_with_level(val[i], level + 1, mut buf)!
} $else {
return error('type ${typeof(val).name} cannot be array encoded')
}
if i < val.len - 1 {
buf << comma_rune
}
}
e.encode_newline(level - 1, mut buf)!
buf << `]`
}
// str returns the JSON string representation of the `map[string]Any` type.
pub fn (f map[string]Any) str() string {
return Any(f).json_str()
}
// str returns the JSON string representation of the `[]Any` type.
pub fn (f []Any) str() string {
return Any(f).json_str()
}
// str returns the string representation of the `Any` type. Use the `json_str` method
// if you want to use the escaped str() version of the `Any` type.
pub fn (f Any) str() string {
if f is string {
return f
} else {
return f.json_str()
}
}
// json_str returns the JSON string representation of the `Any` type.
pub fn (f Any) json_str() string {
return encode(f)
}
// prettify_json_str returns the pretty-formatted JSON string representation of the `Any` type.
@[manualfree]
pub fn (f Any) prettify_json_str() string {
mut buf := []u8{}
defer {
unsafe { buf.free() }
}
mut enc := Encoder{
newline: `\n`
newline_spaces_count: 2
}
enc.encode_value(f, mut buf) or {}
return buf.bytestr()
}
// TODO: Need refactor. Is so slow. The longer the string, the lower the performance.
// encode_string returns the JSON spec-compliant version of the string.
@[direct_array_access]
fn (e &Encoder) encode_string(s string, mut buf []u8) ! {
if s == '' {
empty := [u8(quote_rune), quote_rune]!
unsafe { buf.push_many(&empty[0], 2) }
return
}
mut last_no_buffer_expansible_char_position_candidate := 0
buf << quote_rune
if !e.escape_unicode {
unsafe {
buf.push_many(s.str, s.len)
buf << quote_rune
}
return
}
for idx := 0; idx < s.len; idx++ {
current_byte := s[idx]
mut current_utf8_len := ((0xe5000000 >> ((current_byte >> 3) & 0x1e)) & 3) + 1
current_value_cause_buffer_expansion :=
(current_utf8_len == 1 && ((current_byte < 32 || current_byte > 127)
|| current_byte in ascii_especial_characters)) || current_utf8_len == 3
if !current_value_cause_buffer_expansion {
// while it is not the last one
if idx < s.len - 1 {
if s.len > (idx + current_utf8_len) {
if current_utf8_len == 2 || current_utf8_len == 4 {
// runes like: ã, ü, etc.
// Emojis ranges
// (0x1F300, 0x1F5FF), # Miscellaneous Symbols and Pictographs
// (0x1F600, 0x1F64F), # Emoticons
// (0x1F680, 0x1F6FF), # Transport and Map Symbols
idx += current_utf8_len - 1
continue
}
} else {
unsafe {
buf.push_many(s.str + last_no_buffer_expansible_char_position_candidate,
s.len - last_no_buffer_expansible_char_position_candidate)
}
break
}
} else if idx == s.len - 1 {
unsafe {
buf.push_many(s.str + last_no_buffer_expansible_char_position_candidate,
s.len - last_no_buffer_expansible_char_position_candidate)
}
}
} else {
if idx > 0 {
length := idx - last_no_buffer_expansible_char_position_candidate
unsafe {
buf.push_many(s.str + last_no_buffer_expansible_char_position_candidate,
length)
}
last_no_buffer_expansible_char_position_candidate = idx + 1
}
}
if current_utf8_len == 1 {
if current_byte < 32 {
// ASCII Control Characters
unsafe {
buf.push_many(ascii_control_characters[current_byte].str, ascii_control_characters[current_byte].len)
}
last_no_buffer_expansible_char_position_candidate = idx + 1
} else if current_byte >= 32 && current_byte < 128 {
// ASCII especial characters
if current_byte == `\\` {
unsafe { buf.push_many(&back_slash[0], back_slash.len) }
last_no_buffer_expansible_char_position_candidate = idx + 1
continue
} else if current_byte == `"` {
unsafe { buf.push_many(&quote[0], quote.len) }
last_no_buffer_expansible_char_position_candidate = idx + 1
continue
} else if current_byte == `/` {
unsafe { buf.push_many(&slash[0], slash.len) }
last_no_buffer_expansible_char_position_candidate = idx + 1
continue
}
}
continue
} else if current_utf8_len == 3 {
// runes like: ✔, ひらがな ...
// Handle multi-byte characters byte-by-byte
mut codepoint := u32(current_byte & ((1 << (7 - current_utf8_len)) - 1))
for j in 1 .. current_utf8_len {
if idx + j >= s.len {
// Incomplete UTF-8 sequence, TODO handle error
idx++
continue
}
mut b := s[idx + j]
if (b & 0xC0) != 0x80 {
// Invalid continuation byte, TODO handle error
idx++
continue
}
codepoint = u32((codepoint << 6) | (b & 0x3F))
}
// runes like: ✔, ひらがな ...
unsafe { buf.push_many(&null_unicode[0], null_unicode.len) }
buf[buf.len - 1] = hex_digit(codepoint & 0xF)
buf[buf.len - 2] = hex_digit((codepoint >> 4) & 0xF)
buf[buf.len - 3] = hex_digit((codepoint >> 8) & 0xF)
buf[buf.len - 4] = hex_digit((codepoint >> 12) & 0xF)
idx += current_utf8_len - 1
last_no_buffer_expansible_char_position_candidate = idx + 1
}
}
buf << quote_rune
}
fn hex_digit(n u32) u8 {
if n < 10 {
return `0` + n
}
return `a` + (n - 10)
}
fn encode_number(value f64) string {
if math.is_nan(value) || math.is_inf(value, 0) {
return 'null'
} else if value == f64(int(value)) {
return int(value).str()
} else {
// TODO:cjson Try 15 decimal places of precision to avoid nonsignificant nonzero digits
// If not, print with 17 decimal places of precision
// strconv.f64_to_str_l try max 18 digits instead.
return strconv.f64_to_str_l(value)
}
}