encoding.xml: make tag name and attribute parsing more robust and cleaner (#19828)

This commit is contained in:
Subhomoy Haldar 2023-11-10 09:31:36 +00:00 committed by GitHub
parent e59c194457
commit 00f2e4a8c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 10 additions and 15 deletions

View File

@ -527,20 +527,12 @@ fn parse_children(name string, attributes map[string]string, mut reader io.Reade
} }
fn parse_single_node(first_char u8, mut reader io.Reader) !XMLNode { fn parse_single_node(first_char u8, mut reader io.Reader) !XMLNode {
mut local_buf := [u8(0)]
mut ch := next_char(mut reader, mut local_buf)!
mut contents := strings.new_builder(xml.default_string_builder_cap) mut contents := strings.new_builder(xml.default_string_builder_cap)
// We're expecting an opening tag contents.write_u8(first_char)
if ch == `/` {
return error('XML node cannot start with "</".')
}
if ch != `>` { mut local_buf := [u8(0)]
contents.write_u8(ch) for {
} mut ch := next_char(mut reader, mut local_buf)!
for ch != `>` {
ch = next_char(mut reader, mut local_buf)!
if ch == `>` { if ch == `>` {
break break
} }
@ -550,7 +542,7 @@ fn parse_single_node(first_char u8, mut reader io.Reader) !XMLNode {
tag_contents := contents.str().trim_space() tag_contents := contents.str().trim_space()
parts := tag_contents.split_any(' \t\n') parts := tag_contents.split_any(' \t\n')
name := if parts.len > 0 { first_char.ascii_str() + parts[0] } else { first_char.ascii_str() } name := parts[0]
// Check if it is a self-closing tag // Check if it is a self-closing tag
if tag_contents.ends_with('/') { if tag_contents.ends_with('/') {
@ -561,7 +553,7 @@ fn parse_single_node(first_char u8, mut reader io.Reader) !XMLNode {
} }
} }
attribute_string := tag_contents[name.len - 1..].trim_space() attribute_string := tag_contents[name.len..].trim_space()
attributes := parse_attributes(attribute_string)! attributes := parse_attributes(attribute_string)!
return parse_children(name, attributes, mut reader) return parse_children(name, attributes, mut reader)

View File

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?> <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<sst count="5" uniqueCount="5"> <sst count="5" uniqueCount="5">
<si> <si>
<t>Item 1</t> <t a="1">Item 1</t>
</si> </si>
<si> <si>
<t>Item 2</t> <t>Item 2</t>

View File

@ -19,6 +19,9 @@ fn test_valid_parsing() {
children: [ children: [
xml.XMLNode{ xml.XMLNode{
name: 't' name: 't'
attributes: {
'a': '1'
}
children: ['Item 1'] children: ['Item 1']
}, },
] ]