mirror of
https://github.com/vlang/v.git
synced 2025-09-23 04:18:48 -04:00
encoding.xml: update parser logic to skip BOM before prolog (#19858)
This commit is contained in:
parent
e0207b6830
commit
5f08d45c7c
@ -18,6 +18,9 @@ const (
|
|||||||
double_dash = '--'.bytes()
|
double_dash = '--'.bytes()
|
||||||
c_tag = '[C'.bytes()
|
c_tag = '[C'.bytes()
|
||||||
data_chars = 'DATA'.bytes()
|
data_chars = 'DATA'.bytes()
|
||||||
|
|
||||||
|
byte_order_marking_first = u8(0xEF)
|
||||||
|
byte_order_marking_bytes = [u8(0xBB), 0xBF]
|
||||||
)
|
)
|
||||||
|
|
||||||
// Helper types to assist in parsing
|
// Helper types to assist in parsing
|
||||||
@ -296,18 +299,30 @@ fn parse_doctype(mut reader io.Reader) !DocumentType {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn parse_prolog(mut reader io.Reader) !(Prolog, u8) {
|
fn parse_prolog(mut reader io.Reader) !(Prolog, u8) {
|
||||||
// Trim trailing whitespace
|
// Skip trailing whitespace and invalid characters
|
||||||
mut local_buf := [u8(0)]
|
mut local_buf := [u8(0)]
|
||||||
mut ch := next_char(mut reader, mut local_buf)!
|
mut ch := next_char(mut reader, mut local_buf)!
|
||||||
for {
|
for {
|
||||||
match ch {
|
match ch {
|
||||||
` `, `\t`, `\n` {
|
` `, `\t`, `\r`, `\n` {
|
||||||
ch = next_char(mut reader, mut local_buf)!
|
ch = next_char(mut reader, mut local_buf)!
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
`<` {
|
`<` {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
xml.byte_order_marking_first {
|
||||||
|
// UTF-8 BOM
|
||||||
|
mut bom_buf := [u8(0), 0]
|
||||||
|
if reader.read(mut bom_buf)! != 2 {
|
||||||
|
return error('Invalid UTF-8 BOM.')
|
||||||
|
}
|
||||||
|
if bom_buf != xml.byte_order_marking_bytes {
|
||||||
|
return error('Invalid UTF-8 BOM.')
|
||||||
|
}
|
||||||
|
ch = next_char(mut reader, mut local_buf)!
|
||||||
|
continue
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
return error('Expecting a prolog or root node starting with "<".')
|
return error('Expecting a prolog or root node starting with "<".')
|
||||||
}
|
}
|
||||||
|
17
vlib/encoding/xml/test/local/20_bom_file/bom_test.v
Normal file
17
vlib/encoding/xml/test/local/20_bom_file/bom_test.v
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
module main
|
||||||
|
|
||||||
|
import os
|
||||||
|
import encoding.xml
|
||||||
|
|
||||||
|
fn test_valid_parsing() {
|
||||||
|
// We use a .bin file to avoid stripping the BOM from the XML file
|
||||||
|
path := os.join_path(os.dir(@FILE), 'workbook.bin')
|
||||||
|
|
||||||
|
doc := xml.XMLDocument.from_file(path) or {
|
||||||
|
assert false, 'Failed to parse workbook.bin'
|
||||||
|
exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
sheets := doc.get_elements_by_tag('sheet')
|
||||||
|
assert sheets.len == 1, 'Expected 1 sheet, got ${sheets.len}'
|
||||||
|
}
|
17
vlib/encoding/xml/test/local/20_bom_file/workbook.bin
Normal file
17
vlib/encoding/xml/test/local/20_bom_file/workbook.bin
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x15" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main">
|
||||||
|
<fileVersion appName="xl" lastEdited="6" lowestEdited="6" rupBuild="14420"/>
|
||||||
|
<workbookPr defaultThemeVersion="164011"/>
|
||||||
|
<bookViews>
|
||||||
|
<workbookView xWindow="0" yWindow="0" windowWidth="22260" windowHeight="12645"/>
|
||||||
|
</bookViews>
|
||||||
|
<sheets>
|
||||||
|
<sheet name="Sheet1" sheetId="1" r:id="rId1"/>
|
||||||
|
</sheets>
|
||||||
|
<calcPr calcId="162913"/>
|
||||||
|
<extLst>
|
||||||
|
<ext uri="{140A7094-0E35-4892-8432-C4D2E57EDEB5}" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main">
|
||||||
|
<x15:workbookPr chartTrackingRefBase="1"/>
|
||||||
|
</ext>
|
||||||
|
</extLst>
|
||||||
|
</workbook>
|
Loading…
x
Reference in New Issue
Block a user