mirror of
https://github.com/vlang/v.git
synced 2025-09-22 11:57:33 -04:00
encoding.xml: update parser logic to skip BOM before prolog (#19858)
This commit is contained in:
parent
e0207b6830
commit
5f08d45c7c
@ -18,6 +18,9 @@ const (
|
||||
double_dash = '--'.bytes()
|
||||
c_tag = '[C'.bytes()
|
||||
data_chars = 'DATA'.bytes()
|
||||
|
||||
byte_order_marking_first = u8(0xEF)
|
||||
byte_order_marking_bytes = [u8(0xBB), 0xBF]
|
||||
)
|
||||
|
||||
// Helper types to assist in parsing
|
||||
@ -296,18 +299,30 @@ fn parse_doctype(mut reader io.Reader) !DocumentType {
|
||||
}
|
||||
|
||||
fn parse_prolog(mut reader io.Reader) !(Prolog, u8) {
|
||||
// Trim trailing whitespace
|
||||
// Skip trailing whitespace and invalid characters
|
||||
mut local_buf := [u8(0)]
|
||||
mut ch := next_char(mut reader, mut local_buf)!
|
||||
for {
|
||||
match ch {
|
||||
` `, `\t`, `\n` {
|
||||
` `, `\t`, `\r`, `\n` {
|
||||
ch = next_char(mut reader, mut local_buf)!
|
||||
continue
|
||||
}
|
||||
`<` {
|
||||
break
|
||||
}
|
||||
xml.byte_order_marking_first {
|
||||
// UTF-8 BOM
|
||||
mut bom_buf := [u8(0), 0]
|
||||
if reader.read(mut bom_buf)! != 2 {
|
||||
return error('Invalid UTF-8 BOM.')
|
||||
}
|
||||
if bom_buf != xml.byte_order_marking_bytes {
|
||||
return error('Invalid UTF-8 BOM.')
|
||||
}
|
||||
ch = next_char(mut reader, mut local_buf)!
|
||||
continue
|
||||
}
|
||||
else {
|
||||
return error('Expecting a prolog or root node starting with "<".')
|
||||
}
|
||||
|
17
vlib/encoding/xml/test/local/20_bom_file/bom_test.v
Normal file
17
vlib/encoding/xml/test/local/20_bom_file/bom_test.v
Normal file
@ -0,0 +1,17 @@
|
||||
module main
|
||||
|
||||
import os
|
||||
import encoding.xml
|
||||
|
||||
fn test_valid_parsing() {
|
||||
// We use a .bin file to avoid stripping the BOM from the XML file
|
||||
path := os.join_path(os.dir(@FILE), 'workbook.bin')
|
||||
|
||||
doc := xml.XMLDocument.from_file(path) or {
|
||||
assert false, 'Failed to parse workbook.bin'
|
||||
exit(1)
|
||||
}
|
||||
|
||||
sheets := doc.get_elements_by_tag('sheet')
|
||||
assert sheets.len == 1, 'Expected 1 sheet, got ${sheets.len}'
|
||||
}
|
17
vlib/encoding/xml/test/local/20_bom_file/workbook.bin
Normal file
17
vlib/encoding/xml/test/local/20_bom_file/workbook.bin
Normal file
@ -0,0 +1,17 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x15" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main">
|
||||
<fileVersion appName="xl" lastEdited="6" lowestEdited="6" rupBuild="14420"/>
|
||||
<workbookPr defaultThemeVersion="164011"/>
|
||||
<bookViews>
|
||||
<workbookView xWindow="0" yWindow="0" windowWidth="22260" windowHeight="12645"/>
|
||||
</bookViews>
|
||||
<sheets>
|
||||
<sheet name="Sheet1" sheetId="1" r:id="rId1"/>
|
||||
</sheets>
|
||||
<calcPr calcId="162913"/>
|
||||
<extLst>
|
||||
<ext uri="{140A7094-0E35-4892-8432-C4D2E57EDEB5}" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main">
|
||||
<x15:workbookPr chartTrackingRefBase="1"/>
|
||||
</ext>
|
||||
</extLst>
|
||||
</workbook>
|
Loading…
x
Reference in New Issue
Block a user