Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ struct and can be applied at once. When `serde-types` feature is enabled, config

### Misc Changes

- [#924]: (breaking change) Split `SyntaxError::UnclosedPIOrXmlDecl` into `UnclosedPI` and
`UnclosedXmlDecl` for more precise error reporting.
- [#924]: (breaking change) `Parser::eof_error` now takes `&self` and content `&[u8]` parameters.
- [#908]: Increase minimal supported `serde` version from 1.0.139 to 1.0.180.
- [#913]: Deprecate `.prefixes()`, `.resolve()`, `.resolve_attribute()`, and `.resolve_element()`
of `NsReader`. Use `.resolver().bindings()` and `.resolver().resolve()` methods instead.
Expand All @@ -37,6 +40,7 @@ struct and can be applied at once. When `serde-types` feature is enabled, config
[#846]: https://github.com/tafia/quick-xml/issues/846
[#908]: https://github.com/tafia/quick-xml/pull/908
[#913]: https://github.com/tafia/quick-xml/pull/913
[#924]: https://github.com/tafia/quick-xml/pull/924


## 0.38.4 -- 2025-11-11
Expand Down
14 changes: 10 additions & 4 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@ pub enum SyntaxError {
/// The parser started to parse `<!`, but the input ended before it can recognize
/// anything.
InvalidBangMarkup,
/// The parser started to parse processing instruction or XML declaration (`<?`),
/// The parser started to parse processing instruction (`<?`),
/// but the input ended before the `?>` sequence was found.
UnclosedPIOrXmlDecl,
UnclosedPI,
/// The parser started to parse XML declaration (`<?xml`),
/// but the input ended before the `?>` sequence was found.
UnclosedXmlDecl,
/// The parser started to parse comment (`<!--`) content, but the input ended
/// before the `-->` sequence was found.
UnclosedComment,
Expand All @@ -38,8 +41,11 @@ impl fmt::Display for SyntaxError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::InvalidBangMarkup => f.write_str("unknown or missed symbol in markup"),
Self::UnclosedPIOrXmlDecl => {
f.write_str("processing instruction or xml declaration not closed: `?>` not found before end of input")
Self::UnclosedPI => {
f.write_str("processing instruction not closed: `?>` not found before end of input")
}
Self::UnclosedXmlDecl => {
f.write_str("XML declaration not closed: `?>` not found before end of input")
}
Self::UnclosedComment => {
f.write_str("comment not closed: `-->` not found before end of input")
Expand Down
2 changes: 1 addition & 1 deletion src/parser/element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ impl Parser for ElementParser {
}

#[inline]
fn eof_error() -> SyntaxError {
fn eof_error(&self, _content: &[u8]) -> SyntaxError {
SyntaxError::UnclosedTag
}
}
Expand Down
6 changes: 5 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,9 @@ pub trait Parser {

/// Returns parse error produced by this parser in case of reaching end of
/// input without finding the end of a parsed thing.
fn eof_error() -> SyntaxError;
///
/// # Parameters
/// - `content`: the content that was read before EOF. Some parsers may use
/// this to provide more specific error messages.
fn eof_error(&self, content: &[u8]) -> SyntaxError;
}
16 changes: 14 additions & 2 deletions src/parser/pi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

use crate::errors::SyntaxError;
use crate::parser::Parser;
use crate::utils::is_whitespace;

/// A parser that search a `?>` sequence in the slice.
///
Expand Down Expand Up @@ -72,8 +73,19 @@ impl Parser for PiParser {
}

#[inline]
fn eof_error() -> SyntaxError {
SyntaxError::UnclosedPIOrXmlDecl
fn eof_error(&self, content: &[u8]) -> SyntaxError {
// Check if content starts with "?xml" followed by whitespace, '?' or end.
// This determines whether to report an unclosed XML declaration or PI.
// FIXME: Add support for UTF-8/ASCII incompatible encodings (UTF-16)
let is_xml_decl = content.starts_with(b"?xml")
&& content
.get(4)
.map_or(true, |&b| is_whitespace(b) || b == b'?');
if is_xml_decl {
SyntaxError::UnclosedXmlDecl
} else {
SyntaxError::UnclosedPI
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ macro_rules! impl_buffered_source {
}

*position += read;
Err(Error::Syntax(P::eof_error()))
Err(Error::Syntax(parser.eof_error(&buf[start..])))
}

#[inline]
Expand Down
2 changes: 1 addition & 1 deletion src/reader/slice_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
}

*position += self.len() as u64;
Err(Error::Syntax(P::eof_error()))
Err(Error::Syntax(parser.eof_error(self)))
}

#[inline]
Expand Down
15 changes: 12 additions & 3 deletions src/reader/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,11 +270,20 @@ impl ReaderState {
)))
}
} else {
// <?....EOF
// ^^^^^ - `buf` does not contains `<`, but we want to report error at `<`,
// <?....>
// ^^^^^ - `buf` does not contain `<`, but we want to report error at `<`,
// so we move offset to it (-2 for `<` and `>`)
self.last_error_offset = self.offset - len as u64 - 2;
Err(Error::Syntax(SyntaxError::UnclosedPIOrXmlDecl))

// Check if this is an XML declaration (starts with "?xml" followed by whitespace or "?")
// FIXME: Add support for UTF-8/ASCII incompatible encodings (UTF-16)
let is_xml_decl = buf.starts_with(b"?xml")
&& buf.get(4).map_or(true, |&b| is_whitespace(b) || b == b'?');
if is_xml_decl {
Err(Error::Syntax(SyntaxError::UnclosedXmlDecl))
} else {
Err(Error::Syntax(SyntaxError::UnclosedPI))
}
}
}

Expand Down
95 changes: 81 additions & 14 deletions tests/reader-errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,16 +430,16 @@ mod syntax {
mod pi {
use super::*;

err!(unclosed01(".<?") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed02(".<??") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed03(".<?>") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed04(".<?<") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed05(".<?&") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed06(".<?p") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed07(".<? ") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed08(".<?\t") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed09(".<?\r") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed10(".<?\n") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed01(".<?") => SyntaxError::UnclosedPI);
err!(unclosed02(".<??") => SyntaxError::UnclosedPI);
err!(unclosed03(".<?>") => SyntaxError::UnclosedPI);
err!(unclosed04(".<?<") => SyntaxError::UnclosedPI);
err!(unclosed05(".<?&") => SyntaxError::UnclosedPI);
err!(unclosed06(".<?p") => SyntaxError::UnclosedPI);
err!(unclosed07(".<? ") => SyntaxError::UnclosedPI);
err!(unclosed08(".<?\t") => SyntaxError::UnclosedPI);
err!(unclosed09(".<?\r") => SyntaxError::UnclosedPI);
err!(unclosed10(".<?\n") => SyntaxError::UnclosedPI);

// According to the grammar, processing instruction MUST contain a non-empty
// target name, but we do not consider this as a _syntax_ error.
Expand All @@ -453,10 +453,16 @@ mod syntax {
mod decl {
use super::*;

err!(unclosed1(".<?x") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed2(".<?xm") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed3(".<?xml") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed4(".<?xml?") => SyntaxError::UnclosedPIOrXmlDecl);
err!(unclosed1(".<?x") => SyntaxError::UnclosedPI);
err!(unclosed2(".<?xm") => SyntaxError::UnclosedPI);
err!(unclosed3(".<?xml") => SyntaxError::UnclosedXmlDecl);
err!(unclosed4(".<?xml?") => SyntaxError::UnclosedXmlDecl);
err!(unclosed5(".<?xml ") => SyntaxError::UnclosedXmlDecl);
err!(unclosed6(".<?xml\t") => SyntaxError::UnclosedXmlDecl);
err!(unclosed7(".<?xml\r") => SyntaxError::UnclosedXmlDecl);
err!(unclosed8(".<?xml\n") => SyntaxError::UnclosedXmlDecl);
// "xmls" is a PI target, not an XML declaration
err!(unclosed9(".<?xmls") => SyntaxError::UnclosedPI);

// According to the grammar, XML declaration MUST contain at least one space
// and `version` attribute, but we do not consider this as a _syntax_ error.
Expand All @@ -467,6 +473,67 @@ mod syntax {
ok!(normal5("<?xml\n?>") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml\n", 3))));
ok!(normal6("<?xml\n?>rest") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml\n", 3))));
}

/// Tests for UTF-16 encoded XML declarations.
/// FIXME: Add support for UTF-8/ASCII incompatible encodings (UTF-16)
mod decl_utf16 {
use super::*;
use pretty_assertions::assert_eq;

/// UTF-16 LE encoded `<?xml ` (with BOM)
/// BOM (FF FE) + '<' (3C 00) + '?' (3F 00) + 'x' (78 00) + 'm' (6D 00) + 'l' (6C 00) + ' ' (20 00)
const UTF16_LE_XML_DECL: &[u8] = &[
0xFF, 0xFE, // BOM
0x3C, 0x00, // <
0x3F, 0x00, // ?
0x78, 0x00, // x
0x6D, 0x00, // m
0x6C, 0x00, // l
0x20, 0x00, // space
];

/// UTF-16 BE encoded `<?xml ` (with BOM)
/// BOM (FE FF) + '<' (00 3C) + '?' (00 3F) + 'x' (00 78) + 'm' (00 6D) + 'l' (00 6C) + ' ' (00 20)
const UTF16_BE_XML_DECL: &[u8] = &[
0xFE, 0xFF, // BOM
0x00, 0x3C, // <
0x00, 0x3F, // ?
0x00, 0x78, // x
0x00, 0x6D, // m
0x00, 0x6C, // l
0x00, 0x20, // space
];

#[test]
#[ignore = "UTF-16 support not yet implemented for XML declaration detection"]
fn utf16_le_unclosed_xml_decl() {
let mut reader = Reader::from_reader(UTF16_LE_XML_DECL);
match reader.read_event() {
Err(Error::Syntax(cause)) => {
assert_eq!(cause, SyntaxError::UnclosedXmlDecl);
}
x => panic!(
"Expected `Err(Syntax(UnclosedXmlDecl))`, but got {:?}",
x
),
}
}

#[test]
#[ignore = "UTF-16 support not yet implemented for XML declaration detection"]
fn utf16_be_unclosed_xml_decl() {
let mut reader = Reader::from_reader(UTF16_BE_XML_DECL);
match reader.read_event() {
Err(Error::Syntax(cause)) => {
assert_eq!(cause, SyntaxError::UnclosedXmlDecl);
}
x => panic!(
"Expected `Err(Syntax(UnclosedXmlDecl))`, but got {:?}",
x
),
}
}
}
}

mod ill_formed {
Expand Down