Skip to content

Commit cf62324

Browse files
Split SyntaxError::UnclosedPIOrXmlDecl into separate enum Variants
1 parent f1903a1 commit cf62324

File tree

9 files changed

+129
-27
lines changed

9 files changed

+129
-27
lines changed

Changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ struct and can be applied at once. When `serde-types` feature is enabled, config
2929

3030
### Misc Changes
3131

32+
- [#924]: (breaking change) Split `SyntaxError::UnclosedPIOrXmlDecl` into `UnclosedPI` and
33+
`UnclosedXmlDecl` for more precise error reporting.
34+
- [#924]: (breaking change) `Parser::eof_error` now takes `&self` and content `&[u8]` parameters.
3235
- [#908]: Increase minimal supported `serde` version from 1.0.139 to 1.0.180.
3336
- [#913]: Deprecate `.prefixes()`, `.resolve()`, `.resolve_attribute()`, and `.resolve_element()`
3437
of `NsReader`. Use `.resolver().bindings()` and `.resolver().resolve()` methods instead.
@@ -37,6 +40,7 @@ struct and can be applied at once. When `serde-types` feature is enabled, config
3740
[#846]: https://github.com/tafia/quick-xml/issues/846
3841
[#908]: https://github.com/tafia/quick-xml/pull/908
3942
[#913]: https://github.com/tafia/quick-xml/pull/913
43+
[#924]: https://github.com/tafia/quick-xml/pull/924
4044

4145

4246
## 0.38.4 -- 2025-11-11

src/errors.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@ pub enum SyntaxError {
1717
/// The parser started to parse `<!`, but the input ended before it can recognize
1818
/// anything.
1919
InvalidBangMarkup,
20-
/// The parser started to parse processing instruction or XML declaration (`<?`),
20+
/// The parser started to parse processing instruction (`<?`),
2121
/// but the input ended before the `?>` sequence was found.
22-
UnclosedPIOrXmlDecl,
22+
UnclosedPI,
23+
/// The parser started to parse XML declaration (`<?xml`),
24+
/// but the input ended before the `?>` sequence was found.
25+
UnclosedXmlDecl,
2326
/// The parser started to parse comment (`<!--`) content, but the input ended
2427
/// before the `-->` sequence was found.
2528
UnclosedComment,
@@ -38,8 +41,11 @@ impl fmt::Display for SyntaxError {
3841
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3942
match self {
4043
Self::InvalidBangMarkup => f.write_str("unknown or missed symbol in markup"),
41-
Self::UnclosedPIOrXmlDecl => {
42-
f.write_str("processing instruction or xml declaration not closed: `?>` not found before end of input")
44+
Self::UnclosedPI => {
45+
f.write_str("processing instruction not closed: `?>` not found before end of input")
46+
}
47+
Self::UnclosedXmlDecl => {
48+
f.write_str("XML declaration not closed: `?>` not found before end of input")
4349
}
4450
Self::UnclosedComment => {
4551
f.write_str("comment not closed: `-->` not found before end of input")

src/parser/element.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ impl Parser for ElementParser {
7373
}
7474

7575
#[inline]
76-
fn eof_error() -> SyntaxError {
76+
fn eof_error(&self, _content: &[u8]) -> SyntaxError {
7777
SyntaxError::UnclosedTag
7878
}
7979
}

src/parser/mod.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,9 @@ pub trait Parser {
2525

2626
/// Returns parse error produced by this parser in case of reaching end of
2727
/// input without finding the end of a parsed thing.
28-
fn eof_error() -> SyntaxError;
28+
///
29+
/// # Parameters
30+
/// - `content`: the content that was read before EOF. Some parsers may use
31+
/// this to provide more specific error messages.
32+
fn eof_error(&self, content: &[u8]) -> SyntaxError;
2933
}

src/parser/pi.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use crate::errors::SyntaxError;
44
use crate::parser::Parser;
5+
use crate::utils::is_whitespace;
56

67
/// A parser that search a `?>` sequence in the slice.
78
///
@@ -72,8 +73,19 @@ impl Parser for PiParser {
7273
}
7374

7475
#[inline]
75-
fn eof_error() -> SyntaxError {
76-
SyntaxError::UnclosedPIOrXmlDecl
76+
fn eof_error(&self, content: &[u8]) -> SyntaxError {
77+
// Check if content starts with "?xml" followed by whitespace, '?' or end.
78+
// This determines whether to report an unclosed XML declaration or PI.
79+
// FIXME: Add support for UTF-8/ASCII incompatible encodings (UTF-16)
80+
let is_xml_decl = content.starts_with(b"?xml")
81+
&& content
82+
.get(4)
83+
.map_or(true, |&b| is_whitespace(b) || b == b'?');
84+
if is_xml_decl {
85+
SyntaxError::UnclosedXmlDecl
86+
} else {
87+
SyntaxError::UnclosedPI
88+
}
7789
}
7890
}
7991

src/reader/buffered_reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ macro_rules! impl_buffered_source {
234234
}
235235

236236
*position += read;
237-
Err(Error::Syntax(P::eof_error()))
237+
Err(Error::Syntax(parser.eof_error(&buf[start..])))
238238
}
239239

240240
#[inline]

src/reader/slice_reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
350350
}
351351

352352
*position += self.len() as u64;
353-
Err(Error::Syntax(P::eof_error()))
353+
Err(Error::Syntax(parser.eof_error(self)))
354354
}
355355

356356
#[inline]

src/reader/state.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,11 +270,20 @@ impl ReaderState {
270270
)))
271271
}
272272
} else {
273-
// <?....EOF
274-
// ^^^^^ - `buf` does not contains `<`, but we want to report error at `<`,
273+
// <?....>
274+
// ^^^^^ - `buf` does not contain `<`, but we want to report error at `<`,
275275
// so we move offset to it (-2 for `<` and `>`)
276276
self.last_error_offset = self.offset - len as u64 - 2;
277-
Err(Error::Syntax(SyntaxError::UnclosedPIOrXmlDecl))
277+
278+
// Check if this is an XML declaration (starts with "?xml" followed by whitespace or "?")
279+
// FIXME: Add support for UTF-8/ASCII incompatible encodings (UTF-16)
280+
let is_xml_decl = buf.starts_with(b"?xml")
281+
&& buf.get(4).map_or(true, |&b| is_whitespace(b) || b == b'?');
282+
if is_xml_decl {
283+
Err(Error::Syntax(SyntaxError::UnclosedXmlDecl))
284+
} else {
285+
Err(Error::Syntax(SyntaxError::UnclosedPI))
286+
}
278287
}
279288
}
280289

tests/reader-errors.rs

Lines changed: 81 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -430,16 +430,16 @@ mod syntax {
430430
mod pi {
431431
use super::*;
432432

433-
err!(unclosed01(".<?") => SyntaxError::UnclosedPIOrXmlDecl);
434-
err!(unclosed02(".<??") => SyntaxError::UnclosedPIOrXmlDecl);
435-
err!(unclosed03(".<?>") => SyntaxError::UnclosedPIOrXmlDecl);
436-
err!(unclosed04(".<?<") => SyntaxError::UnclosedPIOrXmlDecl);
437-
err!(unclosed05(".<?&") => SyntaxError::UnclosedPIOrXmlDecl);
438-
err!(unclosed06(".<?p") => SyntaxError::UnclosedPIOrXmlDecl);
439-
err!(unclosed07(".<? ") => SyntaxError::UnclosedPIOrXmlDecl);
440-
err!(unclosed08(".<?\t") => SyntaxError::UnclosedPIOrXmlDecl);
441-
err!(unclosed09(".<?\r") => SyntaxError::UnclosedPIOrXmlDecl);
442-
err!(unclosed10(".<?\n") => SyntaxError::UnclosedPIOrXmlDecl);
433+
err!(unclosed01(".<?") => SyntaxError::UnclosedPI);
434+
err!(unclosed02(".<??") => SyntaxError::UnclosedPI);
435+
err!(unclosed03(".<?>") => SyntaxError::UnclosedPI);
436+
err!(unclosed04(".<?<") => SyntaxError::UnclosedPI);
437+
err!(unclosed05(".<?&") => SyntaxError::UnclosedPI);
438+
err!(unclosed06(".<?p") => SyntaxError::UnclosedPI);
439+
err!(unclosed07(".<? ") => SyntaxError::UnclosedPI);
440+
err!(unclosed08(".<?\t") => SyntaxError::UnclosedPI);
441+
err!(unclosed09(".<?\r") => SyntaxError::UnclosedPI);
442+
err!(unclosed10(".<?\n") => SyntaxError::UnclosedPI);
443443

444444
// According to the grammar, processing instruction MUST contain a non-empty
445445
// target name, but we do not consider this as a _syntax_ error.
@@ -453,10 +453,16 @@ mod syntax {
453453
mod decl {
454454
use super::*;
455455

456-
err!(unclosed1(".<?x") => SyntaxError::UnclosedPIOrXmlDecl);
457-
err!(unclosed2(".<?xm") => SyntaxError::UnclosedPIOrXmlDecl);
458-
err!(unclosed3(".<?xml") => SyntaxError::UnclosedPIOrXmlDecl);
459-
err!(unclosed4(".<?xml?") => SyntaxError::UnclosedPIOrXmlDecl);
456+
err!(unclosed1(".<?x") => SyntaxError::UnclosedPI);
457+
err!(unclosed2(".<?xm") => SyntaxError::UnclosedPI);
458+
err!(unclosed3(".<?xml") => SyntaxError::UnclosedXmlDecl);
459+
err!(unclosed4(".<?xml?") => SyntaxError::UnclosedXmlDecl);
460+
err!(unclosed5(".<?xml ") => SyntaxError::UnclosedXmlDecl);
461+
err!(unclosed6(".<?xml\t") => SyntaxError::UnclosedXmlDecl);
462+
err!(unclosed7(".<?xml\r") => SyntaxError::UnclosedXmlDecl);
463+
err!(unclosed8(".<?xml\n") => SyntaxError::UnclosedXmlDecl);
464+
// "xmls" is a PI target, not an XML declaration
465+
err!(unclosed9(".<?xmls") => SyntaxError::UnclosedPI);
460466

461467
// According to the grammar, XML declaration MUST contain at least one space
462468
// and `version` attribute, but we do not consider this as a _syntax_ error.
@@ -467,6 +473,67 @@ mod syntax {
467473
ok!(normal5("<?xml\n?>") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml\n", 3))));
468474
ok!(normal6("<?xml\n?>rest") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml\n", 3))));
469475
}
476+
477+
/// Tests for UTF-16 encoded XML declarations.
478+
/// FIXME: Add support for UTF-8/ASCII incompatible encodings (UTF-16)
479+
mod decl_utf16 {
480+
use super::*;
481+
use pretty_assertions::assert_eq;
482+
483+
/// UTF-16 LE encoded `<?xml ` (with BOM)
484+
/// BOM (FF FE) + '<' (3C 00) + '?' (3F 00) + 'x' (78 00) + 'm' (6D 00) + 'l' (6C 00) + ' ' (20 00)
485+
const UTF16_LE_XML_DECL: &[u8] = &[
486+
0xFF, 0xFE, // BOM
487+
0x3C, 0x00, // <
488+
0x3F, 0x00, // ?
489+
0x78, 0x00, // x
490+
0x6D, 0x00, // m
491+
0x6C, 0x00, // l
492+
0x20, 0x00, // space
493+
];
494+
495+
/// UTF-16 BE encoded `<?xml ` (with BOM)
496+
/// BOM (FE FF) + '<' (00 3C) + '?' (00 3F) + 'x' (00 78) + 'm' (00 6D) + 'l' (00 6C) + ' ' (00 20)
497+
const UTF16_BE_XML_DECL: &[u8] = &[
498+
0xFE, 0xFF, // BOM
499+
0x00, 0x3C, // <
500+
0x00, 0x3F, // ?
501+
0x00, 0x78, // x
502+
0x00, 0x6D, // m
503+
0x00, 0x6C, // l
504+
0x00, 0x20, // space
505+
];
506+
507+
#[test]
508+
#[ignore = "UTF-16 support not yet implemented for XML declaration detection"]
509+
fn utf16_le_unclosed_xml_decl() {
510+
let mut reader = Reader::from_reader(UTF16_LE_XML_DECL);
511+
match reader.read_event() {
512+
Err(Error::Syntax(cause)) => {
513+
assert_eq!(cause, SyntaxError::UnclosedXmlDecl);
514+
}
515+
x => panic!(
516+
"Expected `Err(Syntax(UnclosedXmlDecl))`, but got {:?}",
517+
x
518+
),
519+
}
520+
}
521+
522+
#[test]
523+
#[ignore = "UTF-16 support not yet implemented for XML declaration detection"]
524+
fn utf16_be_unclosed_xml_decl() {
525+
let mut reader = Reader::from_reader(UTF16_BE_XML_DECL);
526+
match reader.read_event() {
527+
Err(Error::Syntax(cause)) => {
528+
assert_eq!(cause, SyntaxError::UnclosedXmlDecl);
529+
}
530+
x => panic!(
531+
"Expected `Err(Syntax(UnclosedXmlDecl))`, but got {:?}",
532+
x
533+
),
534+
}
535+
}
536+
}
470537
}
471538

472539
mod ill_formed {

0 commit comments

Comments
 (0)