Oracle: Support for quote delimited strings (#2130)

xitep · web-flow · commit f84887d00491 · 2025-12-16T18:04:11.000Z
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
@@ -110,7 +110,7 @@ pub use self::trigger::{
 
 pub use self::value::{
     escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
-    NormalizationForm, TrimWhereField, Value, ValueWithSpan,
+    NormalizationForm, QuoteDelimitedString, TrimWhereField, Value, ValueWithSpan,
 };
 
 use crate::ast::helpers::key_value_options::KeyValueOptions;
diff --git a/src/ast/value.rs b/src/ast/value.rs
@@ -167,6 +167,12 @@ pub enum Value {
     TripleDoubleQuotedRawStringLiteral(String),
     /// N'string value'
     NationalStringLiteral(String),
+    /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
+    /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
+    QuoteDelimitedStringLiteral(QuoteDelimitedString),
+    /// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
+    /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
+    NationalQuoteDelimitedStringLiteral(QuoteDelimitedString),
     /// X'hex value'
     HexStringLiteral(String),
 
@@ -207,6 +213,8 @@ impl Value {
             | Value::NationalStringLiteral(s)
             | Value::HexStringLiteral(s) => Some(s),
             Value::DollarQuotedString(s) => Some(s.value),
+            Value::QuoteDelimitedStringLiteral(s) => Some(s.value),
+            Value::NationalQuoteDelimitedStringLiteral(s) => Some(s.value),
             _ => None,
         }
     }
@@ -242,6 +250,8 @@ impl fmt::Display for Value {
             Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
             Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)),
             Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
+            Value::QuoteDelimitedStringLiteral(v) => v.fmt(f),
+            Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"),
             Value::HexStringLiteral(v) => write!(f, "X'{v}'"),
             Value::Boolean(v) => write!(f, "{v}"),
             Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),
@@ -279,6 +289,28 @@ impl fmt::Display for DollarQuotedString {
     }
 }
 
+/// A quote delimited string literal, e.g. `Q'_abc_'`.
+///
+/// See [Value::QuoteDelimitedStringLiteral] and/or
+/// [Value::NationalQuoteDelimitedStringLiteral].
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub struct QuoteDelimitedString {
+    /// the quote start character; i.e. the character _after_ the opening `Q'`
+    pub start_quote: char,
+    /// the string literal value itself
+    pub value: String,
+    /// the quote end character; i.e. the character _before_ the closing `'`
+    pub end_quote: char,
+}
+
+impl fmt::Display for QuoteDelimitedString {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "Q'{}{}{}'", self.start_quote, self.value, self.end_quote)
+    }
+}
+
 #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs
@@ -195,4 +195,8 @@ impl Dialect for GenericDialect {
     fn supports_interval_options(&self) -> bool {
         true
     }
+
+    fn supports_quote_delimited_string(&self) -> bool {
+        true
+    }
 }
diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
@@ -1209,6 +1209,13 @@ pub trait Dialect: Debug + Any {
     fn supports_semantic_view_table_factor(&self) -> bool {
         false
     }
+
+    /// Support quote delimited string literals, e.g. `Q'{...}'`
+    ///
+    /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
+    fn supports_quote_delimited_string(&self) -> bool {
+        false
+    }
 }
 
 /// This represents the operators for which precedence must be defined
diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs
@@ -95,4 +95,8 @@ impl Dialect for OracleDialect {
     fn supports_group_by_expr(&self) -> bool {
         true
     }
+
+    fn supports_quote_delimited_string(&self) -> bool {
+        true
+    }
 }
diff --git a/src/parser/merge.rs b/src/parser/merge.rs
@@ -13,7 +13,7 @@
 //! SQL Parser for a `MERGE` statement
 
 #[cfg(not(feature = "std"))]
-use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec};
+use alloc::{boxed::Box, format, vec, vec::Vec};
 
 use crate::{
     ast::{
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
@@ -1754,6 +1754,8 @@ impl<'a> Parser<'a> {
             | Token::TripleSingleQuotedRawStringLiteral(_)
             | Token::TripleDoubleQuotedRawStringLiteral(_)
             | Token::NationalStringLiteral(_)
+            | Token::QuoteDelimitedStringLiteral(_)
+            | Token::NationalQuoteDelimitedStringLiteral(_)
             | Token::HexStringLiteral(_) => {
                 self.prev_token();
                 Ok(Expr::Value(self.parse_value()?))
@@ -2770,6 +2772,8 @@ impl<'a> Parser<'a> {
                     | Token::EscapedStringLiteral(_)
                     | Token::UnicodeStringLiteral(_)
                     | Token::NationalStringLiteral(_)
+                    | Token::QuoteDelimitedStringLiteral(_)
+                    | Token::NationalQuoteDelimitedStringLiteral(_)
                     | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)),
                     _ => self.expected(
                         "either filler, WITH, or WITHOUT in LISTAGG",
@@ -10697,6 +10701,12 @@ impl<'a> Parser<'a> {
             Token::NationalStringLiteral(ref s) => {
                 ok_value(Value::NationalStringLiteral(s.to_string()))
             }
+            Token::QuoteDelimitedStringLiteral(v) => {
+                ok_value(Value::QuoteDelimitedStringLiteral(v))
+            }
+            Token::NationalQuoteDelimitedStringLiteral(v) => {
+                ok_value(Value::NationalQuoteDelimitedStringLiteral(v))
+            }
             Token::EscapedStringLiteral(ref s) => {
                 ok_value(Value::EscapedStringLiteral(s.to_string()))
             }
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
@@ -29,10 +29,10 @@ use alloc::{
     vec,
     vec::Vec,
 };
-use core::iter::Peekable;
 use core::num::NonZeroU8;
 use core::str::Chars;
 use core::{cmp, fmt};
+use core::{iter::Peekable, str};
 
 #[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};
@@ -46,7 +46,10 @@ use crate::dialect::{
     SnowflakeDialect,
 };
 use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
-use crate::{ast::DollarQuotedString, dialect::HiveDialect};
+use crate::{
+    ast::{DollarQuotedString, QuoteDelimitedString},
+    dialect::HiveDialect,
+};
 
 /// SQL Token enumeration
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
@@ -98,6 +101,12 @@ pub enum Token {
     TripleDoubleQuotedRawStringLiteral(String),
     /// "National" string literal: i.e: N'string'
     NationalStringLiteral(String),
+    /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
+    /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
+    QuoteDelimitedStringLiteral(QuoteDelimitedString),
+    /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'`
+    /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
+    NationalQuoteDelimitedStringLiteral(QuoteDelimitedString),
     /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
     EscapedStringLiteral(String),
     /// Unicode string literal: i.e: U&'first \000A second'
@@ -292,6 +301,8 @@ impl fmt::Display for Token {
             Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""),
             Token::DollarQuotedString(ref s) => write!(f, "{s}"),
             Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
+            Token::QuoteDelimitedStringLiteral(ref s) => s.fmt(f),
+            Token::NationalQuoteDelimitedStringLiteral(ref s) => write!(f, "N{s}"),
             Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
             Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"),
             Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
@@ -1032,13 +1043,35 @@ impl<'a> Tokenizer<'a> {
                                 self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
                             Ok(Some(Token::NationalStringLiteral(s)))
                         }
+                        Some(&q @ 'q') | Some(&q @ 'Q')
+                            if self.dialect.supports_quote_delimited_string() =>
+                        {
+                            chars.next(); // consume and check the next char
+                            if let Some('\'') = chars.peek() {
+                                self.tokenize_quote_delimited_string(chars, &[n, q])
+                                    .map(|s| Some(Token::NationalQuoteDelimitedStringLiteral(s)))
+                            } else {
+                                let s = self.tokenize_word(String::from_iter([n, q]), chars);
+                                Ok(Some(Token::make_word(&s, None)))
+                            }
+                        }
                         _ => {
                             // regular identifier starting with an "N"
                             let s = self.tokenize_word(n, chars);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
                 }
+                q @ 'Q' | q @ 'q' if self.dialect.supports_quote_delimited_string() => {
+                    chars.next(); // consume and check the next char
+                    if let Some('\'') = chars.peek() {
+                        self.tokenize_quote_delimited_string(chars, &[q])
+                            .map(|s| Some(Token::QuoteDelimitedStringLiteral(s)))
+                    } else {
+                        let s = self.tokenize_word(q, chars);
+                        Ok(Some(Token::make_word(&s, None)))
+                    }
+                }
                 // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
                 x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
                     let starting_loc = chars.location();
@@ -1994,6 +2027,61 @@ impl<'a> Tokenizer<'a> {
         )
     }
 
+    /// Reads a quote delimited string expecting `chars.next()` to deliver a quote.
+    ///
+    /// See <https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA>
+    fn tokenize_quote_delimited_string(
+        &self,
+        chars: &mut State,
+        // the prefix that introduced the possible literal or word,
+        // e.g. "Q" or "nq"
+        literal_prefix: &[char],
+    ) -> Result<QuoteDelimitedString, TokenizerError> {
+        let literal_start_loc = chars.location();
+        chars.next();
+
+        let start_quote_loc = chars.location();
+        let (start_quote, end_quote) = match chars.next() {
+            None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => {
+                return self.tokenizer_error(
+                    start_quote_loc,
+                    format!(
+                        "Invalid space, tab, newline, or EOF after '{}''",
+                        String::from_iter(literal_prefix)
+                    ),
+                );
+            }
+            Some(c) => (
+                c,
+                match c {
+                    '[' => ']',
+                    '{' => '}',
+                    '<' => '>',
+                    '(' => ')',
+                    c => c,
+                },
+            ),
+        };
+
+        // read the string literal until the "quote character" following a by literal quote
+        let mut value = String::new();
+        while let Some(ch) = chars.next() {
+            if ch == end_quote {
+                if let Some('\'') = chars.peek() {
+                    chars.next(); // ~ consume the quote
+                    return Ok(QuoteDelimitedString {
+                        start_quote,
+                        value,
+                        end_quote,
+                    });
+                }
+            }
+            value.push(ch);
+        }
+
+        self.tokenizer_error(literal_start_loc, "Unterminated string literal")
+    }
+
     /// Read a quoted string.
     fn tokenize_quoted_string(
         &self,
diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs

Original file line number	Diff line number	Diff line change
`@@ -195,4 +195,8 @@ impl Dialect for GenericDialect {`
`195`	`195`	`fn supports_interval_options(&self) -> bool {`
`196`	`196`	`true`
`197`	`197`	`}`
	`198`	`+`
	`199`	`+ fn supports_quote_delimited_string(&self) -> bool {`
	`200`	`+ true`
	`201`	`+ }`
`198`	`202`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1209,6 +1209,13 @@ pub trait Dialect: Debug + Any {`
`1209`	`1209`	`fn supports_semantic_view_table_factor(&self) -> bool {`
`1210`	`1210`	`false`
`1211`	`1211`	`}`
	`1212`	`+`
	`1213`	+ /// Support quote delimited string literals, e.g. `Q'{...}'`
	`1214`	`+ ///`
	`1215`	`+ /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)`
	`1216`	`+ fn supports_quote_delimited_string(&self) -> bool {`
	`1217`	`+ false`
	`1218`	`+ }`
`1212`	`1219`	`}`
`1213`	`1220`
`1214`	`1221`	`/// This represents the operators for which precedence must be defined`
Original file line number	Diff line number	Diff line change
`@@ -95,4 +95,8 @@ impl Dialect for OracleDialect {`
`95`	`95`	`fn supports_group_by_expr(&self) -> bool {`
`96`	`96`	`true`
`97`	`97`	`}`
	`98`	`+`
	`99`	`+ fn supports_quote_delimited_string(&self) -> bool {`
	`100`	`+ true`
	`101`	`+ }`
`98`	`102`	`}`