Skip to content

Commit f84887d

Browse files
authored
Oracle: Support for quote delimited strings (#2130)
1 parent cdeed32 commit f84887d

File tree

9 files changed

+381
-6
lines changed

9 files changed

+381
-6
lines changed

src/ast/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ pub use self::trigger::{
110110

111111
pub use self::value::{
112112
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
113-
NormalizationForm, TrimWhereField, Value, ValueWithSpan,
113+
NormalizationForm, QuoteDelimitedString, TrimWhereField, Value, ValueWithSpan,
114114
};
115115

116116
use crate::ast::helpers::key_value_options::KeyValueOptions;

src/ast/value.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,12 @@ pub enum Value {
167167
TripleDoubleQuotedRawStringLiteral(String),
168168
/// N'string value'
169169
NationalStringLiteral(String),
170+
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
171+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
172+
QuoteDelimitedStringLiteral(QuoteDelimitedString),
173+
/// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
174+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
175+
NationalQuoteDelimitedStringLiteral(QuoteDelimitedString),
170176
/// X'hex value'
171177
HexStringLiteral(String),
172178

@@ -207,6 +213,8 @@ impl Value {
207213
| Value::NationalStringLiteral(s)
208214
| Value::HexStringLiteral(s) => Some(s),
209215
Value::DollarQuotedString(s) => Some(s.value),
216+
Value::QuoteDelimitedStringLiteral(s) => Some(s.value),
217+
Value::NationalQuoteDelimitedStringLiteral(s) => Some(s.value),
210218
_ => None,
211219
}
212220
}
@@ -242,6 +250,8 @@ impl fmt::Display for Value {
242250
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
243251
Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)),
244252
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
253+
Value::QuoteDelimitedStringLiteral(v) => v.fmt(f),
254+
Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"),
245255
Value::HexStringLiteral(v) => write!(f, "X'{v}'"),
246256
Value::Boolean(v) => write!(f, "{v}"),
247257
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),
@@ -279,6 +289,28 @@ impl fmt::Display for DollarQuotedString {
279289
}
280290
}
281291

292+
/// A quote delimited string literal, e.g. `Q'_abc_'`.
293+
///
294+
/// See [Value::QuoteDelimitedStringLiteral] and/or
295+
/// [Value::NationalQuoteDelimitedStringLiteral].
296+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
297+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
298+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
299+
pub struct QuoteDelimitedString {
300+
/// the quote start character; i.e. the character _after_ the opening `Q'`
301+
pub start_quote: char,
302+
/// the string literal value itself
303+
pub value: String,
304+
/// the quote end character; i.e. the character _before_ the closing `'`
305+
pub end_quote: char,
306+
}
307+
308+
impl fmt::Display for QuoteDelimitedString {
309+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
310+
write!(f, "Q'{}{}{}'", self.start_quote, self.value, self.end_quote)
311+
}
312+
}
313+
282314
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
283315
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
284316
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,4 +195,8 @@ impl Dialect for GenericDialect {
195195
fn supports_interval_options(&self) -> bool {
196196
true
197197
}
198+
199+
fn supports_quote_delimited_string(&self) -> bool {
200+
true
201+
}
198202
}

src/dialect/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,6 +1209,13 @@ pub trait Dialect: Debug + Any {
12091209
fn supports_semantic_view_table_factor(&self) -> bool {
12101210
false
12111211
}
1212+
1213+
/// Support quote delimited string literals, e.g. `Q'{...}'`
1214+
///
1215+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
1216+
fn supports_quote_delimited_string(&self) -> bool {
1217+
false
1218+
}
12121219
}
12131220

12141221
/// This represents the operators for which precedence must be defined

src/dialect/oracle.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,8 @@ impl Dialect for OracleDialect {
9595
fn supports_group_by_expr(&self) -> bool {
9696
true
9797
}
98+
99+
fn supports_quote_delimited_string(&self) -> bool {
100+
true
101+
}
98102
}

src/parser/merge.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
//! SQL Parser for a `MERGE` statement
1414
1515
#[cfg(not(feature = "std"))]
16-
use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec};
16+
use alloc::{boxed::Box, format, vec, vec::Vec};
1717

1818
use crate::{
1919
ast::{

src/parser/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1754,6 +1754,8 @@ impl<'a> Parser<'a> {
17541754
| Token::TripleSingleQuotedRawStringLiteral(_)
17551755
| Token::TripleDoubleQuotedRawStringLiteral(_)
17561756
| Token::NationalStringLiteral(_)
1757+
| Token::QuoteDelimitedStringLiteral(_)
1758+
| Token::NationalQuoteDelimitedStringLiteral(_)
17571759
| Token::HexStringLiteral(_) => {
17581760
self.prev_token();
17591761
Ok(Expr::Value(self.parse_value()?))
@@ -2770,6 +2772,8 @@ impl<'a> Parser<'a> {
27702772
| Token::EscapedStringLiteral(_)
27712773
| Token::UnicodeStringLiteral(_)
27722774
| Token::NationalStringLiteral(_)
2775+
| Token::QuoteDelimitedStringLiteral(_)
2776+
| Token::NationalQuoteDelimitedStringLiteral(_)
27732777
| Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)),
27742778
_ => self.expected(
27752779
"either filler, WITH, or WITHOUT in LISTAGG",
@@ -10697,6 +10701,12 @@ impl<'a> Parser<'a> {
1069710701
Token::NationalStringLiteral(ref s) => {
1069810702
ok_value(Value::NationalStringLiteral(s.to_string()))
1069910703
}
10704+
Token::QuoteDelimitedStringLiteral(v) => {
10705+
ok_value(Value::QuoteDelimitedStringLiteral(v))
10706+
}
10707+
Token::NationalQuoteDelimitedStringLiteral(v) => {
10708+
ok_value(Value::NationalQuoteDelimitedStringLiteral(v))
10709+
}
1070010710
Token::EscapedStringLiteral(ref s) => {
1070110711
ok_value(Value::EscapedStringLiteral(s.to_string()))
1070210712
}

src/tokenizer.rs

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ use alloc::{
2929
vec,
3030
vec::Vec,
3131
};
32-
use core::iter::Peekable;
3332
use core::num::NonZeroU8;
3433
use core::str::Chars;
3534
use core::{cmp, fmt};
35+
use core::{iter::Peekable, str};
3636

3737
#[cfg(feature = "serde")]
3838
use serde::{Deserialize, Serialize};
@@ -46,7 +46,10 @@ use crate::dialect::{
4646
SnowflakeDialect,
4747
};
4848
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
49-
use crate::{ast::DollarQuotedString, dialect::HiveDialect};
49+
use crate::{
50+
ast::{DollarQuotedString, QuoteDelimitedString},
51+
dialect::HiveDialect,
52+
};
5053

5154
/// SQL Token enumeration
5255
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
@@ -98,6 +101,12 @@ pub enum Token {
98101
TripleDoubleQuotedRawStringLiteral(String),
99102
/// "National" string literal: i.e: N'string'
100103
NationalStringLiteral(String),
104+
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
105+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
106+
QuoteDelimitedStringLiteral(QuoteDelimitedString),
107+
/// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'`
108+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
109+
NationalQuoteDelimitedStringLiteral(QuoteDelimitedString),
101110
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
102111
EscapedStringLiteral(String),
103112
/// Unicode string literal: i.e: U&'first \000A second'
@@ -292,6 +301,8 @@ impl fmt::Display for Token {
292301
Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""),
293302
Token::DollarQuotedString(ref s) => write!(f, "{s}"),
294303
Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
304+
Token::QuoteDelimitedStringLiteral(ref s) => s.fmt(f),
305+
Token::NationalQuoteDelimitedStringLiteral(ref s) => write!(f, "N{s}"),
295306
Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
296307
Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"),
297308
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
@@ -1032,13 +1043,35 @@ impl<'a> Tokenizer<'a> {
10321043
self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
10331044
Ok(Some(Token::NationalStringLiteral(s)))
10341045
}
1046+
Some(&q @ 'q') | Some(&q @ 'Q')
1047+
if self.dialect.supports_quote_delimited_string() =>
1048+
{
1049+
chars.next(); // consume and check the next char
1050+
if let Some('\'') = chars.peek() {
1051+
self.tokenize_quote_delimited_string(chars, &[n, q])
1052+
.map(|s| Some(Token::NationalQuoteDelimitedStringLiteral(s)))
1053+
} else {
1054+
let s = self.tokenize_word(String::from_iter([n, q]), chars);
1055+
Ok(Some(Token::make_word(&s, None)))
1056+
}
1057+
}
10351058
_ => {
10361059
// regular identifier starting with an "N"
10371060
let s = self.tokenize_word(n, chars);
10381061
Ok(Some(Token::make_word(&s, None)))
10391062
}
10401063
}
10411064
}
1065+
q @ 'Q' | q @ 'q' if self.dialect.supports_quote_delimited_string() => {
1066+
chars.next(); // consume and check the next char
1067+
if let Some('\'') = chars.peek() {
1068+
self.tokenize_quote_delimited_string(chars, &[q])
1069+
.map(|s| Some(Token::QuoteDelimitedStringLiteral(s)))
1070+
} else {
1071+
let s = self.tokenize_word(q, chars);
1072+
Ok(Some(Token::make_word(&s, None)))
1073+
}
1074+
}
10421075
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
10431076
x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
10441077
let starting_loc = chars.location();
@@ -1994,6 +2027,61 @@ impl<'a> Tokenizer<'a> {
19942027
)
19952028
}
19962029

2030+
/// Reads a quote delimited string expecting `chars.next()` to deliver a quote.
2031+
///
2032+
/// See <https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA>
2033+
fn tokenize_quote_delimited_string(
2034+
&self,
2035+
chars: &mut State,
2036+
// the prefix that introduced the possible literal or word,
2037+
// e.g. "Q" or "nq"
2038+
literal_prefix: &[char],
2039+
) -> Result<QuoteDelimitedString, TokenizerError> {
2040+
let literal_start_loc = chars.location();
2041+
chars.next();
2042+
2043+
let start_quote_loc = chars.location();
2044+
let (start_quote, end_quote) = match chars.next() {
2045+
None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => {
2046+
return self.tokenizer_error(
2047+
start_quote_loc,
2048+
format!(
2049+
"Invalid space, tab, newline, or EOF after '{}''",
2050+
String::from_iter(literal_prefix)
2051+
),
2052+
);
2053+
}
2054+
Some(c) => (
2055+
c,
2056+
match c {
2057+
'[' => ']',
2058+
'{' => '}',
2059+
'<' => '>',
2060+
'(' => ')',
2061+
c => c,
2062+
},
2063+
),
2064+
};
2065+
2066+
// read the string literal until the "quote character" following a by literal quote
2067+
let mut value = String::new();
2068+
while let Some(ch) = chars.next() {
2069+
if ch == end_quote {
2070+
if let Some('\'') = chars.peek() {
2071+
chars.next(); // ~ consume the quote
2072+
return Ok(QuoteDelimitedString {
2073+
start_quote,
2074+
value,
2075+
end_quote,
2076+
});
2077+
}
2078+
}
2079+
value.push(ch);
2080+
}
2081+
2082+
self.tokenizer_error(literal_start_loc, "Unterminated string literal")
2083+
}
2084+
19972085
/// Read a quoted string.
19982086
fn tokenize_quoted_string(
19992087
&self,

0 commit comments

Comments
 (0)