@@ -29,10 +29,10 @@ use alloc::{
2929 vec,
3030 vec:: Vec ,
3131} ;
32- use core:: iter:: Peekable ;
3332use core:: num:: NonZeroU8 ;
3433use core:: str:: Chars ;
3534use core:: { cmp, fmt} ;
35+ use core:: { iter:: Peekable , str} ;
3636
3737#[ cfg( feature = "serde" ) ]
3838use serde:: { Deserialize , Serialize } ;
@@ -46,7 +46,10 @@ use crate::dialect::{
4646 SnowflakeDialect ,
4747} ;
4848use crate :: keywords:: { Keyword , ALL_KEYWORDS , ALL_KEYWORDS_INDEX } ;
49- use crate :: { ast:: DollarQuotedString , dialect:: HiveDialect } ;
49+ use crate :: {
50+ ast:: { DollarQuotedString , QuoteDelimitedString } ,
51+ dialect:: HiveDialect ,
52+ } ;
5053
5154/// SQL Token enumeration
5255#[ derive( Debug , Clone , PartialEq , PartialOrd , Eq , Ord , Hash ) ]
@@ -98,6 +101,12 @@ pub enum Token {
98101 TripleDoubleQuotedRawStringLiteral ( String ) ,
99102 /// "National" string literal: i.e: N'string'
100103 NationalStringLiteral ( String ) ,
104+ /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
105+ /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
106+ QuoteDelimitedStringLiteral ( QuoteDelimitedString ) ,
107+ /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'`
108+ /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
109+ NationalQuoteDelimitedStringLiteral ( QuoteDelimitedString ) ,
101110 /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
102111 EscapedStringLiteral ( String ) ,
103112 /// Unicode string literal: i.e: U&'first \000A second'
@@ -292,6 +301,8 @@ impl fmt::Display for Token {
292301 Token :: TripleDoubleQuotedString ( ref s) => write ! ( f, "\" \" \" {s}\" \" \" " ) ,
293302 Token :: DollarQuotedString ( ref s) => write ! ( f, "{s}" ) ,
294303 Token :: NationalStringLiteral ( ref s) => write ! ( f, "N'{s}'" ) ,
304+ Token :: QuoteDelimitedStringLiteral ( ref s) => s. fmt ( f) ,
305+ Token :: NationalQuoteDelimitedStringLiteral ( ref s) => write ! ( f, "N{s}" ) ,
295306 Token :: EscapedStringLiteral ( ref s) => write ! ( f, "E'{s}'" ) ,
296307 Token :: UnicodeStringLiteral ( ref s) => write ! ( f, "U&'{s}'" ) ,
297308 Token :: HexStringLiteral ( ref s) => write ! ( f, "X'{s}'" ) ,
@@ -1032,13 +1043,35 @@ impl<'a> Tokenizer<'a> {
10321043 self . tokenize_single_quoted_string ( chars, '\'' , backslash_escape) ?;
10331044 Ok ( Some ( Token :: NationalStringLiteral ( s) ) )
10341045 }
1046+ Some ( & q @ 'q' ) | Some ( & q @ 'Q' )
1047+ if self . dialect . supports_quote_delimited_string ( ) =>
1048+ {
1049+ chars. next ( ) ; // consume and check the next char
1050+ if let Some ( '\'' ) = chars. peek ( ) {
1051+ self . tokenize_quote_delimited_string ( chars, & [ n, q] )
1052+ . map ( |s| Some ( Token :: NationalQuoteDelimitedStringLiteral ( s) ) )
1053+ } else {
1054+ let s = self . tokenize_word ( String :: from_iter ( [ n, q] ) , chars) ;
1055+ Ok ( Some ( Token :: make_word ( & s, None ) ) )
1056+ }
1057+ }
10351058 _ => {
10361059 // regular identifier starting with an "N"
10371060 let s = self . tokenize_word ( n, chars) ;
10381061 Ok ( Some ( Token :: make_word ( & s, None ) ) )
10391062 }
10401063 }
10411064 }
1065+ q @ 'Q' | q @ 'q' if self . dialect . supports_quote_delimited_string ( ) => {
1066+ chars. next ( ) ; // consume and check the next char
1067+ if let Some ( '\'' ) = chars. peek ( ) {
1068+ self . tokenize_quote_delimited_string ( chars, & [ q] )
1069+ . map ( |s| Some ( Token :: QuoteDelimitedStringLiteral ( s) ) )
1070+ } else {
1071+ let s = self . tokenize_word ( q, chars) ;
1072+ Ok ( Some ( Token :: make_word ( & s, None ) ) )
1073+ }
1074+ }
10421075 // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
10431076 x @ 'e' | x @ 'E' if self . dialect . supports_string_escape_constant ( ) => {
10441077 let starting_loc = chars. location ( ) ;
@@ -1994,6 +2027,61 @@ impl<'a> Tokenizer<'a> {
19942027 )
19952028 }
19962029
2030+ /// Reads a quote delimited string expecting `chars.next()` to deliver a quote.
2031+ ///
2032+ /// See <https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA>
2033+ fn tokenize_quote_delimited_string (
2034+ & self ,
2035+ chars : & mut State ,
2036+ // the prefix that introduced the possible literal or word,
2037+ // e.g. "Q" or "nq"
2038+ literal_prefix : & [ char ] ,
2039+ ) -> Result < QuoteDelimitedString , TokenizerError > {
2040+ let literal_start_loc = chars. location ( ) ;
2041+ chars. next ( ) ;
2042+
2043+ let start_quote_loc = chars. location ( ) ;
2044+ let ( start_quote, end_quote) = match chars. next ( ) {
2045+ None | Some ( ' ' ) | Some ( '\t' ) | Some ( '\r' ) | Some ( '\n' ) => {
2046+ return self . tokenizer_error (
2047+ start_quote_loc,
2048+ format ! (
2049+ "Invalid space, tab, newline, or EOF after '{}''" ,
2050+ String :: from_iter( literal_prefix)
2051+ ) ,
2052+ ) ;
2053+ }
2054+ Some ( c) => (
2055+ c,
2056+ match c {
2057+ '[' => ']' ,
2058+ '{' => '}' ,
2059+ '<' => '>' ,
2060+ '(' => ')' ,
2061+ c => c,
2062+ } ,
2063+ ) ,
2064+ } ;
2065+
2066+ // read the string literal until the "quote character" following a by literal quote
2067+ let mut value = String :: new ( ) ;
2068+ while let Some ( ch) = chars. next ( ) {
2069+ if ch == end_quote {
2070+ if let Some ( '\'' ) = chars. peek ( ) {
2071+ chars. next ( ) ; // ~ consume the quote
2072+ return Ok ( QuoteDelimitedString {
2073+ start_quote,
2074+ value,
2075+ end_quote,
2076+ } ) ;
2077+ }
2078+ }
2079+ value. push ( ch) ;
2080+ }
2081+
2082+ self . tokenizer_error ( literal_start_loc, "Unterminated string literal" )
2083+ }
2084+
19972085 /// Read a quoted string.
19982086 fn tokenize_quoted_string (
19992087 & self ,
0 commit comments