1919
2020from typing import Tuple , Union , List , Any
2121
22+ from app .translator .core .custom_types .values import ValueType
2223from app .translator .core .exceptions .parser import TokenizerGeneralException
2324from app .translator .core .mixins .logic import ANDLogicOperatorMixin
2425from app .translator .core .models .field import Keyword , Field
2526from app .translator .core .models .identifier import Identifier
2627from app .translator .core .tokenizer import QueryTokenizer
2728from app .translator .core .custom_types .tokens import OperatorType
29+ from app .translator .platforms .base .lucene .escape_manager import lucene_escape_manager
2830from app .translator .tools .utils import get_match_group
2931
3032
@@ -41,19 +43,21 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
4143 field_pattern = r"(?P<field_name>[a-zA-Z\.\-_]+)"
4244 match_operator_pattern = r"(?:___field___\s*(?P<match_operator>:\[\*\sTO|:\[|:<|:>|:))\s*"
4345 _num_value_pattern = r"\d+(?:\.\d+)*"
44- num_value_pattern = fr"(?P<num_value >{ _num_value_pattern } )\s*"
45- double_quotes_value_pattern = r '"(?P<d_q_value >(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\ }\s]|\\\"|\\)*)"\s*'
46- no_quotes_value_pattern = r "(?P<n_q_value >(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*"
47- re_value_pattern = r "/(?P<re_value >[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\ }\[\]\s?]+)/\s*"
48- gte_value_pattern = fr"\[\s*(?P<gte_value >{ _num_value_pattern } )\s+TO\s+\*\s*\]"
49- lte_value_pattern = fr"\[\s*\*\s+TO\s+(?P<lte_value >{ _num_value_pattern } )\s*\]"
46+ num_value_pattern = fr"(?P<{ ValueType . number_value } >{ _num_value_pattern } )\s*"
47+ double_quotes_value_pattern = fr '"(?P<{ ValueType . double_quotes_value } >(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\} }\s]|\\\"|\\)*)"\s*'
48+ no_quotes_value_pattern = fr "(?P<{ ValueType . no_quotes_value } >(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*"
49+ re_value_pattern = fr "/(?P<{ ValueType . regular_expression_value } >[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{{\} }\[\]\s?]+)/\s*"
50+ gte_value_pattern = fr"\[\s*(?P<{ ValueType . greater_than_or_equal } >{ _num_value_pattern } )\s+TO\s+\*\s*\]"
51+ lte_value_pattern = fr"\[\s*\*\s+TO\s+(?P<{ ValueType . less_than_or_equal } >{ _num_value_pattern } )\s*\]"
5052 range_value_pattern = fr"{ gte_value_pattern } |{ lte_value_pattern } "
5153 _value_pattern = fr"{ num_value_pattern } |{ re_value_pattern } |{ no_quotes_value_pattern } |{ double_quotes_value_pattern } |{ range_value_pattern } "
52- keyword_pattern = r "(?P<n_q_value >(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{|\\\}|\\\:|\\)+)(?:\s+|\)|$)"
54+ keyword_pattern = fr "(?P<{ ValueType . no_quotes_value } >(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{{ |\\\} }|\\\:|\\)+)(?:\s+|\)|$)"
5355
54- multi_value_pattern = r """\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)"""
56+ multi_value_pattern = fr """\((?P<{ ValueType . value } >[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)"""
5557 multi_value_check_pattern = r"___field___\s*___operator___\s*\("
5658
59+ escape_manager = lucene_escape_manager
60+
5761 wildcard_symbol = "*"
5862
5963 @staticmethod
@@ -69,22 +73,22 @@ def clean_quotes(value: Union[str, int]):
6973 return value
7074
7175 def get_operator_and_value (self , match : re .Match , operator : str = OperatorType .EQ ) -> Tuple [str , Any ]:
72- if (num_value := get_match_group (match , group_name = 'num_value' )) is not None :
76+ if (num_value := get_match_group (match , group_name = ValueType . number_value )) is not None :
7377 return operator , num_value
7478
75- elif (re_value := get_match_group (match , group_name = 're_value' )) is not None :
79+ elif (re_value := get_match_group (match , group_name = ValueType . regular_expression_value )) is not None :
7680 return OperatorType .REGEX , re_value
7781
78- elif (n_q_value := get_match_group (match , group_name = 'n_q_value' )) is not None :
82+ elif (n_q_value := get_match_group (match , group_name = ValueType . no_quotes_value )) is not None :
7983 return operator , n_q_value
8084
81- elif (d_q_value := get_match_group (match , group_name = 'd_q_value' )) is not None :
85+ elif (d_q_value := get_match_group (match , group_name = ValueType . double_quotes_value )) is not None :
8286 return operator , d_q_value
8387
84- elif (gte_value := get_match_group (match , group_name = 'gte_value' )) is not None :
88+ elif (gte_value := get_match_group (match , group_name = ValueType . greater_than_or_equal )) is not None :
8589 return OperatorType .GTE , gte_value
8690
87- elif (lte_value := get_match_group (match , group_name = 'lte_value' )) is not None :
91+ elif (lte_value := get_match_group (match , group_name = ValueType . less_than_or_equal )) is not None :
8892 return OperatorType .LTE , lte_value
8993
9094 return super ().get_operator_and_value (match , operator )
0 commit comments