88 */
99namespace nicoSWD \Rules ;
1010
11- /**
12- * Class Tokenizer
13- * @package nicoSWD\Rules
14- */
11+ use SplPriorityQueue ;
12+ use stdClass ;
13+
1514final class Tokenizer implements TokenizerInterface
1615{
17- /**
18- * @var string
19- */
20- private $ tokens = '
21- ~(
22- (?<And>&&)
23- | (?<Or>\|\|)
24- | (?<NotEqualStrict>!==)
25- | (?<NotEqual><>|!=)
26- | (?<EqualStrict>===)
27- | (?<Equal>==)
28- | (?<In>\bin\b)
29- | (?<Bool>\b(?:true|false)\b)
30- | (?<Null>\bnull\b)
31- | (?<Method>\.\s*[a-zA-Z_]\w*\s*\()
32- | (?<Function>[a-zA-Z_]\w*\s*\()
33- | (?<Variable>[a-zA-Z_]\w*)
34- | (?<Float>-?\d+(?:\.\d+))
35- | (?<Integer>-?\d+)
36- | (?<EncapsedString>"[^"]*"| \'[^ \']* \')
37- | (?<SmallerEqual><=)
38- | (?<GreaterEqual>>=)
39- | (?<Smaller><)
40- | (?<Greater>>)
41- | (?<OpeningParentheses>\()
42- | (?<ClosingParentheses>\))
43- | (?<OpeningArray>\[)
44- | (?<ClosingArray>\])
45- | (?<Comma>,)
46- | (?<Regex>/[^/\*].*/[igm]{0,3})
47- | (?<Comment>//[^\r\n]*|/\*.*?\*/)
48- | (?<Newline>\r?\n)
49- | (?<Space>\s+)
50- | (?<Unknown>.)
51- )~xAs ' ;
16+ const TOKEN_AND = 'And ' ;
17+ const TOKEN_OR = 'Or ' ;
18+ const TOKEN_NOT_EQUAL_STRICT = 'NotEqualStrict ' ;
19+ const TOKEN_NOT_EQUAL = 'NotEqual ' ;
20+ const TOKEN_EQUAL_STRICT = 'EqualStrict ' ;
21+ const TOKEN_EQUAL = 'Equal ' ;
22+ const TOKEN_IN = 'In ' ;
23+ const TOKEN_BOOL = 'Bool ' ;
24+ const TOKEN_NULL = 'Null ' ;
25+ const TOKEN_METHOD = 'Method ' ;
26+ const TOKEN_FUNCTION = 'Function ' ;
27+ const TOKEN_VARIABLE = 'Variable ' ;
28+ const TOKEN_FLOAT = 'Float ' ;
29+ const TOKEN_INTEGER = 'Integer ' ;
30+ const TOKEN_ENCAPSED_STRING = 'EncapsedString ' ;
31+ const TOKEN_SMALLER_EQUAL = 'SmallerEqual ' ;
32+ const TOKEN_GREATER_EQUAL = 'GreaterEqual ' ;
33+ const TOKEN_SMALLER = 'Smaller ' ;
34+ const TOKEN_GREATER = 'Greater ' ;
35+ const TOKEN_OPENING_PARENTHESIS = 'OpeningParentheses ' ;
36+ const TOKEN_CLOSING_PARENTHESIS = 'ClosingParentheses ' ;
37+ const TOKEN_OPENING_ARRAY = 'OpeningArray ' ;
38+ const TOKEN_CLOSING_ARRAY = 'ClosingArray ' ;
39+ const TOKEN_COMMA = 'Comma ' ;
40+ const TOKEN_REGEX = 'Regex ' ;
41+ const TOKEN_COMMENT = 'Comment ' ;
42+ const TOKEN_NEWLINE = 'Newline ' ;
43+ const TOKEN_SPACE = 'Space ' ;
44+ const TOKEN_UNKNOWN = 'Unknown ' ;
45+
46+ private $ internalTokens = [];
47+
48+ private $ regex = '' ;
49+
50+ private $ regexRequiresReassambly = false ;
51+
52+ public function __construct ()
53+ {
54+ $ this ->registerToken (self ::TOKEN_AND , '&& ' , 145 );
55+ $ this ->registerToken (self ::TOKEN_OR , '\|\| ' , 140 );
56+ $ this ->registerToken (self ::TOKEN_NOT_EQUAL_STRICT , '!== ' , 135 );
57+ $ this ->registerToken (self ::TOKEN_NOT_EQUAL , '<>|!= ' , 130 );
58+ $ this ->registerToken (self ::TOKEN_EQUAL_STRICT , '=== ' , 125 );
59+ $ this ->registerToken (self ::TOKEN_EQUAL , '== ' , 120 );
60+ $ this ->registerToken (self ::TOKEN_IN , '\bin\b ' , 115 );
61+ $ this ->registerToken (self ::TOKEN_BOOL , '\b(?:true|false)\b ' , 110 );
62+ $ this ->registerToken (self ::TOKEN_NULL , '\bnull\b ' , 105 );
63+ $ this ->registerToken (self ::TOKEN_METHOD , '\.\s*[a-zA-Z_]\w*\s*\( ' , 100 );
64+ $ this ->registerToken (self ::TOKEN_FUNCTION , '[a-zA-Z_]\w*\s*\( ' , 95 );
65+ $ this ->registerToken (self ::TOKEN_FLOAT , '-?\d+(?:\.\d+) ' , 90 );
66+ $ this ->registerToken (self ::TOKEN_INTEGER , '-?\d+ ' , 85 );
67+ $ this ->registerToken (self ::TOKEN_ENCAPSED_STRING , '"[^"]*"| \'[^ \']* \'' , 80 );
68+ $ this ->registerToken (self ::TOKEN_SMALLER_EQUAL , '<= ' , 75 );
69+ $ this ->registerToken (self ::TOKEN_GREATER_EQUAL , '>= ' , 70 );
70+ $ this ->registerToken (self ::TOKEN_SMALLER , '< ' , 65 );
71+ $ this ->registerToken (self ::TOKEN_GREATER , '> ' , 60 );
72+ $ this ->registerToken (self ::TOKEN_OPENING_PARENTHESIS , '\( ' , 55 );
73+ $ this ->registerToken (self ::TOKEN_CLOSING_PARENTHESIS , '\) ' , 50 );
74+ $ this ->registerToken (self ::TOKEN_OPENING_ARRAY , '\[ ' , 45 );
75+ $ this ->registerToken (self ::TOKEN_CLOSING_ARRAY , '\] ' , 40 );
76+ $ this ->registerToken (self ::TOKEN_COMMA , ', ' , 35 );
77+ $ this ->registerToken (self ::TOKEN_REGEX , '/[^/\*].*/[igm]{0,3} ' , 30 );
78+ $ this ->registerToken (self ::TOKEN_COMMENT , '//[^\r\n]*|/\*.*?\*/ ' , 25 );
79+ $ this ->registerToken (self ::TOKEN_NEWLINE , '\r?\n ' , 20 );
80+ $ this ->registerToken (self ::TOKEN_SPACE , '\s+ ' , 15 );
81+ $ this ->registerToken (self ::TOKEN_VARIABLE , '[a-zA-Z_]\w* ' , 10 );
82+ $ this ->registerToken (self ::TOKEN_UNKNOWN , '. ' , 5 );
83+ }
5284
5385 /**
54- * @param string $string
55- * @return Stack
86+ * {@inheritdoc}
5687 */
5788 public function tokenize ($ string )
5889 {
5990 $ stack = new Stack ();
91+ $ regex = $ this ->getRegex ();
6092 $ baseNameSpace = __NAMESPACE__ . '\\Tokens \\Token ' ;
6193 $ offset = 0 ;
6294
63- while (preg_match ($ this -> tokens , $ string , $ matches , 0 , $ offset )) {
95+ while (preg_match ($ regex , $ string , $ matches , 0 , $ offset )) {
6496 $ token = $ this ->getMatchedToken ($ matches );
6597 $ className = $ baseNameSpace . $ token ;
6698
@@ -77,8 +109,22 @@ public function tokenize($string)
77109 }
78110
79111 /**
80- * @param string[] $matches
81- * @return string
112+ * {@inheritdoc}
113+ */
114+ public function registerToken ($ class , $ regex , $ priority = null )
115+ {
116+ $ token = new StdClass ();
117+ $ token ->class = $ class ;
118+ $ token ->regex = $ regex ;
119+ $ token ->priority = $ priority !== null ? $ priority : $ this ->getPriority ($ class );
120+
121+ $ this ->internalTokens [$ class ] = $ token ;
122+ $ this ->regexRequiresReassambly = true ;
123+ }
124+
125+ /**
126+ * @param array $matches
127+ * @return int|string
82128 */
83129 private function getMatchedToken (array $ matches )
84130 {
@@ -90,4 +136,48 @@ private function getMatchedToken(array $matches)
90136
91137 return 'Unknown ' ;
92138 }
93- }
139+
140+ /**
141+ * @return string
142+ */
143+ private function getRegex ()
144+ {
145+ if (!$ this ->regex || $ this ->regexRequiresReassambly ) {
146+ $ regex = [];
147+
148+ foreach ($ this ->getQueue () as $ token ) {
149+ $ regex [] = "(?< $ token ->class > $ token ->regex ) " ;
150+ }
151+
152+ $ this ->regex = sprintf ('~(%s)~As ' , implode ('| ' , $ regex ));
153+ $ this ->regexRequiresReassambly = false ;
154+ }
155+
156+ return $ this ->regex ;
157+ }
158+
159+ /**
160+ * @return SplPriorityQueue
161+ */
162+ private function getQueue ()
163+ {
164+ $ queue = new SplPriorityQueue ();
165+
166+ foreach ($ this ->internalTokens as $ class ) {
167+ $ queue ->insert ($ class , $ class ->priority );
168+ }
169+
170+ return $ queue ;
171+ }
172+
173+ /**
174+ * @param string $class
175+ * @return int
176+ */
177+ private function getPriority ($ class )
178+ {
179+ return isset ($ this ->internalTokens [$ class ])
180+ ? $ this ->internalTokens [$ class ]->priority
181+ : 10 ;
182+ }
183+ }
0 commit comments