ALib C++ Framework
by
Library Version: 2511 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
parser_impl.cpp
1//##################################################################################################
2// ALib C++ Framework
3//
4// Copyright 2013-2025 A-Worx GmbH, Germany
5// Published under 'Boost Software License' (a free software license, see LICENSE.txt)
6//##################################################################################################
7#include "alib_precompile.hpp"
8#if !defined(ALIB_C20_MODULES) || ((ALIB_C20_MODULES != 0) && (ALIB_C20_MODULES != 1))
9# error "Configuration MACRO ALIB_C20_MODULES has to be given to the compiler as either 0 or 1"
10#endif
11#if ALIB_C20_MODULES
12 module;
13#endif
14//========================================= Global Fragment ========================================
17//============================================== Module ============================================
18#if ALIB_C20_MODULES
19 module ALib.Expressions.Impl;
20 import ALib.Expressions;
21#else
23#endif
24//========================================== Implementation ========================================
25namespace alib { namespace expressions { namespace detail {
26
27//##################################################################################################
28// Parser
29//##################################################################################################
30
32: compileTimeAllocator(allocator)
33, compiler (pCompiler)
34, unaryOperators (allocator)
35, binaryOperators (allocator) {
36 // characters to be known
37 syntaxTokens [u8'(']= true;
38 syntaxTokens [u8')']= true;
39 syntaxTokens [u8',']= true;
40 operatorChars[u8'?']= true;
41 operatorChars[u8':']= true;
42
43 // define unary ops
44 for( auto& op : compiler.UnaryOperators ) {
45 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op), "EXPR",
46 "Doubly defined unary operator symbol '{}'.", op )
47
48 unaryOperators.EmplaceUnique(op);
49 for( auto it : op )
50 operatorChars[it]= true;
51 }
52
53 for( auto& op : compiler.AlphabeticUnaryOperatorAliases ) {
54 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op.first), "EXPR",
55 "Doubly defined unary operator symbol '{}'.", op.first )
56
57 unaryOperators.EmplaceUnique(op.first);
58 if( !isalpha( op.first.CharAtStart() ) )
59 for( auto it : op.first )
60 operatorChars[it]= true;
61 }
62
63
64 for( auto& op : compiler.BinaryOperators ) {
65 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
66 "Doubly defined binary operator symbol '{}'.", op.first )
67 if( op.first == A_CHAR("[]") ) {
68 syntaxTokens[u8'[']= true;
69 syntaxTokens[u8']']= true;
70 } else {
71 binaryOperators.EmplaceUnique(op.first);
72 for( auto it : op.first )
73 operatorChars[it]= true;
74 } }
75
76 for( auto& op : compiler.AlphabeticBinaryOperatorAliases ) {
77 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
78 "Doubly defined binary operator symbol '{}'.", op.first )
79
80 ALIB_DBG( auto originalOp= )
81 compiler.BinaryOperators.Find( op.second );
82 ALIB_ASSERT_ERROR( originalOp != compiler.BinaryOperators.end(), "EXPR",
83 "Alias '{}' defined for unknown operator '{}'.",
84 op.first, op.second )
85
86 binaryOperators.EmplaceUnique(op.first);
87 if( !isalpha( op.first.CharAtStart() ) )
88 for( auto it : op.first )
89 operatorChars[it]= true;
90} }
91
92//##################################################################################################
93// Lexer
94//##################################################################################################
95void ParserImpl::NextToken() {
96 scanner.TrimStart();
97 tokPosition= expression.Length() - scanner.Length();
98
99 if( scanner.IsEmpty() ) {
100 token= Tokens::EOT;
101 return;
102 }
103
104 character first= scanner.CharAtStart<NC>();
105
106 //----------------------------------------- Syntax Tokens ----------------------------------------
107 if( syntaxTokens[first] ) {
108 token= Tokens(first);
109 scanner.ConsumeChar();
110 return;
111 }
112
113 //--------------------------------------- Symbolic operators -------------------------------------
114 // read up to 3 operator characters
115 if( operatorChars[first] ) {
116 integer operatorLength= 1;
117 scanner.ConsumeChar();
118 if( operatorChars[scanner.CharAtStart() ] ) {
119 scanner.ConsumeChar();
120 ++operatorLength;
121
122 if( operatorChars[scanner.CharAtStart() ] ) {
123 scanner.ConsumeChar();
124 ++operatorLength;
125 } }
126
127 token= Tokens::SymbolicOp;
128 tokString= String( expression.Buffer() + tokPosition, operatorLength );
129
130 // special treatment for Elvis with spaces "? :"
131 if( tokString == A_CHAR("?") && compiler.BinaryOperators.Contains( A_CHAR("?:") ) ) {
132 // patch existing token and return
133 Substring backup= scanner;
134 if( scanner.TrimStart().CharAtStart() == ':' ) {
135 tokString= A_CHAR("?:");
136 scanner.ConsumeChar();
137 }
138 else
139 scanner= backup;
140 }
141 return;
142 }
143
144 //-------------------------------------- alphabetic operators ------------------------------------
145 if( isalpha( first ) ) {
146 integer len= 1;
147 while( len < scanner.Length() && ( isalpha( scanner[len] ) || scanner[len] == '_' ) )
148 ++len;
149 tokString= scanner.Substring<NC>( 0, len );
150 auto hashCode= tokString.HashcodeIgnoreCase();
151
152 // unary
153 {
154 decltype(unaryOperators)::Iterator it;
155 if( (it= unaryOperators .Find( tokString, hashCode )) != unaryOperators.end()
156 && ( HasBits(compiler.CfgCompilation, Compilation::AlphabeticOperatorsIgnoreCase)
157 || tokString.Equals<NC>( it.Value() ) ) )
158 {
159 scanner.ConsumeChars<NC>( tokString.Length() );
160 token= Tokens::AlphaUnOp;
161 return;
162 } }
163
164 // binary
165 {
166 decltype(binaryOperators)::Iterator it;
167 if( (it= binaryOperators .Find( tokString, hashCode )) != binaryOperators.end()
168 && ( HasBits(compiler.CfgCompilation, Compilation::AlphabeticOperatorsIgnoreCase)
169 || tokString.Equals<NC>( it.Value() ) ) )
170 {
171 scanner.ConsumeChars<NC>( tokString.Length() );
172 token= Tokens::AlphaBinOp;
173 return;
174 } }
175
176 }
177
178 //------------------------------------------ Identifiers -----------------------------------------
179 if( isalpha( first ) || first == '_' ) {
180 integer endOfIdent= 0;
181 character next= 0;
182 while( ++endOfIdent < scanner.Length()
183 && ( isalnum( next= scanner[endOfIdent] )
184 || next == '_' ) );
185
186 token= Tokens::Identifier;
187 tokString= String( expression.Buffer() + tokPosition, endOfIdent );
188 scanner.ConsumeChars<NC>( endOfIdent );
189 return;
190 }
191
192 //-------------------------------------------- numbers -------------------------------------------
193 if( isdigit( first ) ) {
194 integer endOfDecPart= 0;
195 character next= 0;
196 while( ++endOfDecPart < scanner.Length()
197 && ( isdigit( next= scanner[endOfDecPart] )
198 || ( HasBits(numberFormat->Flags, NumberFormatFlags::ReadGroupChars) && next== numberFormat->ThousandsGroupChar ) )
199 );
200
201
202 // float number
203 if( next == numberFormat->DecimalPointChar
204 || next == 'e'
205 || next == 'E'
206 || scanner.Substring( endOfDecPart ).StartsWith( numberFormat->ExponentSeparator ) )
207
208 {
209 auto oldStart= scanner.Buffer();
210 double value;
211 scanner.ConsumeFloat( value, numberFormat );
212 token = Tokens::LitFloat;
213 tokFloat= value;
214
215 String numberParsed( oldStart, scanner.Buffer() - oldStart );
216 tokLiteralHint= numberParsed.IndexOf('e') > 0
217 || numberParsed.IndexOf('E') > 0
218 || numberParsed.IndexOf( numberFormat->ExponentSeparator ) > 0
219 ? ASTLiteral::NFHint::Scientific
220 : ASTLiteral::NFHint::NONE;
221 }
222
223 // integer number
224 else {
225 tokLiteralHint= ASTLiteral::NFHint::NONE;
226 if( numberFormat->HexLiteralPrefix.IsNotEmpty()
227 && scanner.StartsWith( numberFormat->HexLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Hexadecimal;
228 else if( numberFormat->OctLiteralPrefix.IsNotEmpty()
229 && scanner.StartsWith( numberFormat->OctLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Octal;
230 else if( numberFormat->BinLiteralPrefix.IsNotEmpty()
231 && scanner.StartsWith( numberFormat->BinLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Binary;
232
233 integer value;
234 scanner.ConsumeInt( value, numberFormat );
235 token= Tokens::LitInteger;
236 tokInteger= value;
237 }
238
239 return;
240 }
241
242 //-------------------------------------------- Strings -------------------------------------------
243 if( first == '"' ) {
244 bool lastWasSlash= false;
245 scanner.ConsumeChar<NC>();
246 character next;
247 while( (next= scanner.ConsumeChar()) != '\0' ) {
248 if( next == '\\' ) { lastWasSlash= true; continue; }
249 if( next == '\"' && !lastWasSlash ) break;
250 lastWasSlash= false;
251 }
252
253 if( next != '"' ) {
254 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation,
255 EXPRESSIONS.GetResource("EE4") );
256 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo,
257 expression, expression.Length() - scanner.Length() );
258 throw e;
259 }
260
261 String quoted( expression.Buffer() + tokPosition + 1,
262 expression.Length() - scanner.Length() - tokPosition -2 );
263 token = Tokens::LitString;
264 tokString.Allocate(compileTimeAllocator, String1K(quoted) << Escape( lang::Switch::Off ) );
265 return;
266 }
267
268 //--------------------------------------- unrecognized token -------------------------------------
269 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxError );
270 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, expression.Length() - scanner.Length() );
271 throw e;
272}
273
274
275
276//##################################################################################################
277// Parser
278//##################################################################################################
279#define Start parseConditional
280
281detail::AST* ParserImpl::Parse( const String& exprString, NumberFormat* nf ) {
282 if( exprString.IsEmpty() )
283 throw Exception( ALIB_CALLER, Exceptions::EmptyExpressionString );
284
285 expression = exprString;
286 numberFormat= nf;
287 ASTs = compileTimeAllocator().New<StdVectorMA<AST*>>( compileTimeAllocator );
288 ASTs->reserve(20);
289
290 // load first token
291 scanner= expression;
292 NextToken();
293
294//ALIB_DBG( lexer.DbgListTokens(); )
295
296 AST* ast= Start();
297
298
299 // if tokens remain, an "operator" would be expected
300 if( token != Tokens::EOT ) {
301 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE5") );
302 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
303 throw e;
304 }
305
306 return ast;
307}
308
309
310AST* ParserImpl::parseConditional() {
311 // parse lhs as simple
312 push( parseBinary() ); // Q
313
314 integer qmPosition= tokPosition;
315
316
317 if( token == Tokens::SymbolicOp && tokString == A_CHAR("?") ) {
318 NextToken();
319 push( Start() ); // T
320
321 // expect colon
322 if( token != Tokens::SymbolicOp || tokString != A_CHAR(":") ) {
323 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE6") );
324 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
325 throw e;
326 }
327 integer colonPosition= tokPosition;
328
329 NextToken();
330
331 AST* F= Start();
332 AST* T= pop();
333 AST* Q= pop();
334 return compileTimeAllocator().New<ASTConditional>(Q, T, F, qmPosition, colonPosition );
335 }
336
337 // was no conditional
338 return pop();
339}
340
341AST* ParserImpl::parseBinary() {
342 // parse lhs as simple
343 push( parseSimple() );
344
345 // parse
346 integer position= tokPosition;
347 String binOp;
348 for( ;; ) {
349 binOp= getBinaryOp();
350 if( binOp.IsNull() )
351 return pop();
352
353 // rhs is braced? -> lhs becomes <lhs op rhs> and we start over
354 if( token == Tokens::BraceOpen ) {
355 replace( compileTimeAllocator().New<ASTBinaryOp>(binOp, top(), parseSimple(), position ) );
356 position= tokPosition;
357 continue;
358 }
359 break;
360 }
361
362 // check if tokens remain
363 if( token == Tokens::EOT ) {
364 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE7") );
365 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
366 throw e;
367 }
368
369 AST* lhs= top();
370 AST* rhs= push( parseBinary() );
371
372 int binOpPrecedence= compiler.GetBinaryOperatorPrecedence( binOp );
373 AST* replace = rhs;
374 ASTBinaryOp* parent = nullptr;
375 while( replace->NodeType == AST::Types::BinaryOp
376 && compiler.GetBinaryOperatorPrecedence(dynamic_cast<ASTBinaryOp*>(replace)->Operator) <= binOpPrecedence )
377 {
378 parent = dynamic_cast<ASTBinaryOp*>(replace);
379 replace= parent->Lhs;
380 }
381
382 pop();
383 pop();
384 if( parent == nullptr )
385 return compileTimeAllocator().New<ASTBinaryOp>( binOp, lhs, rhs, position );
386
387 // insert binary at lhs of deepest equal-level binary found.
388 // Its current lhs becomes its new lhs-child's rhs.
389 parent->Lhs= compileTimeAllocator().New<ASTBinaryOp>( binOp, lhs, parent->Lhs, position );
390 return rhs;
391}
392
394 // '(' expr ')' (brackets)
395 if( token == Tokens::BraceOpen ) {
396 NextToken();
397 push( Start() );
398
399 if( token != Tokens::BraceClose ) {
400 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE1"));
401 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
402 throw e;
403 }
404 NextToken();
405 replace( parseSubscript( top() ) );
406 return pop();
407 }
408
409 // unary operator
410 integer position= tokPosition;
411 {
412 String unOp= getUnaryOp();
413 if( unOp.IsNotNull() ) {
414 push( compileTimeAllocator().New<ASTUnaryOp>(unOp, parseSimple(), position ) );
415 replace( parseSubscript( top() ) );
416 return pop();
417 } }
418
419 // terminals
420 if( token == Tokens::LitInteger ) { push(compileTimeAllocator().New<ASTLiteral>(tokInteger, position, tokLiteralHint ) ); NextToken(); replace( parseSubscript(top()) ); return pop(); }
421 if( token == Tokens::LitFloat ) { push(compileTimeAllocator().New<ASTLiteral>(tokFloat , position, tokLiteralHint ) ); NextToken(); replace( parseSubscript(top()) ); return pop(); }
422 if( token == Tokens::LitString ) { push(compileTimeAllocator().New<ASTLiteral>(String(compileTimeAllocator, tokString), position )); NextToken(); replace( parseSubscript(top()) ); return pop(); }
423
424 // allow bin op's names here! This is tricky but right!
425 if( token == Tokens::Identifier || token == Tokens::AlphaBinOp ) {
426 String name= tokString;
427 NextToken();
428
429 // function
430 if( token == Tokens::BraceOpen ) {
431 ASTFunction* astFunction= compileTimeAllocator().New<ASTFunction>( name, position, compileTimeAllocator );
432 push( astFunction );
433 for(;;) {
434 NextToken();
435 if( token == Tokens::BraceClose ) {
436 NextToken();
437 return pop();
438 }
439 astFunction->Arguments.emplace_back( Start() );
440
441 if( token == Tokens::Comma )
442 continue;
443
444 if( token != Tokens::BraceClose ) {
446 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
447 throw e;
448 }
449
450 NextToken();
451 replace( parseSubscript( astFunction ) );
452 return pop();
453 } }
454
455 // identifier
456 replace( parseSubscript( push(compileTimeAllocator().New<ASTIdentifier>( String(compileTimeAllocator, name), position ) ) ) );
457 return pop();
458 }
459
460 //--------------------------------------------- ERRORS -------------------------------------------
461 if( token == Tokens::EOT ) {
463 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
464 throw e;
465 }
466
467 if( token == Tokens::BraceClose ) {
469 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
470 throw e;
471 }
472
473 if( token == Tokens::SubscriptOpen || token == Tokens::SubscriptClose ) {
475 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
476 throw e;
477 }
478
479 if( token == Tokens::Comma ) {
481 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
482 throw e;
483 }
484
485 ALIB_ERROR( "EXPR", "Internal error. This should never happen." )
486 return nullptr;
487}
488
490 if( !HasBits( compiler.CfgCompilation, Compilation::AllowSubscriptOperator )
492 return function;
493
494 integer position= tokPosition;
495
496 NextToken();
497
498 push( Start() );
499
503 throw e;
504 }
505
506 // success
507 NextToken();
508 return compileTimeAllocator().New<ASTBinaryOp>( A_CHAR("[]"), function, pop(), position );
509}
510
511
512//##################################################################################################
513// Helpers
514//##################################################################################################
515
516
518 if( token == Tokens::SymbolicOp ) {
519 // symbolic unary ops may be nested. Hence, we find one by one from the actual token and consume the
520 // token only if all is consumed.
521 for( integer partialRead= 1 ; partialRead <= tokString.Length() ; ++partialRead ) {
522 Substring key= Substring( tokString.Buffer(), partialRead );
523 if( unaryOperators.Contains( key ) ) {
524 if( partialRead == tokString.Length() )
525 NextToken();
526 else {
527 tokString= String( tokString.Buffer() + partialRead,
528 tokString.Length() - partialRead );
529 tokPosition+= partialRead;
530 }
531 return key;
532 } }
535 throw e;
536 }
537 else if ( token == Tokens::AlphaUnOp ) {
538 String alphabeticOperator= tokString;
539 NextToken();
540 return alphabeticOperator;
541 }
542
543 return NULL_STRING;
544}
545
547 if ( token == Tokens::SymbolicOp ) {
548 // ignore ternary
549 if ( tokString == A_CHAR( "?" ) || tokString == A_CHAR( ":" ) )
550 return NULL_STRING;
551
552 // binary ops may be longer and concatenated with unaries. So we consume as much as possible
553 // but are happy with less than available
554 for ( integer partialRead = tokString.Length(); partialRead > 0; --partialRead ) {
555 Substring key = Substring( tokString.Buffer(), partialRead );
556 if ( binaryOperators.Contains( key ) ) {
557 if ( partialRead == tokString.Length() )
558 NextToken();
559 else {
560 tokString = String( tokString.Buffer() + partialRead,
561 tokString.Length() - partialRead );
562 tokPosition += partialRead;
563 }
564 return key;
565 } }
566
569 throw e;
570 }
571 else if ( token == Tokens::AlphaBinOp ) {
572 String alphabeticOperator= tokString;
573 NextToken();
574 return alphabeticOperator;
575 }
576
577 return NULL_STRING;
578}
579
580
581#undef Start
582
583}}} // namespace [alib::expressions::detail]
#define ALIB_CALLER_NULLED
Definition alib.inl:1105
#define A_CHAR(STR)
Definition alib.inl:1325
#define ALIB_ERROR(domain,...)
Definition alib.inl:1140
Exception & Add(const lang::CallerInfo &ci, TEnum type, TArgs &&... args)
Tokens token
The actual token type.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > unaryOperators
Compiler & compiler
The compiler that this parser works for.
String expression
The given expression to parse.
integer tokPosition
The position of the token in #".expression".
String tokString
String value of token (if applicable).
ParserImpl(Compiler &compiler, MonoAllocator &allocator)
@ SubscriptClose
A closing subscript brace.
@ AlphaBinOp
An alphabetic binary operator.
@ SymbolicOp
A symbolic operator. Can be unary or binary.
@ AlphaUnOp
An alphabetic unary operator.
@ SubscriptOpen
An opening subscript brace.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > binaryOperators
void NextToken()
This is the "scanner" or "lexer" method.
const String & GetResource(const NString &name)
@ UnknownBinaryOperatorSymbol
Unknown binary operator symbol found when parsing expression string.
@ UnknownUnaryOperatorSymbol
Unknown unary operator symbol found when parsing expression string.
@ SyntaxErrorExpectation
Syntax error with concrete information about what the parser expected at given position.
monomem::TMonoAllocator< lang::HeapAllocator > MonoAllocator
constexpr String NULL_STRING
A nulled string of the default character type.
Definition string.inl:2254
lang::integer integer
Type alias in namespace alib.
Definition integers.inl:149
strings::TString< character > String
Type alias in namespace alib.
Definition string.inl:2172
expressions::ExpressionsCamp EXPRESSIONS
The singleton instance of ALib Camp class #"ExpressionsCamp".
strings::TSubstring< character > Substring
Type alias in namespace alib.
exceptions::Exception Exception
Type alias in namespace alib.
Abstract syntax tree node representing binary operators.
Definition ast_impl.inl:197