%option reentrant %option 8bit %option prefix="Aql" %option bison-locations %option bison-bridge %option yylineno %option noyywrap nounput batch %x BACKTICK %x FORWARDTICK %x SINGLE_QUOTE %x DOUBLE_QUOTE %x COMMENT_SINGLE %x COMMENT_MULTI %x NOT %top{ #include } %{ #include "Basics/Common.h" #include "Basics/conversions.h" // introduce the namespace here, otherwise following references to // the namespace in auto-generated headers might fail namespace arangodb { namespace aql { class Query; class Parser; } } #include "Aql/AstNode.h" #include "Aql/grammar.h" #include "Aql/Parser.h" #define YY_EXTRA_TYPE arangodb::aql::Parser* #define YY_USER_ACTION yylloc->first_line = (int) yylineno; yylloc->first_column = (int) yycolumn; yylloc->last_column = (int) (yycolumn + yyleng - 1); yycolumn += (int) yyleng; yyextra->increaseOffset(yyleng); #define YY_NO_INPUT 1 #define YY_INPUT(resultBuffer, resultState, maxBytesToRead) { \ size_t length = yyextra->remainingLength(); \ if (length > static_cast(maxBytesToRead)) { \ length = static_cast(maxBytesToRead); \ } \ if (length > 0) { \ yyextra->fillBuffer(resultBuffer, length); \ resultState = length; \ } \ else { \ resultState = YY_NULL; \ } \ } %} %% /* --------------------------------------------------------------------------- * language keywords * --------------------------------------------------------------------------- */ (?i:FOR) { return T_FOR; } (?i:LET) { return T_LET; } (?i:FILTER) { return T_FILTER; } (?i:RETURN) { return T_RETURN; } (?i:COLLECT) { return T_COLLECT; } (?i:SORT) { return T_SORT; } (?i:LIMIT) { return T_LIMIT; } (?i:DISTINCT) { return T_DISTINCT; } (?i:AGGREGATE) { return T_AGGREGATE; } (?i:ASC) { return T_ASC; } (?i:DESC) { return T_DESC; } (?i:NOT) { BEGIN(NOT); } (?i:AND) { return T_AND; } (?i:OR) { return T_OR; } (?i:IN) { return T_IN; } (?i:INTO) { return T_INTO; } (?i:WITH) { return T_WITH; } (?i:REMOVE) { return T_REMOVE; } (?i:INSERT) { return T_INSERT; } (?i:UPDATE) { return T_UPDATE; } (?i:REPLACE) { return T_REPLACE; } (?i:UPSERT) { return T_UPSERT; } (?i:GRAPH) { return T_GRAPH; } (?i:OUTBOUND) { return T_OUTBOUND; } (?i:INBOUND) { return T_INBOUND; } (?i:ANY) { return T_ANY; } (?i:ALL) { return T_ALL; } (?i:NONE) { return T_NONE; } /* --------------------------------------------------------------------------- * predefined type literals * --------------------------------------------------------------------------- */ (?i:NULL) { return T_NULL; } (?i:TRUE) { return T_TRUE; } (?i:FALSE) { return T_FALSE; } /* --------------------------------------------------------------------------- * operators * --------------------------------------------------------------------------- */ "==" { return T_EQ; } "!=" { return T_NE; } ">=" { return T_GE; } ">" { return T_GT; } "<=" { return T_LE; } "<" { return T_LT; } "=" { return T_ASSIGN; } "!" { return T_NOT; } "&&" { return T_AND; } "||" { return T_OR; } "+" { return T_PLUS; } "-" { return T_MINUS; } "*" { return T_TIMES; } "/" { return T_DIV; } "%" { return T_MOD; } "?" { return T_QUESTION; } "::" { return T_SCOPE; } ":" { return T_COLON; } ".." { return T_RANGE; } /* --------------------------------------------------------------------------- * punctuation * --------------------------------------------------------------------------- */ "," { return T_COMMA; } "(" { return T_OPEN; } ")" { return T_CLOSE; } "{" { return T_OBJECT_OPEN; } "}" { return T_OBJECT_CLOSE; } "[" { return T_ARRAY_OPEN; } "]" { return T_ARRAY_CLOSE; } /* --------------------------------------------------------------------------- * identifiers * --------------------------------------------------------------------------- */ ($?[a-zA-Z][_a-zA-Z0-9]*|_+[a-zA-Z]+[_a-zA-Z0-9]*) { /* unquoted string */ yylval->strval.value = yyextra->query()->registerString(yytext, yyleng); yylval->strval.length = yyleng; return T_STRING; } ` { /* string enclosed in backticks */ yyextra->marker(yyextra->queryString() + yyextra->offset()); BEGIN(BACKTICK); } ` { /* end of backtick-enclosed string */ BEGIN(INITIAL); size_t outLength; yylval->strval.value = yyextra->query()->registerEscapedString(yyextra->marker(), yyextra->offset() - (yyextra->marker() - yyextra->queryString()) - 1, outLength); yylval->strval.length = outLength; return T_STRING; } \\. { /* character escaped by backslash */ } \n { /* newline character inside backtick */ } . { /* any character (except newline) inside backtick */ } ´ { /* string enclosed in forwardticks */ yyextra->marker(yyextra->queryString() + yyextra->offset()); BEGIN(FORWARDTICK); } ´ { /* end of forwardtick-enclosed string */ BEGIN(INITIAL); size_t outLength; yylval->strval.value = yyextra->query()->registerEscapedString(yyextra->marker(), yyextra->offset() - (yyextra->marker() - yyextra->queryString()) - 2, outLength); yylval->strval.length = outLength; return T_STRING; } \\. { /* character escaped by backslash */ } \n { /* newline character inside forwardtick */ } . { /* any character (except newline) inside forwardtick */ } /* --------------------------------------------------------------------------- * strings * --------------------------------------------------------------------------- */ \" { yyextra->marker(yyextra->queryString() + yyextra->offset()); BEGIN(DOUBLE_QUOTE); } \" { /* end of quote-enclosed string */ BEGIN(INITIAL); size_t outLength; yylval->strval.value = yyextra->query()->registerEscapedString(yyextra->marker(), yyextra->offset() - (yyextra->marker() - yyextra->queryString()) - 1, outLength); yylval->strval.length = outLength; return T_QUOTED_STRING; } \\. { /* character escaped by backslash */ } \n { /* newline character inside quote */ } . { /* any character (except newline) inside quote */ } ' { yyextra->marker(yyextra->queryString() + yyextra->offset()); BEGIN(SINGLE_QUOTE); } ' { /* end of quote-enclosed string */ BEGIN(INITIAL); size_t outLength; yylval->strval.value = yyextra->query()->registerEscapedString(yyextra->marker(), yyextra->offset() - (yyextra->marker() - yyextra->queryString()) - 1, outLength); yylval->strval.length = outLength; return T_QUOTED_STRING; } \\. { /* character escaped by backslash */ } \n { /* newline character inside quote */ } . { /* any character (except newline) inside quote */ } /* --------------------------------------------------------------------------- * number literals * --------------------------------------------------------------------------- */ (0|[1-9][0-9]*) { /* a numeric integer value */ arangodb::aql::AstNode* node = nullptr; auto parser = yyextra; try { int64_t value1 = arangodb::basics::StringUtils::int64_check(std::string(yytext, yyleng)); node = parser->ast()->createNodeValueInt(value1); } catch (...) { try { double value2 = TRI_DoubleString(yytext); node = parser->ast()->createNodeValueDouble(value2); } catch (...) { parser->registerWarning( TRI_ERROR_QUERY_NUMBER_OUT_OF_RANGE, TRI_errno_string(TRI_ERROR_QUERY_NUMBER_OUT_OF_RANGE), yylloc->first_line, yylloc->first_column); node = parser->ast()->createNodeValueNull(); } } yylval->node = node; return T_INTEGER; } (0|[1-9][0-9]*)((\.[0-9]+)?([eE][\-\+]?[0-9]+)?) { /* a numeric double value */ arangodb::aql::AstNode* node = nullptr; auto parser = yyextra; double value = TRI_DoubleString(yytext); if (TRI_errno() != TRI_ERROR_NO_ERROR) { parser->registerWarning(TRI_ERROR_QUERY_NUMBER_OUT_OF_RANGE, TRI_errno_string(TRI_ERROR_QUERY_NUMBER_OUT_OF_RANGE), yylloc->first_line, yylloc->first_column); node = parser->ast()->createNodeValueNull(); } else { node = parser->ast()->createNodeValueDouble(value); } yylval->node = node; return T_DOUBLE; } /* --------------------------------------------------------------------------- * bind parameters * --------------------------------------------------------------------------- */ @@?(_+[a-zA-Z0-9]+[a-zA-Z0-9_]*|[a-zA-Z0-9][a-zA-Z0-9_]*) { /* bind parameters must start with a @ if followed by another @, this is a collection name parameter */ yylval->strval.value = yyextra->query()->registerString(yytext + 1, yyleng - 1); yylval->strval.length = yyleng - 1; return T_PARAMETER; } /* --------------------------------------------------------------------------- * whitespace etc. * --------------------------------------------------------------------------- */ [ \t\r]+ { /* whitespace is ignored */ } [\n] { yycolumn = 0; } /* --------------------------------------------------------------------------- * comments * --------------------------------------------------------------------------- */ "//" { BEGIN(COMMENT_SINGLE); } \n { /* line numbers are counted elsewhere already */ yycolumn = 0; BEGIN(INITIAL); } [^\n]+ { /* everything else */ } "/*" { BEGIN(COMMENT_MULTI); } "*/" { BEGIN(INITIAL); } [^*\n]+ { // eat comment in chunks } "*" { // eat the lone star } \n { /* line numbers are counted elsewhere already */ yycolumn = 0; } /* --------------------------------------------------------------------------- * special transformation for NOT IN to T_NIN * --------------------------------------------------------------------------- */ (?i:IN) { /* T_NOT + T_IN => T_NIN */ BEGIN(INITIAL); return T_NIN; } [\r\t\n ] { /* ignore whitespace */ } . { /* found something different to T_IN */ /* now push the character back into the input stream and return a T_NOT token */ BEGIN(INITIAL); yyless(0); return T_NOT; } <> { /* make sure that we still return a T_NOT when we reach the end of the input */ BEGIN(INITIAL); return T_NOT; } . { /* anything else is returned as it is */ return (int) yytext[0]; } %%