mirror of https://gitee.com/bigwinds/arangodb
1432 lines
48 KiB
C++
1432 lines
48 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2016 by EMC Corporation, All Rights Reserved
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is EMC Corporation
|
|
///
|
|
/// @author Andrey Abramov
|
|
/// @author Vasiliy Nabatchikov
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef _MSC_VER
|
|
#include <string.h>
|
|
#endif
|
|
|
|
#include <stdint.h>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include "parser_context.hpp"
|
|
|
|
#if defined (__GNUC__)
|
|
#pragma GCC diagnostic push
|
|
#if (__GNUC__ >= 7)
|
|
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough=0"
|
|
#endif
|
|
#endif
|
|
|
|
using namespace iresearch::iql;
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- static variables
|
|
// -----------------------------------------------------------------------------
|
|
|
|
namespace {
|
|
const parser::semantic_type UNKNOWN = 0; // no known value
|
|
const parser::semantic_type TRUE = 1; // expression evaluating to true
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- constructors and destructors
|
|
// -----------------------------------------------------------------------------
|
|
|
|
parser_context::parser_context(
|
|
std::string const& sData, functions const& functions /*= defaults::FUNCTIONS*/
|
|
): m_sData(sData), m_functions(functions), m_nNext(0), m_eState(StateType::NONE)
|
|
{
|
|
m_nodes.resize(2); // add an error node at position 0 (a.k.a. UNKNOWN)
|
|
|
|
// initialize 'BOOL_TRUE' at position 1
|
|
m_nodes[1].type = query_node::NodeType::BOOL_TRUE;
|
|
|
|
m_error.first = false;
|
|
m_filter.first = false;
|
|
m_limit.first = false;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- parser operations
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief error printer
|
|
/// NOTE: this method is not triggered if all GLR states collapse to fail
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void parser_context::yyerror(
|
|
parser::location_type const& location, std::string const& sError
|
|
) {
|
|
m_error.first = true;
|
|
m_error.second.sMessage = sError;
|
|
m_error.second.nStart = location.begin.column;
|
|
m_error.second.nEnd = location.end.column;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief lexical analyzer
|
|
/// The semantic value of the token (if it has one) is stored into the
|
|
/// variable 'value', the position of the token is stored into the
|
|
/// variable 'location' with fields:
|
|
/// begin.line, begin.column, end.line, end.column
|
|
/// @return numeric code which represents a token type
|
|
/// A token type code of zero is returned if the end-of-input is
|
|
/// encountered
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::token_type parser_context::yylex(
|
|
parser::semantic_type& value, parser::location_type& location
|
|
) {
|
|
location.begin.column = (decltype(location.begin.column))m_nNext;
|
|
|
|
parser::token_type type = next();
|
|
|
|
location.end.column = (decltype(location.begin.column))m_nNext;
|
|
value = UNKNOWN; // reset to undefined
|
|
|
|
return type;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- value operations
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create a node representing a sequence literal
|
|
/// @return ID of the node with the literal or UNKNOWN on error
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::semantic_type parser_context::sequence(
|
|
parser::location_type const& location
|
|
) {
|
|
if (location.end.column < location.begin.column ||
|
|
m_sData.size() < location.end.column) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN); // index out of bounds
|
|
}
|
|
|
|
parser::semantic_type value;
|
|
auto& node = create_node(value);
|
|
|
|
node.type = query_node::NodeType::SEQUENCE;
|
|
node.sValue = m_sData.substr(
|
|
location.begin.column, location.end.column - location.begin.column
|
|
);
|
|
|
|
return value; // ID of new node
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- node operations
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief append a value to a node
|
|
/// @return ID of the node with appended value or UNKNOWN on error
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::semantic_type parser_context::append(
|
|
parser::semantic_type const& value, parser::location_type const& location
|
|
) {
|
|
auto& node = find_node(value);
|
|
|
|
if (query_node::NodeType::SEQUENCE != node.type) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN);
|
|
}
|
|
|
|
std::string sValue = m_sData.substr(
|
|
location.begin.column, location.end.column - location.begin.column
|
|
);
|
|
|
|
node.sValue.append(sValue);
|
|
|
|
return value; // ID of modified node
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief boost node rank by the specified value
|
|
/// @return ID of the boosted node or UNKNOWN on error
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::semantic_type parser_context::boost(
|
|
parser::semantic_type const& value, parser::location_type const& location
|
|
) {
|
|
auto& node = find_node(value);
|
|
char const* pcStart = &(m_sData.c_str()[location.begin.column]);
|
|
char* pcNext;
|
|
float fValue = strtof(pcStart, &pcNext);
|
|
|
|
if (pcNext - pcStart != location.end.column - location.begin.column) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN);
|
|
}
|
|
|
|
node.fBoost *= fValue;
|
|
|
|
return value; // ID of modified node
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create a node representing a function call
|
|
/// @return ID of the node with the function call or UNKNOWN on error
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::semantic_type parser_context::function(
|
|
parser::semantic_type const& name, parser::semantic_type const& args
|
|
) {
|
|
auto& nameNode = find_node(name);
|
|
auto& argsNode = find_node(args);
|
|
size_t nArgsCount;
|
|
bool bArgsDirect = false;
|
|
|
|
if (query_node::NodeType::SEQUENCE != nameNode.type) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN); // invalid name
|
|
}
|
|
|
|
switch (argsNode.type) {
|
|
case query_node::NodeType::LIST:
|
|
nArgsCount = argsNode.children.size();
|
|
break;
|
|
case query_node::NodeType::FUNCTION:
|
|
if (!argsNode.pFnBoolean && !argsNode.pFnSequence) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN); // invalid args
|
|
}
|
|
// fall through
|
|
case query_node::NodeType::UNION: // fall through
|
|
case query_node::NodeType::INTERSECTION: // fall through
|
|
case query_node::NodeType::BOOL_TRUE: // fall through
|
|
case query_node::NodeType::EQUAL: // fall through
|
|
case query_node::NodeType::LIKE: // fall through
|
|
case query_node::NodeType::SEQUENCE:
|
|
nArgsCount = 1;
|
|
bArgsDirect = true;
|
|
break;
|
|
case query_node::NodeType::UNKNOWN:
|
|
nArgsCount = 0;
|
|
break;
|
|
default:
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN); // invalid args
|
|
}
|
|
|
|
// function type depends on the parent node and can be any of the following
|
|
auto* pBestBoolFn =
|
|
find_best_function(nameNode.sValue, nArgsCount, m_functions.boolFns);
|
|
auto* pBestOrderFn =
|
|
find_best_function(nameNode.sValue, nArgsCount, m_functions.orderFns);
|
|
auto* pBestSeqFn =
|
|
find_best_function(nameNode.sValue, nArgsCount, m_functions.seqFns);
|
|
|
|
if (!pBestBoolFn && !pBestOrderFn && !pBestSeqFn) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN); // unknown fn
|
|
}
|
|
|
|
parser::semantic_type value;
|
|
auto& node = create_node(value);
|
|
|
|
if (bArgsDirect) {
|
|
node.children.emplace_back(args);
|
|
}
|
|
else {
|
|
// after call to creat_node(...) 'nameNode' and 'argsNode' values are undefined
|
|
node.children = find_node(args).children; // get new reference to args node
|
|
}
|
|
|
|
node.pFnBoolean = pBestBoolFn;
|
|
node.pFnOrder = pBestOrderFn;
|
|
node.pFnSequence = pBestSeqFn;
|
|
node.type = query_node::NodeType::FUNCTION;
|
|
node.sValue = nameNode.sValue;
|
|
|
|
return value; // ID of new node
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief append a specified node to the specified list
|
|
/// @return ID of the list node or UNKNOWN on error
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::semantic_type parser_context::list(
|
|
parser::semantic_type const& value1, parser::semantic_type const& value2
|
|
) {
|
|
auto& node1 = find_node(value1);
|
|
auto& node2 = find_node(value2);
|
|
|
|
if (query_node::NodeType::LIST == node1.type) {
|
|
if (query_node::NodeType::LIST == node2.type) {
|
|
node1.children.insert(
|
|
node1.children.end(),
|
|
node2.children.begin(),
|
|
node2.children.end()
|
|
);
|
|
}
|
|
else {
|
|
node1.children.emplace_back(value2);
|
|
}
|
|
|
|
return value1; // ID of modified node
|
|
}
|
|
|
|
if (query_node::NodeType::LIST == node2.type) {
|
|
return list(value2, value1);
|
|
}
|
|
|
|
parser::semantic_type value;
|
|
auto& node = create_node(value);
|
|
|
|
node.type = query_node::NodeType::LIST;
|
|
node.children.emplace_back(value1);
|
|
node.children.emplace_back(value2);
|
|
|
|
return value; // ID of new node
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief negate the result of the specified node a value to a node
|
|
/// @return ID of the negated node or UNKNOWN on error
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::semantic_type parser_context::negation(
|
|
parser::semantic_type const& value
|
|
) {
|
|
auto nodeId = try_eval(value, true, false, false); // only boolean
|
|
auto nodeItr = m_negatedNodeCache.find(nodeId);
|
|
|
|
if (nodeItr != m_negatedNodeCache.end()) {
|
|
return nodeItr->second;
|
|
}
|
|
|
|
auto& node = find_node(nodeId);
|
|
parser::semantic_type newValue;
|
|
|
|
switch (node.type) {
|
|
case query_node::NodeType::FUNCTION:
|
|
if (!node.pFnBoolean) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN); // only boolean
|
|
}
|
|
// fall through
|
|
case query_node::NodeType::BOOL_TRUE: // fall through
|
|
case query_node::NodeType::EQUAL: // fall through
|
|
case query_node::NodeType::LIKE:
|
|
{
|
|
auto& newNode = create_node(newValue);
|
|
|
|
newNode = node;
|
|
newNode.bNegated = !newNode.bNegated;
|
|
}
|
|
|
|
break;
|
|
case query_node::NodeType::INTERSECTION:
|
|
{
|
|
auto& newNode = create_node(newValue);
|
|
|
|
newNode = node;
|
|
newNode.type = query_node::NodeType::UNION;
|
|
|
|
for (auto& child: newNode.children) {
|
|
child = negation(child);
|
|
|
|
if (child == UNKNOWN) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN);
|
|
}
|
|
}
|
|
}
|
|
|
|
break;
|
|
case query_node::NodeType::UNION:
|
|
{
|
|
if (node.children.empty()) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN);
|
|
}
|
|
|
|
auto itr = node.children.begin();
|
|
|
|
newValue = negation(*(itr++));
|
|
|
|
for (auto end = node.children.end(); itr != end; ++itr) {
|
|
newValue = op_and(newValue, negation(*itr));
|
|
}
|
|
}
|
|
|
|
break;
|
|
default:
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN);
|
|
}
|
|
|
|
m_negatedNodeCache.emplace(nodeId, newValue);
|
|
m_negatedNodeCache.emplace(newValue, nodeId);
|
|
|
|
return newValue; // ID of new node
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief construct a range from the operands
|
|
/// @return ID of the range definition node or UNKNOWN on error
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::semantic_type parser_context::range(
|
|
parser::semantic_type const& value1, bool bInclusive1,
|
|
parser::semantic_type const& value2, bool bInclusive2
|
|
) {
|
|
auto minNodeId = try_eval(value1, false, false, true); // only sequence nodes
|
|
auto maxNodeId = value1 == value2 ? minNodeId : try_eval(value2, false, false, true); // only sequence nodes (don't try_eval(...) twice)
|
|
auto& minNode = find_node(minNodeId);
|
|
auto& maxNode = find_node(maxNodeId);
|
|
|
|
// only support values are range parameters
|
|
if (!((query_node::NodeType::FUNCTION == minNode.type && minNode.pFnSequence) || query_node::NodeType::SEQUENCE == minNode.type || query_node::NodeType::UNKNOWN == minNode.type) ||
|
|
!((query_node::NodeType::FUNCTION == maxNode.type && maxNode.pFnSequence) || query_node::NodeType::SEQUENCE == maxNode.type || query_node::NodeType::UNKNOWN == maxNode.type) ||
|
|
(query_node::NodeType::UNKNOWN == minNode.type && query_node::NodeType::UNKNOWN == maxNode.type)) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN);
|
|
}
|
|
|
|
parser::semantic_type value;
|
|
auto& node = create_node(value);
|
|
|
|
node.type = query_node::NodeType::RANGE;
|
|
node.bBeginInclusive = bInclusive1;
|
|
node.bEndInclusive = bInclusive2;
|
|
node.children.emplace_back(minNodeId);
|
|
node.children.emplace_back(maxNodeId);
|
|
|
|
return value;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- comparison operations
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief compare operands for equality
|
|
/// @return ID of the range definition node or UNKNOWN on error
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::semantic_type parser_context::op_eq(
|
|
parser::semantic_type const& value1, parser::semantic_type const& value2
|
|
) {
|
|
auto nameNodeId = try_eval(value1, false, false, true); // accept only sequences
|
|
auto& rangeNodeId = value2;
|
|
auto& nameNode = find_node(nameNodeId);
|
|
auto& rangeNode = find_node(rangeNodeId);
|
|
|
|
// only support values are range parameters
|
|
if (!((query_node::NodeType::FUNCTION == nameNode.type && nameNode.pFnSequence) || query_node::NodeType::SEQUENCE == nameNode.type) ||
|
|
query_node::NodeType::RANGE != rangeNode.type) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN);
|
|
}
|
|
|
|
parser::semantic_type value;
|
|
auto& node = create_node(value);
|
|
|
|
node.type = query_node::NodeType::EQUAL;
|
|
node.children.emplace_back(nameNodeId);
|
|
node.children.emplace_back(rangeNodeId);
|
|
|
|
return value;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief compare operands for likeness (i.e. phrase query)
|
|
/// @return ID of the range definition node or UNKNOWN on error
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::semantic_type parser_context::op_like(
|
|
parser::semantic_type const& value1, parser::semantic_type const& value2
|
|
) {
|
|
auto leftNodeId = try_eval(value1, false, false, true); // only sequence
|
|
auto rightNodeId = try_eval(value2, false, false, true); // only sequence
|
|
auto& node1 = find_node(leftNodeId);
|
|
auto& node2 = find_node(rightNodeId);
|
|
|
|
// only support values are range parameters
|
|
if (!((query_node::NodeType::FUNCTION == node1.type && node1.pFnSequence) || query_node::NodeType::SEQUENCE == node1.type) ||
|
|
!((query_node::NodeType::FUNCTION == node2.type && node2.pFnSequence) || query_node::NodeType::SEQUENCE == node2.type)) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN);
|
|
}
|
|
|
|
parser::semantic_type value;
|
|
auto& node = create_node(value);
|
|
|
|
node.type = query_node::NodeType::LIKE;
|
|
node.children.emplace_back(leftNodeId);
|
|
node.children.emplace_back(rightNodeId);
|
|
|
|
return value;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- filter operations
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief logically AND two nodes
|
|
/// @return ID of the logically modified node or UNKNOWN on error
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::semantic_type parser_context::op_and(
|
|
parser::semantic_type const& value1, parser::semantic_type const& value2
|
|
) {
|
|
auto leftNodeId = try_eval(value1, true, false, false); // only boolean
|
|
auto rightNodeId = try_eval(value2, true, false, false); // only boolean
|
|
auto& node1 = find_node(leftNodeId);
|
|
auto& node2 = find_node(rightNodeId);
|
|
|
|
switch (node1.type) {
|
|
case query_node::NodeType::INTERSECTION:
|
|
switch (node2.type) {
|
|
case query_node::NodeType::INTERSECTION:
|
|
{
|
|
parser::semantic_type value = leftNodeId;
|
|
|
|
for (auto& child: node2.children) {
|
|
value = op_and(value, child);
|
|
}
|
|
|
|
return value; // ID of new node
|
|
}
|
|
case query_node::NodeType::UNION:
|
|
return op_and(rightNodeId, leftNodeId);
|
|
case query_node::NodeType::FUNCTION:
|
|
if (!node2.pFnBoolean) {
|
|
break; // can only have boolean functions in intersections
|
|
}
|
|
// fall through
|
|
case query_node::NodeType::EQUAL: // fall through
|
|
case query_node::NodeType::LIKE:
|
|
add_child(node1.children, rightNodeId, false);
|
|
|
|
return leftNodeId; // ID of modified node
|
|
case query_node::NodeType::BOOL_TRUE:
|
|
return node2.bNegated ? rightNodeId : leftNodeId;
|
|
default: {} // NOOP
|
|
}
|
|
|
|
break;
|
|
case query_node::NodeType::UNION:
|
|
switch (node2.type) {
|
|
case query_node::NodeType::UNION:
|
|
{
|
|
parser::semantic_type value;
|
|
auto& node = create_node(value);
|
|
|
|
node.type = query_node::NodeType::UNION;
|
|
|
|
for (auto& child1: node1.children) {
|
|
for (auto && child2: node2.children) {
|
|
auto child = op_and(child1, child2);
|
|
|
|
if (child == UNKNOWN) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN);
|
|
}
|
|
|
|
add_child(node.children, child, true);
|
|
}
|
|
}
|
|
|
|
return value;
|
|
}
|
|
case query_node::NodeType::FUNCTION:
|
|
if (!node2.pFnBoolean) {
|
|
break; // can only have boolean functions in intersections
|
|
}
|
|
// fall through
|
|
case query_node::NodeType::INTERSECTION: // fall through
|
|
case query_node::NodeType::EQUAL: // fall through
|
|
case query_node::NodeType::LIKE:
|
|
{
|
|
parser::semantic_type value;
|
|
auto& node = create_node(value);
|
|
|
|
node.type = query_node::NodeType::UNION;
|
|
|
|
for (auto& child1: node1.children) {
|
|
parser::semantic_type value2Copy;
|
|
auto& node2Copy = create_node(value2Copy);
|
|
|
|
node2Copy = node2;
|
|
|
|
//auto child = op_and(child1, value2Copy);
|
|
auto child = op_and(child1, rightNodeId);
|
|
|
|
if (child == UNKNOWN) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN);
|
|
}
|
|
|
|
add_child(node.children, child, true);
|
|
}
|
|
|
|
return node.children.size() == 1 ? node.children[0] : value; // ID of existing/new node
|
|
}
|
|
case query_node::NodeType::BOOL_TRUE:
|
|
return node2.bNegated ? rightNodeId : leftNodeId;
|
|
default: {} // NOOP
|
|
}
|
|
|
|
break;
|
|
case query_node::NodeType::FUNCTION:
|
|
if (!node1.pFnBoolean) {
|
|
break; // can only have boolean functions in intersections
|
|
}
|
|
// fall through
|
|
case query_node::NodeType::EQUAL: // fall through
|
|
case query_node::NodeType::LIKE:
|
|
switch (node2.type) {
|
|
case query_node::NodeType::INTERSECTION: // fall through
|
|
case query_node::NodeType::UNION:
|
|
return op_and(rightNodeId, leftNodeId);
|
|
case query_node::NodeType::FUNCTION:
|
|
if (!node2.pFnBoolean) {
|
|
break; // can only have boolean functions in intersections
|
|
}
|
|
// fall through
|
|
case query_node::NodeType::EQUAL: // fall through
|
|
case query_node::NodeType::LIKE:
|
|
if (leftNodeId == rightNodeId) {
|
|
return leftNodeId; // ID of unmodified node
|
|
}
|
|
|
|
{
|
|
parser::semantic_type value;
|
|
auto& node = create_node(value);
|
|
|
|
node.children.emplace_back(leftNodeId);
|
|
node.children.emplace_back(rightNodeId);
|
|
node.type = query_node::NodeType::INTERSECTION;
|
|
|
|
return value; // ID of new node
|
|
}
|
|
case query_node::NodeType::BOOL_TRUE:
|
|
return node2.bNegated ? rightNodeId : leftNodeId;
|
|
default: {} // NOOP
|
|
}
|
|
|
|
break;
|
|
case query_node::NodeType::BOOL_TRUE:
|
|
return node1.bNegated ? leftNodeId : rightNodeId;
|
|
default: {} // NOOP
|
|
}
|
|
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief logically OR two nodes
|
|
/// @return ID of the logically modified node or UNKNOWN on error
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::semantic_type parser_context::op_or(
|
|
parser::semantic_type const& value1, parser::semantic_type const& value2
|
|
) {
|
|
auto leftNodeId = try_eval(value1, true, false, false); // only boolean
|
|
auto rightNodeId = try_eval(value2, true, false, false); // only boolean
|
|
auto& node1 = find_node(leftNodeId);
|
|
auto& node2 = find_node(rightNodeId);
|
|
|
|
switch (node1.type) {
|
|
case query_node::NodeType::UNION:
|
|
switch (node2.type) {
|
|
case query_node::NodeType::UNION:
|
|
{
|
|
parser::semantic_type value = leftNodeId;
|
|
|
|
for (auto& child: node2.children) {
|
|
value = op_or(value, child);
|
|
}
|
|
|
|
return value; // ID of new node
|
|
}
|
|
case query_node::NodeType::FUNCTION:
|
|
if (!node2.pFnBoolean) {
|
|
break; // can only have boolean functions in intersections
|
|
}
|
|
// fall through
|
|
case query_node::NodeType::INTERSECTION: // fall through
|
|
case query_node::NodeType::EQUAL: // fall through
|
|
case query_node::NodeType::LIKE:
|
|
add_child(node1.children, rightNodeId, true);
|
|
|
|
return leftNodeId; // ID of modified node
|
|
case query_node::NodeType::BOOL_TRUE:
|
|
return node2.bNegated ? leftNodeId : rightNodeId;
|
|
default: {} // NOOP
|
|
}
|
|
|
|
break;
|
|
case query_node::NodeType::FUNCTION:
|
|
if (!node1.pFnBoolean) {
|
|
break; // can only have boolean functions in intersections
|
|
}
|
|
// fall through
|
|
case query_node::NodeType::INTERSECTION: // fall through
|
|
case query_node::NodeType::EQUAL: // fall through
|
|
case query_node::NodeType::LIKE:
|
|
switch (node2.type) {
|
|
case query_node::NodeType::UNION:
|
|
return op_or(rightNodeId, leftNodeId);
|
|
case query_node::NodeType::FUNCTION:
|
|
if (!node2.pFnBoolean) {
|
|
break; // can only have boolean functions in intersections
|
|
}
|
|
// fall through
|
|
case query_node::NodeType::INTERSECTION: // fall through
|
|
case query_node::NodeType::EQUAL: // fall through
|
|
case query_node::NodeType::LIKE:
|
|
if (leftNodeId == rightNodeId) {
|
|
return leftNodeId; // ID of unmodified node
|
|
}
|
|
|
|
{
|
|
parser::semantic_type value;
|
|
auto& node = create_node(value);
|
|
|
|
node.children.emplace_back(leftNodeId);
|
|
node.children.emplace_back(rightNodeId);
|
|
node.type = query_node::NodeType::UNION;
|
|
|
|
return value; // ID of new node
|
|
}
|
|
case query_node::NodeType::BOOL_TRUE:
|
|
return node2.bNegated ? leftNodeId : rightNodeId;
|
|
default: {} // NOOP
|
|
}
|
|
|
|
break;
|
|
case query_node::NodeType::BOOL_TRUE:
|
|
return node1.bNegated ? rightNodeId : leftNodeId;
|
|
default: {} // NOOP
|
|
}
|
|
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN);
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- query operations
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief add an order field
|
|
/// @return success
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
bool parser_context::addOrder(
|
|
parser::semantic_type const& value, bool bAscending
|
|
) {
|
|
auto nodeId = try_eval(value, false, true, true); // accept order or sequence
|
|
auto& node = find_node(nodeId);
|
|
|
|
switch (node.type) {
|
|
case query_node::NodeType::FUNCTION: // fall through
|
|
if (!node.pFnOrder && !node.pFnSequence) {
|
|
break; // no applicable functions
|
|
}
|
|
case query_node::NodeType::SEQUENCE:
|
|
m_order.emplace_back(nodeId, bAscending);
|
|
|
|
return true;
|
|
default: {} // NOOP
|
|
}
|
|
|
|
return false; // no other types of nodes are supported
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief set a limit on the result set
|
|
/// @return success
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
bool parser_context::setLimit(parser::semantic_type const& value) {
|
|
auto nodeId = try_eval(value, false, false, true); // only sequence
|
|
auto& node = find_node(nodeId);
|
|
|
|
// only support values that are available during expression compile time
|
|
if (query_node::NodeType::SEQUENCE != node.type) {
|
|
return false; // no other types of nodes are supported
|
|
}
|
|
|
|
char const* pcStart = node.sValue.c_str();
|
|
char* pcNext;
|
|
float fValue = strtof(pcStart, &pcNext);
|
|
|
|
m_limit.second = (size_t)fValue;
|
|
m_limit.first =
|
|
fValue == (float)(m_limit.second) &&
|
|
(size_t)(pcNext - pcStart) == node.sValue.size();
|
|
|
|
return m_limit.first;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief set the filter portion of a query
|
|
/// @return success
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
bool parser_context::setQuery(parser::semantic_type const& value) {
|
|
auto nodeId = try_eval(value, true, false, false); // only accept boolean
|
|
auto& node = find_node(nodeId);
|
|
|
|
// only support values that are conditional expressions
|
|
switch (node.type) {
|
|
case query_node::NodeType::FUNCTION:
|
|
if (!node.pFnBoolean) {
|
|
break; // only boolean functions allowed as query root
|
|
}
|
|
case query_node::NodeType::UNION: // fall through
|
|
case query_node::NodeType::INTERSECTION: // fall through
|
|
case query_node::NodeType::BOOL_TRUE: // fall through
|
|
case query_node::NodeType::EQUAL: // fall through
|
|
case query_node::NodeType::LIKE:
|
|
m_filter.first = true;
|
|
m_filter.second = nodeId;
|
|
|
|
return true;
|
|
default: {} // NOOP
|
|
}
|
|
|
|
m_filter.first = false;
|
|
|
|
return false; // no other types of nodes are supported
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- protected functions
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief retrieve current state of the context
|
|
/// 'pLastError' is set to values from last call to yyerror(...)
|
|
/// @return the next position to be parsed
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser_context::query_state parser_context::current_state() const {
|
|
return {
|
|
/*nOffset =*/ m_nNext,
|
|
/*pnFilter =*/ m_filter.first ? &m_filter.second : nullptr,
|
|
/*order =*/ m_order,
|
|
/*pnLimit =*/ m_limit.first ? &m_limit.second : nullptr,
|
|
/*pError =*/ m_error.first ? &m_error.second : nullptr
|
|
};
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief retrieve a node by 'value'
|
|
/// @return requested node or UNKNOWN if not found
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser_context::query_node const& parser_context::find_node(
|
|
parser::semantic_type const& value
|
|
) const {
|
|
#if defined(__APPLE__)
|
|
#pragma clang diagnostic push
|
|
#pragma clang diagnostic ignored "-Wtautological-compare"
|
|
#elif defined (__GNUC__)
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wtype-limits"
|
|
#elif defined(_MSC_VER)
|
|
#pragma warning(disable: 4127) // conditional expression is constant
|
|
#endif
|
|
|
|
// parser::semantic_type may be defined as a signed value in parser.yy
|
|
if (std::is_signed<parser::semantic_type>::value && value < 0) {
|
|
return m_nodes[0];
|
|
}
|
|
|
|
#if defined(__APPLE__)
|
|
#pragma clang diagnostic pop
|
|
#elif defined (__GNUC__)
|
|
#pragma GCC diagnostic pop
|
|
#elif defined(_MSC_VER)
|
|
#pragma warning(default: 4127)
|
|
#endif
|
|
|
|
return value < m_nodes.size() ? m_nodes[value] : m_nodes[0];
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief output the branch starting at 'root' as a string representation
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void parser_context::print(
|
|
std::ostream & out, parser::semantic_type const& root, bool bBoost, bool bId
|
|
) {
|
|
auto const& node = find_node(root);
|
|
std::string sChildDelim = "";
|
|
|
|
switch (node.type) {
|
|
case query_node::NodeType::UNION:
|
|
if (bBoost) out << node.fBoost << "*";
|
|
out << "{";
|
|
sChildDelim = " || ";
|
|
break;
|
|
case query_node::NodeType::INTERSECTION:
|
|
if (bBoost) out << node.fBoost << "*";
|
|
out << "{";
|
|
sChildDelim = " && ";
|
|
break;
|
|
case query_node::NodeType::EQUAL:
|
|
if (bBoost) out << node.fBoost << "*";
|
|
if (bBoost || bId) out << "(";
|
|
sChildDelim = node.bNegated ? " != " : " == ";
|
|
break;
|
|
case query_node::NodeType::LIKE:
|
|
if (bBoost) out << node.fBoost << "*";
|
|
if (bBoost || bId) out << node.fBoost << "(";
|
|
sChildDelim = node.bNegated ? " !~= " : " ~= ";
|
|
break;
|
|
case query_node::NodeType::FUNCTION: // fall through
|
|
out << "'" << node.sValue << "'(";
|
|
sChildDelim = ", ";
|
|
break;
|
|
case query_node::NodeType::LIST:
|
|
out << "(";
|
|
break;
|
|
case query_node::NodeType::RANGE:
|
|
if (node.children.size() == 2 && node.children[0] == node.children[1]) {
|
|
print(out, node.children[0], bBoost, bId); // same node, i.e. ==
|
|
return;
|
|
}
|
|
out << (node.bBeginInclusive ? "[" : "(");
|
|
sChildDelim = ", ";
|
|
break;
|
|
case query_node::NodeType::SEQUENCE:
|
|
out << "'" << node.sValue << "'";
|
|
if (bId) out << "@" << root;
|
|
return;
|
|
default:
|
|
out << "\?\?\?(" << node.type << ")";
|
|
if (bId) out << "@" << root;
|
|
return;
|
|
}
|
|
|
|
std::string sDelim = "";
|
|
|
|
for (auto& child: node.children) {
|
|
out << sDelim;
|
|
print(out, child, bBoost, bId);
|
|
sDelim = sChildDelim;
|
|
}
|
|
|
|
switch (node.type) {
|
|
case query_node::NodeType::UNION: // fall through
|
|
case query_node::NodeType::INTERSECTION: // fall through
|
|
out << "}";
|
|
if (bId) out << "@" << root;
|
|
return;
|
|
case query_node::NodeType::EQUAL: // fall through
|
|
case query_node::NodeType::LIKE:
|
|
if (bBoost || bId) out << ")";
|
|
if (bId) out << "@" << root;
|
|
return;
|
|
case query_node::NodeType::FUNCTION: // fall through
|
|
case query_node::NodeType::LIST:
|
|
out << ")";
|
|
if (bId) out << "@" << root;
|
|
return;
|
|
case query_node::NodeType::RANGE:
|
|
out << (node.bEndInclusive ? "]" : ")");
|
|
if (bId) out << "@" << root;
|
|
return;
|
|
default: {} // NOOP
|
|
}
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- private functions
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief add a child to children, ensuring there are no duplicates or overlaps
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void parser_context::add_child(
|
|
std::vector<size_t>& children,
|
|
parser::semantic_type const& child,
|
|
bool bRemoveSuperset
|
|
) {
|
|
auto& node = find_node(child);
|
|
std::unordered_set<size_t> subChildren; // only for bRemoveSuperset
|
|
|
|
if (bRemoveSuperset) {
|
|
if (query_node::NodeType::INTERSECTION == node.type) {
|
|
subChildren.insert(node.children.begin(), node.children.end());
|
|
}
|
|
else {
|
|
subChildren.emplace(child);
|
|
}
|
|
}
|
|
|
|
for (auto itr = children.begin(); itr != children.end();) {
|
|
auto& existing = *(itr++);
|
|
|
|
if (existing == child) {
|
|
return; // nothing to do, child already present
|
|
}
|
|
|
|
if (!bRemoveSuperset) {
|
|
continue;
|
|
}
|
|
|
|
// ...........................................................................
|
|
// check if one of the sub-children is an intersection superset
|
|
// e.g. (A && B) || (A && B && C) <-- pick (A && B)
|
|
// ...........................................................................
|
|
auto& existingNode = find_node(existing);
|
|
|
|
if (query_node::NodeType::INTERSECTION != existingNode.type) {
|
|
continue;
|
|
}
|
|
|
|
size_t nMatching = 0;
|
|
|
|
// count number of children from existingNode that exist in child.children
|
|
for (auto& existingSubChild : existingNode.children) {
|
|
if (subChildren.find(existingSubChild) != subChildren.end()) {
|
|
++nMatching;
|
|
}
|
|
}
|
|
|
|
// all existingNode.children in child.children
|
|
// e.g. existing:(A && B), new:(A && B && C) <-- pick existing
|
|
if (existingNode.children.size() == nMatching) {
|
|
return;
|
|
}
|
|
|
|
// all child.children in existingNode.children
|
|
// e.g. existing:(A && B && C), new:(A && B) <-- pick new
|
|
if (subChildren.size() == nMatching) {
|
|
itr = children.erase(itr - 1);
|
|
}
|
|
}
|
|
|
|
children.emplace_back(child);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create a new node
|
|
/// NOTE: previous results from find_node(...) are undefined after return
|
|
/// @return new node and set 'value' to ID of new node
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser_context::query_node& parser_context::create_node(
|
|
parser::semantic_type& value
|
|
) {
|
|
m_nodes.emplace_back();
|
|
value = m_nodes.size() - 1;
|
|
|
|
return m_nodes.back();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief find a function in 'fns' best matching the supplied arguments
|
|
/// @return pointer to function or nullptr if no best match found
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
template <typename function_type>
|
|
function_type const* iresearch::iql::parser_context::find_best_function(
|
|
std::string const& sName,
|
|
size_t nArgsCount,
|
|
std::unordered_multimap<std::string, function_type> const& fns
|
|
) const {
|
|
auto fnItr = fns.equal_range(sName);
|
|
function_type const* pBestFn = nullptr; // init to non-match value
|
|
|
|
// find best matching function based on number of arguments
|
|
for (auto itr = fnItr.first; itr != fnItr.second; ++itr) {
|
|
auto& fn = itr->second;
|
|
|
|
if (fn.m_nFixedArg > nArgsCount) {
|
|
continue; // too many arguments
|
|
}
|
|
|
|
if (fn.m_nFixedArg == nArgsCount || pBestFn == nullptr) {
|
|
if (pBestFn != nullptr && pBestFn->m_nFixedArg == fn.m_nFixedArg) {
|
|
return nullptr; // collision
|
|
}
|
|
|
|
pBestFn = &fn;
|
|
}
|
|
else if (fn.m_bVarArg) {
|
|
if (pBestFn->m_nFixedArg == fn.m_nFixedArg) {
|
|
return nullptr; // collision
|
|
}
|
|
|
|
// fn is a better match since it has more fixed args
|
|
if (pBestFn->m_nFixedArg < fn.m_nFixedArg) {
|
|
pBestFn = &fn;
|
|
}
|
|
}
|
|
}
|
|
|
|
return pBestFn;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief retrieve a node by 'value'
|
|
/// @return requested node or UNKNOWN if not found
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser_context::query_node& parser_context::find_node(
|
|
parser::semantic_type const& value
|
|
) {
|
|
// reuse const-implementation
|
|
auto& node = const_cast<const parser_context*>(this)->find_node(value);
|
|
|
|
return const_cast<parser_context::query_node&>(node);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief read next token
|
|
/// @return token type
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::token_type parser_context::next() {
|
|
if (m_nNext >= m_sData.size()) {
|
|
return parser::token_type::IQL_EOF;
|
|
}
|
|
|
|
switch (m_eState) {
|
|
case StateType::SINGLE: {
|
|
parser::token_type type = nextQuoted(false); // check for sequence end
|
|
|
|
return type != parser::token_type::IQL_UNKNOWN ? type : nextSequence('\'');
|
|
}
|
|
case StateType::DOUBLE: {
|
|
parser::token_type type = nextQuoted(false); // check for sequence end
|
|
|
|
return type != parser::token_type::IQL_UNKNOWN ? type : nextSequence('"');
|
|
}
|
|
case StateType::NONE:
|
|
break;
|
|
default:
|
|
return parser::token_type::IQL_UNKNOWN; // unsupported state
|
|
}
|
|
|
|
// ...........................................................................
|
|
// check if it's whitespace
|
|
// ...........................................................................
|
|
parser::token_type type;
|
|
if ((type = nextSeperator()) != parser::token_type::IQL_UNKNOWN) {
|
|
return type;
|
|
}
|
|
|
|
// ...........................................................................
|
|
// check if it's a quoted literal
|
|
// ...........................................................................
|
|
if ((type = nextQuoted(true)) != parser::token_type::IQL_UNKNOWN) {
|
|
return type;
|
|
}
|
|
|
|
// ...........................................................................
|
|
// check if it's an operator
|
|
// ...........................................................................
|
|
if ((type = nextOperator()) != parser::token_type::IQL_UNKNOWN) {
|
|
return type;
|
|
}
|
|
|
|
// ...........................................................................
|
|
// check if it's a keyword
|
|
// ...........................................................................
|
|
if ((type = nextKeyword()) != parser::token_type::IQL_UNKNOWN) {
|
|
return type;
|
|
}
|
|
|
|
bool bSeen = false;
|
|
|
|
while (m_nNext < m_sData.size() &&
|
|
!isspace((uint8_t)(m_sData[m_nNext])) &&
|
|
(!bSeen || !ispunct((uint8_t)(m_sData[m_nNext])))) { // allow 1 char ispunct(...)
|
|
++m_nNext;
|
|
bSeen = true;
|
|
}
|
|
|
|
return bSeen ?
|
|
parser::token_type::IQL_SEQUENCE : parser::token_type::IQL_UNKNOWN;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief read next token as a keyword
|
|
/// do not modify m_nNext if IQL_UNKNOWN
|
|
/// @return token type or IQL_UNKNOWN if not a keyword
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::token_type parser_context::nextKeyword() {
|
|
static const std::unordered_map<std::string, parser::token_type> keywords = {
|
|
{ "NOT", parser::token_type::IQL_NOT },
|
|
{ "AND", parser::token_type::IQL_AND },
|
|
{ "OR", parser::token_type::IQL_OR },
|
|
{ "ORDER", parser::token_type::IQL_ORDER },
|
|
{ "ASC", parser::token_type::IQL_ASC },
|
|
{ "DESC", parser::token_type::IQL_DESC },
|
|
{ "LIMIT", parser::token_type::IQL_LIMIT },
|
|
};
|
|
|
|
size_t nEnd = m_nNext;
|
|
|
|
// find end of token
|
|
for (size_t nCount = m_sData.size(); nEnd < nCount; ++nEnd) {
|
|
if (isspace((uint8_t)(m_sData[nEnd])) ||
|
|
ispunct((uint8_t)(m_sData[nEnd]))) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
std::string sValue = m_sData.substr(m_nNext, nEnd - m_nNext); // ci value
|
|
|
|
std::transform(sValue.begin(), sValue.end(), sValue.begin(), ::toupper);
|
|
|
|
auto itr = keywords.find(sValue);
|
|
|
|
if (itr == keywords.end()) {
|
|
return parser::token_type::IQL_UNKNOWN;
|
|
}
|
|
|
|
m_nNext = nEnd;
|
|
|
|
return itr->second;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief read next token as an operator
|
|
/// do not modify m_nNext if IQL_UNKNOWN
|
|
/// @return token type or IQL_UNKNOWN if not an operator
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::token_type parser_context::nextOperator() {
|
|
// ...........................................................................
|
|
// double char operators
|
|
// ...........................................................................
|
|
if (m_nNext + 1 < m_sData.size()) {
|
|
char const* pcStart = &(m_sData.c_str()[m_nNext]);
|
|
parser::token_type type = parser::token_type::IQL_UNKNOWN;
|
|
|
|
if (strncmp("~=", pcStart, 2) == 0) {
|
|
type = parser::token_type::IQL_LIKE;
|
|
}
|
|
else if (strncmp("!=", pcStart, 2) == 0 ||
|
|
strncmp("<>", pcStart, 2) == 0) {
|
|
type = parser::token_type::IQL_NE;
|
|
}
|
|
else if (strncmp("<=", pcStart, 2) == 0) {
|
|
type = parser::token_type::IQL_LE;
|
|
}
|
|
else if (strncmp("==", pcStart, 2) == 0) {
|
|
type = parser::token_type::IQL_EQ;
|
|
}
|
|
else if (strncmp(">=", pcStart, 2) == 0) {
|
|
type = parser::token_type::IQL_GE;
|
|
}
|
|
else if (strncmp("&&", pcStart, 2) == 0) {
|
|
type = parser::token_type::IQL_AMPAMP;
|
|
}
|
|
else if (strncmp("||", pcStart, 2) == 0) {
|
|
type = parser::token_type::IQL_PIPEPIPE;
|
|
}
|
|
|
|
if (parser::token_type::IQL_UNKNOWN != type) {
|
|
m_nNext += 2;
|
|
|
|
return type;
|
|
}
|
|
}
|
|
|
|
// ...........................................................................
|
|
// single char operators
|
|
// ...........................................................................
|
|
static const std::unordered_map<char, parser::token_type> operators1c = {
|
|
{ ',', parser::token_type::IQL_COMMA },
|
|
{ '*', parser::token_type::IQL_ASTERISK },
|
|
{ '<', parser::token_type::IQL_LCHEVRON },
|
|
{ '>', parser::token_type::IQL_RCHEVRON },
|
|
{ '!', parser::token_type::IQL_EXCLAIM },
|
|
{ '(', parser::token_type::IQL_LPAREN },
|
|
{ ')', parser::token_type::IQL_RPAREN },
|
|
{ '[', parser::token_type::IQL_LSBRACKET },
|
|
{ ']', parser::token_type::IQL_RSBRACKET },
|
|
};
|
|
|
|
auto itr = operators1c.find(m_sData[m_nNext]);
|
|
|
|
if (itr != operators1c.end()) {
|
|
++m_nNext;
|
|
|
|
return itr->second;
|
|
}
|
|
|
|
return parser::token_type::IQL_UNKNOWN;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief read next token as a start of a quoted sequence
|
|
/// do not modify m_nNext if IQL_UNKNOWN
|
|
/// @return token type or IQL_UNKNOWN if not an operator
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::token_type parser_context::nextQuoted(bool bStart) {
|
|
switch (m_sData[m_nNext]) {
|
|
case '"':
|
|
if (!bStart && StateType::DOUBLE != m_eState) {
|
|
break; // inside an existing sequence that is not double-quoted
|
|
}
|
|
|
|
++m_nNext;
|
|
m_eState = bStart ? StateType::DOUBLE : StateType::NONE;
|
|
|
|
return parser::token_type::IQL_DQUOTE;
|
|
case '\'':
|
|
if (!bStart && StateType::SINGLE != m_eState) {
|
|
break; // inside an existing sequence that is not single-quoted
|
|
}
|
|
|
|
++m_nNext;
|
|
m_eState = bStart ? StateType::SINGLE : StateType::NONE;
|
|
|
|
return parser::token_type::IQL_SQUOTE;
|
|
}
|
|
|
|
return parser::token_type::IQL_UNKNOWN;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief read next seperator sequence
|
|
/// @return token type
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::token_type parser_context::nextSeperator() {
|
|
size_t nCount = m_sData.size();
|
|
bool bSeen = false;
|
|
|
|
// skip to the end of whitespace
|
|
while (m_nNext < nCount && isspace((uint8_t)(m_sData[m_nNext]))) {
|
|
++m_nNext;
|
|
bSeen = true;
|
|
}
|
|
|
|
// treat comments as part of whitespace, i.e. /* ... */
|
|
if (m_nNext < nCount + 2 &&
|
|
strncmp("/*", &(m_sData.c_str()[m_nNext]), 2) == 0) {
|
|
for (size_t i = m_nNext + 2; i + 1 < nCount; ++i) { // +2 for /*, +1 for /
|
|
// if found comment terminator
|
|
if (strncmp("*/", &(m_sData.c_str()[i]), 2) == 0) {
|
|
m_nNext = i + 2; // +2 for */
|
|
nextSeperator(); // consume any subsequence spaces and comments
|
|
|
|
return parser::token_type::IQL_SEP;
|
|
}
|
|
}
|
|
// not a comment since no */ found
|
|
}
|
|
|
|
return bSeen ?
|
|
parser::token_type::IQL_SEP : parser::token_type::IQL_UNKNOWN;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief read next sequence terminated by character 'cSep'
|
|
/// do not modify m_nNext if IQL_UNKNOWN
|
|
/// @return token type or IQL_UNKNOWN if not a terminated sequence
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::token_type parser_context::nextSequence(char cSep) {
|
|
for (size_t i = m_nNext, nCount = m_sData.size(); i < nCount; ++i) {
|
|
if (cSep == m_sData[i]) {
|
|
m_nNext = i;
|
|
|
|
return parser::token_type::IQL_SEQUENCE;
|
|
}
|
|
}
|
|
|
|
return parser::token_type::IQL_UNKNOWN;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief evaluate a node (e.g. a deterministic function node) if possible
|
|
/// @return ID of the node with the evaluated data or the original node if no
|
|
/// further valuation can be applied, UNKNOWN on name collision
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
parser::semantic_type parser_context::try_eval(
|
|
parser::semantic_type value, bool bBoolean, bool bOrder, bool bSequence
|
|
) {
|
|
auto& node = find_node(value);
|
|
|
|
if (query_node::NodeType::FUNCTION != node.type) {
|
|
return value; // cannot eval
|
|
}
|
|
|
|
bBoolean &= node.pFnBoolean != nullptr;
|
|
bOrder &= node.pFnOrder != nullptr;
|
|
bSequence &= node.pFnSequence != nullptr;
|
|
|
|
if ((bBoolean && (bOrder || bSequence)) || (bOrder && bSequence)) {
|
|
return *const_cast<parser::semantic_type*>(&UNKNOWN); // name collision
|
|
}
|
|
|
|
if (!bBoolean && !bOrder && !bSequence) {
|
|
return value; // cannot eval
|
|
}
|
|
|
|
// ...........................................................................
|
|
// auto-evaluate determinitic values during parsing if possible
|
|
// ...........................................................................
|
|
|
|
std::vector<iresearch::string_ref> fnArgs;
|
|
bool bDeterministic = true;
|
|
|
|
// check if one of the args is non-deterministic
|
|
for (auto& child: node.children) {
|
|
child = try_eval(child, false, false, true); // only sequence nodes
|
|
|
|
auto& childNode = find_node(child);
|
|
|
|
// only SEQUENCE args are supported by determinitic functions
|
|
if (query_node::NodeType::SEQUENCE != childNode.type) {
|
|
bDeterministic = false; // not a deterministic argument, cannot eval
|
|
}
|
|
|
|
fnArgs.emplace_back(childNode.sValue);
|
|
}
|
|
|
|
if (!bDeterministic) {
|
|
return value; // not all arguments deterministic, cannot eval
|
|
}
|
|
|
|
if (bBoolean) {
|
|
bool bResult;
|
|
|
|
// check for successful invocation of the deterministic function
|
|
if (!node.pFnBoolean->m_fnDeterminitic(bResult, fnArgs)) {
|
|
return value; // not a deterministic function, cannot eval
|
|
}
|
|
|
|
return bResult ? TRUE : negation(TRUE); // TRUE/FALSE eval
|
|
}
|
|
|
|
if (bOrder) {
|
|
order_function::deterministic_buffer_t buf;
|
|
|
|
// check for successful invocation of the deterministic function
|
|
if (!node.pFnOrder->m_fnDeterminitic(buf, fnArgs)) {
|
|
return value; // not a deterministic function, cannot eval
|
|
}
|
|
|
|
auto& order_node = create_node(value);
|
|
|
|
order_node.sValue = buf;
|
|
order_node.type = query_node::NodeType::SEQUENCE;
|
|
|
|
return value; // ID of new node
|
|
}
|
|
|
|
if (bSequence) {
|
|
sequence_function::deterministic_buffer_t buf;
|
|
|
|
// check for successful invocation of the deterministic function
|
|
if (!node.pFnSequence->m_fnDeterminitic(buf, fnArgs)) {
|
|
return value; // not a deterministic function, cannot eval
|
|
}
|
|
|
|
auto& seq_node = create_node(value);
|
|
|
|
seq_node.sValue = buf;
|
|
seq_node.type = query_node::NodeType::SEQUENCE;
|
|
|
|
return value; // ID of new node
|
|
}
|
|
|
|
return value; // deterministic valuation not possible
|
|
}
|
|
|
|
#if defined (__GNUC__)
|
|
#pragma GCC diagnostic pop
|
|
#endif
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- END-OF-FILE
|
|
// -----------------------------------------------------------------------------
|