diff --git a/arangod/Aql/ExecutionNode.cpp b/arangod/Aql/ExecutionNode.cpp index a7bea6b60a..f52357d090 100644 --- a/arangod/Aql/ExecutionNode.cpp +++ b/arangod/Aql/ExecutionNode.cpp @@ -408,10 +408,10 @@ void EnumerateCollectionNode::toJsonHelper (triagens::basics::Json& nodes, } //////////////////////////////////////////////////////////////////////////////// -/// @brief get vector of indexes with fields +/// @brief get vector of indices with fields //////////////////////////////////////////////////////////////////////////////// -std::vector EnumerateCollectionNode::getIndexesUnordered (vector attrs) const { +std::vector EnumerateCollectionNode::getIndicesUnordered (vector attrs) const { std::vector out; TRI_document_collection_t* document = _collection->documentCollection(); size_t const n = document->_allIndexes._length; @@ -499,7 +499,7 @@ EnumerateCollectionNode::CompareIndex (TRI_index_t* idx, return match; } -std::vector EnumerateCollectionNode::getIndexesOrdered (IndexMatchVec &attrs) const { +std::vector EnumerateCollectionNode::getIndicesOrdered (IndexMatchVec &attrs) const { std::vector out; TRI_document_collection_t* document = _collection->documentCollection(); @@ -831,6 +831,46 @@ void SortNode::toJsonHelper (triagens::basics::Json& nodes, nodes(json); } +class SortNodeFindMyExpressions : public WalkerWorker { + +public: + size_t _foundCalcNodes; + std::vector> _elms; + std::vector> _myVars; + + SortNodeFindMyExpressions(SortNode* me) + : _foundCalcNodes(0), + _elms(me->getElements()) + { + _myVars.resize(_elms.size()); + } + + bool before (ExecutionNode* en) { + if (en->getType() == triagens::aql::ExecutionNode::CALCULATION) { + auto cn = static_cast(en); + for (size_t n = 0; n < _elms.size(); n++) { + if (_elms[n].first->id == cn->outVariable()->id) { + _myVars[n] = std::make_pair(cn, _elms[n].second); + _foundCalcNodes ++; + break; + } + } + } + return _foundCalcNodes >= _elms.size(); + } +}; + +std::vector> SortNode::getCalcNodePairs () +{ + SortNodeFindMyExpressions findExp(this); + _dependencies[0]->walk(&findExp); + if (findExp._foundCalcNodes < _elements.size()) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, + "SortNode wasn't able to locate all its CalculationNodes"); + } + return findExp._myVars; +} + // ----------------------------------------------------------------------------- // --SECTION-- methods of AggregateNode // ----------------------------------------------------------------------------- diff --git a/arangod/Aql/ExecutionNode.h b/arangod/Aql/ExecutionNode.h index b3743682cd..79f2805f53 100644 --- a/arangod/Aql/ExecutionNode.h +++ b/arangod/Aql/ExecutionNode.h @@ -654,10 +654,10 @@ namespace triagens { } //////////////////////////////////////////////////////////////////////////////// -/// @brief get vector of indexes that has any match in its fields with +/// @brief get vector of indices that has any match in its fields with //////////////////////////////////////////////////////////////////////////////// - std::vector getIndexesUnordered (vector attrs) const; + std::vector getIndicesUnordered (vector attrs) const; enum MatchType { FULL_MATCH, @@ -689,7 +689,7 @@ namespace triagens { /// the specified indexes. //////////////////////////////////////////////////////////////////////////////// - std::vector getIndexesOrdered (IndexMatchVec &attrs) const; + std::vector getIndicesOrdered (IndexMatchVec &attrs) const; TRI_vocbase_t* vocbase () const { return _vocbase; @@ -1487,6 +1487,10 @@ namespace triagens { std::vector> getElements () { return _elements; } + + + std::vector> getCalcNodePairs (); + // ----------------------------------------------------------------------------- // --SECTION-- private variables // ----------------------------------------------------------------------------- diff --git a/arangod/Aql/Expression.cpp b/arangod/Aql/Expression.cpp index bcabf5fc73..150dccfb74 100644 --- a/arangod/Aql/Expression.cpp +++ b/arangod/Aql/Expression.cpp @@ -334,6 +334,65 @@ AqlValue Expression::executeSimpleExpression (AstNode const* node, THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "unhandled type in simple expression"); } + +bool Expression::isSimpleAccessReference() const { + if (!isSimple()) { + return false; + } + + auto expNode = _node; + + if (expNode->type != triagens::aql::NODE_TYPE_ATTRIBUTE_ACCESS) { + return false; + } + + while (expNode->type == triagens::aql::NODE_TYPE_ATTRIBUTE_ACCESS) { + expNode = expNode->getMember (0); + } + + return (expNode->type == triagens::aql::NODE_TYPE_REFERENCE); +} + +std::pair Expression::getAccessNRef() const { + if (!isSimple()) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, + "getAccessNRef works only on simple expressions!"); + } + + auto expNode = _node; + std::vector attributeVector; + + if (expNode->type != triagens::aql::NODE_TYPE_ATTRIBUTE_ACCESS) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, + "getAccessNRef works only on simple expressions!"); + } + + while (expNode->type == triagens::aql::NODE_TYPE_ATTRIBUTE_ACCESS) { + attributeVector.push_back(expNode->getStringValue()); + expNode = expNode->getMember (0); + } + + std::string attributeVectorStr = ""; + for (auto oneAttr = attributeVector.rbegin(); + oneAttr != attributeVector.rend(); + ++oneAttr) { + if (attributeVectorStr.size() > 0) + attributeVectorStr += std::string("."); + attributeVectorStr += *oneAttr; + } + + if (expNode->type != triagens::aql::NODE_TYPE_REFERENCE) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, + "getAccessNRef works only on simple expressions!"); + } + + auto var = static_cast(expNode->getData()); + + return std::make_pair(attributeVectorStr, var->name); + +} + + // ----------------------------------------------------------------------------- // --SECTION-- END-OF-FILE // ----------------------------------------------------------------------------- diff --git a/arangod/Aql/Expression.h b/arangod/Aql/Expression.h index 13faa8cd01..3c8a034d07 100644 --- a/arangod/Aql/Expression.h +++ b/arangod/Aql/Expression.h @@ -146,10 +146,23 @@ namespace triagens { /// @brief check whether this is a simple expression. //////////////////////////////////////////////////////////////////////////////// - bool isSimple () { + bool isSimple () const { return _type == SIMPLE; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief check whether this is a simple access to a Reference. +//////////////////////////////////////////////////////////////////////////////// + + bool isSimpleAccessReference () const; + +//////////////////////////////////////////////////////////////////////////////// +/// @brief this gives you ("variable.access", "Reference") +/// call isSimpleAccessReference in advance to enshure no exceptions. +//////////////////////////////////////////////////////////////////////////////// + + std::pair getAccessNRef() const; + // ----------------------------------------------------------------------------- // --SECTION-- private functions // ----------------------------------------------------------------------------- diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 9cff6bf3be..aed6522060 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -374,7 +374,7 @@ class FilterToEnumCollFinder : public WalkerWorker { } } else { - std::vector idxs = node->getIndexesUnordered(attrs); + std::vector idxs = node->getIndicesUnordered(attrs); // make one new plan for every index in that replaces the // enumerate collection node with a RangeIndexNode . . . for (auto idx: idxs) { @@ -543,6 +543,8 @@ int triagens::aql::useIndexRange (Optimizer* opt, //////////////////////////////////////////////////////////////////////////////// class sortToIndexNode : public WalkerWorker { + using EN = triagens::aql::ExecutionNode; + SortNode *_sortNode; RangesInfo* _ranges; ExecutionPlan* _plan; @@ -569,180 +571,106 @@ class sortToIndexNode : public WalkerWorker { _idsToRemove.push_back(Node->id()); } - bool before (ExecutionNode* en) { - std::cout << "type:" << en->getTypeString() << "\n"; - size_t n = _vars.size(); - auto outvar = en->getVariablesSetHere(); - - if ((_executionNodesFound < n) && - en->getType() == triagens::aql::ExecutionNode::CALCULATION) { - - // Look up whether this is one of the calculation nodes we reference. - for (size_t i = 0; i < n; i++) { - if (_vars[i]->id == outvar[0]->id) { - _myVars[i] = static_cast(en); - _executionNodesFound++; - _idsToRemove.push_back (en->id()); - break; - } - } - if (_executionNodesFound == n) { - // ok we got all, study them. - // TODO - } - } - else if (_executionNodesFound == n) { - if (en->getType() == triagens::aql::ExecutionNode::FILTER) { - /// skip. we don't care. - /// TODO: check whether to ABORT here? - } - if (en->getType() == triagens::aql::ExecutionNode::SORT) { - return true; // pulling two sorts together is done elsewhere. - } - if (en->getType() == triagens::aql::ExecutionNode::LIMIT) { - return true; // LIMIT is criterion to stop - } - if (en->getType() == triagens::aql::ExecutionNode::INDEX_RANGE) { - // TODO: we should also match INDEX_RANGE later on. - // todo: this may only be done if there is a full index match. - } - else if (en->getType() == triagens::aql::ExecutionNode::ENUMERATE_COLLECTION) { - /* - std::cout << "blub\n"; - auto JsonPlan = _plan->toJson(TRI_UNKNOWN_MEM_ZONE, false); - auto JsonString = JsonPlan.toString(); - std::cout <<"Old Plan: \n" << JsonString << "\n"; - */ - size_t nVarsIndexable = 0; - std::vector> rangeInfo; - std::vector attributeVector; - EnumerateCollectionNode::IndexMatchVec attrs; - std::string collectionName; - - auto node = static_cast(en); - auto var = node->getVariablesSetHere()[0]; // should only be 1 - - auto sortElements = _sortNode->getElements(); - - for (size_t n = 0; n < sortElements.size(); n++) { - // we should have already made shure this works above. - TRI_ASSERT(sortElements[n].first->id == _myVars[n]->outVariable()->id); - bool ASC = sortElements[n].second; /// TODO what to do with this? - - auto exp = _myVars[n]->expression(); - - if (!exp->isSimple()) { - break; // nott simple? stop evaluation. - } - - auto expNode = exp->node(); - - if (expNode->type != triagens::aql::NODE_TYPE_ATTRIBUTE_ACCESS) { - break; // we only support attribute accesses. - } - - // digg through nested Attributes: - while (expNode->type == triagens::aql::NODE_TYPE_ATTRIBUTE_ACCESS) { - attributeVector.push_back(expNode->getStringValue()); - expNode = expNode->getMember (0); - } - - // And concatenate the attributes again in reverse order: - std::string attributeVectorStr = ""; - for (auto oneAttr = attributeVector.rbegin(); - oneAttr != attributeVector.rend(); - ++oneAttr) { - if (attributeVectorStr.size() > 0) - attributeVectorStr += std::string("."); - attributeVectorStr += *oneAttr; - } - - // we now should have a Collection Reference: - if (expNode->type != triagens::aql::NODE_TYPE_REFERENCE) { - break; // some other operation - can't work with this. - } - - auto subVar = static_cast(expNode->getData()); - if (subVar->name != var->name) { - // No, the requested collection is not a reference to this. - break; - } - expNode = exp->node(); - - attrs.push_back(std::make_pair(attributeVectorStr, ASC)); - collectionName = expNode->getStringValue(); - - rangeInfo.push_back(std::vector()); - rangeInfo.at(nVarsIndexable).push_back(new RangeInfo(var->name, /// todo: asc/desc - attributeVectorStr, - ///(ASC)? a:b, - ///(ASC)? b:a, - - nullptr, nullptr)); - nVarsIndexable++; - } - - if (nVarsIndexable == 0) { - return true; // we didn't find anything replaceable by this index - } - - auto indexes = node->getIndexesOrdered(attrs); - - if (indexes.size() == 0) { - return true; - } - - - // make one new plan for every index in that replaces the - // enumerate collection node with a RangeIndexNode . . . - for (auto idx: indexes) { - - //can only use the index if it is a skip list or (a hash and we - //are checking equality) - std::cout << "FOUND INDEX!\n"; - - auto newPlan = _plan->clone(); - ExecutionNode* newNode = nullptr; - try{ - newNode = new IndexRangeNode( newPlan->nextId(), - node->vocbase(), - node->collection(), - node->outVariable(), - idx.index,/// TODO: estimate cost on match quality - rangeInfo); - newPlan->registerNode(newNode); - } - catch (...) { - if (newNode != nullptr) { - delete newNode; - } - delete newPlan; - throw; - } - newPlan->replaceNode(newPlan->getNodeById(node->id()), newNode); - - auto JsonPlan = newPlan->toJson(TRI_UNKNOWN_MEM_ZONE, false); - auto JsonString = JsonPlan.toString(); - std::cout <<"New Plan: \n" << JsonString << "\n"; - - if (idx.fullmatch) { // if the index replaces the sort, remove it. - for (auto idToRemove = _idsToRemove.begin(); - idToRemove != _idsToRemove.end(); - ++idToRemove) { - newPlan->unlinkNode(newPlan->getNodeById(*idToRemove)); - } - } - JsonPlan = newPlan->toJson(TRI_UNKNOWN_MEM_ZONE, false); - JsonString = JsonPlan.toString(); - std::cout <<"removed foo \n" << JsonString << "\n"; - - _out.push_back(newPlan); - } - - } - } - return false; + void RemoveSortNode (ExecutionPlan *newPlan) { + for (auto idToRemove = _idsToRemove.begin(); + idToRemove != _idsToRemove.end(); + ++idToRemove) { + newPlan->unlinkNode(newPlan->getNodeById(*idToRemove)); } + } + + void handleEnumerateCollectionNode(EnumerateCollectionNode* node) + { + auto collectionName = node->getVariablesSetHere()[0]->name; + auto sortParams = _sortNode->getCalcNodePairs(); + + EnumerateCollectionNode::IndexMatchVec attrs; + std::vector> rangeInfo; + size_t nVarsIndexable = 0; + + for (size_t n = 0; n < sortParams.size(); n++) { + bool ASC = sortParams[n].second; + auto oneSortExpression = sortParams[n].first->expression(); + _idsToRemove.push_back(sortParams[n].first->id()); + + if (!oneSortExpression->isSimpleAccessReference()) { + continue; + } + + auto simpleExpression = oneSortExpression->getAccessNRef(); + + if (simpleExpression.second != collectionName) { + continue; + } + + attrs.push_back(std::make_pair(simpleExpression.first, ASC)); + + rangeInfo.push_back(std::vector()); + + rangeInfo.at(nVarsIndexable).push_back(new RangeInfo(collectionName, + simpleExpression.first, + nullptr, nullptr)); + nVarsIndexable++; + } + + if (nVarsIndexable == 0) { + return; // we didn't find anything replaceable by indice + } + + auto indices = node->getIndicesOrdered(attrs); + + // make one new plan for each index that replaces this + // EnumerateCollectionNode with an IndexRangeNode + for (auto idx: indices) { + //can only use the index if it is a skip list or (a hash and we + //are checking equality) + auto newPlan = _plan->clone(); + ExecutionNode* newNode = nullptr; + try { + newNode = new IndexRangeNode( newPlan->nextId(), + node->vocbase(), + node->collection(), + node->outVariable(), + idx.index,/// TODO: estimate cost on match quality + rangeInfo); + newPlan->registerNode(newNode); + } + catch (...) { + if (newNode != nullptr) { + delete newNode; + } + delete newPlan; + throw; + } + + newPlan->replaceNode(newPlan->getNodeById(node->id()), newNode); + + if (idx.fullmatch) { // if the index superseedes the sort, remove it. + RemoveSortNode(newPlan); + } + _out.push_back(newPlan); + } + } + + bool before (ExecutionNode* en) { + std::cout << "type:" << en->getTypeString() << "\n"; + switch (en->getType()) { + default: // skip. we don't care. + case EN::FILTER: // skip. we don't care. + return false; /// TODO: check whether to ABORT here? + case EN::SORT: // pulling two sorts together is done elsewhere. + return en != _sortNode; + case EN::LIMIT: // LIMIT is criterion to stop + return true; + case EN::INDEX_RANGE: + // TODO: we should also match INDEX_RANGE later on. + // todo: this may only be done if there is a full index match. + return true; + case EN::ENUMERATE_COLLECTION: + handleEnumerateCollectionNode(static_cast(en)); + return true; // no matching index found. + } + } }; diff --git a/arangod/VocBase/index.cpp b/arangod/VocBase/index.cpp index 23c28fcd19..a5c7653c6d 100644 --- a/arangod/VocBase/index.cpp +++ b/arangod/VocBase/index.cpp @@ -1158,19 +1158,30 @@ static int FillLookupSLOperator (TRI_index_operator_t* slOperator, for (size_t j = 0; j < relationOperator->_numFields; ++j) { TRI_json_t* jsonObject = (TRI_json_t*) TRI_AtVector(&(relationOperator->_parameters->_value._objects), j); + // find out if the search value is a list or an array if ((TRI_IsListJson(jsonObject) || TRI_IsArrayJson(jsonObject)) && slOperator->_type != TRI_EQ_INDEX_OPERATOR) { - // non-equality operator used on complex data type, this is disallowed + // non-equality operator used on list or array data type, this is disallowed + // because we need to shape these objects first. however, at this place (index lookup) + // we never want to create new shapes so we will have a problem if we cannot find an + // existing shape for the search value. in this case we would need to raise an error + // but then the query results would depend on the state of the shaper and if it had + // seen previous such objects + + // we still allow looking for list or array values using equality. this is safe. return TRI_ERROR_BAD_PARAMETER; } + // now shape the search object (but never create any new shapes) TRI_shaped_json_t* shapedObject = TRI_ShapedJsonJson(document->getShaper(), jsonObject, false); // ONLY IN INDEX, PROTECTED by RUNTIME if (shapedObject != nullptr) { + // found existing shape relationOperator->_fields[j] = *shapedObject; // shallow copy here is ok TRI_Free(TRI_UNKNOWN_MEM_ZONE, shapedObject); // don't require storage anymore } else { + // shape not found return TRI_RESULT_ELEMENT_NOT_FOUND; } }