diff --git a/.gitignore b/.gitignore index bc55957536..0f684a89e9 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ Debug32/ Release64/ Release32/ WindowsLibraries/ +cluster/ core TAGS diff --git a/arangod/Aql/Optimizer.cpp b/arangod/Aql/Optimizer.cpp index 428f98a498..995382588b 100644 --- a/arangod/Aql/Optimizer.cpp +++ b/arangod/Aql/Optimizer.cpp @@ -486,8 +486,8 @@ void Optimizer::setupRules() { patchUpdateStatementsRule_pass9, DoesNotCreateAdditionalPlans, true); // patch update statements - registerRule("geo-index-optimizer", optimizeGeoIndexRule, - geoDistanceRule, DoesNotCreateAdditionalPlans, true); + registerRule("geo-index-optimizer", geoIndexRule, + applyGeoIndexRule, DoesNotCreateAdditionalPlans, true); if (arangodb::ServerState::instance()->isCoordinator()) { // distribute operations in cluster diff --git a/arangod/Aql/Optimizer.h b/arangod/Aql/Optimizer.h index 9a98ce0f1b..44bb2ed5b0 100644 --- a/arangod/Aql/Optimizer.h +++ b/arangod/Aql/Optimizer.h @@ -145,6 +145,8 @@ class Optimizer { // remove redundant OR conditions removeRedundantOrRule_pass6 = 820, + applyGeoIndexRule = 825, + useIndexesRule_pass6 = 830, // try to remove filters covered by index ranges @@ -197,9 +199,8 @@ class Optimizer { removeSatelliteJoinsRule_pass10 = 1045, // recognize that a RemoveNode can be moved to the shards - undistributeRemoveAfterEnumCollRule_pass10 = 1050, + undistributeRemoveAfterEnumCollRule_pass10 = 1050 - geoDistanceRule = 1060 }; public: diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 56ac6be5c5..ef579bf5a3 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -51,6 +51,8 @@ #include #include +#include + using namespace arangodb; using namespace arangodb::aql; using EN = arangodb::aql::ExecutionNode; @@ -2727,6 +2729,7 @@ void arangodb::aql::distributeFilternCalcToClusterRule( void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule) { + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER DISTRIBUTE SORT RULE"; SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::GATHER, true); @@ -2782,6 +2785,7 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, stopSearching = true; break; case EN::SORT: + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "APPLY DISTRIBUTE SORT"; auto thisSortNode = static_cast(inspectNode); // remember our cursor... @@ -2789,7 +2793,9 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, // then unlink the filter/calculator from the plan plan->unlinkNode(inspectNode); // and re-insert into plan in front of the remoteNode - plan->insertDependency(rn, inspectNode); + if(thisSortNode->_reinsertInCluster){ + plan->insertDependency(rn, inspectNode); + } gatherNode->setElements(thisSortNode->getElements()); modified = true; // ready to rumble! @@ -3919,34 +3925,168 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, } +/////////////////////////////////////////////////////////////////////////////// +// GEO RULE /////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// +// Description of what this Rule tries to achieve: +// https://docs.google.com/document/d/1G57UP08ZFywUXKi5cLvEIKpZP-AUKGwG9oAnFOX8LLo +// - -struct GeoIndexInfo { - EnumerateCollectionNode* _collectionNode; - Collection const* _collection; - std::shared_ptr _index; - std::vector _longitude; - std::vector _latitude; +struct GeoIndexInfo{ + operator bool() const { return distanceNode && valid; } + void invalidate() { valid = false; } + GeoIndexInfo() + : collectionNode(nullptr) + , executionNode(nullptr) + , indexNode(nullptr) + , setter(nullptr) + , expressionParent(nullptr) + , expressionNode(nullptr) + , distanceNode(nullptr) + , index(nullptr) + , range(nullptr) + , executionNodeType(EN::ILLEGAL) + , within(false) + , lessgreaterequal(false) + , valid(true) + , constantPair{nullptr,nullptr} + {} + EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode + ExecutionNode* executionNode; // start node that is a sort or filter + IndexNode* indexNode; // AstNode that is the parent of the Node + CalculationNode* setter; // node that has contains the condition for filter or sort + AstNode* expressionParent; // AstNode that is the parent of the Node + AstNode* expressionNode; // AstNode that contains the sort/filter condition + AstNode* distanceNode; // AstNode that contains the distance parameters + std::shared_ptr index; //pointer to geoindex + AstNode const* range; // range for within + ExecutionNode::NodeType executionNodeType; // type of execution node sort or filter + bool within; // is this a within lookup + bool lessgreaterequal; // is this a check for le/ge (true) or lt/gt (false) + bool valid; // contains this node a valid condition + std::vector longitude; // access path to longitude + std::vector latitude; // access path to latitude + std::pair constantPair; }; +////////////////////////////////////////////////////////////////////// +//candidate checking + +AstNode* isValueOrRefNode(AstNode* node){ + //TODO - implement me + return node; +} + +GeoIndexInfo isDistanceFunction(AstNode* distanceNode, AstNode* expressionParent){ + // the expression must exist and it must be a function call + auto rv = GeoIndexInfo{}; + if(distanceNode->type != NODE_TYPE_FCALL) { + return rv; + } + + //get the ast node of the expression + auto func = static_cast(distanceNode->getData()); + + // we're looking for "DISTANCE()", which is a function call + // with an empty parameters array + if ( func->externalName != "DISTANCE" || distanceNode->numMembers() != 1 ) { + return rv; + } + rv.distanceNode = distanceNode; + rv.expressionNode = distanceNode; + rv.expressionParent = expressionParent; + return rv; +} + +GeoIndexInfo isGeoFilterExpression(AstNode* node, AstNode* expressionParent){ + // binary compare must be on top + bool dist_first = true; + bool lessEqual = true; + auto rv = GeoIndexInfo{}; + if( node->type != NODE_TYPE_OPERATOR_BINARY_GE + && node->type != NODE_TYPE_OPERATOR_BINARY_GT + && node->type != NODE_TYPE_OPERATOR_BINARY_LE + && node->type != NODE_TYPE_OPERATOR_BINARY_LT) { + + return rv; + } else { + if (node->type == NODE_TYPE_OPERATOR_BINARY_GE || node->type == NODE_TYPE_OPERATOR_BINARY_GT){ + dist_first = false; + } + } + if (node->type == NODE_TYPE_OPERATOR_BINARY_GT || node->type == NODE_TYPE_OPERATOR_BINARY_LT){ + lessEqual = false; + } + + if(node->numMembers() != 2){ + return rv; + } + + AstNode* first = node->getMember(0); + AstNode* second = node->getMember(1); + + auto eval_stuff = [](bool dist_first, bool lessEqual, GeoIndexInfo&& dist_fun, AstNode* value_node){ + if (dist_first && dist_fun && value_node){ + dist_fun.within = true; + dist_fun.range = value_node; + dist_fun.lessgreaterequal = lessEqual; + } else { + dist_fun.invalidate(); + } + return dist_fun; + }; + rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first, expressionParent), isValueOrRefNode(second)); + if (!rv) { + rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(second, expressionParent), isValueOrRefNode(first)); + } -// TODO - remove debug code -#ifdef OBIDEBUG - #define OBILEVEL ERR -#else - #define OBILEVEL TRACE -#endif -static boost::optional -geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionNode* ex, ExecutionPlan* plan){ + if(rv){ + //this must be set after checking if the node contains a distance node. + rv.expressionNode = node; + } + + return rv; +} + +GeoIndexInfo iterativePreorderWithCondition(EN::NodeType type, AstNode* root, GeoIndexInfo(*condition)(AstNode*, AstNode*)){ + // returns on first hit + if (!root){ + return GeoIndexInfo{}; + } + std::vector> nodestack; + nodestack.push_back({root,nullptr}); + + while(nodestack.size()){ + auto current = nodestack.back(); + nodestack.pop_back(); + GeoIndexInfo rv = condition(current.first,current.second); + if (rv) { + return rv; + } + + if (type == EN::FILTER){ + if (current.first->type == NODE_TYPE_OPERATOR_BINARY_AND || current.first->type == NODE_TYPE_OPERATOR_NARY_AND ){ + for (std::size_t i = 0; i < current.first->numMembers(); ++i){ + nodestack.push_back({current.first->getMember(i),current.first}); + } + } + } else if (type == EN::SORT) { + // must be the only sort condition + } + } + return GeoIndexInfo{}; +} + +GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionPlan* plan, GeoIndexInfo info){ using SV = std::vector; - LOG(OBILEVEL) << " enter argument check"; // first and second should be based on the same document - need to provide the document // in order to see which collection is bound to it and if that collections supports geo-index if( !pair.first->isAttributeAccessForVariable() || !pair.second->isAttributeAccessForVariable()){ - LOG(OBILEVEL) << " not both args are of type attribute access"; - return boost::none; + info.invalidate(); + return info; } // expect access of the for doc.attribute @@ -3956,14 +4096,13 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionN SV accessPath1{pair.first->getString()}; SV accessPath2{pair.second->getString()}; - LOG(OBILEVEL) << " got setter"; if(setter1 == setter2){ if(setter1->getType() == EN::ENUMERATE_COLLECTION){ auto collNode = reinterpret_cast(setter1); + auto coll = collNode->collection(); //what kind of indexes does it have on what attributes auto lcoll = coll->getCollection(); // TODO - check collection for suitable geo-indexes - LOG(OBILEVEL) << " SETTER IS ENUMERATE_COLLECTION: " << coll->getName(); for(auto indexShardPtr : lcoll->getIndexes()){ // get real index arangodb::Index& index = *indexShardPtr.get(); @@ -3974,140 +4113,364 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionN continue; } -#ifdef OBIDEBUG - //FIXME - REMOVE DEBUG CODE LATER - auto vecs = std::vector>{index.fieldNames(), std::vector{accessPath1, accessPath2}}; - for(auto vec : vecs ){ - for(auto path : vec){ - std::cout << "AccessPath VECTOR: "; - for(auto word : path){ - std::cout << word << " "; - } - std::cout << std::endl; - } - } -#endif - - //check access paths of attribues in ast and those in index match + //check access paths of attributes in ast and those in index match if( index.fieldNames()[0] == accessPath1 && index.fieldNames()[1] == accessPath2 ){ - return GeoIndexInfo{collNode, coll, indexShardPtr, std::move(accessPath1), std::move(accessPath2) }; + info.collectionNode = collNode; + info.index = indexShardPtr; + info.longitude = std::move(accessPath1); + info.latitude = std::move(accessPath2); + return info; } } } } - return boost::none; + info.invalidate(); + return info; } -void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, - ExecutionPlan* plan, - Optimizer::Rule const* rule) { +bool checkDistanceArguments(GeoIndexInfo& info, ExecutionPlan* plan){ + if(!info){ + return false; + } - LOG(OBILEVEL) << "ENTER GEO RULE"; + auto const& functionArguments = info.distanceNode->getMember(0); + if(functionArguments->numMembers() < 4){ + return false; + } + + std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; + std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; + + GeoIndexInfo result1 = geoDistanceFunctionArgCheck(argPair1, plan, info /*copy*/); + GeoIndexInfo result2 = geoDistanceFunctionArgCheck(argPair2, plan, info /*copy*/); + //info now conatins access path to collection + + // xor only one argument pair shall have a geoIndex + if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ + info.invalidate(); + return false; + } + + GeoIndexInfo res; + if(result1){ + info = std::move(result1); + info.constantPair = std::move(argPair2); + } else { + info = std::move(result2); + info.constantPair = std::move(argPair1); + } + + return true; +} + +//checks a single sort or filter node +GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, ExecutionPlan* plan, ExecutionNode* n){ + ExecutionNode* setter = nullptr; + auto rv = GeoIndexInfo{}; + switch(type){ + case EN::SORT: { + auto node = static_cast(n); + auto& elements = node->getElements(); + + // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion + if ( !(elements.size() == 1 && elements[0].second)) { + //test on second makes sure the SORT is ascending + return rv; + } + + //variable of sort expression + auto variable = elements[0].first; + TRI_ASSERT(variable != nullptr); + + //// find the expression that is bound to the variable + // get the expression node that holds the calculation + setter = plan->getVarSetBy(variable->id); + } + break; + + case EN::FILTER: { + auto node = static_cast(n); + + // filter nodes always have one input variable + auto varsUsedHere = node->getVariablesUsedHere(); + TRI_ASSERT(varsUsedHere.size() == 1); + + // now check who introduced our variable + auto variable = varsUsedHere[0]; + setter = plan->getVarSetBy(variable->id); + } + break; + + default: + return rv; + } + + // common part - extract astNode from setter witch is a calculation node + if (setter == nullptr || setter->getType() != EN::CALCULATION) { + return rv; + } + + auto expression = static_cast(setter)->expression(); + + // the expression must exist and it must have an astNode + if (expression == nullptr || expression->node() == nullptr){ + // not the right type of node + return rv; + } + AstNode* node = expression->nodeForModification(); + + //FIXME -- technical debt -- code duplication / not all cases covered + switch(type){ + case EN::SORT: { + // check comma separated parts of condition cond0, cond1, cond2 + rv = isDistanceFunction(node,nullptr); + } + break; + + case EN::FILTER: { + rv = iterativePreorderWithCondition(type, node, &isGeoFilterExpression); + } + break; + + default: + rv.invalidate(); // not required but make sure the result is invalid + } + + rv.executionNode = n; + rv.executionNodeType = type; + rv.setter = static_cast(setter); + + checkDistanceArguments(rv, plan); + + return rv; +}; + +////////////////////////////////////////////////////////////////////// +//modify plan + +// builds a condition that can be used with the index interface and +// contains all parameters required by the GeoIndex +std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info, + bool lessEqual = false, AstNode const* withRange = nullptr){ + + AstNode* lat = info.constantPair.first; + AstNode* lon = info.constantPair.second; + auto ast = plan->getAst(); + auto varAstNode = ast->createNodeReference(info.collectionNode->outVariable()); + + auto nAryAnd = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_AND); + nAryAnd->reserve(withRange ? 4 : 2); + + auto latKey = ast->createNodeAttributeAccess(varAstNode, "latitude",8); + auto latEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, latKey, lat); + nAryAnd->addMember(latEq); + + auto lonKey = ast->createNodeAttributeAccess(varAstNode, "longitude",9); + auto lonEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, lonKey, lon); + nAryAnd->addMember(lonEq); + + if(info.within){ + auto withKey = ast->createNodeAttributeAccess(varAstNode, "within",6); + auto withEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, withKey, info.range); + nAryAnd->addMember(withEq); + + auto lessKey = ast->createNodeAttributeAccess(varAstNode, "lesseq",6); + auto lessValue = ast->createNodeValueBool(info.lessgreaterequal); + auto lessEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, lessKey, lessValue); + nAryAnd->addMember(lessEq); + } + + auto unAryOr = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_OR, nAryAnd); + + auto condition = std::make_unique(ast); + condition->andCombine(unAryOr); + condition->normalize(plan); + return condition; +} + +void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ + if( info.expressionParent && info.executionNodeType == EN::FILTER) { + + auto ast = plan->getAst(); + CalculationNode* newNode = nullptr; + Expression* expr = new Expression(ast, static_cast(info.setter)->expression()->nodeForModification()->clone(ast)); + + try { + newNode = new CalculationNode(plan, plan->nextId(), expr, static_cast(info.setter)->outVariable()); + } catch (...) { + delete expr; + throw; + } + + plan->registerNode(newNode); + plan->replaceNode(info.setter, newNode); + + bool done = false; + ast->traverseAndModify(newNode->expression()->nodeForModification(),[&done](AstNode* node, void* data){ + if(done){ + return node; + } + if(node->type == NODE_TYPE_OPERATOR_BINARY_AND){ + for(std::size_t i = 0; i < node->numMembers(); i++){ + if(isGeoFilterExpression(node->getMemberUnchecked(i),node)){ + done = true; + return node->getMemberUnchecked(i ? 0 : 1); + } + } + } + return node; + }, + nullptr); + + if(done){ + return; + } + + auto replaceInfo = iterativePreorderWithCondition(EN::FILTER, newNode->expression()->nodeForModification(), &isGeoFilterExpression); + if(newNode->expression()->nodeForModification() == replaceInfo.expressionParent){ + if(replaceInfo.expressionParent->type == NODE_TYPE_OPERATOR_BINARY_AND){ + for(std::size_t i = 0; i < replaceInfo.expressionParent->numMembers(); ++i){ + if(replaceInfo.expressionParent->getMember(i) != replaceInfo.expressionNode){ + newNode->expression()->replaceNode(replaceInfo.expressionParent->getMember(i)); + return; + } + } + } + } + + //else { + // // COULD BE IMPROVED + // if(replaceInfo.expressionParent->type == NODE_TYPE_OPERATOR_BINARY_AND){ + // // delete ast node - we would need the parent of expression parent to delete the node + // // we do not have it available here so we just replace the the node with true + // return; + // } + //} + + //fallback + auto replacement = ast->createNodeValueBool(true); + for(std::size_t i = 0; i < replaceInfo.expressionParent->numMembers(); ++i){ + if(replaceInfo.expressionParent->getMember(i) == replaceInfo.expressionNode){ + replaceInfo.expressionParent->removeMemberUnchecked(i); + replaceInfo.expressionParent->addMember(replacement); + } + } + + } +} + +// applys the optimization for a candidate +bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, GeoIndexInfo& second){ + if(!first && !second){ + return false; + } + + if(!first){ + first = std::move(second); + second.invalidate(); + } + + // We are not allowed to be a inner loop + if(first.collectionNode->isInInnerLoop() && first.executionNodeType == EN::SORT){ + return false; + } + + std::unique_ptr condition; + condition = buildGeoCondition(plan,first); + + auto inode = new IndexNode( + plan, plan->nextId(), first.collectionNode->vocbase(), + first.collectionNode->collection(), first.collectionNode->outVariable(), + std::vector{Transaction::IndexHandle{first.index}}, + condition.get(), false); + plan->registerNode(inode); + condition.release(); + + plan->replaceNode(first.collectionNode,inode); + + replaceGeoCondition(plan, first); + replaceGeoCondition(plan, second); + + // if executionNode is sort OR a filter without further sub conditions + // the node can be unlinked + auto unlinkNode = [&](GeoIndexInfo& info){ + if(info && !info.expressionParent){ + if (!arangodb::ServerState::instance()->isCoordinator() || info.executionNodeType == EN::FILTER) { + plan->unlinkNode(info.executionNode); + } else if (info.executionNodeType == EN::SORT){ + //make sure sort is not reinserted in cluster + static_cast(info.executionNode)->_reinsertInCluster = false; + } + } + }; + + unlinkNode(first); + unlinkNode(second); + + //signal that plan has been changed + return true; +}; + +void arangodb::aql::geoIndexRule(Optimizer* opt, + ExecutionPlan* plan, + Optimizer::Rule const* rule) { + + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER GEO RULE"; SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; bool modified = false; + //inspect each return node and work upwards to SingletonNode + plan->findEndNodes(nodes, true); + //ExecutionPlan* newPlan = nullptr; + for (auto& node : nodes) { + GeoIndexInfo sortInfo{}; + GeoIndexInfo filterInfo{}; + auto current = node; - plan->findNodesOfType(nodes, EN::SORT, true); + while (current){ + switch(current->getType()) { + case EN::SORT:{ + sortInfo = identifyGeoOptimizationCandidate(EN::SORT, plan, current); + } + break ; + case EN::FILTER:{ + filterInfo = identifyGeoOptimizationCandidate(EN::FILTER, plan, current); + } + break; + case EN::ENUMERATE_COLLECTION:{ + EnumerateCollectionNode* collnode = static_cast(current); + if( (sortInfo && sortInfo.collectionNode!= collnode) + ||(filterInfo && filterInfo.collectionNode != collnode) + ){ + filterInfo.invalidate(); + sortInfo.invalidate(); + break; + } + if (applyGeoOptimization(true, plan, filterInfo, sortInfo)){ + modified = true; + filterInfo.invalidate(); + sortInfo.invalidate(); + } + } + break; - for (auto const& n : nodes) { - auto node = static_cast(n); - auto const& elements = node->getElements(); + case EN::INDEX: + case EN::COLLECT:{ + filterInfo.invalidate(); + sortInfo.invalidate(); + break; + } - // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion - if ( !(elements.size() == 1 && elements[0].second)) { - continue; + default:{} //skip - do nothing + break; + } + + current = current->getFirstDependency(); //inspect next node } - - //variable of sort expression - auto const variable = elements[0].first; - TRI_ASSERT(variable != nullptr); - - //// find the expression that is bound to the variable - // get the expression node that holds the cacluation - auto setter = plan->getVarSetBy(variable->id); - if (setter == nullptr || setter->getType() != EN::CALCULATION) { - continue; - } - - // downcast to calculation node and get expression - auto cn = static_cast(setter); - auto const expression = cn->expression(); - - // the expression must exist and it must be a function call - if (expression == nullptr || expression->node() == nullptr || - expression->node()->type != NODE_TYPE_FCALL) { - // not the right type of node - continue; - } - - //get the ast node of the expression - AstNode const* funcNode = expression->node(); - auto func = static_cast(funcNode->getData()); - - // we're looking for "DISTANCE()", which is a function call - // with an empty parameters array - if ( func->externalName != "DISTANCE" || funcNode->numMembers() != 1 ) { - continue; - } - - LOG(OBILEVEL) << " FOUND DISTANCE RULE"; - - auto const& distanceArgs = funcNode->getMember(0); - if(distanceArgs->numMembers() != 4){ - continue; - } - - std::pair argPair1 = { distanceArgs->getMember(0), distanceArgs->getMember(1) }; - std::pair argPair2 = { distanceArgs->getMember(2), distanceArgs->getMember(3) }; - - auto result1 = geoDistanceFunctionArgCheck(argPair1, node, plan); - auto result2 = geoDistanceFunctionArgCheck(argPair2, node, plan); - - // xor only one argument pair shall have a geoIndex - if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ - continue; - } - - LOG(OBILEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; - - if(!result1){ - result1 = std::move(result2); - } - - LOG(OBILEVEL) << " attributes: " << result1.get()._longitude[0] - << ", " << result1.get()._longitude - << " of collection:" << result1.get()._collection->getName() - << " are geoindexed"; - - break; //remove this to make use of the index - - auto cnode = result1.get()._collectionNode; - auto& idxPtr = result1.get()._index; - - //create new index node and register it - auto condition = std::make_unique(plan->getAst()); //What is this condition exactly about - condition->normalize(plan); - auto inode = new IndexNode( - plan, plan->nextId(), cnode->vocbase(), - cnode->collection(), cnode->outVariable(), - std::vector{Transaction::IndexHandle{idxPtr}}, - condition.get(), !elements[0].second); - plan->registerNode(inode); - condition.release(); - - plan->unlinkNode(n); - plan->replaceNode(cnode,inode); - - //signal that plan has been changed - modified=true; - } opt->addPlan(plan, rule, modified); - - LOG(OBILEVEL) << "EXIT GEO RULE"; - LOG(OBILEVEL) << ""; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; } - diff --git a/arangod/Aql/OptimizerRules.h b/arangod/Aql/OptimizerRules.h index cedcab94fa..2f848fef30 100644 --- a/arangod/Aql/OptimizerRules.h +++ b/arangod/Aql/OptimizerRules.h @@ -201,7 +201,7 @@ void prepareTraversalsRule(Optimizer* opt, ExecutionPlan* plan, /// @brief moves simple subqueries one level higher void inlineSubqueriesRule(Optimizer*, ExecutionPlan*, Optimizer::Rule const*); -void optimizeGeoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule); +void geoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule); } // namespace aql } // namespace arangodb diff --git a/arangod/Aql/SortNode.cpp b/arangod/Aql/SortNode.cpp index cc260878ee..8fa0facb1d 100644 --- a/arangod/Aql/SortNode.cpp +++ b/arangod/Aql/SortNode.cpp @@ -32,7 +32,7 @@ using namespace arangodb::aql; SortNode::SortNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base, SortElementVector const& elements, bool stable) - : ExecutionNode(plan, base), _elements(elements), _stable(stable) {} + : ExecutionNode(plan, base), _reinsertInCluster(true), _elements(elements), _stable(stable){} /// @brief toVelocyPack, for SortNode void SortNode::toVelocyPackHelper(VPackBuilder& nodes, bool verbose) const { diff --git a/arangod/Aql/SortNode.h b/arangod/Aql/SortNode.h index 6f30c99316..6338ec5acf 100644 --- a/arangod/Aql/SortNode.h +++ b/arangod/Aql/SortNode.h @@ -53,7 +53,7 @@ class SortNode : public ExecutionNode { public: SortNode(ExecutionPlan* plan, size_t id, SortElementVector const& elements, bool stable) - : ExecutionNode(plan, id), _elements(elements), _stable(stable) {} + : ExecutionNode(plan, id), _reinsertInCluster(true), _elements(elements), _stable(stable) {} SortNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base, SortElementVector const& elements, bool stable); @@ -120,6 +120,9 @@ class SortNode : public ExecutionNode { /// values (e.g. when a FILTER condition exists that guarantees this) void removeConditions(size_t count); + // reinsert node when building gather node - this is used e.g for the geo-index + bool _reinsertInCluster; + private: /// @brief pairs, consisting of variable and sort direction /// (true = ascending | false = descending) diff --git a/arangod/GeoIndex/GeoIndex.cpp b/arangod/GeoIndex/GeoIndex.cpp index 9c39fd261d..a8ee88240e 100644 --- a/arangod/GeoIndex/GeoIndex.cpp +++ b/arangod/GeoIndex/GeoIndex.cpp @@ -1991,19 +1991,14 @@ typedef struct { GeoFix dist; } hpot; // pot for putting on the heap -bool hpotcompare(hpot a, hpot b) { return (a.dist > b.dist); } +static bool hpotcompare(hpot const& a, hpot const& b) { return (a.dist > b.dist); } typedef struct { int slot; double snmd; } hslot; // pot for putting on the heap -bool hslotcompare(hslot a, hslot b) { - if (a.snmd > b.snmd) - return true; - else - return false; -} +static bool hslotcompare(hslot const& a, hslot const& b) { return (a.snmd > b.snmd); } typedef struct { GeoIx* Ix; /* GeoIndex */ @@ -2098,7 +2093,7 @@ GeoCoordinates* GeoIndex_ReadCursor(GeoCursor* gc, int count) { } if (gcr->slotheap.size() != 0) { slox = gcr->slotheap.front().slot; - gcr->slotsnmd = GeoSNMD(&gcr->gd, (gcr->Ix)->gc + slox); + gcr->slotsnmd = gcr->slotheap.front().snmd; } } else { hp.pot = pot.LorLeaf; @@ -2118,7 +2113,7 @@ GeoCoordinates* GeoIndex_ReadCursor(GeoCursor* gc, int count) { } else { if (gcr->slotheap.size() == 0) break; // that's all there is slox = gcr->slotheap.front().slot; - tsnmd = GeoSNMD(&gcr->gd, (gcr->Ix)->gc + slox); + tsnmd = gcr->slotheap.front().snmd; r = GeoResultsGrow(gr); if (r == -1) { TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr->snmd); @@ -2134,7 +2129,7 @@ GeoCoordinates* GeoIndex_ReadCursor(GeoCursor* gc, int count) { gcr->slotheap.pop_back(); if (gcr->slotheap.size() != 0) { slox = gcr->slotheap.front().slot; - gcr->slotsnmd = GeoSNMD(&gcr->gd, (gcr->Ix)->gc + slox); + gcr->slotsnmd = gcr->slotheap.front().snmd; } } } diff --git a/arangod/Indexes/GeoIndex.cpp b/arangod/Indexes/GeoIndex.cpp index 64e526824a..0d605f9a02 100644 --- a/arangod/Indexes/GeoIndex.cpp +++ b/arangod/Indexes/GeoIndex.cpp @@ -21,13 +21,141 @@ /// @author Dr. Frank Celler //////////////////////////////////////////////////////////////////////////////// -#include "GeoIndex.h" -#include "Logger/Logger.h" +#include "Aql/Ast.h" +#include "Aql/AstNode.h" +#include "Aql/SortCondition.h" #include "Basics/StringRef.h" #include "Basics/VelocyPackHelper.h" +#include "GeoIndex.h" +#include "Indexes/GeoIndex.h" +#include "Logger/Logger.h" #include "VocBase/transaction.h" using namespace arangodb; +GeoIndexIterator::GeoIndexIterator(LogicalCollection* collection, + arangodb::Transaction* trx, + ManagedDocumentResult* mmdr, + GeoIndex const* index, + arangodb::aql::AstNode const* cond, + arangodb::aql::Variable const* var) + : IndexIterator(collection, trx, mmdr, index), + _index(index), + _cursor(nullptr), + _coor(), + _condition(cond), + _variable(var), + _lat(0), + _lon(0), + _near(true), + _withinRange(0), + _withinLessEq(false) + // lookup will hold the inforamtion if this is a cursor for + // near/within and the reference point + //_lookups(trx, node, reference, index->fields()), + { + evaluateCondition(); + } + +void GeoIndexIterator::evaluateCondition() { + if (_condition) { + auto numMembers = _condition->numMembers(); + + if(numMembers >= 2){ + _lat = _condition->getMember(0)->getMember(1)->getDoubleValue(); + _lon = _condition->getMember(1)->getMember(1)->getDoubleValue(); + } + + if (numMembers == 2){ //near + _near = true; + } else { //within + _near = false; + _withinRange = _condition->getMember(2)->getMember(1)->getDoubleValue(); + _withinLessEq = _condition->getMember(3)->getMember(1)->getDoubleValue(); + } + + } else { + LOG(ERR) << "No Condition passed to GeoIndexIterator constructor"; + } + + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT evaluate Condition"; +} + +IndexLookupResult GeoIndexIterator::next() { + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER next"; + if (!_cursor){ + createCursor(_lat,_lon); + } + + auto coords = std::unique_ptr(::GeoIndex_ReadCursor(_cursor,1)); + if(coords && coords->length){ + if(_near || GeoIndex_distance(&_coor, &coords->coordinates[0]) <= _withinRange ){ + auto revision = ::GeoIndex::toRevision(coords->coordinates[0].data); + return IndexLookupResult{revision}; + } + } + // if there are no more results we return the default constructed IndexLookupResult + return IndexLookupResult{}; +} + +void GeoIndexIterator::nextBabies(std::vector& result, size_t batchSize) { + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER nextBabies " << batchSize; + if (!_cursor){ + createCursor(_lat,_lon); + } + + result.clear(); + if (batchSize > 0) { + auto coords = std::unique_ptr(::GeoIndex_ReadCursor(_cursor,batchSize)); + size_t length = coords ? coords->length : 0; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "length " << length; + if (!length){ + return; + } + + + for(std::size_t index = 0; index < length; ++index){ + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "near " << _near << " max allowed range: " << _withinRange + // << " actual range: " << GeoIndex_distance(&_coor, &coords->coordinates[index]) ; + if (_near || GeoIndex_distance(&_coor, &coords->coordinates[index]) <= _withinRange ){ + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "add above to result" ; + result.emplace_back(IndexLookupResult(::GeoIndex::toRevision(coords->coordinates[index].data))); + } else { + break; + } + } + } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT nextBabies " << result.size(); +} + +::GeoCursor* GeoIndexIterator::replaceCursor(::GeoCursor* c){ + if(_cursor){ + ::GeoIndex_CursorFree(_cursor); + } + _cursor = c; + return _cursor; +} + +::GeoCursor* GeoIndexIterator::createCursor(double lat, double lon){ + _coor = GeoCoordinate{lat, lon, 0}; + return replaceCursor(::GeoIndex_NewCursor(_index->_geoIndex, &_coor)); +} + +/// @brief creates an IndexIterator for the given Condition +IndexIterator* GeoIndex::iteratorForCondition( + arangodb::Transaction* trx, + ManagedDocumentResult* mmdr, + arangodb::aql::AstNode const* node, + arangodb::aql::Variable const* reference, bool) const { + TRI_IF_FAILURE("HashIndex::noIterator") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + return new GeoIndexIterator(_collection, trx, mmdr, this, node, reference); +} + + +void GeoIndexIterator::reset() { + replaceCursor(nullptr); +} GeoIndex::GeoIndex(TRI_idx_iid_t iid, arangodb::LogicalCollection* collection, VPackSlice const& info) diff --git a/arangod/Indexes/GeoIndex.h b/arangod/Indexes/GeoIndex.h index 1d62a954f6..ca0195d7cd 100644 --- a/arangod/Indexes/GeoIndex.h +++ b/arangod/Indexes/GeoIndex.h @@ -27,6 +27,7 @@ #include "Basics/Common.h" #include "GeoIndex/GeoIndex.h" #include "Indexes/Index.h" +#include "Indexes/IndexIterator.h" #include "VocBase/vocbase.h" #include "VocBase/voc-types.h" @@ -37,8 +38,49 @@ static_assert(sizeof(GeoCoordinate::data) >= sizeof(TRI_voc_rid_t), "invalid size of GeoCoordinate.data"); namespace arangodb { +class GeoIndex; + +class GeoIndexIterator final : public IndexIterator { + public: + +/// @brief Construct an GeoIndexIterator based on Ast Conditions + GeoIndexIterator(LogicalCollection* collection, arangodb::Transaction* trx, + ManagedDocumentResult* mmdr, + GeoIndex const* index, + arangodb::aql::AstNode const*, + arangodb::aql::Variable const*); + + ~GeoIndexIterator() { + replaceCursor(nullptr); + }; + + char const* typeName() const override { return "geo-index-iterator"; } + + IndexLookupResult next() override; + + void nextBabies(std::vector&, size_t) override; + + void reset() override; + + private: + ::GeoCursor* replaceCursor(::GeoCursor* c); + ::GeoCursor* createCursor(double lat, double lon); + void evaluateCondition(); //called in constructor + + GeoIndex const* _index; + ::GeoCursor* _cursor; + ::GeoCoordinate _coor; + arangodb::aql::AstNode const* _condition; + arangodb::aql::Variable const* _variable; + double _lat; + double _lon; + bool _near; + double _withinRange; + double _withinLessEq; +}; class GeoIndex final : public Index { +friend class GeoIndexIterator; public: GeoIndex() = delete; @@ -66,6 +108,12 @@ class GeoIndex final : public Index { return TRI_IDX_TYPE_GEO2_INDEX; } + IndexIterator* iteratorForCondition(arangodb::Transaction*, + ManagedDocumentResult*, + arangodb::aql::AstNode const*, + arangodb::aql::Variable const*, + bool) const override; + bool allowExpansion() const override { return false; } bool canBeDropped() const override { return true; } diff --git a/arangod/Indexes/IndexIterator.h b/arangod/Indexes/IndexIterator.h index 84614a0120..d69b1d4724 100644 --- a/arangod/Indexes/IndexIterator.h +++ b/arangod/Indexes/IndexIterator.h @@ -21,6 +21,30 @@ /// @author Michael Hackstein //////////////////////////////////////////////////////////////////////////////// +// In order to implement a new IndexIterator the folling functions need to be +// implmeneted. +// +// typeName() returns a string descibing the type of the indexIterator +// +// The next() function of the IndexIterator returns IndexLookupResults that are +// created from RevisionIds. If there is nothing more to return a default +// constructed IndesLookupResult is returend. +// +// reset() resets the iterator +// +// optional - default implementation provided: +// +// nextBabies() gets more than one result, the function is meant to increase +// performance when receiving a single result from the index is more expensive +// per item than the item costs when receiving multiple results. +// +// skip(trySkip, skipped) tries to skip the next trySkip elements +// +// When finished you need to implement the fuction: +// virtual IndexIterator* iteratorForCondition(...) +// So a there is a way to create an iterator for the index + + #ifndef ARANGOD_INDEXES_INDEX_ITERATOR_H #define ARANGOD_INDEXES_INDEX_ITERATOR_H 1 diff --git a/arangod/RestHandler/RestDocumentHandler.cpp b/arangod/RestHandler/RestDocumentHandler.cpp index 632c9ab1df..5e2dcb8120 100644 --- a/arangod/RestHandler/RestDocumentHandler.cpp +++ b/arangod/RestHandler/RestDocumentHandler.cpp @@ -202,7 +202,7 @@ bool RestDocumentHandler::readSingleDocument(bool generateBody) { // check for an etag bool isValidRevision; TRI_voc_rid_t const ifNoneRid = - extractRevision("if-none-match", nullptr, isValidRevision); + extractRevision("if-none-match", isValidRevision); if (!isValidRevision) { generateError(rest::ResponseCode::BAD, TRI_ERROR_HTTP_BAD_PARAMETER, "invalid revision number"); @@ -213,7 +213,7 @@ bool RestDocumentHandler::readSingleDocument(bool generateBody) { options.ignoreRevs = true; TRI_voc_rid_t const ifRid = - extractRevision("if-match", nullptr, isValidRevision); + extractRevision("if-match", isValidRevision); if (!isValidRevision) { generateError(rest::ResponseCode::BAD, TRI_ERROR_HTTP_BAD_PARAMETER, "invalid revision number"); @@ -392,7 +392,7 @@ bool RestDocumentHandler::modifyDocument(bool isPatch) { if (!isArrayCase) { TRI_voc_rid_t revision = 0; bool isValidRevision; - revision = extractRevision("if-match", nullptr, isValidRevision); + revision = extractRevision("if-match", isValidRevision); if (!isValidRevision) { generateError(rest::ResponseCode::BAD, TRI_ERROR_HTTP_BAD_PARAMETER, "invalid revision number"); @@ -498,7 +498,7 @@ bool RestDocumentHandler::deleteDocument() { TRI_voc_rid_t revision = 0; if (suffixes.size() == 2) { bool isValidRevision = false; - revision = extractRevision("if-match", nullptr, isValidRevision); + revision = extractRevision("if-match", isValidRevision); if (!isValidRevision) { generateError(rest::ResponseCode::BAD, TRI_ERROR_HTTP_BAD_PARAMETER, "invalid revision number"); diff --git a/arangod/RestHandler/RestVocbaseBaseHandler.cpp b/arangod/RestHandler/RestVocbaseBaseHandler.cpp index cc0b850741..5f7cc9e2ee 100644 --- a/arangod/RestHandler/RestVocbaseBaseHandler.cpp +++ b/arangod/RestHandler/RestVocbaseBaseHandler.cpp @@ -568,7 +568,6 @@ void RestVocbaseBaseHandler::generateTransactionError( //////////////////////////////////////////////////////////////////////////////// TRI_voc_rid_t RestVocbaseBaseHandler::extractRevision(char const* header, - char const* parameter, bool& isValid) { isValid = true; bool found; @@ -603,20 +602,6 @@ TRI_voc_rid_t RestVocbaseBaseHandler::extractRevision(char const* header, return rid; } - if (parameter != nullptr) { - std::string const& etag2 = _request->value(parameter, found); - - if (found) { - TRI_voc_rid_t rid = 0; - - bool isOld; - rid = TRI_StringToRidWithCheck(etag2, isOld, false); - isValid = (rid != 0); - - return rid; - } - } - return 0; } diff --git a/arangod/RestHandler/RestVocbaseBaseHandler.h b/arangod/RestHandler/RestVocbaseBaseHandler.h index e73fcfa266..7081fdfd36 100644 --- a/arangod/RestHandler/RestVocbaseBaseHandler.h +++ b/arangod/RestHandler/RestVocbaseBaseHandler.h @@ -258,7 +258,7 @@ class RestVocbaseBaseHandler : public RestBaseHandler { /// @note @FA{header} must be lowercase. ////////////////////////////////////////////////////////////////////////////// - TRI_voc_rid_t extractRevision(char const*, char const*, bool&); + TRI_voc_rid_t extractRevision(char const*, bool&); ////////////////////////////////////////////////////////////////////////////// /// @brief extracts a boolean parameter value diff --git a/js/server/tests/aql/aql-optimizer-geoindex.js b/js/server/tests/aql/aql-optimizer-geoindex.js new file mode 100644 index 0000000000..318bf39457 --- /dev/null +++ b/js/server/tests/aql/aql-optimizer-geoindex.js @@ -0,0 +1,300 @@ +/*jshint globalstrict:false, strict:false, maxlen: 500 */ +/*global assertEqual, assertFalse, assertTrue, assertNotEqual, AQL_EXPLAIN, AQL_EXECUTE */ + +// execute with: +// ./scripts/unittest shell_server_aql --test js/server/tests/aql/aql-optimizer-geoindex.js + +//////////////////////////////////////////////////////////////////////////////// +/// @brief tests for optimizer rules +/// +/// @file +/// +/// DISCLAIMER +/// +/// Copyright 2010-2012 triagens GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Jan Christoph Uhde +/// @author Copyright 2016, ArangoDB GmbH, Cologne, Germany +//////////////////////////////////////////////////////////////////////////////// + +const expect = require('chai').expect; +var internal = require("internal"); +var jsunity = require("jsunity"); +var helper = require("@arangodb/aql-helper"); +var isEqual = helper.isEqual; +var findExecutionNodes = helper.findExecutionNodes; +var findReferencedNodes = helper.findReferencedNodes; +var getQueryMultiplePlansAndExecutions = helper.getQueryMultiplePlansAndExecutions; +var removeAlwaysOnClusterRules = helper.removeAlwaysOnClusterRules; + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test suite +//////////////////////////////////////////////////////////////////////////////// + +function optimizerRuleTestSuite() { + // quickly disable tests here + var enabled = { + basics : true, + removeNodes : true, + sorted : true + } + + var ruleName = "use-geoindex"; + var secondRuleName = "use-geoindexes"; + var removeCalculationNodes = "remove-unnecessary-calculations-2"; + var colName = "UnitTestsAqlOptimizer" + ruleName.replace(/-/g, "_"); + var colNameOther = colName + "_XX"; + + // various choices to control the optimizer: + var paramNone = { optimizer: { rules: [ "-all" ] } }; + var paramIndexFromSort = { optimizer: { rules: [ "-all", "+" + ruleName ] } }; + var paramIndexRange = { optimizer: { rules: [ "-all", "+" + secondRuleName ] } }; + var paramIndexFromSort_IndexRange = { optimizer: { rules: [ "-all", "+" + ruleName, "+" + secondRuleName ] } }; + var paramIndexFromSort_IndexRange_RemoveCalculations = { + optimizer: { rules: [ "-all", "+" + ruleName, "+" + secondRuleName, "+" + removeCalculationNodes ] } + }; + var paramIndexFromSort_RemoveCalculations = { + optimizer: { rules: [ "-all", "+" + ruleName, "+" + removeCalculationNodes ] } + }; + + var geocol; + var sortArray = function (l, r) { + if (l[0] !== r[0]) { + return l[0] < r[0] ? -1 : 1; + } + if (l[1] !== r[1]) { + return l[1] < r[1] ? -1 : 1; + } + return 0; + }; + var hasSortNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "SortNode").length, 1, query.string + " Has SortNode "); + }; + var hasNoSortNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "SortNode").length, 0, query.string + " Has no SortNode"); + }; + var hasFilterNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "FilterNode").length, 1, query.string + " Has FilterNode"); + }; + var hasNoFilterNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "FilterNode").length, 0, query.string + " Has no FilterNode"); + }; + var hasNoIndexNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "IndexNode").length, 0, query.string + " Has no IndexNode"); + }; + var hasNoResultsNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "NoResultsNode").length, 1, query.string + " Has NoResultsNode"); + }; + var hasCalculationNodes = function (plan,query, countXPect) { + assertEqual(findExecutionNodes(plan, "CalculationNode").length, + countXPect, "Has " + countXPect + " CalculationNode"); + }; + var hasIndexNode = function (plan,query) { + var rn = findExecutionNodes(plan,"IndexNode"); + assertEqual(rn.length, 1, query.string + "Has IndexNode"); + return; + }; + var isNodeType = function(node, type) { + assertEqual(node.type, type, query.string + " check whether this node is of type "+type); + }; + + var geodistance = function(latitude1, longitude1, latitude2, longitude2) { + //if (TYPEWEIGHT(latitude1) !== TYPEWEIGHT_NUMBER || + // TYPEWEIGHT(longitude1) !== TYPEWEIGHT_NUMBER || + // TYPEWEIGHT(latitude2) !== TYPEWEIGHT_NUMBER || + // TYPEWEIGHT(longitude2) !== TYPEWEIGHT_NUMBER) { + // WARN('DISTANCE', INTERNAL.errors.ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH); + // return null; + //} + + //var p1 = AQL_TO_NUMBER(latitude1) * (Math.PI / 180.0); + //var p2 = AQL_TO_NUMBER(latitude2) * (Math.PI / 180.0); + //var d1 = AQL_TO_NUMBER(latitude2 - latitude1) * (Math.PI / 180.0); + //var d2 = AQL_TO_NUMBER(longitude2 - longitude1) * (Math.PI / 180.0); + + var p1 = (latitude1) * (Math.PI / 180.0); + var p2 = (latitude2) * (Math.PI / 180.0); + var d1 = (latitude2 - latitude1) * (Math.PI / 180.0); + var d2 = (longitude2 - longitude1) * (Math.PI / 180.0); + + var a = Math.sin(d1 / 2.0) * Math.sin(d1 / 2.0) + + Math.cos(p1) * Math.cos(p2) * + Math.sin(d2 / 2.0) * Math.sin(d2 / 2.0); + var c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1.0 - a)); + + return (6371e3 * c); + } + + + return { + + //////////////////////////////////////////////////////////////////////////////// + /// @brief set up + //////////////////////////////////////////////////////////////////////////////// + + setUp : function () { + var loopto = 10; + + internal.db._drop(colName); + geocol = internal.db._create(colName); + geocol.ensureIndex({type:"geo", fields:["lat","lon"]}) + for (lat=-40; lat <=40 ; ++lat){ + for (lon=-40; lon <= 40; ++lon){ + geocol.insert({lat,lon}); + } + } + }, + + //////////////////////////////////////////////////////////////////////////////// + /// @brief tear down + //////////////////////////////////////////////////////////////////////////////// + + tearDown : function () { + internal.db._drop(colName); + internal.db._drop(colNameOther); + geocol = null; + }, + + testRuleBasics : function () { + if(enabled.basics){ + geocol.ensureIndex({ type: "hash", fields: [ "y", "z" ], unique: false }); + + var queries = [ + //query clust sort filter index + { string : "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR d IN " + colName + " SORT distance(0, 0, d.lat, d.lon) FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR d IN " + colName + " SORT distance(0, 0, d.lat, d.lon) FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR i in 1..2 FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 && i > 1 LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : true + , index : true + }, + ]; + + queries.forEach(function(query) { + var result = AQL_EXPLAIN(query.string); + + // //optimized on cluster + // if (query[1]) { + // assertNotEqual(-1, removeAlwaysOnClusterRules(result.plan.rules).indexOf(ruleName), query[0]); + // } + // else { + // assertEqual(-1, removeAlwaysOnClusterRules(result.plan.rules).indexOf(ruleName), query[0]); + // } + + //sort nodes + if (query.sort) { + hasSortNode(result,query); + } else { + hasNoSortNode(result,query); + } + + //filter nodes + if (query.filter) { + hasFilterNode(result,query); + } else { + hasNoFilterNode(result,query); + } + + if (query.index){ + hasIndexNode(result,query); + } else { + hasNoIndexNode(result,query); + } + + }); + } + }, // testRuleBasics + + testRuleRemoveNodes : function () { + if(enabled.removeNodes){ + var queries = [ + [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 5 RETURN d", false, false, false ], + [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 5 RETURN d", false, false, false ], + [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 111200 RETURN d", false, false, false ], +// [ "FOR i IN 1..2 FOR d IN geocol SORT distance(i,2,d.lat,d.lon) ASC LIMIT 5 RETURN d", false, false, false ], + ]; + + var expected = [ + [[0,0], [-1,0], [0,1], [1,0], [0,-1]], + [[0,0], [-1,0], [0,1], [1,0], [0,-1]], + [[0,0], [-1,0], [0,1], [1,0], [0,-1]], + ] + + queries.forEach(function(query, qindex) { + var result = AQL_EXECUTE(query[0]); + expect(expected[qindex].length).to.be.equal(result.json.length) + pairs = result.json.map(function(res){ + return [res.lat,res.lon]; + }); + //internal.print(pairs) + assertEqual(expected[qindex].sort(),pairs.sort()) + //expect(expected[qindex].sort()).to.be.equal(result.json.sort()) + }); + } + }, // testRuleSort + + testRuleSorted : function(){ + if(enabled.sorted){ + var old=0; + var query = "FOR d IN " + colName + " SORT distance(d.lat, d.lon, 0, 0) RETURN distance(d.lat, d.lon, 0, 0)"; + var result = AQL_EXECUTE(query); + distances = result.json.map(d => { return parseFloat(d.toFixed(5))}); + //internal.print(distances); + old=0; + distances.forEach(d => { assertTrue( d >= old); old = d; }); + } + } //testSorted + + }; // test dictionary (return) +} // optimizerRuleTestSuite + +//////////////////////////////////////////////////////////////////////////////// +/// @brief executes the test suite +//////////////////////////////////////////////////////////////////////////////// + +jsunity.run(optimizerRuleTestSuite); + +return jsunity.done(); diff --git a/js/server/tests/aql/aql-optimizer-indexes.js b/js/server/tests/aql/aql-optimizer-indexes.js index 344cd23beb..7c5889bf62 100644 --- a/js/server/tests/aql/aql-optimizer-indexes.js +++ b/js/server/tests/aql/aql-optimizer-indexes.js @@ -431,7 +431,7 @@ function optimizerIndexesTestSuite () { assertEqual("SingletonNode", nodeTypes[0], query); assertNotEqual(-1, nodeTypes.indexOf("IndexNode"), query); - + var results = AQL_EXECUTE(query); assertEqual([ 12 ], results.json, query); assertEqual(0, results.stats.scannedFull); diff --git a/lib/Logger/LogTopic.cpp b/lib/Logger/LogTopic.cpp index 91a294354f..6e52bd728f 100644 --- a/lib/Logger/LogTopic.cpp +++ b/lib/Logger/LogTopic.cpp @@ -47,6 +47,7 @@ LogTopic Logger::COMMUNICATION("communication", LogLevel::INFO); LogTopic Logger::COMPACTOR("compactor"); LogTopic Logger::CONFIG("config"); LogTopic Logger::DATAFILES("datafiles", LogLevel::INFO); +LogTopic Logger::DEVEL("development", LogLevel::DEBUG); LogTopic Logger::GRAPHS("graphs", LogLevel::INFO); LogTopic Logger::HEARTBEAT("heartbeat", LogLevel::INFO); LogTopic Logger::MEMORY("memory", LogLevel::FATAL); // suppress diff --git a/lib/Logger/Logger.h b/lib/Logger/Logger.h index 3529e9b9f4..f635577976 100644 --- a/lib/Logger/Logger.h +++ b/lib/Logger/Logger.h @@ -129,12 +129,13 @@ class Logger { public: static LogTopic AGENCY; static LogTopic AGENCYCOMM; - static LogTopic COLLECTOR; - static LogTopic COMPACTOR; - static LogTopic COMMUNICATION; - static LogTopic CONFIG; static LogTopic CLUSTER; + static LogTopic COLLECTOR; + static LogTopic COMMUNICATION; + static LogTopic COMPACTOR; + static LogTopic CONFIG; static LogTopic DATAFILES; + static LogTopic DEVEL; static LogTopic GRAPHS; static LogTopic HEARTBEAT; static LogTopic MEMORY; diff --git a/scripts/perfanalysis.cpp b/scripts/perfanalysis.cpp index 01a635b561..535a43dcb2 100644 --- a/scripts/perfanalysis.cpp +++ b/scripts/perfanalysis.cpp @@ -1,5 +1,5 @@ // Compile with -// g++ perfanalysis.cpp -o perfanalyis -std=c++11 -Wall -O3 +// g++ perfanalysis.cpp -o perfanalyis -std=c++14 -Wall -O3 #include #include diff --git a/scripts/setupPerfEvents.sh b/scripts/setupPerfEvents.sh index 36af7a012f..75eb924f8d 100755 --- a/scripts/setupPerfEvents.sh +++ b/scripts/setupPerfEvents.sh @@ -8,24 +8,26 @@ # document operations. Run this script with sudo when the ArangoDB # process is already running: # -# ./setupPerfEvents.sh +# sudo ./setupPerfEvents.sh # # Now you are able to recrod the event with: # -# sudo perf record -e "probe_arangod:*" -aR sleep 60 +# sudo perf record -e "probe_arangod:*" -aR # -# The above command will get sample data for 60 seconds. A file "perf.data" is -# written to the current directory. Dump the events in this file with: +# The above command will get sample data indefinitely, hit Ctrl-C when +# the measurement is finished. A file "perf.data" is written to the +# current directory. Dump the events in this file with: # # sudo perf script > perf.history # # This logs the times when individual threads hit the events. # Use the program perfanalyis.cpp in this directory in the following way: +# (for compilation instructions see at the top of perfanalysis.cpp) # -# sudo ./perfanalyis < perf.history > perf.statistics +# ./scripts/perfanalyis < perf.history > perf.statistics # # This will group enter and exit events of functions together, compute the time -# spent and sort by function. When finised remove all events with: +# spent and sort by function. When finished remove all events with: # # sudo perf probe -d "probe_arangod:*" # @@ -65,6 +67,10 @@ main(){ addEvent handleRequest handleRequest@HttpServer.cpp addEvent handleWrite handleWrite@SocketTask.cpp + echo "work in LogicalCollection" + addEvent logicalInsertDocument insertDocument@LogicalCollection.cpp + addEvent logicalInsert insert@LogicalCollection.cpp + addEvent tcp_sendmsg addEvent tcp_recvmsg