diff --git a/Documentation/Books/Users/Aql/Operations.mdpp b/Documentation/Books/Users/Aql/Operations.mdpp index 03a9fc5920..45563e6af2 100644 --- a/Documentation/Books/Users/Aql/Operations.mdpp +++ b/Documentation/Books/Users/Aql/Operations.mdpp @@ -228,7 +228,18 @@ contains the group value. The second form does the same as the first form, but additionally introduces a variable (specified by *groups*) that contains all elements that fell into the -group. Specifying the *INTO* clause is optional- +group. This works as follows: The *groups* variable is a list containing +as many elements as there are in the group. Each member of that list is +a JSON object in which the value of every variable that is defined in the +AQL query is bound to the corresponding attribute. Note that this considers +all variables that are defined before the *COLLECT* statement, but not those on +the top level (outside of any *FOR*), unless the *COLLECT* statement is itself +on the top level, in which case all variables are taken. Furthermore note +that it is possible that the optimizer moves *LET* statements out of *FOR* +statements to improve performance. In a future version of ArangoDB we plan +to allow to configure exactly the values of which variables are copied +into the *groups* variable, since excessive copying can have a negative +impact on performance. Specifying the *INTO* clause is optional. ``` FOR u IN users diff --git a/arangod/Aql/AqlValue.cpp b/arangod/Aql/AqlValue.cpp index 4420852a24..bf8d1cf512 100644 --- a/arangod/Aql/AqlValue.cpp +++ b/arangod/Aql/AqlValue.cpp @@ -724,6 +724,7 @@ Json AqlValue::extractListMember (triagens::arango::AqlTransaction* trx, auto vecCollection = (*it)->getDocumentCollection(0); return (*it)->getValue(p - totalSize, 0).toJson(trx, vecCollection); } + totalSize += (*it)->size(); } break; // fall-through to returning null } @@ -762,6 +763,8 @@ AqlValue AqlValue::CreateFromBlocks (triagens::arango::AqlTransaction* trx, for (RegisterId j = 0; j < n; ++j) { if (variableNames[j][0] != '\0') { // temporaries don't have a name and won't be included + // Variables from depth 0 are excluded, too, unless the + // COLLECT statement is on level 0 as well. values.set(variableNames[j].c_str(), current->getValue(i, j).toJson(trx, current->getDocumentCollection(j))); } } diff --git a/arangod/Aql/ExecutionBlock.cpp b/arangod/Aql/ExecutionBlock.cpp index 5e8ea9e0fb..6db5510d51 100644 --- a/arangod/Aql/ExecutionBlock.cpp +++ b/arangod/Aql/ExecutionBlock.cpp @@ -78,12 +78,14 @@ AggregatorGroup::~AggregatorGroup () { void AggregatorGroup::initialize (size_t capacity) { TRI_ASSERT(capacity > 0); + groupValues.clear(); + collections.clear(); groupValues.reserve(capacity); collections.reserve(capacity); for (size_t i = 0; i < capacity; ++i) { - groupValues[i] = AqlValue(); - collections[i] = nullptr; + groupValues.emplace_back(); + collections.push_back(nullptr); } } @@ -716,7 +718,8 @@ AqlItemBlock* EnumerateCollectionBlock::getSome (size_t, // atLeast, } if (_buffer.empty()) { - if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize)) { + size_t toFetch = (std::min)(DefaultBatchSize, atMost); + if (! ExecutionBlock::getBlock(toFetch, toFetch)) { _done = true; return nullptr; } @@ -797,7 +800,8 @@ size_t EnumerateCollectionBlock::skipSome (size_t atLeast, size_t atMost) { while (skipped < atLeast) { if (_buffer.empty()) { - if (! getBlock(DefaultBatchSize, DefaultBatchSize)) { + size_t toFetch = (std::min)(DefaultBatchSize, atMost); + if (! getBlock(toFetch, toFetch)) { _done = true; return skipped; } @@ -1168,7 +1172,8 @@ AqlItemBlock* IndexRangeBlock::getSome (size_t atLeast, // try again! if (_buffer.empty()) { - if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize) + size_t toFetch = (std::min)(DefaultBatchSize, atMost); + if (! ExecutionBlock::getBlock(toFetch, toFetch) || (! initIndex())) { _done = true; return nullptr; @@ -1274,7 +1279,8 @@ size_t IndexRangeBlock::skipSome (size_t atLeast, while (skipped < atLeast) { if (_buffer.empty()) { - if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize) + size_t toFetch = (std::min)(DefaultBatchSize, atMost); + if (! ExecutionBlock::getBlock(toFetch, toFetch) || (! initIndex())) { _done = true; return skipped; @@ -1726,7 +1732,8 @@ AqlItemBlock* EnumerateListBlock::getSome (size_t, size_t atMost) { // try again! if (_buffer.empty()) { - if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize)) { + size_t toFetch = (std::min)(DefaultBatchSize, atMost); + if (! ExecutionBlock::getBlock(toFetch, toFetch)) { _done = true; return nullptr; } @@ -1847,7 +1854,8 @@ size_t EnumerateListBlock::skipSome (size_t atLeast, size_t atMost) { while (skipped < atLeast) { if (_buffer.empty()) { - if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize)) { + size_t toFetch = (std::min)(DefaultBatchSize, atMost); + if (! ExecutionBlock::getBlock(toFetch, toFetch)) { _done = true; return skipped; } @@ -2073,7 +2081,7 @@ AqlItemBlock* CalculationBlock::getSome (size_t atLeast, size_t atMost) { unique_ptr res(ExecutionBlock::getSomeWithoutRegisterClearout( - DefaultBatchSize, DefaultBatchSize)); + atLeast, atMost)); if (res.get() == nullptr) { return nullptr; @@ -2341,6 +2349,10 @@ bool FilterBlock::hasMore () { } if (_buffer.empty()) { + // QUESTION: Is this sensible? Asking whether there is more might + // trigger an expensive fetching operation, even if later on only + // a single document is needed due to a LIMIT... + // However, how should we know this here? if (! getBlock(DefaultBatchSize, DefaultBatchSize)) { _done = true; return false; @@ -2395,13 +2407,17 @@ AggregateBlock::AggregateBlock (ExecutionEngine* engine, } // iterate over all our variables - for (auto it = en->getRegisterPlan()->varInfo.begin(); - it != en->getRegisterPlan()->varInfo.end(); ++it) { - // find variable in the global variable map - auto itVar = en->_variableMap.find((*it).first); + for (auto& vi : en->getRegisterPlan()->varInfo) { + if (vi.second.depth > 0 || en->getDepth() == 1) { + // Do not keep variables from depth 0, unless we are depth 1 ourselves + // (which means no FOR in which we are contained) + + // find variable in the global variable map + auto itVar = en->_variableMap.find(vi.first); - if (itVar != en->_variableMap.end()) { - _variableNames[(*it).second.registerId] = (*itVar).second; + if (itVar != en->_variableMap.end()) { + _variableNames[vi.second.registerId] = (*itVar).second; + } } } } diff --git a/arangod/Aql/ExecutionNode.cpp b/arangod/Aql/ExecutionNode.cpp index 096bb5dc73..a69bb6e8a4 100644 --- a/arangod/Aql/ExecutionNode.cpp +++ b/arangod/Aql/ExecutionNode.cpp @@ -2043,22 +2043,35 @@ ExecutionNode* AggregateNode::clone (ExecutionPlan* plan, //////////////////////////////////////////////////////////////////////////////// struct UserVarFinder : public WalkerWorker { - UserVarFinder () {}; + UserVarFinder (int mindepth) : mindepth(mindepth), depth(-1) {}; ~UserVarFinder () {}; std::vector userVars; + int mindepth; // minimal depth to consider + int depth; bool enterSubquery (ExecutionNode*, ExecutionNode*) override final { return false; } - bool before (ExecutionNode* en) override final { - auto vars = en->getVariablesSetHere(); - for (auto v : vars) { - if (v->isUserDefined()) { - userVars.push_back(v); + void after (ExecutionNode* en) override final { + if (en->getType() == ExecutionNode::SINGLETON) { + depth = 0; + } + else if (en->getType() == ExecutionNode::ENUMERATE_COLLECTION || + en->getType() == ExecutionNode::INDEX_RANGE || + en->getType() == ExecutionNode::ENUMERATE_LIST || + en->getType() == ExecutionNode::AGGREGATE) { + depth += 1; + } + // Now depth is set correct for this node. + if (depth >= mindepth) { + auto vars = en->getVariablesSetHere(); + for (auto v : vars) { + if (v->isUserDefined()) { + userVars.push_back(v); + } } } - return false; } }; @@ -2069,10 +2082,18 @@ std::vector AggregateNode::getVariablesUsedHere () const { } if (_outVariable != nullptr) { // Here we have to find all user defined variables in this query - // amonst our dependencies: - UserVarFinder finder; + // amongst our dependencies: + UserVarFinder finder(1); auto myselfasnonconst = const_cast(this); myselfasnonconst->walk(&finder); + if (finder.depth == 1) { + // we are toplevel, let's run again with mindepth = 0 + finder.userVars.clear(); + finder.mindepth = 0; + finder.depth = -1; + finder.reset(); + myselfasnonconst->walk(&finder); + } for (auto x : finder.userVars) { v.insert(x); }