From 4e48475a9ff21c062ff2cde934513f20eb0fe7e2 Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Fri, 28 Nov 2014 11:21:47 +0100 Subject: [PATCH 1/5] Fix a bug in list access for AqlValues. --- arangod/Aql/AqlValue.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arangod/Aql/AqlValue.cpp b/arangod/Aql/AqlValue.cpp index 4420852a24..bf8d1cf512 100644 --- a/arangod/Aql/AqlValue.cpp +++ b/arangod/Aql/AqlValue.cpp @@ -724,6 +724,7 @@ Json AqlValue::extractListMember (triagens::arango::AqlTransaction* trx, auto vecCollection = (*it)->getDocumentCollection(0); return (*it)->getValue(p - totalSize, 0).toJson(trx, vecCollection); } + totalSize += (*it)->size(); } break; // fall-through to returning null } @@ -762,6 +763,8 @@ AqlValue AqlValue::CreateFromBlocks (triagens::arango::AqlTransaction* trx, for (RegisterId j = 0; j < n; ++j) { if (variableNames[j][0] != '\0') { // temporaries don't have a name and won't be included + // Variables from depth 0 are excluded, too, unless the + // COLLECT statement is on level 0 as well. values.set(variableNames[j].c_str(), current->getValue(i, j).toJson(trx, current->getDocumentCollection(j))); } } From 676ec3694d1e082dee0c10c74c154f98f7ffb75c Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Fri, 28 Nov 2014 11:43:09 +0100 Subject: [PATCH 2/5] Choose more sensible block sizes for fetch from dependency. --- arangod/Aql/ExecutionBlock.cpp | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/arangod/Aql/ExecutionBlock.cpp b/arangod/Aql/ExecutionBlock.cpp index 9827aa77c2..a55d392daa 100644 --- a/arangod/Aql/ExecutionBlock.cpp +++ b/arangod/Aql/ExecutionBlock.cpp @@ -716,7 +716,8 @@ AqlItemBlock* EnumerateCollectionBlock::getSome (size_t, // atLeast, } if (_buffer.empty()) { - if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize)) { + size_t toFetch = (std::min)(DefaultBatchSize, atMost); + if (! ExecutionBlock::getBlock(toFetch, toFetch)) { _done = true; return nullptr; } @@ -797,7 +798,8 @@ size_t EnumerateCollectionBlock::skipSome (size_t atLeast, size_t atMost) { while (skipped < atLeast) { if (_buffer.empty()) { - if (! getBlock(DefaultBatchSize, DefaultBatchSize)) { + size_t toFetch = (std::min)(DefaultBatchSize, atMost); + if (! getBlock(toFetch, toFetch)) { _done = true; return skipped; } @@ -1168,7 +1170,8 @@ AqlItemBlock* IndexRangeBlock::getSome (size_t atLeast, // try again! if (_buffer.empty()) { - if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize) + size_t toFetch = (std::min)(DefaultBatchSize, atMost); + if (! ExecutionBlock::getBlock(toFetch, toFetch) || (! initIndex())) { _done = true; return nullptr; @@ -1274,7 +1277,8 @@ size_t IndexRangeBlock::skipSome (size_t atLeast, while (skipped < atLeast) { if (_buffer.empty()) { - if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize) + size_t toFetch = (std::min)(DefaultBatchSize, atMost); + if (! ExecutionBlock::getBlock(toFetch, toFetch) || (! initIndex())) { _done = true; return skipped; @@ -1726,7 +1730,8 @@ AqlItemBlock* EnumerateListBlock::getSome (size_t, size_t atMost) { // try again! if (_buffer.empty()) { - if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize)) { + size_t toFetch = (std::min)(DefaultBatchSize, atMost); + if (! ExecutionBlock::getBlock(toFetch, toFetch)) { _done = true; return nullptr; } @@ -1847,7 +1852,8 @@ size_t EnumerateListBlock::skipSome (size_t atLeast, size_t atMost) { while (skipped < atLeast) { if (_buffer.empty()) { - if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize)) { + size_t toFetch = (std::min)(DefaultBatchSize, atMost); + if (! ExecutionBlock::getBlock(toFetch, toFetch)) { _done = true; return skipped; } @@ -2073,7 +2079,7 @@ AqlItemBlock* CalculationBlock::getSome (size_t atLeast, size_t atMost) { unique_ptr res(ExecutionBlock::getSomeWithoutRegisterClearout( - DefaultBatchSize, DefaultBatchSize)); + atLeast, atMost)); if (res.get() == nullptr) { return nullptr; @@ -2341,6 +2347,10 @@ bool FilterBlock::hasMore () { } if (_buffer.empty()) { + // QUESTION: Is this sensible? Asking whether there is more might + // trigger an expensive fetching operation, even if later on only + // a single document is needed due to a LIMIT... + // However, how should we know this here? if (! getBlock(DefaultBatchSize, DefaultBatchSize)) { _done = true; return false; From a76e22f03a9681c689e745caf10b40b2cf91da27 Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Fri, 28 Nov 2014 13:05:15 +0100 Subject: [PATCH 3/5] Only put non-toplevel vars into the INTO variable in a COLLECT. This restores the behaviour of 2.2. --- arangod/Aql/ExecutionBlock.cpp | 16 ++++++++------ arangod/Aql/ExecutionNode.cpp | 39 ++++++++++++++++++++++++++-------- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/arangod/Aql/ExecutionBlock.cpp b/arangod/Aql/ExecutionBlock.cpp index a55d392daa..97d48a4fc8 100644 --- a/arangod/Aql/ExecutionBlock.cpp +++ b/arangod/Aql/ExecutionBlock.cpp @@ -2405,13 +2405,17 @@ AggregateBlock::AggregateBlock (ExecutionEngine* engine, } // iterate over all our variables - for (auto it = en->getRegisterPlan()->varInfo.begin(); - it != en->getRegisterPlan()->varInfo.end(); ++it) { - // find variable in the global variable map - auto itVar = en->_variableMap.find((*it).first); + for (auto& vi : en->getRegisterPlan()->varInfo) { + if (vi.second.depth > 0 || en->getDepth() == 1) { + // Do not keep variables from depth 0, unless we are depth 1 ourselves + // (which means no FOR in which we are contained) + + // find variable in the global variable map + auto itVar = en->_variableMap.find(vi.first); - if (itVar != en->_variableMap.end()) { - _variableNames[(*it).second.registerId] = (*itVar).second; + if (itVar != en->_variableMap.end()) { + _variableNames[vi.second.registerId] = (*itVar).second; + } } } } diff --git a/arangod/Aql/ExecutionNode.cpp b/arangod/Aql/ExecutionNode.cpp index 096bb5dc73..a69bb6e8a4 100644 --- a/arangod/Aql/ExecutionNode.cpp +++ b/arangod/Aql/ExecutionNode.cpp @@ -2043,22 +2043,35 @@ ExecutionNode* AggregateNode::clone (ExecutionPlan* plan, //////////////////////////////////////////////////////////////////////////////// struct UserVarFinder : public WalkerWorker { - UserVarFinder () {}; + UserVarFinder (int mindepth) : mindepth(mindepth), depth(-1) {}; ~UserVarFinder () {}; std::vector userVars; + int mindepth; // minimal depth to consider + int depth; bool enterSubquery (ExecutionNode*, ExecutionNode*) override final { return false; } - bool before (ExecutionNode* en) override final { - auto vars = en->getVariablesSetHere(); - for (auto v : vars) { - if (v->isUserDefined()) { - userVars.push_back(v); + void after (ExecutionNode* en) override final { + if (en->getType() == ExecutionNode::SINGLETON) { + depth = 0; + } + else if (en->getType() == ExecutionNode::ENUMERATE_COLLECTION || + en->getType() == ExecutionNode::INDEX_RANGE || + en->getType() == ExecutionNode::ENUMERATE_LIST || + en->getType() == ExecutionNode::AGGREGATE) { + depth += 1; + } + // Now depth is set correct for this node. + if (depth >= mindepth) { + auto vars = en->getVariablesSetHere(); + for (auto v : vars) { + if (v->isUserDefined()) { + userVars.push_back(v); + } } } - return false; } }; @@ -2069,10 +2082,18 @@ std::vector AggregateNode::getVariablesUsedHere () const { } if (_outVariable != nullptr) { // Here we have to find all user defined variables in this query - // amonst our dependencies: - UserVarFinder finder; + // amongst our dependencies: + UserVarFinder finder(1); auto myselfasnonconst = const_cast(this); myselfasnonconst->walk(&finder); + if (finder.depth == 1) { + // we are toplevel, let's run again with mindepth = 0 + finder.userVars.clear(); + finder.mindepth = 0; + finder.depth = -1; + finder.reset(); + myselfasnonconst->walk(&finder); + } for (auto x : finder.userVars) { v.insert(x); } From 2a03393121b881a133723843f95e140d9349bbca Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Fri, 28 Nov 2014 13:25:56 +0100 Subject: [PATCH 4/5] Improve documentation of COLLECT AQL statement. --- Documentation/Books/Users/Aql/Operations.mdpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/Documentation/Books/Users/Aql/Operations.mdpp b/Documentation/Books/Users/Aql/Operations.mdpp index 03a9fc5920..45563e6af2 100644 --- a/Documentation/Books/Users/Aql/Operations.mdpp +++ b/Documentation/Books/Users/Aql/Operations.mdpp @@ -228,7 +228,18 @@ contains the group value. The second form does the same as the first form, but additionally introduces a variable (specified by *groups*) that contains all elements that fell into the -group. Specifying the *INTO* clause is optional- +group. This works as follows: The *groups* variable is a list containing +as many elements as there are in the group. Each member of that list is +a JSON object in which the value of every variable that is defined in the +AQL query is bound to the corresponding attribute. Note that this considers +all variables that are defined before the *COLLECT* statement, but not those on +the top level (outside of any *FOR*), unless the *COLLECT* statement is itself +on the top level, in which case all variables are taken. Furthermore note +that it is possible that the optimizer moves *LET* statements out of *FOR* +statements to improve performance. In a future version of ArangoDB we plan +to allow to configure exactly the values of which variables are copied +into the *groups* variable, since excessive copying can have a negative +impact on performance. Specifying the *INTO* clause is optional. ``` FOR u IN users From 0445b547be07a0271330de5f5d80d3f931f92a16 Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Fri, 28 Nov 2014 13:31:27 +0100 Subject: [PATCH 5/5] Fix a buffer overflow that did not have any effect. --- arangod/Aql/ExecutionBlock.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arangod/Aql/ExecutionBlock.cpp b/arangod/Aql/ExecutionBlock.cpp index 97d48a4fc8..7e9d8d5001 100644 --- a/arangod/Aql/ExecutionBlock.cpp +++ b/arangod/Aql/ExecutionBlock.cpp @@ -78,12 +78,14 @@ AggregatorGroup::~AggregatorGroup () { void AggregatorGroup::initialize (size_t capacity) { TRI_ASSERT(capacity > 0); + groupValues.clear(); + collections.clear(); groupValues.reserve(capacity); collections.reserve(capacity); for (size_t i = 0; i < capacity; ++i) { - groupValues[i] = AqlValue(); - collections[i] = nullptr; + groupValues.emplace_back(); + collections.push_back(nullptr); } }