Merge branch 'devel' of https://github.com/triAGENS/ArangoDB into devel

2014-11-28 13:58:59 +01:00 · 2014-11-28 13:58:59 +01:00 · bc8579f20b
parent ac36c0cd7a 0445b547be
commit bc8579f20b
4 changed files with 76 additions and 25 deletions
--- a/Documentation/Books/Users/Aql/Operations.mdpp
+++ b/Documentation/Books/Users/Aql/Operations.mdpp
@ -228,7 +228,18 @@ contains the group value.

 The second form does the same as the first form, but additionally introduces a
 variable (specified by *groups*) that contains all elements that fell into the
-group. Specifying the *INTO* clause is optional-
+group. This works as follows: The *groups* variable is a list containing 
+as many elements as there are in the group. Each member of that list is
+a JSON object in which the value of every variable that is defined in the 
+AQL query is bound to the corresponding attribute. Note that this considers
+all variables that are defined before the *COLLECT* statement, but not those on
+the top level (outside of any *FOR*), unless the *COLLECT* statement is itself
+on the top level, in which case all variables are taken. Furthermore note 
+that it is possible that the optimizer moves *LET* statements out of *FOR*
+statements to improve performance. In a future version of ArangoDB we plan
+to allow to configure exactly the values of which variables are copied
+into the *groups* variable, since excessive copying can have a negative
+impact on performance. Specifying the *INTO* clause is optional.

 ```
 FOR u IN users
--- a/arangod/Aql/AqlValue.cpp
+++ b/arangod/Aql/AqlValue.cpp
@ -724,6 +724,7 @@ Json AqlValue::extractListMember (triagens::arango::AqlTransaction* trx,
          auto vecCollection = (*it)->getDocumentCollection(0);
          return (*it)->getValue(p - totalSize, 0).toJson(trx, vecCollection);
        }
+        totalSize += (*it)->size();
      }
      break; // fall-through to returning null
    }
@ -762,6 +763,8 @@ AqlValue AqlValue::CreateFromBlocks (triagens::arango::AqlTransaction* trx,
      for (RegisterId j = 0; j < n; ++j) {
        if (variableNames[j][0] != '\0') {
          // temporaries don't have a name and won't be included
+          // Variables from depth 0 are excluded, too, unless the
+          // COLLECT statement is on level 0 as well.
          values.set(variableNames[j].c_str(), current->getValue(i, j).toJson(trx, current->getDocumentCollection(j)));
        }
      }
--- a/arangod/Aql/ExecutionBlock.cpp
+++ b/arangod/Aql/ExecutionBlock.cpp
@ -78,12 +78,14 @@ AggregatorGroup::~AggregatorGroup () {
 void AggregatorGroup::initialize (size_t capacity) {
  TRI_ASSERT(capacity > 0);

+  groupValues.clear();
+  collections.clear();
  groupValues.reserve(capacity);
  collections.reserve(capacity);

  for (size_t i = 0; i < capacity; ++i) {
-    groupValues[i] = AqlValue();
-    collections[i] = nullptr;
+    groupValues.emplace_back();
+    collections.push_back(nullptr);
  }
 }

@ -716,7 +718,8 @@ AqlItemBlock* EnumerateCollectionBlock::getSome (size_t, // atLeast,
  }

  if (_buffer.empty()) {
-    if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize)) {
+    size_t toFetch = (std::min)(DefaultBatchSize, atMost);
+    if (! ExecutionBlock::getBlock(toFetch, toFetch)) {
      _done = true;
      return nullptr;
    }
@ -797,7 +800,8 @@ size_t EnumerateCollectionBlock::skipSome (size_t atLeast, size_t atMost) {

  while (skipped < atLeast) {
    if (_buffer.empty()) {
-      if (! getBlock(DefaultBatchSize, DefaultBatchSize)) {
+      size_t toFetch = (std::min)(DefaultBatchSize, atMost);
+      if (! getBlock(toFetch, toFetch)) {
        _done = true;
        return skipped;
      }
@ -1168,7 +1172,8 @@ AqlItemBlock* IndexRangeBlock::getSome (size_t atLeast,
    // try again!

    if (_buffer.empty()) {
-      if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize) 
+      size_t toFetch = (std::min)(DefaultBatchSize, atMost);
+      if (! ExecutionBlock::getBlock(toFetch, toFetch) 
          || (! initIndex())) {
        _done = true;
        return nullptr;
@ -1274,7 +1279,8 @@ size_t IndexRangeBlock::skipSome (size_t atLeast,

  while (skipped < atLeast) {
    if (_buffer.empty()) {
-      if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize) 
+      size_t toFetch = (std::min)(DefaultBatchSize, atMost);
+      if (! ExecutionBlock::getBlock(toFetch, toFetch) 
          || (! initIndex())) {
        _done = true;
        return skipped;
@ -1726,7 +1732,8 @@ AqlItemBlock* EnumerateListBlock::getSome (size_t, size_t atMost) {
    // try again!

    if (_buffer.empty()) {
-      if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize)) {
+      size_t toFetch = (std::min)(DefaultBatchSize, atMost);
+      if (! ExecutionBlock::getBlock(toFetch, toFetch)) {
        _done = true;
        return nullptr;
      }
@ -1847,7 +1854,8 @@ size_t EnumerateListBlock::skipSome (size_t atLeast, size_t atMost) {

  while (skipped < atLeast) {
    if (_buffer.empty()) {
-      if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize)) {
+      size_t toFetch = (std::min)(DefaultBatchSize, atMost);
+      if (! ExecutionBlock::getBlock(toFetch, toFetch)) {
        _done = true;
        return skipped;
      }
@ -2073,7 +2081,7 @@ AqlItemBlock* CalculationBlock::getSome (size_t atLeast,
                                         size_t atMost) {

  unique_ptr<AqlItemBlock> res(ExecutionBlock::getSomeWithoutRegisterClearout(
-                                                     DefaultBatchSize, DefaultBatchSize));
+                               atLeast, atMost));

  if (res.get() == nullptr) {
    return nullptr;
@ -2341,6 +2349,10 @@ bool FilterBlock::hasMore () {
  }

  if (_buffer.empty()) {
+    // QUESTION: Is this sensible? Asking whether there is more might
+    // trigger an expensive fetching operation, even if later on only
+    // a single document is needed due to a LIMIT...
+    // However, how should we know this here?
    if (! getBlock(DefaultBatchSize, DefaultBatchSize)) {
      _done = true;
      return false;
@ -2395,13 +2407,17 @@ AggregateBlock::AggregateBlock (ExecutionEngine* engine,
    }

    // iterate over all our variables
-    for (auto it = en->getRegisterPlan()->varInfo.begin(); 
-         it != en->getRegisterPlan()->varInfo.end(); ++it) {
-      // find variable in the global variable map
-      auto itVar = en->_variableMap.find((*it).first);
+    for (auto& vi : en->getRegisterPlan()->varInfo) {
+      if (vi.second.depth > 0 || en->getDepth() == 1) {
+        // Do not keep variables from depth 0, unless we are depth 1 ourselves
+        // (which means no FOR in which we are contained)
+ 
+        // find variable in the global variable map
+        auto itVar = en->_variableMap.find(vi.first);

-      if (itVar != en->_variableMap.end()) {
-        _variableNames[(*it).second.registerId] = (*itVar).second;
+        if (itVar != en->_variableMap.end()) {
+          _variableNames[vi.second.registerId] = (*itVar).second;
+        }
      }
    }
  }
--- a/arangod/Aql/ExecutionNode.cpp
+++ b/arangod/Aql/ExecutionNode.cpp
@ -2043,22 +2043,35 @@ ExecutionNode* AggregateNode::clone (ExecutionPlan* plan,
 ////////////////////////////////////////////////////////////////////////////////

 struct UserVarFinder : public WalkerWorker<ExecutionNode> {
-  UserVarFinder () {};
+  UserVarFinder (int mindepth) : mindepth(mindepth), depth(-1) {};
  ~UserVarFinder () {};
  std::vector<Variable const*> userVars;
+  int mindepth;   // minimal depth to consider
+  int depth;

  bool enterSubquery (ExecutionNode*, ExecutionNode*) override final {
    return false;
  }

-  bool before (ExecutionNode* en) override final {
-    auto vars = en->getVariablesSetHere();
-    for (auto v : vars) {
-      if (v->isUserDefined()) {
-        userVars.push_back(v);
+  void after (ExecutionNode* en) override final {
+    if (en->getType() == ExecutionNode::SINGLETON) {
+      depth = 0;
+    }
+    else if (en->getType() == ExecutionNode::ENUMERATE_COLLECTION ||
+             en->getType() == ExecutionNode::INDEX_RANGE ||
+             en->getType() == ExecutionNode::ENUMERATE_LIST ||
+             en->getType() == ExecutionNode::AGGREGATE) {
+      depth += 1;
+    }
+    // Now depth is set correct for this node.
+    if (depth >= mindepth) {
+      auto vars = en->getVariablesSetHere();
+      for (auto v : vars) {
+        if (v->isUserDefined()) {
+          userVars.push_back(v);
+        }
      }
    }
-    return false;
  }
 };

@ -2069,10 +2082,18 @@ std::vector<Variable const*> AggregateNode::getVariablesUsedHere () const {
  }
  if (_outVariable != nullptr) {
    // Here we have to find all user defined variables in this query
-    // amonst our dependencies:
-    UserVarFinder finder;
+    // amongst our dependencies:
+    UserVarFinder finder(1);
    auto myselfasnonconst = const_cast<AggregateNode*>(this);
    myselfasnonconst->walk(&finder);
+    if (finder.depth == 1) {
+      // we are toplevel, let's run again with mindepth = 0
+      finder.userVars.clear();
+      finder.mindepth = 0;
+      finder.depth = -1;
+      finder.reset();
+      myselfasnonconst->walk(&finder);
+    }
    for (auto x : finder.userVars) {
      v.insert(x);
    }