arangodb/lib/Basics/AssocMulti.h

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
///     http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Dr. Frank Celler
/// @author Martin Schoenert
/// @author Max Neunhoeffer
////////////////////////////////////////////////////////////////////////////////

#ifndef LIB_BASICS_ASSOC_MULTI_H
#define LIB_BASICS_ASSOC_MULTI_H 1

// Activate for additional debugging:
// #define TRI_CHECK_MULTI_POINTER_HASH 1

#include "Basics/Common.h"
#include "Basics/JsonHelper.h"
#include "Basics/logging.h"
#include "Basics/memory-map.h"
#include "Basics/Mutex.h"
#include "Basics/MutexLocker.h"
#include "Basics/prime-numbers.h"

#include <thread>
#include <velocypack/Builder.h>
#include <velocypack/velocypack-aliases.h>

namespace arangodb {
namespace basics {

////////////////////////////////////////////////////////////////////////////////
/// @brief associative array of pointers, tolerating repeated keys.
///
/// This is a data structure that can store pointers to elements. Each element
/// has a unique key (for example a certain attribute) and multiple
/// elements in the associative array can have the same key. Every element
/// can be at most once in the array.
/// We want to offer constant time complexity for the following
/// operations:
///  - insert pointer to a element into the array
///  - lookup pointer to a element in the array
///  - delete pointer to a element from the array
///  - find one pointer to a element with a given key
/// Furthermore, we want to offer O(n) complexity for the following
/// operation:
///  - find all pointers whose elements have a given key k, where n is
///    the number of elements in the array with this key
/// To this end, we use a hash table and ask the user to provide the following:
///  - a way to hash elements by their keys, and to hash keys themselves,
///  - a way to hash elements by their full identity
///  - a way to compare a key to the key of a given element
///  - a way to compare two elements, either by their keys or by their full
///    identities.
/// To avoid unnecessary comparisons the user can guarantee that s/he will
/// only try to store non-identical elements into the array. This enables
/// the code to skip comparisons which would otherwise be necessary to
/// ensure uniqueness.
/// The idea of the algorithm is as follows: Each slot in the hash table
/// contains a pointer to the actual element, as well as two unsigned
/// integers "prev" and "next" (being indices in the hash table) to
/// organise a linked list of entries, *within the same hash table*. All
/// elements with the same key are kept in a doubly linked list. The first
/// element in such a linked list is kept at the position determined by
/// its hash with respect to its key (or in the first free slot after this
/// position). All further elements in such a linked list are kept at the
/// position determined by its hash with respect to its full identity
/// (or in the first free slot after this position). Provided the hash
/// table is large enough and the hash functions distribute well enough,
/// this gives the proposed complexity.
///
////////////////////////////////////////////////////////////////////////////////

template <class Element, class IndexType, bool useHashCache>
struct Entry {
 private:
  uint64_t hashCache;  // cache the hash value, this stores the
                       // hashByKey for the first element in the
                       // linked list and the hashByElm for all
                       // others
 public:
  Element* ptr;    // a pointer to the data stored in this slot
  IndexType next;  // index of the data following in the linked
                   // list of all items with the same key
  IndexType prev;  // index of the data preceding in the linked
                   // list of all items with the same key
  uint64_t readHashCache() { return hashCache; }
  void writeHashCache(uint64_t v) { hashCache = v; }
};

template <class Element, class IndexType>
struct Entry<Element, IndexType, false> {
  Element* ptr;    // a pointer to the data stored in this slot
  IndexType next;  // index of the data following in the linked
                   // list of all items with the same key
  IndexType prev;  // index of the data preceding in the linked
                   // list of all items with the same key
  uint64_t readHashCache() { return 0; }
  void writeHashCache(uint64_t v) { TRI_ASSERT(false); }
};

template <class Key, class Element, class IndexType = size_t,
          bool useHashCache = true>
class AssocMulti {
 private:
  typedef void UserData;

 public:
  static IndexType const INVALID_INDEX = ((IndexType)0) - 1;

  typedef std::function<uint64_t(UserData*, Key const*)> HashKeyFuncType;
  typedef std::function<uint64_t(UserData*, Element const*, bool)>
      HashElementFuncType;
  typedef std::function<bool(UserData*, Key const*, Element const*)>
      IsEqualKeyElementFuncType;
  typedef std::function<bool(UserData*, Element const*, Element const*)>
      IsEqualElementElementFuncType;
  typedef std::function<void(Element*)> CallbackElementFuncType;

 private:
  typedef Entry<Element, IndexType, useHashCache> EntryType;

  struct Bucket {
    IndexType _nrAlloc;       // the size of the table
    IndexType _nrUsed;        // the number of used entries
    IndexType _nrCollisions;  // the number of entries that have
                              // a key that was previously in the table
    EntryType* _table;        // the table itself

    Bucket() : _nrAlloc(0), _nrUsed(0), _nrCollisions(0), _table(nullptr) {}
    // Intentionally no destructor, the AssocMulti class takes
    // care of freeing the tables!
  };

  std::vector<Bucket> _buckets;
  size_t _bucketsMask;

#ifdef TRI_INTERNAL_STATS
  uint64_t _nrFinds;    // statistics: number of lookup calls
  uint64_t _nrAdds;     // statistics: number of insert calls
  uint64_t _nrRems;     // statistics: number of remove calls
  uint64_t _nrResizes;  // statistics: number of resizes

  uint64_t _nrProbes;   // statistics: number of misses in FindElementPlace
                        // and LookupByElement, used by insert, lookup and
                        // remove
  uint64_t _nrProbesF;  // statistics: number of misses while looking up
  uint64_t _nrProbesD;  // statistics: number of misses while removing
#endif

  HashKeyFuncType const _hashKey;
  HashElementFuncType const _hashElement;
  IsEqualKeyElementFuncType const _isEqualKeyElement;
  IsEqualElementElementFuncType const _isEqualElementElement;
  IsEqualElementElementFuncType const _isEqualElementElementByKey;

  std::function<std::string()> _contextCallback;

 public:
  AssocMulti(HashKeyFuncType hashKey, HashElementFuncType hashElement,
             IsEqualKeyElementFuncType isEqualKeyElement,
             IsEqualElementElementFuncType isEqualElementElement,
             IsEqualElementElementFuncType isEqualElementElementByKey,
             size_t numberBuckets = 1, IndexType initialSize = 64,
             std::function<std::string()> contextCallback =
                 []() -> std::string { return ""; })
      :
#ifdef TRI_INTERNAL_STATS
        _nrFinds(0),
        _nrAdds(0),
        _nrRems(0),
        _nrResizes(0),
        _nrProbes(0),
        _nrProbesF(0),
        _nrProbesD(0),
#endif
        _hashKey(hashKey),
        _hashElement(hashElement),
        _isEqualKeyElement(isEqualKeyElement),
        _isEqualElementElement(isEqualElementElement),
        _isEqualElementElementByKey(isEqualElementElementByKey),
        _contextCallback(contextCallback) {

    // Make the number of buckets a power of two:
    size_t ex = 0;
    size_t nr = 1;
    numberBuckets >>= 1;
    while (numberBuckets > 0) {
      ex += 1;
      numberBuckets >>= 1;
      nr <<= 1;
    }
    numberBuckets = nr;
    _bucketsMask = nr - 1;

    try {
      for (size_t j = 0; j < numberBuckets; j++) {
        _buckets.emplace_back();
        Bucket& b = _buckets.back();
        b._nrAlloc = initialSize;
        b._table = nullptr;

        // may fail...
        b._table = new EntryType[b._nrAlloc];

#ifdef __linux__
        if (b._nrAlloc > 1000000) {
          uintptr_t mem = reinterpret_cast<uintptr_t>(b._table);
          uintptr_t pageSize = getpagesize();
          mem = (mem / pageSize) * pageSize;
          void* memptr = reinterpret_cast<void*>(mem);
          TRI_MMFileAdvise(memptr, b._nrAlloc * sizeof(EntryType),
                           TRI_MADVISE_RANDOM);
        }
#endif

        for (IndexType i = 0; i < b._nrAlloc; i++) {
          invalidateEntry(b, i);
        }
      }
    } catch (...) {
      for (auto& b : _buckets) {
        delete[] b._table;
        b._table = nullptr;
        b._nrAlloc = 0;
      }
      throw;
    }
  }

  ~AssocMulti() {
    for (auto& b : _buckets) {
      if (b._table != nullptr) {
        delete[] b._table;
        b._table = nullptr;
      }
    }
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief return the memory used by the hash table
  //////////////////////////////////////////////////////////////////////////////

  size_t memoryUsage() const {
    size_t res = 0;
    // size_t count = 0;
    for (auto& b : _buckets) {
      res += static_cast<size_t>(b._nrAlloc) * sizeof(EntryType);
      // std::cout << "Bucket: " << count++ << " _nrAlloc=" << b._nrAlloc
      //          << " _nrUsed=" << b._nrUsed << std::endl;
    }
    return res;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief size(), return the number of items stored
  //////////////////////////////////////////////////////////////////////////////

  size_t size() const {
    size_t res = 0;
    for (auto& b : _buckets) {
      res += static_cast<size_t>(b._nrUsed);
    }
    return res;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief Appends information about statistics in the given VPackBuilder
  //////////////////////////////////////////////////////////////////////////////

  void appendToVelocyPack(VPackBuilder& builder) {
    builder.add("buckets", VPackValue(VPackValueType::Array));
    for (auto& b : _buckets) {
      builder.openObject();
      builder.add("nrAlloc", VPackValue(b._nrAlloc));
      builder.add("nrUsed", VPackValue(b._nrUsed));
      builder.close();
    }
    builder.close();  // buckets
    builder.add("nrBuckets", VPackValue(_buckets.size()));
    builder.add("totalUsed", VPackValue(size()));
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief Appends information about statistics in the given json.
  //////////////////////////////////////////////////////////////////////////////

  void appendToJson(TRI_memory_zone_t* zone, Json& json) {
    Json bkts(zone, Json::Array);
    for (auto& b : _buckets) {
      Json bucketInfo(zone, Json::Object);
      bucketInfo("nrAlloc", Json(static_cast<double>(b._nrAlloc)));
      bucketInfo("nrUsed", Json(static_cast<double>(b._nrUsed)));
      bkts.add(bucketInfo);
    }
    json("buckets", bkts);
    json("nrBuckets", Json(static_cast<double>(_buckets.size())));
    json("totalUsed", Json(static_cast<double>(size())));
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief capacity(), return the number of allocated items
  //////////////////////////////////////////////////////////////////////////////

  size_t capacity() const {
    size_t res = 0;
    for (auto& b : _buckets) {
      res += static_cast<size_t>(b._nrAlloc);
    }
    return res;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief return the element at position.
  /// this may return a nullptr
  //////////////////////////////////////////////////////////////////////////////

  Element* at(Bucket& b, size_t position) const {
    return b._table[position].ptr;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief adds a key/element to the array
  //////////////////////////////////////////////////////////////////////////////

  Element* insert(UserData* userData, Element* element, bool overwrite,
                  bool checkEquality) {
// if the checkEquality flag is not set, we do not check for element
// equality we use this flag to speed up initial insertion into the
// index, i.e. when the index is built for a collection and we know
// for sure no duplicate elements will be inserted

#ifdef TRI_CHECK_MULTI_POINTER_HASH
    check(userData, true, true);
#endif

    // compute the hash by the key only first
    uint64_t hashByKey = _hashElement(userData, element, true);
    Bucket& b = _buckets[hashByKey & _bucketsMask];

    auto result =
        doInsert(userData, element, hashByKey, b, overwrite, checkEquality);

#ifdef TRI_CHECK_MULTI_POINTER_HASH
    check(userData, true, true);
#endif

    return result;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief adds multiple elements to the array
  //////////////////////////////////////////////////////////////////////////////

  int batchInsert(UserData* userData, std::vector<Element*> const* data,
                  size_t numThreads) {
#ifdef TRI_CHECK_MULTI_POINTER_HASH
    check(userData, true, true);
#endif
    std::atomic<int> res(TRI_ERROR_NO_ERROR);

    std::vector<Element*> const& elements = *(data);

    if (elements.size() < numThreads) {
      numThreads = elements.size();
    }
    if (numThreads > _buckets.size()) {
      numThreads = _buckets.size();
    }

    size_t const chunkSize = elements.size() / numThreads;

    typedef std::vector<std::pair<Element*, uint64_t>> DocumentsPerBucket;

    arangodb::Mutex bucketMapLocker;

    std::unordered_map<uint64_t, std::vector<DocumentsPerBucket>> allBuckets;

    // partition the work into some buckets
    {
      std::function<void(size_t, size_t)> partitioner;
      partitioner = [&](size_t lower, size_t upper) -> void {
        try {
          std::unordered_map<uint64_t, DocumentsPerBucket> partitions;

          for (size_t i = lower; i < upper; ++i) {
            uint64_t hashByKey = _hashElement(userData, elements[i], true);
            auto bucketId = hashByKey & _bucketsMask;

            auto it = partitions.find(bucketId);

            if (it == partitions.end()) {
              it = partitions.emplace(bucketId, DocumentsPerBucket()).first;
            }

            (*it).second.emplace_back(elements[i], hashByKey);
          }

          // transfer ownership to the central map
          MUTEX_LOCKER(mutexLocker, bucketMapLocker);

          for (auto& it : partitions) {
            auto it2 = allBuckets.find(it.first);

            if (it2 == allBuckets.end()) {
              it2 = allBuckets.emplace(it.first,
                                       std::vector<DocumentsPerBucket>()).first;
            }

            (*it2).second.emplace_back(std::move(it.second));
          }
        } catch (...) {
          res = TRI_ERROR_INTERNAL;
        }
      };

      std::vector<std::thread> threads;
      threads.reserve(numThreads);

      try {
        for (size_t i = 0; i < numThreads; ++i) {
          size_t lower = i * chunkSize;
          size_t upper = (i + 1) * chunkSize;

          if (i + 1 == numThreads) {
            // last chunk. account for potential rounding errors
            upper = elements.size();
          } else if (upper > elements.size()) {
            upper = elements.size();
          }

          threads.emplace_back(std::thread(partitioner, lower, upper));
        }
      } catch (...) {
        res = TRI_ERROR_INTERNAL;
      }

      for (size_t i = 0; i < threads.size(); ++i) {
        // must join threads, otherwise the program will crash
        threads[i].join();
      }
    }

    if (res.load() != TRI_ERROR_NO_ERROR) {
      return res.load();
    }

    // now the data is partitioned...

    // now insert the bucket data in parallel
    {
      auto inserter = [&](size_t chunk) -> void {
        try {
          for (auto const& it : allBuckets) {
            uint64_t bucketId = it.first;

            if (bucketId % numThreads != chunk) {
              // we're not responsible for this bucket!
              continue;
            }

            // we're responsible for this bucket!
            Bucket& b = _buckets[bucketId];

            for (auto const& it2 : it.second) {
              for (auto const& it3 : it2) {
                doInsert(userData, it3.first, it3.second, b, true, false);
              }
            }
          }
        } catch (...) {
          res = TRI_ERROR_INTERNAL;
        }
      };

      std::vector<std::thread> threads;
      threads.reserve(numThreads);

      try {
        for (size_t i = 0; i < numThreads; ++i) {
          threads.emplace_back(std::thread(inserter, i));
        }
      } catch (...) {
        res = TRI_ERROR_INTERNAL;
      }

      for (size_t i = 0; i < threads.size(); ++i) {
        // must join threads, otherwise the program will crash
        threads[i].join();
      }
    }

#ifdef TRI_CHECK_MULTI_POINTER_HASH
    check(userData, true, true);
#endif
    if (res.load() != TRI_ERROR_NO_ERROR) {
      // Rollback such that the data can be deleted outside
      try {
        for (auto const& d : *data) {
          remove(userData, d);
        }
      } catch (...) {
      }
    }
    return res.load();
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief a method to iterate over all elements in the hash
  //////////////////////////////////////////////////////////////////////////////

  void invokeOnAllElements(CallbackElementFuncType callback) {
    for (auto& b : _buckets) {
      if (b._table == nullptr) {
        continue;
      }

      for (size_t i = 0; i < b._nrAlloc; ++i) {
        if (b._table[i].ptr == nullptr) {
          continue;
        }
        callback(b._table[i].ptr);
      }
    }
  }

 private:
  //////////////////////////////////////////////////////////////////////////////
  /// @brief adds a key/element to the array
  //////////////////////////////////////////////////////////////////////////////

  Element* doInsert(UserData* userData, Element* element, uint64_t hashByKey,
                    Bucket& b, bool const overwrite, bool const checkEquality) {
    // if the checkEquality flag is not set, we do not check for element
    // equality we use this flag to speed up initial insertion into the
    // index, i.e. when the index is built for a collection and we know
    // for sure no duplicate elements will be inserted

    // if we were adding and the table is more than 2/3 full, extend it
    if (2 * b._nrAlloc < 3 * b._nrUsed) {
      resizeInternal(userData, b, 2 * b._nrAlloc + 1);
    }

#ifdef TRI_INTERNAL_STATS
    // update statistics
    _nrAdds++;
#endif

    IndexType hashIndex = hashToIndex(hashByKey);
    IndexType i = hashIndex % b._nrAlloc;

    // If this slot is free, just use it:
    if (nullptr == b._table[i].ptr) {
      b._table[i].ptr = element;
      b._table[i].next = INVALID_INDEX;
      b._table[i].prev = INVALID_INDEX;
      if (useHashCache) {
        b._table[i].writeHashCache(hashByKey);
      }
      b._nrUsed++;
      // no collision generated here!
      return nullptr;
    }

    // Now find the first slot with an entry with the same key
    // that is the start of a linked list, or a free slot:
    while (b._table[i].ptr != nullptr &&
           (b._table[i].prev != INVALID_INDEX ||
            (useHashCache && b._table[i].readHashCache() != hashByKey) ||
            !_isEqualElementElementByKey(userData, element, b._table[i].ptr))) {
      i = incr(b, i);
#ifdef TRI_INTERNAL_STATS
      // update statistics
      _ProbesA++;
#endif
    }

    // If this is free, we are the first with this key:
    if (nullptr == b._table[i].ptr) {
      b._table[i].ptr = element;
      b._table[i].next = INVALID_INDEX;
      b._table[i].prev = INVALID_INDEX;
      if (useHashCache) {
        b._table[i].writeHashCache(hashByKey);
      }
      b._nrUsed++;
      // no collision generated here either!
      return nullptr;
    }

    Element* old;

    // Otherwise, entry i points to the beginning of the linked
    // list of which we want to make element a member. Perhaps an
    // equal element is right here:
    if (checkEquality &&
        _isEqualElementElement(userData, element, b._table[i].ptr)) {
      old = b._table[i].ptr;
      if (overwrite) {
        TRI_ASSERT(!useHashCache || b._table[i].readHashCache() == hashByKey);
        b._table[i].ptr = element;
      }
      return old;
    }

    // Now find a new home for element in this linked list:
    uint64_t hashByElm;
    IndexType j =
        findElementPlace(userData, b, element, checkEquality, hashByElm);

    old = b._table[j].ptr;

    // if we found an element, return
    if (old != nullptr) {
      if (overwrite) {
        if (useHashCache) {
          b._table[j].writeHashCache(hashByElm);
        }
        b._table[j].ptr = element;
      }
      return old;
    }

    // add a new element to the associative array and linked list (in pos 2):
    b._table[j].ptr = element;
    b._table[j].next = b._table[i].next;
    b._table[j].prev = i;
    if (useHashCache) {
      b._table[j].writeHashCache(hashByElm);
    }
    b._table[i].next = j;
    // Finally, we need to find the successor to patch it up:
    if (b._table[j].next != INVALID_INDEX) {
      b._table[b._table[j].next].prev = j;
    }
    b._nrUsed++;
    b._nrCollisions++;

    return nullptr;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief insertFirst, special version of insert, when it is known that the
  /// element is the first in the hash with its key, and the hash of the key
  /// is already known. This is for example the case when resizing.
  //////////////////////////////////////////////////////////////////////////////

  IndexType insertFirst(UserData* userData, Bucket& b, Element* element,
                        uint64_t hashByKey) {
#ifdef TRI_CHECK_MULTI_POINTER_HASH
    check(userData, true, true);
#endif

#ifdef TRI_INTERNAL_STATS
    // update statistics
    _nrAdds++;
#endif

    IndexType hashIndex = hashToIndex(hashByKey);
    IndexType i = hashIndex % b._nrAlloc;

    // If this slot is free, just use it:
    if (nullptr == b._table[i].ptr) {
      b._table[i].ptr = element;
      b._table[i].next = INVALID_INDEX;
      b._table[i].prev = INVALID_INDEX;
      if (useHashCache) {
        b._table[i].writeHashCache(hashByKey);
      }
      b._nrUsed++;
// no collision generated here!
#ifdef TRI_CHECK_MULTI_POINTER_HASH
      check(userData, true, true);
#endif
      return i;
    }

    // Now find the first slot with an entry with the same key
    // that is the start of a linked list, or a free slot:
    while (b._table[i].ptr != nullptr) {
      i = incr(b, i);
#ifdef TRI_INTERNAL_STATS
      // update statistics
      _ProbesA++;
#endif
    }

    // We are the first with this key:
    b._table[i].ptr = element;
    b._table[i].next = INVALID_INDEX;
    b._table[i].prev = INVALID_INDEX;
    if (useHashCache) {
      b._table[i].writeHashCache(hashByKey);
    }
    b._nrUsed++;
// no collision generated here either!
#ifdef TRI_CHECK_MULTI_POINTER_HASH
    check(userData, true, true);
#endif
    return i;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief insertFurther, special version of insert, when it is known
  /// that the element is not the first in the hash with its key, and
  /// the hash of the key and the element is already known. This is for
  /// example the case when resizing.
  //////////////////////////////////////////////////////////////////////////////

  void insertFurther(UserData* userData, Bucket& b, Element* element,
                     uint64_t hashByKey, uint64_t hashByElm,
                     IndexType firstPosition) {
#ifdef TRI_CHECK_MULTI_POINTER_HASH
    check(userData, true, true);
#endif

#ifdef TRI_INTERNAL_STATS
    // update statistics
    _nrAdds++;
#endif

    // We already know the beginning of the doubly linked list:

    // Now find a new home for element in this linked list:
    IndexType hashIndex = hashToIndex(hashByElm);
    IndexType j = hashIndex % b._nrAlloc;

    while (b._table[j].ptr != nullptr) {
      j = incr(b, j);
#ifdef TRI_INTERNAL_STATS
      _nrProbes++;
#endif
    }

    // add the element to the hash and linked list (in pos 2):
    b._table[j].ptr = element;
    b._table[j].next = b._table[firstPosition].next;
    b._table[j].prev = firstPosition;
    if (useHashCache) {
      b._table[j].writeHashCache(hashByElm);
    }
    b._table[firstPosition].next = j;
    // Finally, we need to find the successor to patch it up:
    if (b._table[j].next != INVALID_INDEX) {
      b._table[b._table[j].next].prev = j;
    }
    b._nrUsed++;
    b._nrCollisions++;

#ifdef TRI_CHECK_MULTI_POINTER_HASH
    check(userData, true, true);
#endif
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief lookups an element given an element
  //////////////////////////////////////////////////////////////////////////////

 public:
  Element* lookup(UserData* userData, Element const* element) const {
    IndexType i;

#ifdef TRI_INTERNAL_STATS
    // update statistics
    _nrFinds++;
#endif

    Bucket* b;
    i = lookupByElement(userData, element, b);
    return b->_table[i].ptr;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief lookups an element given a key
  //////////////////////////////////////////////////////////////////////////////

  std::vector<Element*>* lookupByKey(UserData* userData, Key const* key,
                                     size_t limit = 0) const {
    std::unique_ptr<std::vector<Element*>> result(new std::vector<Element*>());

    // compute the hash
    uint64_t hashByKey = _hashKey(userData, key);
    Bucket const& b = _buckets[hashByKey & _bucketsMask];
    IndexType hashIndex = hashToIndex(hashByKey);
    IndexType i = hashIndex % b._nrAlloc;

#ifdef TRI_INTERNAL_STATS
    // update statistics
    _nrFinds++;
#endif

    // search the table
    while (b._table[i].ptr != nullptr &&
           (b._table[i].prev != INVALID_INDEX ||
            (useHashCache && b._table[i].readHashCache() != hashByKey) ||
            !_isEqualKeyElement(userData, key, b._table[i].ptr))) {
      i = incr(b, i);
#ifdef TRI_INTERNAL_STATS
      _nrProbesF++;
#endif
    }

    if (b._table[i].ptr != nullptr) {
      // We found the beginning of the linked list:

      do {
        result->push_back(b._table[i].ptr);
        i = b._table[i].next;
      } while (i != INVALID_INDEX && (limit == 0 || result->size() < limit));
    }

    // return whatever we found
    return result.release();
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief looks up all elements with the same key as a given element
  //////////////////////////////////////////////////////////////////////////////

  std::vector<Element*>* lookupWithElementByKey(UserData* userData,
                                                Element const* element,
                                                size_t limit = 0) const {
    std::unique_ptr<std::vector<Element*>> result(new std::vector<Element*>());

    // compute the hash
    uint64_t hashByKey = _hashElement(userData, element, true);
    Bucket const& b = _buckets[hashByKey & _bucketsMask];
    IndexType hashIndex = hashToIndex(hashByKey);
    IndexType i = hashIndex % b._nrAlloc;

#ifdef TRI_INTERNAL_STATS
    // update statistics
    _nrFinds++;
#endif

    // search the table
    while (b._table[i].ptr != nullptr &&
           (b._table[i].prev != INVALID_INDEX ||
            (useHashCache && b._table[i].readHashCache() != hashByKey) ||
            !_isEqualElementElementByKey(userData, element, b._table[i].ptr))) {
      i = incr(b, i);
#ifdef TRI_INTERNAL_STATS
      _nrProbesF++;
#endif
    }

    if (b._table[i].ptr != nullptr) {
      // We found the beginning of the linked list:

      do {
        result->push_back(b._table[i].ptr);
        i = b._table[i].next;
      } while (i != INVALID_INDEX && (limit == 0 || result->size() < limit));
    }

    // return whatever we found
    return result.release();
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief looks up all elements with the same key as a given element,
  /// continuation
  //////////////////////////////////////////////////////////////////////////////

  std::vector<Element*>* lookupWithElementByKeyContinue(
      UserData* userData, Element const* element, size_t limit = 0) const {
    std::unique_ptr<std::vector<Element*>> result(new std::vector<Element*>());

    uint64_t hashByKey = _hashElement(userData, element, true);
    Bucket const& b = _buckets[hashByKey & _bucketsMask];
    uint64_t hashByElm;
    IndexType i = findElementPlace(userData, b, element, true, hashByElm);
    if (b._table[i].ptr == nullptr) {
      // This can only happen if the element was the first in its doubly
      // linked list (after all, the caller guaranteed that element was
      // the last of a previous lookup). To cover this case, we have to
      // look in the position given by the hashByKey:
      i = hashToIndex(hashByKey) % b._nrAlloc;

      // Now find the first slot with an entry with the same key
      // that is the start of a linked list, or a free slot:
      while (
          b._table[i].ptr != nullptr &&
          (b._table[i].prev != INVALID_INDEX ||
           (useHashCache && b._table[i].readHashCache() != hashByKey) ||
           !_isEqualElementElementByKey(userData, element, b._table[i].ptr))) {
        i = incr(b, i);
#ifdef TRI_INTERNAL_STATS
        _nrProbes++;
#endif
      }

      if (b._table[i].ptr == nullptr) {
        // This cannot really happen, but we handle it gracefully anyway
        return nullptr;
      }
    }

    // continue search of the table
    while (true) {
      i = b._table[i].next;
      if (i == INVALID_INDEX || (limit != 0 && result->size() >= limit)) {
        break;
      }
      result->push_back(b._table[i].ptr);
    }

    // return whatever we found
    return result.release();
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief looks up all elements with the same key as a given element,
  /// continuation
  //////////////////////////////////////////////////////////////////////////////

  std::vector<Element*>* lookupByKeyContinue(UserData* userData,
                                             Element const* element,
                                             size_t limit = 0) const {
    return lookupWithElementByKeyContinue(userData, element, limit);
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief removes an element from the array, caller is responsible to free it
  //////////////////////////////////////////////////////////////////////////////

  Element* remove(UserData* userData, Element const* element) {
    IndexType j = 0;

#ifdef TRI_INTERNAL_STATS
    // update statistics
    _nrRems++;
#endif

#ifdef TRI_CHECK_MULTI_POINTER_HASH
    check(userData, true, true);
#endif
    Bucket* b;
    IndexType i = lookupByElement(userData, element, b);
    if (b->_table[i].ptr == nullptr) {
      return nullptr;
    }

    Element* old = b->_table[i].ptr;
    // We have to delete entry i
    if (b->_table[i].prev == INVALID_INDEX) {
      // This is the first in its linked list.
      j = b->_table[i].next;
      if (j == INVALID_INDEX) {
        // The only one in its linked list, simply remove it and heal
        // the hole:
        invalidateEntry(*b, i);
#ifdef TRI_CHECK_MULTI_POINTER_HASH
        check(userData, false, false);
#endif
        healHole(userData, *b, i);
        // this element did not create a collision
      } else {
        // There is at least one successor in position j.
        b->_table[j].prev = INVALID_INDEX;
        moveEntry(*b, j, i);
        if (useHashCache) {
          // We need to exchange the hashCache value by that of the key:
          b->_table[i].writeHashCache(
              _hashElement(userData, b->_table[i].ptr, true));
        }
#ifdef TRI_CHECK_MULTI_POINTER_HASH
        check(userData, false, false);
#endif
        healHole(userData, *b, j);
        b->_nrCollisions--;  // one collision less
      }
    } else {
      // This one is not the first in its linked list
      j = b->_table[i].prev;
      b->_table[j].next = b->_table[i].next;
      j = b->_table[i].next;
      if (j != INVALID_INDEX) {
        // We are not the last in the linked list.
        b->_table[j].prev = b->_table[i].prev;
      }
      invalidateEntry(*b, i);
#ifdef TRI_CHECK_MULTI_POINTER_HASH
      check(userData, false, false);
#endif
      healHole(userData, *b, i);
      b->_nrCollisions--;
    }
    b->_nrUsed--;
#ifdef TRI_CHECK_MULTI_POINTER_HASH
    check(userData, true, true);
#endif
    // return success
    return old;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief resize the array
  //////////////////////////////////////////////////////////////////////////////

  int resize(UserData* userData, size_t size) noexcept {
    size /= _buckets.size();
    for (auto& b : _buckets) {
      if (2 * (2 * size + 1) < 3 * b._nrUsed) {
        return TRI_ERROR_BAD_PARAMETER;
      }

      try {
        resizeInternal(userData, b, 2 * size + 1);
      } catch (...) {
        return TRI_ERROR_OUT_OF_MEMORY;
      }
    }
    return TRI_ERROR_NO_ERROR;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief return selectivity, this is a number s with 0.0 < s <= 1.0. If
  /// s == 1.0 this means that every document is identified uniquely by its
  /// key. It is computed as
  ///    number of different keys/number of elements in table
  //////////////////////////////////////////////////////////////////////////////

  double selectivity() {
    size_t nrUsed = 0;
    size_t nrCollisions = 0;
    for (auto& b : _buckets) {
      nrUsed += b._nrUsed;
      nrCollisions += b._nrCollisions;
    }
    return nrUsed > 0
               ? static_cast<double>(nrUsed - nrCollisions) /
                     static_cast<double>(nrUsed)
               : 1.0;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief iteration over all pointers in the hash array, the callback
  /// function is called on the Element* for each thingy stored in the hash
  //////////////////////////////////////////////////////////////////////////////

  void iterate(UserData* userData, std::function<void(Element*)> callback) {
    for (auto& b : _buckets) {
      for (IndexType i = 0; i < b._nrAlloc; i++) {
        if (b._table[i].ptr != nullptr) {
          callback(userData, b._table[i].ptr);
        }
      }
    }
  }

 private:
  //////////////////////////////////////////////////////////////////////////////
  /// @brief increment IndexType by 1 modulo _nrAlloc:
  //////////////////////////////////////////////////////////////////////////////

  inline IndexType incr(Bucket const& b, IndexType i) const {
    IndexType dummy = (++i) - b._nrAlloc;
    return i < b._nrAlloc ? i : dummy;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief resize the array, internal method
  //////////////////////////////////////////////////////////////////////////////

  void resizeInternal(UserData* userData, Bucket& b, size_t size) {
    std::string const cb(_contextCallback());

    LOG_TRACE("resizing index %s, target size: %llu", cb.c_str(),
              (unsigned long long)size);

    LOG_ACTION("index-resize %s, target size: %llu", cb.c_str(),
               (unsigned long long)size);
    double start = TRI_microtime();

    EntryType* oldTable = b._table;
    IndexType oldAlloc = b._nrAlloc;

    b._nrAlloc =
        static_cast<IndexType>(TRI_NearPrime(static_cast<uint64_t>(size)));

    try {
      b._table = new EntryType[b._nrAlloc];
#ifdef __linux__
      if (b._nrAlloc > 1000000) {
        uintptr_t mem = reinterpret_cast<uintptr_t>(b._table);
        uintptr_t pageSize = getpagesize();
        mem = (mem / pageSize) * pageSize;
        void* memptr = reinterpret_cast<void*>(mem);
        TRI_MMFileAdvise(memptr, b._nrAlloc * sizeof(EntryType),
                         TRI_MADVISE_RANDOM);
      }
#endif

      IndexType i;
      for (i = 0; i < b._nrAlloc; i++) {
        invalidateEntry(b, i);
      }
    } catch (...) {
      b._nrAlloc = oldAlloc;
      b._table = oldTable;
      throw;
    }

    b._nrUsed = 0;
    b._nrCollisions = 0;
#ifdef TRI_INTERNAL_STATS
    _nrResizes++;
#endif

    // table is already clear by allocate, copy old data
    IndexType j;
    for (j = 0; j < oldAlloc; j++) {
      if (oldTable[j].ptr != nullptr && oldTable[j].prev == INVALID_INDEX) {
        // This is a "first" one in its doubly linked list:
        uint64_t hashByKey;
        if (useHashCache) {
          hashByKey = oldTable[j].readHashCache();
        } else {
          hashByKey = _hashElement(userData, oldTable[j].ptr, true);
        }
        IndexType insertPosition =
            insertFirst(userData, b, oldTable[j].ptr, hashByKey);
        // Now walk to the end of the list:
        IndexType k = j;
        while (oldTable[k].next != INVALID_INDEX) {
          k = oldTable[k].next;
        }
        // Now insert all of them backwards, not repeating k:
        while (k != j) {
          uint64_t hashByElm;
          if (useHashCache) {
            hashByElm = oldTable[k].readHashCache();
          } else {
            hashByElm = _hashElement(userData, oldTable[k].ptr, false);
          }
          insertFurther(userData, b, oldTable[k].ptr, hashByKey, hashByElm,
                        insertPosition);
          k = oldTable[k].prev;
        }
      }
    }

    delete[] oldTable;

    LOG_TRACE("resizing index %s done", cb.c_str());

    LOG_TIMER((TRI_microtime() - start), "index-resize, %s, target size: %llu",
              cb.c_str(), (unsigned long long)size);
  }

#ifdef TRI_CHECK_MULTI_POINTER_HASH

  //////////////////////////////////////////////////////////////////////////////
  /// @brief internal debugging check function
  //////////////////////////////////////////////////////////////////////////////

  bool check(UserData* userData, bool checkCount, bool checkPositions) const {
    std::cout << "Performing AssocMulti check " << checkCount << checkPositions
              << std::endl;
    bool ok = true;
    for (auto& b : _buckets) {
      IndexType i, ii, j, k;

      IndexType count = 0;

      for (i = 0; i < b._nrAlloc; i++) {
        if (b._table[i].ptr != nullptr) {
          count++;
          if (b._table[i].prev != INVALID_INDEX) {
            if (b._table[b._table[i].prev].next != i) {
              std::cout << "Alarm prev " << i << std::endl;
              ok = false;
            }
          }

          if (b._table[i].next != INVALID_INDEX) {
            if (b._table[b._table[i].next].prev != i) {
              std::cout << "Alarm next " << i << std::endl;
              ok = false;
            }
          }
          ii = i;
          j = b._table[ii].next;
          while (j != INVALID_INDEX) {
            if (j == i) {
              std::cout << "Alarm cycle " << i << std::endl;
              ok = false;
              break;
            }
            ii = j;
            j = b._table[ii].next;
          }
        }
      }
      if (checkCount && count != b._nrUsed) {
        std::cout << "Alarm _nrUsed wrong " << b._nrUsed << " != " << count
                  << "!" << std::endl;
        ok = false;
      }
      if (checkPositions) {
        for (i = 0; i < b._nrAlloc; i++) {
          if (b._table[i].ptr != nullptr) {
            IndexType hashIndex;
            if (b._table[i].prev == INVALID_INDEX) {
              // We are the first in a linked list.
              uint64_t hashByKey =
                  _hashElement(userData, b._table[i].ptr, true);
              hashIndex = hashToIndex(hashByKey);
              j = hashIndex % b._nrAlloc;
              if (useHashCache && b._table[i].readHashCache() != hashByKey) {
                std::cout << "Alarm hashCache wrong " << i << std::endl;
              }
              for (k = j; k != i;) {
                if (b._table[k].ptr == nullptr ||
                    (b._table[k].prev == INVALID_INDEX &&
                     _isEqualElementElementByKey(userData, b._table[i].ptr,
                                                 b._table[k].ptr))) {
                  ok = false;
                  std::cout << "Alarm pos bykey: " << i << std::endl;
                }
                k = incr(b, k);
              }
            } else {
              // We are not the first in a linked list.
              uint64_t hashByElm =
                  _hashElement(userData, b._table[i].ptr, false);
              hashIndex = hashToIndex(hashByElm);
              j = hashIndex % b._nrAlloc;
              if (useHashCache && b._table[i].readHashCache() != hashByElm) {
                std::cout << "Alarm hashCache wrong " << i << std::endl;
              }
              for (k = j; k != i;) {
                if (b._table[k].ptr == nullptr ||
                    _isEqualElementElement(userData, b._table[i].ptr,
                                           b._table[k].ptr)) {
                  ok = false;
                  std::cout << "Alarm unique: " << k << ", " << i << std::endl;
                }
                k = incr(b, k);
              }
            }
          }
        }
      }
    }
    if (!ok) {
      std::cout << "Something is wrong!" << std::endl;
    }
    return ok;
  }

#endif

  //////////////////////////////////////////////////////////////////////////////
  /// @brief find an element or its place using the element hash function
  //////////////////////////////////////////////////////////////////////////////

  inline IndexType findElementPlace(UserData* userData, Bucket const& b,
                                    Element const* element, bool checkEquality,
                                    uint64_t& hashByElm) const {
    // This either finds a place to store element or an entry in
    // the table that is equal to element. If checkEquality is
    // set to false, the caller guarantees that there is no entry
    // that compares equal to element in the table, which saves a
    // lot of element comparisons. This function always returns a
    // pointer into the table, which is either empty or points to
    // an entry that compares equal to element.

    hashByElm = _hashElement(userData, element, false);
    IndexType hashindex = hashToIndex(hashByElm);
    IndexType i = hashindex % b._nrAlloc;

    while (b._table[i].ptr != nullptr &&
           (!checkEquality ||
            (useHashCache && b._table[i].readHashCache() != hashByElm) ||
            !_isEqualElementElement(userData, element, b._table[i].ptr))) {
      i = incr(b, i);
#ifdef TRI_INTERNAL_STATS
      _nrProbes++;
#endif
    }
    return i;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief find an element or its place by key or element identity
  //////////////////////////////////////////////////////////////////////////////

  IndexType lookupByElement(UserData* userData, Element const* element,
                            Bucket*& buck) const {
    // This performs a complete lookup for an element. It returns a slot
    // number. This slot is either empty or contains an element that
    // compares equal to element.
    uint64_t hashByKey = _hashElement(userData, element, true);
    Bucket const& b = _buckets[hashByKey & _bucketsMask];
    buck = const_cast<Bucket*>(&b);
    IndexType hashIndex = hashToIndex(hashByKey);
    IndexType i = hashIndex % b._nrAlloc;

    // Now find the first slot with an entry with the same key
    // that is the start of a linked list, or a free slot:
    while (b._table[i].ptr != nullptr &&
           (b._table[i].prev != INVALID_INDEX ||
            (useHashCache && b._table[i].readHashCache() != hashByKey) ||
            !_isEqualElementElementByKey(userData, element, b._table[i].ptr))) {
      i = incr(b, i);
#ifdef TRI_INTERNAL_STATS
      _nrProbes++;
#endif
    }

    if (b._table[i].ptr != nullptr) {
      // It might be right here!
      if (_isEqualElementElement(userData, element, b._table[i].ptr)) {
        return i;
      }

      // Now we have to look for it in its hash position:
      uint64_t hashByElm;
      IndexType j = findElementPlace(userData, b, element, true, hashByElm);

      // We have either found an equal element or nothing:
      return j;
    }

    // If we get here, no element with the same key is in the array, so
    // we will not be able to find it anywhere!
    return i;
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief helper to decide whether something is between to places
  //////////////////////////////////////////////////////////////////////////////

  static inline bool isBetween(IndexType from, IndexType x, IndexType to) {
    // returns whether or not x is behind from and before or equal to
    // to in the cyclic order. If x is equal to from, then the result is
    // always false. If from is equal to to, then the result is always
    // true.
    return (from < to) ? (from < x && x <= to) : (x > from || x <= to);
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief helper to invalidate a slot
  //////////////////////////////////////////////////////////////////////////////

  inline void invalidateEntry(Bucket& b, IndexType i) {
    b._table[i].ptr = nullptr;
    b._table[i].next = INVALID_INDEX;
    b._table[i].prev = INVALID_INDEX;
    if (useHashCache) {
      b._table[i].writeHashCache(0);
    }
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief helper to move an entry from one slot to another
  //////////////////////////////////////////////////////////////////////////////

  inline void moveEntry(Bucket& b, IndexType from, IndexType to) {
    // Moves an entry, adjusts the linked lists, but does not take care
    // for the hole. to must be unused. from can be any element in a
    // linked list.
    b._table[to] = b._table[from];
    if (b._table[to].prev != INVALID_INDEX) {
      b._table[b._table[to].prev].next = to;
    }
    if (b._table[to].next != INVALID_INDEX) {
      b._table[b._table[to].next].prev = to;
    }
    invalidateEntry(b, from);
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief helper to heal a hole where we deleted something
  //////////////////////////////////////////////////////////////////////////////

  void healHole(UserData* userData, Bucket& b, IndexType i) {
    IndexType j = incr(b, i);

    while (b._table[j].ptr != nullptr) {
      // Find out where this element ought to be:
      // If it is the start of one of the linked lists, we need to hash
      // by key, otherwise, we hash by the full identity of the element:
      uint64_t hash = _hashElement(userData, b._table[j].ptr,
                                   b._table[j].prev == INVALID_INDEX);
      IndexType hashIndex = hashToIndex(hash);
      IndexType k = hashIndex % b._nrAlloc;
      if (!isBetween(i, k, j)) {
        // we have to move j to i:
        moveEntry(b, j, i);
        i = j;  // Now heal this hole at j,
                // j will be incremented right away
      }
      j = incr(b, j);
#ifdef TRI_INTERNAL_STATS
      _nrProbesD++;
#endif
    }
  }

  //////////////////////////////////////////////////////////////////////////////
  /// @brief convert a 64bit hash value to an index of type IndexType
  //////////////////////////////////////////////////////////////////////////////

  inline IndexType hashToIndex(uint64_t const h) const {
    return static_cast<IndexType>(sizeof(IndexType) == 8 ? h : TRI_64to32(h));
  }
};

}  // namespace arangodb::basics
}  // namespace arangodb

#endif