mirror of https://gitee.com/bigwinds/arangodb
938 lines
28 KiB
C++
938 lines
28 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Dr. Frank Celler
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "HashIndex.h"
|
|
#include "Aql/Ast.h"
|
|
#include "Aql/AstNode.h"
|
|
#include "Aql/SortCondition.h"
|
|
#include "Basics/Exceptions.h"
|
|
#include "Indexes/SimpleAttributeEqualityMatcher.h"
|
|
#include "VocBase/transaction.h"
|
|
#include "VocBase/VocShaper.h"
|
|
|
|
using namespace arangodb;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief Frees an index element
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static void FreeElement(TRI_index_element_t* element) {
|
|
TRI_index_element_t::freeElement(element);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief determines if two elements are equal
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static bool IsEqualElementElement(void* userData,
|
|
TRI_index_element_t const* left,
|
|
TRI_index_element_t const* right) {
|
|
return left->document() == right->document();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief given a key generates a hash integer
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static uint64_t HashKey(void* userData,
|
|
TRI_hash_index_search_value_t const* key) {
|
|
uint64_t hash = 0x0123456789abcdef;
|
|
|
|
for (size_t j = 0; j < key->_length; ++j) {
|
|
// ignore the sid for hashing
|
|
hash = fasthash64(key->_values[j]._data.data, key->_values[j]._data.length,
|
|
hash);
|
|
}
|
|
|
|
return hash;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief determines if a key corresponds to an element
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static bool IsEqualKeyElement(void* userData,
|
|
TRI_hash_index_search_value_t const* left,
|
|
TRI_index_element_t const* right) {
|
|
TRI_ASSERT(right->document() != nullptr);
|
|
|
|
for (size_t j = 0; j < left->_length; ++j) {
|
|
TRI_shaped_json_t* leftJson = &left->_values[j];
|
|
TRI_shaped_sub_t* rightSub = &right->subObjects()[j];
|
|
|
|
if (leftJson->_sid != rightSub->_sid) {
|
|
return false;
|
|
}
|
|
|
|
auto length = leftJson->_data.length;
|
|
|
|
char const* rightData;
|
|
size_t rightLength;
|
|
TRI_InspectShapedSub(rightSub, right->document(), rightData, rightLength);
|
|
|
|
if (length != rightLength) {
|
|
return false;
|
|
}
|
|
|
|
if (length > 0 && memcmp(leftJson->_data.data, rightData, length) != 0) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool IsEqualKeyElementHash(
|
|
void* userData, TRI_hash_index_search_value_t const* left,
|
|
uint64_t const hash, // Has been computed but is not used here
|
|
TRI_index_element_t const* right) {
|
|
return IsEqualKeyElement(userData, left, right);
|
|
}
|
|
|
|
TRI_doc_mptr_t* HashIndexIterator::next() {
|
|
while (true) {
|
|
if (_posInBuffer >= _buffer.size()) {
|
|
if (_position >= _keys.size()) {
|
|
// we're at the end of the lookup values
|
|
return nullptr;
|
|
}
|
|
|
|
// We have to refill the buffer
|
|
_buffer.clear();
|
|
_posInBuffer = 0;
|
|
|
|
int res = _index->lookup(_trx, _keys[_position++], _buffer);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
THROW_ARANGO_EXCEPTION(res);
|
|
}
|
|
}
|
|
|
|
if (!_buffer.empty()) {
|
|
// found something
|
|
return _buffer.at(_posInBuffer++);
|
|
}
|
|
}
|
|
}
|
|
|
|
void HashIndexIterator::reset() {
|
|
_buffer.clear();
|
|
_position = 0;
|
|
_posInBuffer = 0;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create the unique array
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
HashIndex::UniqueArray::UniqueArray(
|
|
TRI_HashArray_t* hashArray, HashElementFunc* hashElement,
|
|
IsEqualElementElementByKey* isEqualElElByKey)
|
|
: _hashArray(hashArray),
|
|
_hashElement(hashElement),
|
|
_isEqualElElByKey(isEqualElElByKey) {
|
|
TRI_ASSERT(_hashArray != nullptr);
|
|
TRI_ASSERT(_hashElement != nullptr);
|
|
TRI_ASSERT(_isEqualElElByKey != nullptr);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief destroy the unique array
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
HashIndex::UniqueArray::~UniqueArray() {
|
|
if (_hashArray != nullptr) {
|
|
_hashArray->invokeOnAllElements(FreeElement);
|
|
}
|
|
|
|
delete _hashArray;
|
|
delete _hashElement;
|
|
delete _isEqualElElByKey;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create the multi array
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
HashIndex::MultiArray::MultiArray(TRI_HashArrayMulti_t* hashArray,
|
|
HashElementFunc* hashElement,
|
|
IsEqualElementElementByKey* isEqualElElByKey)
|
|
: _hashArray(hashArray),
|
|
_hashElement(hashElement),
|
|
_isEqualElElByKey(isEqualElElByKey) {
|
|
TRI_ASSERT(_hashArray != nullptr);
|
|
TRI_ASSERT(_hashElement != nullptr);
|
|
TRI_ASSERT(_isEqualElElByKey != nullptr);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief destroy the multi array
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
HashIndex::MultiArray::~MultiArray() {
|
|
if (_hashArray != nullptr) {
|
|
_hashArray->invokeOnAllElements(FreeElement);
|
|
}
|
|
|
|
delete _hashArray;
|
|
delete _hashElement;
|
|
delete _isEqualElElByKey;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create an index search value
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_hash_index_search_value_t::TRI_hash_index_search_value_t()
|
|
: _length(0), _values(nullptr) {}
|
|
|
|
TRI_hash_index_search_value_t::~TRI_hash_index_search_value_t() { destroy(); }
|
|
|
|
void TRI_hash_index_search_value_t::reserve(size_t n) {
|
|
TRI_ASSERT(_values == nullptr);
|
|
_values = static_cast<TRI_shaped_json_t*>(
|
|
TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, n * sizeof(TRI_shaped_json_t), true));
|
|
|
|
if (_values == nullptr) {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY);
|
|
}
|
|
|
|
_length = n;
|
|
}
|
|
|
|
void TRI_hash_index_search_value_t::destroy() {
|
|
if (_values != nullptr) {
|
|
for (size_t i = 0; i < _length; ++i) {
|
|
TRI_DestroyShapedJson(TRI_UNKNOWN_MEM_ZONE, &_values[i]);
|
|
}
|
|
|
|
TRI_Free(TRI_UNKNOWN_MEM_ZONE, _values);
|
|
_values = nullptr;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create the index
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
HashIndex::HashIndex(
|
|
TRI_idx_iid_t iid, TRI_document_collection_t* collection,
|
|
std::vector<std::vector<arangodb::basics::AttributeName>> const& fields,
|
|
bool unique, bool sparse)
|
|
: PathBasedIndex(iid, collection, fields, unique, sparse, false),
|
|
_uniqueArray(nullptr) {
|
|
uint32_t indexBuckets = 1;
|
|
|
|
if (collection != nullptr) {
|
|
// document is a nullptr in the coordinator case
|
|
indexBuckets = collection->_info.indexBuckets();
|
|
}
|
|
|
|
auto func = std::make_unique<HashElementFunc>(_paths.size());
|
|
auto compare = std::make_unique<IsEqualElementElementByKey>(_paths.size());
|
|
|
|
if (unique) {
|
|
auto array = std::make_unique<TRI_HashArray_t>(
|
|
HashKey, *(func.get()), IsEqualKeyElementHash, IsEqualElementElement,
|
|
*(compare.get()), indexBuckets,
|
|
[]() -> std::string { return "unique hash-array"; });
|
|
|
|
_uniqueArray =
|
|
new HashIndex::UniqueArray(array.get(), func.get(), compare.get());
|
|
array.release();
|
|
} else {
|
|
_multiArray = nullptr;
|
|
|
|
auto array = std::make_unique<TRI_HashArrayMulti_t>(
|
|
HashKey, *(func.get()), IsEqualKeyElement, IsEqualElementElement,
|
|
*(compare.get()), indexBuckets, 64,
|
|
[]() -> std::string { return "multi hash-array"; });
|
|
|
|
_multiArray =
|
|
new HashIndex::MultiArray(array.get(), func.get(), compare.get());
|
|
|
|
array.release();
|
|
}
|
|
compare.release();
|
|
|
|
func.release();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create an index stub with a hard-coded selectivity estimate
|
|
/// this is used in the cluster coordinator case
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
HashIndex::HashIndex(VPackSlice const& slice)
|
|
: PathBasedIndex(slice, false), _uniqueArray(nullptr) {}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief destroys the index
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
HashIndex::~HashIndex() {
|
|
if (_unique) {
|
|
delete _uniqueArray;
|
|
} else {
|
|
delete _multiArray;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief returns a selectivity estimate for the index
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
double HashIndex::selectivityEstimate() const {
|
|
if (_unique) {
|
|
return 1.0;
|
|
}
|
|
|
|
if (_multiArray == nullptr) {
|
|
// use hard-coded selectivity estimate in case of cluster coordinator
|
|
return _selectivityEstimate;
|
|
}
|
|
|
|
double estimate = _multiArray->_hashArray->selectivity();
|
|
TRI_ASSERT(estimate >= 0.0 &&
|
|
estimate <= 1.00001); // floating-point tolerance
|
|
return estimate;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief returns the index memory usage
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
size_t HashIndex::memory() const {
|
|
if (_unique) {
|
|
return static_cast<size_t>(elementSize() *
|
|
_uniqueArray->_hashArray->size() +
|
|
_uniqueArray->_hashArray->memoryUsage());
|
|
}
|
|
|
|
return static_cast<size_t>(elementSize() * _multiArray->_hashArray->size() +
|
|
_multiArray->_hashArray->memoryUsage());
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief return a velocypack representation of the index
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void HashIndex::toVelocyPack(VPackBuilder& builder, bool withFigures) const {
|
|
Index::toVelocyPack(builder, withFigures);
|
|
builder.add("unique", VPackValue(_unique));
|
|
builder.add("sparse", VPackValue(_sparse));
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief return a velocypack representation of the index figures
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void HashIndex::toVelocyPackFigures(VPackBuilder& builder) const {
|
|
TRI_ASSERT(builder.isOpenObject());
|
|
builder.add("memory", VPackValue(memory()));
|
|
if (_unique) {
|
|
_uniqueArray->_hashArray->appendToVelocyPack(builder);
|
|
} else {
|
|
_multiArray->_hashArray->appendToVelocyPack(builder);
|
|
}
|
|
}
|
|
|
|
int HashIndex::insert(arangodb::Transaction* trx, TRI_doc_mptr_t const* doc,
|
|
bool isRollback) {
|
|
if (_unique) {
|
|
return insertUnique(trx, doc, isRollback);
|
|
}
|
|
|
|
return insertMulti(trx, doc, isRollback);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief removes an entry from the hash array part of the hash index
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int HashIndex::remove(arangodb::Transaction* trx, TRI_doc_mptr_t const* doc,
|
|
bool isRollback) {
|
|
if (_unique) {
|
|
return removeUnique(trx, doc, isRollback);
|
|
}
|
|
|
|
return removeMulti(trx, doc, isRollback);
|
|
}
|
|
|
|
int HashIndex::batchInsert(arangodb::Transaction* trx,
|
|
std::vector<TRI_doc_mptr_t const*> const* documents,
|
|
size_t numThreads) {
|
|
if (_unique) {
|
|
return batchInsertUnique(trx, documents, numThreads);
|
|
}
|
|
|
|
return batchInsertMulti(trx, documents, numThreads);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief provides a size hint for the hash index
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int HashIndex::sizeHint(arangodb::Transaction* trx, size_t size) {
|
|
if (_sparse) {
|
|
// for sparse indexes, we assume that we will have less index entries
|
|
// than if the index would be fully populated
|
|
size /= 5;
|
|
}
|
|
|
|
if (_unique) {
|
|
return _uniqueArray->_hashArray->resize(trx, size);
|
|
}
|
|
|
|
return _multiArray->_hashArray->resize(trx, size);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief locates entries in the hash index given shaped json objects
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int HashIndex::lookup(arangodb::Transaction* trx,
|
|
TRI_hash_index_search_value_t* searchValue,
|
|
std::vector<TRI_doc_mptr_t*>& documents) const {
|
|
if (_unique) {
|
|
TRI_index_element_t* found =
|
|
_uniqueArray->_hashArray->findByKey(trx, searchValue);
|
|
|
|
if (found != nullptr) {
|
|
// unique hash index: maximum number is 1
|
|
documents.emplace_back(found->document());
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
std::vector<TRI_index_element_t*>* results = nullptr;
|
|
try {
|
|
results = _multiArray->_hashArray->lookupByKey(trx, searchValue);
|
|
} catch (...) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
if (results != nullptr) {
|
|
try {
|
|
for (size_t i = 0; i < results->size(); i++) {
|
|
documents.emplace_back((*results)[i]->document());
|
|
}
|
|
delete results;
|
|
} catch (...) {
|
|
delete results;
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
}
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief locates entries in the hash index given shaped json objects
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int HashIndex::lookup(arangodb::Transaction* trx,
|
|
TRI_hash_index_search_value_t* searchValue,
|
|
std::vector<TRI_doc_mptr_copy_t>& documents,
|
|
TRI_index_element_t*& next, size_t batchSize) const {
|
|
if (_unique) {
|
|
next = nullptr;
|
|
TRI_index_element_t* found =
|
|
_uniqueArray->_hashArray->findByKey(trx, searchValue);
|
|
|
|
if (found != nullptr) {
|
|
// unique hash index: maximum number is 1
|
|
documents.emplace_back(*(found->document()));
|
|
}
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
std::vector<TRI_index_element_t*>* results = nullptr;
|
|
|
|
if (next == nullptr) {
|
|
try {
|
|
results =
|
|
_multiArray->_hashArray->lookupByKey(trx, searchValue, batchSize);
|
|
} catch (...) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
} else {
|
|
try {
|
|
results =
|
|
_multiArray->_hashArray->lookupByKeyContinue(trx, next, batchSize);
|
|
} catch (...) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
}
|
|
|
|
if (results != nullptr) {
|
|
if (results->size() > 0) {
|
|
next = results->back(); // for continuation the next time
|
|
try {
|
|
for (size_t i = 0; i < results->size(); i++) {
|
|
documents.emplace_back(*((*results)[i]->document()));
|
|
}
|
|
} catch (...) {
|
|
delete results;
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
} else {
|
|
next = nullptr;
|
|
}
|
|
delete results;
|
|
} else {
|
|
next = nullptr;
|
|
}
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
int HashIndex::insertUnique(arangodb::Transaction* trx,
|
|
TRI_doc_mptr_t const* doc, bool isRollback) {
|
|
std::vector<TRI_index_element_t*> elements;
|
|
int res = fillElement(elements, doc);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
for (auto& it : elements) {
|
|
// free all elements to prevent leak
|
|
FreeElement(it);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
auto work =
|
|
[this, trx](TRI_index_element_t* element, bool isRollback) -> int {
|
|
TRI_IF_FAILURE("InsertHashIndex") { return TRI_ERROR_DEBUG; }
|
|
return _uniqueArray->_hashArray->insert(trx, element);
|
|
};
|
|
|
|
size_t const n = elements.size();
|
|
|
|
for (size_t i = 0; i < n; ++i) {
|
|
auto hashElement = elements[i];
|
|
res = work(hashElement, isRollback);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
for (size_t j = i; j < n; ++j) {
|
|
// Free all elements that are not yet in the index
|
|
FreeElement(elements[j]);
|
|
}
|
|
// Already indexed elements will be removed by the rollback
|
|
break;
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
int HashIndex::batchInsertUnique(
|
|
arangodb::Transaction* trx,
|
|
std::vector<TRI_doc_mptr_t const*> const* documents, size_t numThreads) {
|
|
std::vector<TRI_index_element_t*> elements;
|
|
elements.reserve(documents->size());
|
|
|
|
for (auto& doc : *documents) {
|
|
int res = fillElement(elements, doc);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
for (auto& it : elements) {
|
|
// free all elements to prevent leak
|
|
FreeElement(it);
|
|
}
|
|
return res;
|
|
}
|
|
}
|
|
|
|
int res = _uniqueArray->_hashArray->batchInsert(trx, &elements, numThreads);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
for (auto& it : elements) {
|
|
// free all elements to prevent leak
|
|
FreeElement(it);
|
|
}
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
int HashIndex::insertMulti(arangodb::Transaction* trx,
|
|
TRI_doc_mptr_t const* doc, bool isRollback) {
|
|
std::vector<TRI_index_element_t*> elements;
|
|
int res = fillElement(elements, doc);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
for (auto& hashElement : elements) {
|
|
FreeElement(hashElement);
|
|
}
|
|
return res;
|
|
}
|
|
|
|
auto work = [this, trx](TRI_index_element_t*& element, bool isRollback) {
|
|
TRI_IF_FAILURE("InsertHashIndex") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
TRI_index_element_t* found =
|
|
_multiArray->_hashArray->insert(trx, element, false, true);
|
|
|
|
if (found != nullptr) {
|
|
// already got the exact same index entry. now free our local element...
|
|
FreeElement(element);
|
|
// we're not responsible for this element anymore
|
|
element = nullptr;
|
|
}
|
|
};
|
|
|
|
size_t const n = elements.size();
|
|
|
|
for (size_t i = 0; i < n; ++i) {
|
|
auto hashElement = elements[i];
|
|
|
|
try {
|
|
work(hashElement, isRollback);
|
|
} catch (arangodb::basics::Exception const& ex) {
|
|
res = ex.code();
|
|
} catch (...) {
|
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
for (size_t j = i; j < n; ++j) {
|
|
// Free all elements that are not yet in the index
|
|
FreeElement(elements[j]);
|
|
}
|
|
for (size_t j = 0; j < i; ++j) {
|
|
// Remove all allready indexed elements and free them
|
|
if (elements[j] != nullptr) {
|
|
removeMultiElement(trx, elements[j], isRollback);
|
|
}
|
|
}
|
|
|
|
return res;
|
|
}
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
int HashIndex::batchInsertMulti(
|
|
arangodb::Transaction* trx,
|
|
std::vector<TRI_doc_mptr_t const*> const* documents, size_t numThreads) {
|
|
std::vector<TRI_index_element_t*> elements;
|
|
|
|
for (auto& doc : *documents) {
|
|
int res = fillElement(elements, doc);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
// Filling the elements failed for some reason. Assume loading as failed
|
|
for (auto& el : elements) {
|
|
// Free all elements that are not yet in the index
|
|
FreeElement(el);
|
|
}
|
|
return res;
|
|
}
|
|
}
|
|
return _multiArray->_hashArray->batchInsert(trx, &elements, numThreads);
|
|
}
|
|
|
|
int HashIndex::removeUniqueElement(arangodb::Transaction* trx,
|
|
TRI_index_element_t* element,
|
|
bool isRollback) {
|
|
TRI_IF_FAILURE("RemoveHashIndex") { return TRI_ERROR_DEBUG; }
|
|
TRI_index_element_t* old = _uniqueArray->_hashArray->remove(trx, element);
|
|
|
|
// this might happen when rolling back
|
|
if (old == nullptr) {
|
|
if (isRollback) {
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
FreeElement(old);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
int HashIndex::removeUnique(arangodb::Transaction* trx,
|
|
TRI_doc_mptr_t const* doc, bool isRollback) {
|
|
std::vector<TRI_index_element_t*> elements;
|
|
int res = fillElement(elements, doc);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
for (auto& hashElement : elements) {
|
|
FreeElement(hashElement);
|
|
}
|
|
return res;
|
|
}
|
|
|
|
for (auto& hashElement : elements) {
|
|
int result = removeUniqueElement(trx, hashElement, isRollback);
|
|
|
|
// we may be looping through this multiple times, and if an error
|
|
// occurs, we want to keep it
|
|
if (result != TRI_ERROR_NO_ERROR) {
|
|
res = result;
|
|
}
|
|
FreeElement(hashElement);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
int HashIndex::removeMultiElement(arangodb::Transaction* trx,
|
|
TRI_index_element_t* element,
|
|
bool isRollback) {
|
|
TRI_IF_FAILURE("RemoveHashIndex") { return TRI_ERROR_DEBUG; }
|
|
|
|
TRI_index_element_t* old = _multiArray->_hashArray->remove(trx, element);
|
|
|
|
if (old == nullptr) {
|
|
// not found
|
|
if (isRollback) { // ignore in this case, because it can happen
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
FreeElement(old);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
int HashIndex::removeMulti(arangodb::Transaction* trx,
|
|
TRI_doc_mptr_t const* doc, bool isRollback) {
|
|
std::vector<TRI_index_element_t*> elements;
|
|
int res = fillElement(elements, doc);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
for (auto& hashElement : elements) {
|
|
FreeElement(hashElement);
|
|
}
|
|
}
|
|
|
|
for (auto& hashElement : elements) {
|
|
int result = removeMultiElement(trx, hashElement, isRollback);
|
|
|
|
// we may be looping through this multiple times, and if an error
|
|
// occurs, we want to keep it
|
|
if (result != TRI_ERROR_NO_ERROR) {
|
|
res = result;
|
|
}
|
|
|
|
FreeElement(hashElement);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief checks whether the index supports the condition
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
bool HashIndex::supportsFilterCondition(
|
|
arangodb::aql::AstNode const* node,
|
|
arangodb::aql::Variable const* reference, size_t itemsInIndex,
|
|
size_t& estimatedItems, double& estimatedCost) const {
|
|
SimpleAttributeEqualityMatcher matcher(fields());
|
|
return matcher.matchAll(this, node, reference, itemsInIndex, estimatedItems,
|
|
estimatedCost);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief creates an IndexIterator for the given Condition
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
IndexIterator* HashIndex::iteratorForCondition(
|
|
arangodb::Transaction* trx, IndexIteratorContext* context,
|
|
arangodb::aql::Ast* ast, arangodb::aql::AstNode const* node,
|
|
arangodb::aql::Variable const* reference, bool reverse) const {
|
|
TRI_ASSERT(node->type == aql::NODE_TYPE_OPERATOR_NARY_AND);
|
|
|
|
SimpleAttributeEqualityMatcher matcher(fields());
|
|
size_t const n = _fields.size();
|
|
TRI_ASSERT(node->numMembers() == n);
|
|
|
|
// initialize permutations
|
|
std::vector<PermutationState> permutationStates;
|
|
permutationStates.reserve(n);
|
|
size_t maxPermutations = 1;
|
|
|
|
std::pair<arangodb::aql::Variable const*,
|
|
std::vector<arangodb::basics::AttributeName>> paramPair;
|
|
|
|
for (size_t i = 0; i < n; ++i) {
|
|
auto comp = node->getMemberUnchecked(i);
|
|
auto attrNode = comp->getMember(0);
|
|
auto valNode = comp->getMember(1);
|
|
|
|
paramPair.first = nullptr;
|
|
paramPair.second.clear();
|
|
|
|
if (!attrNode->isAttributeAccessForVariable(paramPair) ||
|
|
paramPair.first != reference) {
|
|
attrNode = comp->getMember(1);
|
|
valNode = comp->getMember(0);
|
|
|
|
if (!attrNode->isAttributeAccessForVariable(paramPair) ||
|
|
paramPair.first != reference) {
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
size_t attributePosition = SIZE_MAX;
|
|
for (size_t j = 0; j < _fields.size(); ++j) {
|
|
if (arangodb::basics::AttributeName::isIdentical(
|
|
_fields[j], paramPair.second, true)) {
|
|
attributePosition = j;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (attributePosition == SIZE_MAX) {
|
|
// index attribute not found in condition. this is a severe error
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL);
|
|
}
|
|
|
|
arangodb::aql::AstNodeType type = comp->type;
|
|
|
|
if (comp->type == aql::NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
permutationStates.emplace_back(
|
|
PermutationState(type, valNode, attributePosition, 1));
|
|
TRI_IF_FAILURE("HashIndex::permutationEQ") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
} else if (comp->type == aql::NODE_TYPE_OPERATOR_BINARY_IN) {
|
|
if (isAttributeExpanded(attributePosition)) {
|
|
type = aql::NODE_TYPE_OPERATOR_BINARY_EQ;
|
|
permutationStates.emplace_back(
|
|
PermutationState(type, valNode, attributePosition, 1));
|
|
TRI_IF_FAILURE("HashIndex::permutationArrayIN") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
} else {
|
|
if (valNode->numMembers() == 0) {
|
|
return nullptr;
|
|
}
|
|
permutationStates.emplace_back(PermutationState(
|
|
type, valNode, attributePosition, valNode->numMembers()));
|
|
TRI_IF_FAILURE("HashIndex::permutationIN") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
maxPermutations *= valNode->numMembers();
|
|
}
|
|
}
|
|
}
|
|
|
|
if (permutationStates.empty()) {
|
|
// can only be caused by empty IN lists
|
|
return nullptr;
|
|
}
|
|
|
|
std::vector<TRI_hash_index_search_value_t*> searchValues;
|
|
searchValues.reserve(maxPermutations);
|
|
|
|
try {
|
|
// create all permutations
|
|
auto shaper = _collection->getShaper();
|
|
size_t current = 0;
|
|
bool done = false;
|
|
while (!done) {
|
|
auto searchValue = std::make_unique<TRI_hash_index_search_value_t>();
|
|
searchValue->reserve(n);
|
|
|
|
bool valid = true;
|
|
for (size_t i = 0; i < n; ++i) {
|
|
auto& state = permutationStates[i];
|
|
std::shared_ptr<VPackBuilder> valBuilder =
|
|
state.getValue()->toVelocyPackValue();
|
|
|
|
if (valBuilder == nullptr) {
|
|
valid = false;
|
|
break;
|
|
}
|
|
|
|
auto shaped =
|
|
TRI_ShapedJsonVelocyPack(shaper, valBuilder->slice(), false);
|
|
|
|
if (shaped == nullptr) {
|
|
// no such shape exists. this means we won't find this value and can
|
|
// go on with the next permutation
|
|
valid = false;
|
|
break;
|
|
}
|
|
|
|
searchValue->_values[state.attributePosition] = *shaped;
|
|
TRI_Free(shaper->memoryZone(), shaped);
|
|
}
|
|
|
|
if (valid) {
|
|
searchValues.push_back(searchValue.get());
|
|
searchValue.release();
|
|
}
|
|
|
|
// now permute
|
|
while (true) {
|
|
if (++permutationStates[current].current <
|
|
permutationStates[current].n) {
|
|
current = 0;
|
|
// abort inner iteration
|
|
break;
|
|
}
|
|
|
|
permutationStates[current].current = 0;
|
|
|
|
if (++current >= n) {
|
|
done = true;
|
|
break;
|
|
}
|
|
// next inner iteration
|
|
}
|
|
}
|
|
|
|
TRI_ASSERT(searchValues.size() <= maxPermutations);
|
|
|
|
// Create the iterator
|
|
TRI_IF_FAILURE("HashIndex::noIterator") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
} catch (...) {
|
|
// prevent a leak here
|
|
for (auto& it : searchValues) {
|
|
delete it;
|
|
}
|
|
throw;
|
|
}
|
|
|
|
return new HashIndexIterator(trx, this, searchValues);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief specializes the condition for use with the index
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
arangodb::aql::AstNode* HashIndex::specializeCondition(
|
|
arangodb::aql::AstNode* node,
|
|
arangodb::aql::Variable const* reference) const {
|
|
SimpleAttributeEqualityMatcher matcher(fields());
|
|
return matcher.specializeAll(this, node, reference);
|
|
}
|