1
0
Fork 0
arangodb/arangod/Indexes/HashIndex.cpp

663 lines
22 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// @brief hash index
///
/// @file
///
/// DISCLAIMER
///
/// Copyright 2014 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Dr. Frank Celler
/// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany
/// @author Copyright 2011-2013, triAGENS GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////
#include "HashIndex.h"
#include "VocBase/transaction.h"
#include "VocBase/VocShaper.h"
using namespace triagens::arango;
// -----------------------------------------------------------------------------
// --SECTION-- private functions
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief Frees an index element
////////////////////////////////////////////////////////////////////////////////
static void FreeElement(TRI_index_element_t* element) {
TRI_index_element_t::free(element);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief determines if two elements are equal
////////////////////////////////////////////////////////////////////////////////
static bool IsEqualElementElement (TRI_index_element_t const* left,
TRI_index_element_t const* right) {
return left->document() == right->document();
}
////////////////////////////////////////////////////////////////////////////////
/// @brief given a key generates a hash integer
////////////////////////////////////////////////////////////////////////////////
static uint64_t HashKey (TRI_index_search_value_t const* key) {
uint64_t hash = 0x0123456789abcdef;
for (size_t j = 0; j < key->_length; ++j) {
// ignore the sid for hashing
hash = fasthash64(key->_values[j]._data.data, key->_values[j]._data.length, hash);
}
return hash;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief determines if a key corresponds to an element
////////////////////////////////////////////////////////////////////////////////
static bool IsEqualKeyElement (TRI_index_search_value_t const* left,
TRI_index_element_t const* right) {
TRI_ASSERT_EXPENSIVE(right->document() != nullptr);
for (size_t j = 0; j < left->_length; ++j) {
TRI_shaped_json_t* leftJson = &left->_values[j];
TRI_shaped_sub_t* rightSub = &right->subObjects()[j];
if (leftJson->_sid != rightSub->_sid) {
return false;
}
auto length = leftJson->_data.length;
char const* rightData;
size_t rightLength;
TRI_InspectShapedSub(rightSub, right->document(), rightData, rightLength);
if (length != rightLength) {
return false;
}
if (length > 0 && memcmp(leftJson->_data.data, rightData, length) != 0) {
return false;
}
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief fills the index search from hash index element
////////////////////////////////////////////////////////////////////////////////
static int FillIndexSearchValueByHashIndexElement (HashIndex const* hashIndex,
TRI_index_search_value_t* key,
TRI_index_element_t const* element) {
key->_values = static_cast<TRI_shaped_json_t*>(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, hashIndex->keyEntrySize(), false));
if (key->_values == nullptr) {
return TRI_ERROR_OUT_OF_MEMORY;
}
char const* ptr = element->document()->getShapedJsonPtr(); // ONLY IN INDEX
size_t const n = hashIndex->paths().size();
for (size_t i = 0; i < n; ++i) {
auto sid = element->subObjects()[i]._sid;
key->_values[i]._sid = sid;
TRI_InspectShapedSub(&element->subObjects()[i], ptr, key->_values[i]);
}
key->_length = n;
return TRI_ERROR_NO_ERROR;
}
// -----------------------------------------------------------------------------
// --SECTION-- class HashIndex::UniqueArray
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief create the unique array
////////////////////////////////////////////////////////////////////////////////
HashIndex::UniqueArray::UniqueArray (TRI_HashArray_t* hashArray,
HashElementFunc* hashElement)
: _hashArray(hashArray),
_hashElement(hashElement) {
TRI_ASSERT(_hashArray != nullptr);
TRI_ASSERT(_hashElement != nullptr);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief destroy the unique array
////////////////////////////////////////////////////////////////////////////////
HashIndex::UniqueArray::~UniqueArray () {
if (_hashArray != nullptr) {
_hashArray->invokeOnAllElements(FreeElement);
}
delete _hashArray;
delete _hashElement;
}
// -----------------------------------------------------------------------------
// --SECTION-- class HashIndex::MultiArray
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief create the multi array
////////////////////////////////////////////////////////////////////////////////
HashIndex::MultiArray::MultiArray (TRI_HashArrayMulti_t* hashArray,
HashElementFunc* hashElement,
IsEqualElementElementByKey* isEqualElElByKey)
: _hashArray(hashArray),
_hashElement(hashElement),
_isEqualElElByKey(isEqualElElByKey) {
TRI_ASSERT(_hashArray != nullptr);
TRI_ASSERT(_hashElement != nullptr);
TRI_ASSERT(_isEqualElElByKey != nullptr);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief destroy the multi array
////////////////////////////////////////////////////////////////////////////////
HashIndex::MultiArray::~MultiArray () {
if (_hashArray != nullptr) {
_hashArray->invokeOnAllElements(FreeElement);
}
delete _hashArray;
delete _hashElement;
delete _isEqualElElByKey;
}
// -----------------------------------------------------------------------------
// --SECTION-- class HashIndex
// -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------
// --SECTION-- constructors and destructors
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief create the index
////////////////////////////////////////////////////////////////////////////////
HashIndex::HashIndex (TRI_idx_iid_t iid,
TRI_document_collection_t* collection,
std::vector<std::vector<triagens::basics::AttributeName>> const& fields,
bool unique,
bool sparse)
: PathBasedIndex(iid, collection, fields, unique, sparse),
_uniqueArray(nullptr) {
uint32_t indexBuckets = 1;
if (collection != nullptr) {
// document is a nullptr in the coordinator case
indexBuckets = collection->_info._indexBuckets;
}
std::unique_ptr<HashElementFunc> func(new HashElementFunc(_paths.size()));
if (unique) {
std::unique_ptr<TRI_HashArray_t> array(new TRI_HashArray_t(HashKey,
*(func.get()),
IsEqualKeyElement,
IsEqualElementElement,
indexBuckets,
[] () -> std::string { return "unique hash-array"; }));
_uniqueArray = new HashIndex::UniqueArray(array.get(), func.get());
array.release();
}
else {
_multiArray = nullptr;
std::unique_ptr<IsEqualElementElementByKey> compare(new IsEqualElementElementByKey(_paths.size()));
std::unique_ptr<TRI_HashArrayMulti_t> array(new TRI_HashArrayMulti_t(HashKey,
*(func.get()),
IsEqualKeyElement,
IsEqualElementElement,
*(compare.get()),
indexBuckets,
64,
[] () -> std::string { return "multi hash-array"; }));
_multiArray = new HashIndex::MultiArray(array.get(), func.get(), compare.get());
compare.release();
array.release();
}
func.release();
}
////////////////////////////////////////////////////////////////////////////////
/// @brief destroys the index
////////////////////////////////////////////////////////////////////////////////
HashIndex::~HashIndex () {
if (_unique) {
delete _uniqueArray;
}
else {
delete _multiArray;
}
}
// -----------------------------------------------------------------------------
// --SECTION-- public methods
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief returns a selectivity estimate for the index
////////////////////////////////////////////////////////////////////////////////
double HashIndex::selectivityEstimate () const {
if (_unique) {
return 1.0;
}
double estimate = _multiArray->_hashArray->selectivity();
TRI_ASSERT(estimate >= 0.0 && estimate <= 1.00001); // floating-point tolerance
return estimate;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief returns the index memory usage
////////////////////////////////////////////////////////////////////////////////
size_t HashIndex::memory () const {
if (_unique) {
return static_cast<size_t>(keyEntrySize() * _uniqueArray->_hashArray->size() +
_uniqueArray->_hashArray->memoryUsage());
}
return static_cast<size_t>(keyEntrySize() * _multiArray->_hashArray->size() +
_multiArray->_hashArray->memoryUsage());
}
////////////////////////////////////////////////////////////////////////////////
/// @brief return a JSON representation of the index
////////////////////////////////////////////////////////////////////////////////
triagens::basics::Json HashIndex::toJson (TRI_memory_zone_t* zone,
bool withFigures) const {
auto json = Index::toJson(zone, withFigures);
json("unique", triagens::basics::Json(zone, _unique))
("sparse", triagens::basics::Json(zone, _sparse));
return json;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief return a JSON representation of the index figures
////////////////////////////////////////////////////////////////////////////////
triagens::basics::Json HashIndex::toJsonFigures (TRI_memory_zone_t* zone) const {
triagens::basics::Json json(zone, triagens::basics::Json::Object);
json("memory", triagens::basics::Json(static_cast<double>(memory())));
if (_unique) {
_uniqueArray->_hashArray->appendToJson(zone, json);
}
else {
_multiArray->_hashArray->appendToJson(zone, json);
}
return json;
}
int HashIndex::insert (TRI_doc_mptr_t const* doc,
bool isRollback) {
if (_unique) {
return insertUnique(doc, isRollback);
}
return insertMulti(doc, isRollback);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief removes an entry from the hash array part of the hash index
////////////////////////////////////////////////////////////////////////////////
int HashIndex::remove (TRI_doc_mptr_t const* doc,
bool isRollback) {
if (_unique) {
return removeUnique(doc, isRollback);
}
return removeMulti(doc, isRollback);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief provides a size hint for the hash index
////////////////////////////////////////////////////////////////////////////////
int HashIndex::sizeHint (size_t size) {
if (_sparse) {
// for sparse indexes, we assume that we will have less index entries
// than if the index would be fully populated
size /= 5;
}
if (_unique) {
return _uniqueArray->_hashArray->resize(size);
}
else {
return _multiArray->_hashArray->resize(size);
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief locates entries in the hash index given shaped json objects
////////////////////////////////////////////////////////////////////////////////
int HashIndex::lookup (TRI_index_search_value_t* searchValue,
std::vector<TRI_doc_mptr_copy_t>& documents) const {
if (_unique) {
TRI_index_element_t* found = _uniqueArray->_hashArray->findByKey(searchValue);
if (found != nullptr) {
// unique hash index: maximum number is 1
documents.emplace_back(*(found->document()));
}
return TRI_ERROR_NO_ERROR;
}
std::vector<TRI_index_element_t*>* results = nullptr;
try {
results = _multiArray->_hashArray->lookupByKey(searchValue);
}
catch (...) {
return TRI_ERROR_OUT_OF_MEMORY;
}
if (results != nullptr) {
try {
for (size_t i = 0; i < results->size(); i++) {
documents.emplace_back(*((*results)[i]->document()));
}
delete results;
}
catch (...) {
delete results;
return TRI_ERROR_OUT_OF_MEMORY;
}
}
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief locates entries in the hash index given shaped json objects
////////////////////////////////////////////////////////////////////////////////
int HashIndex::lookup (TRI_index_search_value_t* searchValue,
std::vector<TRI_doc_mptr_copy_t>& documents,
TRI_index_element_t*& next,
size_t batchSize) const {
if (_unique) {
next = nullptr;
TRI_index_element_t* found = _uniqueArray->_hashArray->findByKey(searchValue);
if (found != nullptr) {
// unique hash index: maximum number is 1
documents.emplace_back(*(found->document()));
}
return TRI_ERROR_NO_ERROR;
}
std::vector<TRI_index_element_t*>* results = nullptr;
if (next == nullptr) {
try {
results = _multiArray->_hashArray->lookupByKey(searchValue, batchSize);
}
catch (...) {
return TRI_ERROR_OUT_OF_MEMORY;
}
}
else {
try {
results = _multiArray->_hashArray->lookupByKeyContinue(next, batchSize);
}
catch (...) {
return TRI_ERROR_OUT_OF_MEMORY;
}
}
if (results != nullptr) {
if (results->size() > 0) {
next = results->back(); // for continuation the next time
try {
for (size_t i = 0; i < results->size(); i++) {
documents.emplace_back(*((*results)[i]->document()));
}
}
catch (...) {
delete results;
return TRI_ERROR_OUT_OF_MEMORY;
}
}
else {
next = nullptr;
}
delete results;
}
else {
next = nullptr;
}
return TRI_ERROR_NO_ERROR;
}
// -----------------------------------------------------------------------------
// --SECTION-- private methods
// -----------------------------------------------------------------------------
int HashIndex::insertUnique (TRI_doc_mptr_t const* doc,
bool isRollback) {
auto allocate = [this] () -> TRI_index_element_t* {
return TRI_index_element_t::allocate(keyEntrySize(), false);
};
std::vector<TRI_index_element_t*> elements;
int res = fillElement(allocate, elements, doc);
auto work = [this] (TRI_index_element_t* element, bool isRollback) -> int {
TRI_IF_FAILURE("InsertHashIndex") {
return TRI_ERROR_DEBUG;
}
TRI_index_search_value_t key;
int res = FillIndexSearchValueByHashIndexElement(this, &key, element);
if (res != TRI_ERROR_NO_ERROR) {
// out of memory
return res;
}
res = _uniqueArray->_hashArray->insert(&key, element, isRollback);
if (key._values != nullptr) {
TRI_Free(TRI_UNKNOWN_MEM_ZONE, key._values);
}
return res;
};
size_t count = elements.size();
for (size_t i = 0; i < count; ++i) {
auto hashElement = elements[i];
res = work(hashElement, isRollback);
if (res != TRI_ERROR_NO_ERROR) {
for (size_t j = i; j < count; ++j) {
// Free all elements that are not yet in the index
FreeElement(elements[j]);
}
// Allready indexed elements will be removed by the rollback
return res;
}
}
return res;
}
int HashIndex::insertMulti (TRI_doc_mptr_t const* doc,
bool isRollback) {
auto allocate = [this] () -> TRI_index_element_t* {
return TRI_index_element_t::allocate(keyEntrySize(), false);
};
std::vector<TRI_index_element_t*> elements;
int res = fillElement(allocate, elements, doc);
auto work = [this] (TRI_index_element_t* element, bool isRollback) -> int {
TRI_IF_FAILURE("InsertHashIndex") {
return TRI_ERROR_DEBUG;
}
TRI_index_element_t* found = _multiArray->_hashArray->insert(element, false, true);
if (found != nullptr) { // bad, can only happen if we are in a rollback
if (isRollback) { // in which case we silently ignore it
return TRI_ERROR_NO_ERROR;
}
// This is TRI_RESULT_ELEMENT_EXISTS, but this should not happen:
return TRI_ERROR_INTERNAL;
}
return TRI_ERROR_NO_ERROR;
};
size_t const count = elements.size();
for (size_t i = 0; i < count; ++i) {
auto hashElement = elements[i];
res = work(hashElement, isRollback);
if (res != TRI_ERROR_NO_ERROR) {
for (size_t j = i; j < count; ++j) {
// Free all elements that are not yet in the index
FreeElement(elements[j]);
}
for (size_t j = 0; j < i; ++j) {
// Remove all allready indexed elements and free them
removeMultiElement(elements[j], isRollback);
}
return res;
}
}
return res;
}
int HashIndex::removeUniqueElement (TRI_index_element_t* element, bool isRollback) {
TRI_IF_FAILURE("RemoveHashIndex") {
return TRI_ERROR_DEBUG;
}
TRI_index_element_t* old = _uniqueArray->_hashArray->remove(element);
// this might happen when rolling back
if (old == nullptr) {
if (isRollback) {
return TRI_ERROR_NO_ERROR;
}
else {
return TRI_ERROR_INTERNAL;
}
}
FreeElement(old);
return TRI_ERROR_NO_ERROR;
}
int HashIndex::removeUnique (TRI_doc_mptr_t const* doc, bool isRollback) {
auto allocate = [this] () -> TRI_index_element_t* {
return TRI_index_element_t::allocate(keyEntrySize(), false);
};
std::vector<TRI_index_element_t*> elements;
int res = fillElement(allocate, elements, doc);
if (res != TRI_ERROR_NO_ERROR) {
for (auto& hashElement : elements) {
FreeElement(hashElement);
}
return res;
}
for (auto& hashElement : elements) {
res = removeUniqueElement(hashElement, isRollback);
FreeElement(hashElement);
}
return res;
}
int HashIndex::removeMultiElement (TRI_index_element_t* element, bool isRollback) {
TRI_IF_FAILURE("RemoveHashIndex") {
return TRI_ERROR_DEBUG;
}
TRI_index_element_t* old = _multiArray->_hashArray->remove(element);
if (old == nullptr) {
// not found
if (isRollback) { // ignore in this case, because it can happen
return TRI_ERROR_NO_ERROR;
}
else {
return TRI_ERROR_INTERNAL;
}
}
FreeElement(old);
return TRI_ERROR_NO_ERROR;
}
int HashIndex::removeMulti (TRI_doc_mptr_t const* doc, bool isRollback) {
auto allocate = [this] () -> TRI_index_element_t* {
return TRI_index_element_t::allocate(keyEntrySize(), false);
};
std::vector<TRI_index_element_t*> elements;
int res = fillElement(allocate, elements, doc);
for (auto& hashElement : elements) {
res = removeMultiElement(hashElement, isRollback);
FreeElement(hashElement);
}
return res;
}
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// Local Variables:
// mode: outline-minor
// outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}"
// End: