1
0
Fork 0
arangodb/arangod/HashIndex/hash-array.cpp

394 lines
13 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// @brief hash array implementation
///
/// @file
///
/// DISCLAIMER
///
/// Copyright 2014 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Dr. Frank Celler
/// @author Dr. Oreste Costa-Panaia
/// @author Martin Schoenert
/// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany
/// @author Copyright 2004-2013, triAGENS GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////
#include "hash-array.h"
#include "Basics/fasthash.h"
#include "Basics/logging.h"
#include "Indexes/HashIndex.h"
#include "VocBase/document-collection.h"
#include "VocBase/VocShaper.h"
// -----------------------------------------------------------------------------
// --SECTION-- COMPARISON
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief determines if a key corresponds to an element
////////////////////////////////////////////////////////////////////////////////
bool TRI_hash_array_t::isEqualKeyElement (TRI_index_search_value_t const* left,
TRI_index_element_t const* right) const {
TRI_ASSERT_EXPENSIVE(right->document() != nullptr);
for (size_t j = 0; j < _numFields; ++j) {
TRI_shaped_json_t* leftJson = &left->_values[j];
TRI_shaped_sub_t* rightSub = &right->subObjects()[j];
if (leftJson->_sid != rightSub->_sid) {
return false;
}
auto length = leftJson->_data.length;
char const* rightData;
size_t rightLength;
TRI_InspectShapedSub(rightSub, right->document(), rightData, rightLength);
if (length != rightLength) {
return false;
}
if (length > 0 && memcmp(leftJson->_data.data, rightData, length) != 0) {
return false;
}
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief given a key generates a hash integer
////////////////////////////////////////////////////////////////////////////////
uint64_t TRI_hash_array_t::hashKey (TRI_index_search_value_t const* key) const {
uint64_t hash = 0x0123456789abcdef;
for (size_t j = 0; j < _numFields; ++j) {
// ignore the sid for hashing
hash = fasthash64(key->_values[j]._data.data, key->_values[j]._data.length,
hash);
}
return hash;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief given an element generates a hash integer
////////////////////////////////////////////////////////////////////////////////
uint64_t TRI_hash_array_t::hashElement (TRI_index_element_t const* element) const {
uint64_t hash = 0x0123456789abcdef;
for (size_t j = 0; j < _numFields; j++) {
char const* data;
size_t length;
TRI_InspectShapedSub(&element->subObjects()[j], element->document(), data, length);
// ignore the sid for hashing
// only hash the data block
hash = fasthash64(data, length, hash);
}
return hash;
}
// -----------------------------------------------------------------------------
// --SECTION-- HASH ARRAY
// -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------
// --SECTION-- private functions
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief resizes the array
////////////////////////////////////////////////////////////////////////////////
int TRI_hash_array_t::resizeInternal (triagens::arango::HashIndex* hashIndex,
Bucket& b,
uint64_t targetSize,
bool allowShrink) {
if (b._nrAlloc >= targetSize && ! allowShrink) {
return TRI_ERROR_NO_ERROR;
}
// only log performance infos for indexes with more than this number of entries
static uint64_t const NotificationSizeThreshold = 131072;
double start = TRI_microtime();
if (targetSize > NotificationSizeThreshold) {
LOG_ACTION("index-resize %s, target size: %llu",
hashIndex->context().c_str(),
(unsigned long long) targetSize);
}
TRI_index_element_t** oldTable = b._table;
uint64_t oldAlloc = b._nrAlloc;
TRI_ASSERT(targetSize > 0);
try {
b._table = new TRI_index_element_t* [targetSize];
}
catch (...) {
return TRI_ERROR_OUT_OF_MEMORY;
}
b._nrAlloc = targetSize;
if (b._nrUsed > 0) {
uint64_t const n = b._nrAlloc;
for (uint64_t j = 0; j < oldAlloc; j++) {
TRI_index_element_t* element = oldTable[j];
if (element != nullptr) {
uint64_t i, k;
i = k = hashElement(element) % n;
for (; i < n && b._table[i] != nullptr; ++i);
if (i == n) {
for (i = 0; i < k && b._table[i] != nullptr; ++i);
}
// .....................................................................
// add a new element to the associative array
// memcpy ok here since are simply moving array items internally
// .....................................................................
b._table[i] = element;
}
}
}
delete [] oldTable;
LOG_TIMER((TRI_microtime() - start),
"index-resize %s, target size: %llu",
hashIndex->context().c_str(),
(unsigned long long) targetSize);
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief triggers a resize if necessary
////////////////////////////////////////////////////////////////////////////////
bool TRI_hash_array_t::checkResize (triagens::arango::HashIndex* hashIndex,
Bucket& b) {
if (2 * b._nrAlloc < 3 * b._nrUsed) {
int res = resizeInternal(hashIndex, b, 2 * b._nrAlloc + 1, false);
if (res != TRI_ERROR_NO_ERROR) {
return false;
}
}
return true;
}
// -----------------------------------------------------------------------------
// --SECTION-- public functions
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief get the hash array's memory usage
////////////////////////////////////////////////////////////////////////////////
size_t TRI_hash_array_t::memoryUsage () {
size_t sum = 0;
for (auto& b : _buckets) {
sum += (size_t) (b._nrAlloc * sizeof(TRI_index_element_t*));
}
return sum;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief resizes the hash table
////////////////////////////////////////////////////////////////////////////////
int TRI_hash_array_t::resize (triagens::arango::HashIndex* hashIndex,
size_t size) {
int res = TRI_ERROR_NO_ERROR;
for (auto& b : _buckets) {
res = resizeInternal(hashIndex, b,
(uint64_t) (3 * size / 2 + 1) / _buckets.size(),
false);
if (res != TRI_ERROR_NO_ERROR) {
return res;
}
}
return res;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief finds an element given a key, return NULL if not found
////////////////////////////////////////////////////////////////////////////////
TRI_index_element_t* TRI_hash_array_t::findByKey (TRI_index_search_value_t* key) const {
uint64_t i = hashKey(key);
Bucket const& b = _buckets[i & _bucketsMask];
uint64_t const n = b._nrAlloc;
i = i % n;
uint64_t k = i;
for (; i < n && b._table[i] != nullptr &&
! isEqualKeyElement(key, b._table[i]); ++i);
if (i == n) {
for (i = 0; i < k && b._table[i] != nullptr &&
! isEqualKeyElement(key, b._table[i]); ++i);
}
// ...........................................................................
// return whatever we found, this is nullptr if the thing was not found
// and otherwise a valid pointer
// ...........................................................................
return b._table[i];
}
////////////////////////////////////////////////////////////////////////////////
/// @brief adds an key/element to the array
///
/// This function claims the owenship of the sub-objects in the inserted
/// element.
////////////////////////////////////////////////////////////////////////////////
int TRI_hash_array_t::insert (triagens::arango::HashIndex* hashIndex,
TRI_index_search_value_t const* key,
TRI_index_element_t const* element,
bool isRollback) {
// ...........................................................................
// we are adding and the table is more than half full, extend it
// ...........................................................................
uint64_t i = hashKey(key);
Bucket& b = _buckets[i & _bucketsMask];
if (! checkResize(hashIndex, b)) {
return TRI_ERROR_OUT_OF_MEMORY;
}
uint64_t const n = b._nrAlloc;
i = i % n;
uint64_t k = i;
for (; i < n && b._table[i] != nullptr &&
! isEqualKeyElement(key, b._table[i]); ++i);
if (i == n) {
for (i = 0; i < k && b._table[i] != nullptr &&
! isEqualKeyElement(key, b._table[i]); ++i);
}
TRI_index_element_t* arrayElement = b._table[i];
// ...........................................................................
// if we found an element, return
// ...........................................................................
if (arrayElement != nullptr) {
return TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED;
}
b._table[i] = const_cast<TRI_index_element_t*>(element);
TRI_ASSERT(b._table[i] != nullptr && b._table[i]->document() != nullptr);
b._nrUsed++;
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief removes an element from the array
////////////////////////////////////////////////////////////////////////////////
int TRI_hash_array_t::remove (triagens::arango::HashIndex* hashIndex,
TRI_index_element_t* element) {
uint64_t i = hashElement(element);
Bucket& b = _buckets[i & _bucketsMask];
uint64_t const n = b._nrAlloc;
i = i % n;
uint64_t k = i;
for (; i < n && b._table[i] != nullptr &&
element->document() != b._table[i]->document(); ++i);
if (i == n) {
for (i = 0; i < k && b._table[i] != nullptr &&
element->document() != b._table[i]->document(); ++i);
}
TRI_index_element_t* arrayElement = b._table[i];
// ...........................................................................
// if we did not find such an item return error code
// ...........................................................................
if (arrayElement == nullptr) {
return TRI_RESULT_ELEMENT_NOT_FOUND;
}
// ...........................................................................
// remove item - destroy any internal memory associated with the
// element structure
// ...........................................................................
TRI_index_element_t::free(arrayElement);
b._table[i] = nullptr;
b._nrUsed--;
// ...........................................................................
// and now check the following places for items to move closer together
// so that there are no gaps in the array
// ...........................................................................
k = TRI_IncModU64(i, n);
while (b._table[k] != nullptr) {
uint64_t j = hashElement(b._table[k]) % n;
if ((i < k && ! (i < j && j <= k)) || (k < i && ! (i < j || j <= k))) {
b._table[i] = b._table[k];
b._table[k] = nullptr;
i = k;
}
k = TRI_IncModU64(k, n);
}
if (b._nrUsed == 0) {
resizeInternal (hashIndex, b, initialSize(), true);
}
return TRI_ERROR_NO_ERROR;
}
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// Local Variables:
// mode: outline-minor
// outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}"
// End: