1
0
Fork 0

Merge branch 'array_indexing' of github.com:arangodb/arangodb into array_indexing

This commit is contained in:
Michael Hackstein 2015-08-21 16:21:38 +02:00
commit 85c5885c76
5 changed files with 276 additions and 266 deletions

View File

@ -34,7 +34,6 @@
#include "Basics/fasthash.h" #include "Basics/fasthash.h"
#include "Basics/logging.h" #include "Basics/logging.h"
#include "Indexes/HashIndex.h" #include "Indexes/HashIndex.h"
#include "Indexes/Index.h"
#include "VocBase/document-collection.h" #include "VocBase/document-collection.h"
#include "VocBase/VocShaper.h" #include "VocBase/VocShaper.h"
@ -46,12 +45,11 @@
/// @brief determines if a key corresponds to an element /// @brief determines if a key corresponds to an element
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
static bool IsEqualKeyElement (TRI_hash_array_t const* array, bool TRI_hash_array_t::isEqualKeyElement (TRI_index_search_value_t const* left,
TRI_index_search_value_t const* left, TRI_index_element_t const* right) const {
TRI_index_element_t const* right) {
TRI_ASSERT_EXPENSIVE(right->document() != nullptr); TRI_ASSERT_EXPENSIVE(right->document() != nullptr);
for (size_t j = 0; j < array->_numFields; ++j) { for (size_t j = 0; j < _numFields; ++j) {
TRI_shaped_json_t* leftJson = &left->_values[j]; TRI_shaped_json_t* leftJson = &left->_values[j];
TRI_shaped_sub_t* rightSub = &right->subObjects()[j]; TRI_shaped_sub_t* rightSub = &right->subObjects()[j];
@ -81,11 +79,10 @@ static bool IsEqualKeyElement (TRI_hash_array_t const* array,
/// @brief given a key generates a hash integer /// @brief given a key generates a hash integer
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
static uint64_t HashKey (TRI_hash_array_t const* array, uint64_t TRI_hash_array_t::hashKey (TRI_index_search_value_t const* key) const {
TRI_index_search_value_t const* key) {
uint64_t hash = 0x0123456789abcdef; uint64_t hash = 0x0123456789abcdef;
for (size_t j = 0; j < array->_numFields; ++j) { for (size_t j = 0; j < _numFields; ++j) {
// ignore the sid for hashing // ignore the sid for hashing
hash = fasthash64(key->_values[j]._data.data, key->_values[j]._data.length, hash = fasthash64(key->_values[j]._data.data, key->_values[j]._data.length,
hash); hash);
@ -98,11 +95,10 @@ static uint64_t HashKey (TRI_hash_array_t const* array,
/// @brief given an element generates a hash integer /// @brief given an element generates a hash integer
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
static uint64_t HashElement (TRI_hash_array_t* array, uint64_t TRI_hash_array_t::hashElement (TRI_index_element_t const* element) const {
TRI_index_element_t* element) {
uint64_t hash = 0x0123456789abcdef; uint64_t hash = 0x0123456789abcdef;
for (size_t j = 0; j < array->_numFields; j++) { for (size_t j = 0; j < _numFields; j++) {
char const* data; char const* data;
size_t length; size_t length;
TRI_InspectShapedSub(&element->subObjects()[j], element->document(), data, length); TRI_InspectShapedSub(&element->subObjects()[j], element->document(), data, length);
@ -119,55 +115,20 @@ static uint64_t HashElement (TRI_hash_array_t* array,
// --SECTION-- HASH ARRAY // --SECTION-- HASH ARRAY
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------
// --SECTION-- private defines
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief initial preallocation size of the hash table when the table is
/// first created
/// setting this to a high value will waste memory but reduce the number of
/// reallocations/repositionings necessary when the table grows
////////////////////////////////////////////////////////////////////////////////
static inline uint64_t InitialSize () {
return 251;
}
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// --SECTION-- private functions // --SECTION-- private functions
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief allocate memory for the hash table
////////////////////////////////////////////////////////////////////////////////
static int AllocateTable (TRI_hash_array_t* array,
uint64_t numElements) {
size_t const size = static_cast<size_t>
(sizeof(TRI_index_element_t*) * numElements);
TRI_index_element_t** table = static_cast<TRI_index_element_t**>(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, size, true));
if (table == nullptr) {
return TRI_ERROR_OUT_OF_MEMORY;
}
array->_table = table;
array->_nrAlloc = numElements;
return TRI_ERROR_NO_ERROR;
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief resizes the array /// @brief resizes the array
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
static int ResizeHashArray (triagens::arango::HashIndex* hashIndex, int TRI_hash_array_t::resizeInternal (triagens::arango::HashIndex* hashIndex,
TRI_hash_array_t* array, Bucket& b,
uint64_t targetSize, uint64_t targetSize,
bool allowShrink) { bool allowShrink) {
if (array->_nrAlloc >= targetSize && ! allowShrink) {
if (b._nrAlloc >= targetSize && ! allowShrink) {
return TRI_ERROR_NO_ERROR; return TRI_ERROR_NO_ERROR;
} }
@ -181,45 +142,45 @@ static int ResizeHashArray (triagens::arango::HashIndex* hashIndex,
(unsigned long long) targetSize); (unsigned long long) targetSize);
} }
TRI_index_element_t** oldTable = array->_table; TRI_index_element_t** oldTable = b._table;
uint64_t oldAlloc = array->_nrAlloc; uint64_t oldAlloc = b._nrAlloc;
TRI_ASSERT(targetSize > 0); TRI_ASSERT(targetSize > 0);
int res = AllocateTable(array, targetSize); try {
b._table = new TRI_index_element_t* [targetSize];
if (res != TRI_ERROR_NO_ERROR) {
return res;
} }
catch (...) {
return TRI_ERROR_OUT_OF_MEMORY;
}
b._nrAlloc = targetSize;
if (array->_nrUsed > 0) { if (b._nrUsed > 0) {
uint64_t const n = array->_nrAlloc; uint64_t const n = b._nrAlloc;
for (uint64_t j = 0; j < oldAlloc; j++) { for (uint64_t j = 0; j < oldAlloc; j++) {
TRI_index_element_t* element = oldTable[j]; TRI_index_element_t* element = oldTable[j];
if (element != nullptr) { if (element != nullptr) {
uint64_t i, k; uint64_t i, k;
i = k = HashElement(array, element) % n; i = k = hashElement(element) % n;
for (; i < n && array->_table[i] != nullptr; ++i); for (; i < n && b._table[i] != nullptr; ++i);
if (i == n) { if (i == n) {
for (i = 0; i < k && array->_table[i] != nullptr; ++i); for (i = 0; i < k && b._table[i] != nullptr; ++i);
} }
TRI_ASSERT_EXPENSIVE(i < n);
// ..................................................................... // .....................................................................
// add a new element to the associative array // add a new element to the associative array
// memcpy ok here since are simply moving array items internally // memcpy ok here since are simply moving array items internally
// ..................................................................... // .....................................................................
array->_table[i] = element; b._table[i] = element;
} }
} }
} }
TRI_Free(TRI_UNKNOWN_MEM_ZONE, oldTable); delete [] oldTable;
LOG_TIMER((TRI_microtime() - start), LOG_TIMER((TRI_microtime() - start),
"index-resize %s, target size: %llu", "index-resize %s, target size: %llu",
@ -233,10 +194,10 @@ static int ResizeHashArray (triagens::arango::HashIndex* hashIndex,
/// @brief triggers a resize if necessary /// @brief triggers a resize if necessary
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
static bool CheckResize (triagens::arango::HashIndex* hashIndex, bool TRI_hash_array_t::checkResize (triagens::arango::HashIndex* hashIndex,
TRI_hash_array_t* array) { Bucket& b) {
if (array->_nrAlloc < 2 * array->_nrUsed) { if (2 * b._nrAlloc < 3 * b._nrUsed) {
int res = ResizeHashArray(hashIndex, array, 2 * array->_nrAlloc + 1, false); int res = resizeInternal(hashIndex, b, 2 * b._nrAlloc + 1, false);
if (res != TRI_ERROR_NO_ERROR) { if (res != TRI_ERROR_NO_ERROR) {
return false; return false;
@ -246,59 +207,6 @@ static bool CheckResize (triagens::arango::HashIndex* hashIndex,
return true; return true;
} }
// -----------------------------------------------------------------------------
// --SECTION-- constructors and destructors
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief initialises an array
////////////////////////////////////////////////////////////////////////////////
int TRI_InitHashArray (TRI_hash_array_t* array,
size_t numFields) {
TRI_ASSERT(numFields > 0);
array->_numFields = numFields;
array->_table = nullptr;
array->_nrUsed = 0;
array->_nrAlloc = 0;
return AllocateTable(array, InitialSize());
}
////////////////////////////////////////////////////////////////////////////////
/// @brief destroys an array, but does not free the pointer
////////////////////////////////////////////////////////////////////////////////
void TRI_DestroyHashArray (TRI_hash_array_t* array) {
if (array == nullptr) {
return;
}
// ...........................................................................
// Go through each item in the array and remove any internal allocated memory
// ...........................................................................
// array->_table might be NULL if array initialisation fails
if (array->_table != nullptr) {
TRI_index_element_t** p;
TRI_index_element_t** e;
p = array->_table;
e = p + array->_nrAlloc;
for (; p < e; ++p) {
auto item = *p;
if (item != nullptr) {
TRI_index_element_t::free(item);
}
}
TRI_Free(TRI_UNKNOWN_MEM_ZONE, array->_table);
}
}
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// --SECTION-- public functions // --SECTION-- public functions
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
@ -307,71 +215,57 @@ void TRI_DestroyHashArray (TRI_hash_array_t* array) {
/// @brief get the hash array's memory usage /// @brief get the hash array's memory usage
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
size_t TRI_MemoryUsageHashArray (TRI_hash_array_t const* array) { size_t TRI_hash_array_t::memoryUsage () {
if (array == nullptr) { size_t sum = 0;
return 0; for (auto& b : _buckets) {
sum += (size_t) (b._nrAlloc * sizeof(TRI_index_element_t*));
} }
return sum;
size_t tableSize = (size_t) (array->_nrAlloc *
sizeof(TRI_index_element_t*));
size_t memberSize = (size_t) (array->_nrUsed *
(sizeof(TRI_index_element_t) +
array->_numFields * sizeof(TRI_shaped_sub_t)));
return (size_t) (tableSize + memberSize);
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief resizes the hash table /// @brief resizes the hash table
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int TRI_ResizeHashArray (triagens::arango::HashIndex* hashIndex, int TRI_hash_array_t::resize (triagens::arango::HashIndex* hashIndex,
TRI_hash_array_t* array, size_t size) {
size_t size) { int res = TRI_ERROR_NO_ERROR;
return ResizeHashArray(hashIndex, array, (uint64_t) (2 * size + 1), false); for (auto& b : _buckets) {
res = resizeInternal(hashIndex, b,
(uint64_t) (3 * size / 2 + 1) / _buckets.size(),
false);
if (res != TRI_ERROR_NO_ERROR) {
return res;
}
}
return res;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief finds an element given a key, return NULL if not found /// @brief finds an element given a key, return NULL if not found
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
TRI_index_element_t* TRI_LookupByKeyHashArray (TRI_hash_array_t const* array, TRI_index_element_t* TRI_hash_array_t::findByKey (TRI_index_search_value_t* key) const {
TRI_index_search_value_t* key) { uint64_t i = hashKey(key);
uint64_t const n = array->_nrAlloc; Bucket const& b = _buckets[i & _bucketsMask];
uint64_t i, k;
i = k = HashKey(array, key) % n; uint64_t const n = b._nrAlloc;
i = i % n;
uint64_t k = i;
for (; i < n && array->_table[i] != nullptr && for (; i < n && b._table[i] != nullptr &&
! IsEqualKeyElement(array, key, array->_table[i]); ++i); ! isEqualKeyElement(key, b._table[i]); ++i);
if (i == n) { if (i == n) {
for (i = 0; i < k && array->_table[i] != nullptr && for (i = 0; i < k && b._table[i] != nullptr &&
! IsEqualKeyElement(array, key, array->_table[i]); ++i); ! isEqualKeyElement(key, b._table[i]); ++i);
} }
TRI_ASSERT_EXPENSIVE(i < n);
// ........................................................................... // ...........................................................................
// return whatever we found, this is nullptr if the thing was not found // return whatever we found, this is nullptr if the thing was not found
// and otherwise a valid pointer // and otherwise a valid pointer
// ........................................................................... // ...........................................................................
return array->_table[i]; return b._table[i];
}
////////////////////////////////////////////////////////////////////////////////
/// @brief finds an element given a key, return NULL if not found
////////////////////////////////////////////////////////////////////////////////
TRI_index_element_t* TRI_FindByKeyHashArray (TRI_hash_array_t const* array,
TRI_index_search_value_t* key) {
TRI_index_element_t* element = TRI_LookupByKeyHashArray(array, key);
if (element != nullptr && IsEqualKeyElement(array, key, element)) {
return element;
}
return nullptr;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -381,35 +275,33 @@ TRI_index_element_t* TRI_FindByKeyHashArray (TRI_hash_array_t const* array,
/// element. /// element.
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int TRI_InsertKeyHashArray (triagens::arango::HashIndex* hashIndex, int TRI_hash_array_t::insert (triagens::arango::HashIndex* hashIndex,
TRI_hash_array_t* array, TRI_index_search_value_t const* key,
TRI_index_search_value_t const* key, TRI_index_element_t const* element,
TRI_index_element_t const* element, bool isRollback) {
bool isRollback) {
// ........................................................................... // ...........................................................................
// we are adding and the table is more than half full, extend it // we are adding and the table is more than half full, extend it
// ........................................................................... // ...........................................................................
if (! CheckResize(hashIndex, array)) { uint64_t i = hashKey(key);
Bucket& b = _buckets[i & _bucketsMask];
if (! checkResize(hashIndex, b)) {
return TRI_ERROR_OUT_OF_MEMORY; return TRI_ERROR_OUT_OF_MEMORY;
} }
const uint64_t n = array->_nrAlloc; uint64_t const n = b._nrAlloc;
uint64_t i, k; i = i % n;
uint64_t k = i;
i = k = HashKey(array, key) % n; for (; i < n && b._table[i] != nullptr &&
! isEqualKeyElement(key, b._table[i]); ++i);
for (; i < n && array->_table[i] != nullptr &&
! IsEqualKeyElement(array, key, array->_table[i]); ++i);
if (i == n) { if (i == n) {
for (i = 0; i < k && array->_table[i] != nullptr && for (i = 0; i < k && b._table[i] != nullptr &&
! IsEqualKeyElement(array, key, array->_table[i]); ++i); ! isEqualKeyElement(key, b._table[i]); ++i);
} }
TRI_ASSERT_EXPENSIVE(i < n); TRI_index_element_t* arrayElement = b._table[i];
TRI_index_element_t* arrayElement = array->_table[i];
// ........................................................................... // ...........................................................................
// if we found an element, return // if we found an element, return
@ -419,9 +311,9 @@ int TRI_InsertKeyHashArray (triagens::arango::HashIndex* hashIndex,
return TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED; return TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED;
} }
array->_table[i] = const_cast<TRI_index_element_t*>(element); b._table[i] = const_cast<TRI_index_element_t*>(element);
TRI_ASSERT(array->_table[i] != nullptr && array->_table[i]->document() != nullptr); TRI_ASSERT(b._table[i] != nullptr && b._table[i]->document() != nullptr);
array->_nrUsed++; b._nrUsed++;
return TRI_ERROR_NO_ERROR; return TRI_ERROR_NO_ERROR;
} }
@ -430,24 +322,23 @@ int TRI_InsertKeyHashArray (triagens::arango::HashIndex* hashIndex,
/// @brief removes an element from the array /// @brief removes an element from the array
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int TRI_RemoveElementHashArray (triagens::arango::HashIndex* hashIndex, int TRI_hash_array_t::remove (triagens::arango::HashIndex* hashIndex,
TRI_hash_array_t* array, TRI_index_element_t* element) {
TRI_index_element_t* element) { uint64_t i = hashElement(element);
uint64_t const n = array->_nrAlloc; Bucket& b = _buckets[i & _bucketsMask];
uint64_t i, k;
i = k = HashElement(array, element) % n; uint64_t const n = b._nrAlloc;
i = i % n;
uint64_t k = i;
for (; i < n && array->_table[i] != nullptr && for (; i < n && b._table[i] != nullptr &&
element->document() != array->_table[i]->document(); ++i); element->document() != b._table[i]->document(); ++i);
if (i == n) { if (i == n) {
for (i = 0; i < k && array->_table[i] != nullptr && for (i = 0; i < k && b._table[i] != nullptr &&
element->document() != array->_table[i]->document(); ++i); element->document() != b._table[i]->document(); ++i);
} }
TRI_ASSERT_EXPENSIVE(i < n); TRI_index_element_t* arrayElement = b._table[i];
TRI_index_element_t* arrayElement = array->_table[i];
// ........................................................................... // ...........................................................................
// if we did not find such an item return error code // if we did not find such an item return error code
@ -463,8 +354,8 @@ int TRI_RemoveElementHashArray (triagens::arango::HashIndex* hashIndex,
// ........................................................................... // ...........................................................................
TRI_index_element_t::free(arrayElement); TRI_index_element_t::free(arrayElement);
array->_table[i] = nullptr; b._table[i] = nullptr;
array->_nrUsed--; b._nrUsed--;
// ........................................................................... // ...........................................................................
// and now check the following places for items to move closer together // and now check the following places for items to move closer together
@ -473,20 +364,20 @@ int TRI_RemoveElementHashArray (triagens::arango::HashIndex* hashIndex,
k = TRI_IncModU64(i, n); k = TRI_IncModU64(i, n);
while (array->_table[k] != nullptr) { while (b._table[k] != nullptr) {
uint64_t j = HashElement(array, array->_table[k]) % n; uint64_t j = hashElement(b._table[k]) % n;
if ((i < k && ! (i < j && j <= k)) || (k < i && ! (i < j || j <= k))) { if ((i < k && ! (i < j && j <= k)) || (k < i && ! (i < j || j <= k))) {
array->_table[i] = array->_table[k]; b._table[i] = b._table[k];
array->_table[k] = nullptr; b._table[k] = nullptr;
i = k; i = k;
} }
k = TRI_IncModU64(k, n); k = TRI_IncModU64(k, n);
} }
if (array->_nrUsed == 0) { if (b._nrUsed == 0) {
ResizeHashArray(hashIndex, array, InitialSize(), true); resizeInternal (hashIndex, b, initialSize(), true);
} }
return TRI_ERROR_NO_ERROR; return TRI_ERROR_NO_ERROR;

View File

@ -33,13 +33,12 @@
#include "Basics/Common.h" #include "Basics/Common.h"
#include "Indexes/Index.h"
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// --SECTION-- forward declarations // --SECTION-- forward declarations
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
struct TRI_index_element_t;
struct TRI_index_search_value_s;
namespace triagens { namespace triagens {
namespace arango { namespace arango {
class HashIndex; class HashIndex;
@ -47,43 +46,151 @@ namespace triagens {
} }
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// --SECTION-- public types // --SECTION-- TRI_hash_array_t
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief associative array /// @brief associative array
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
typedef struct TRI_hash_array_s { class TRI_hash_array_t {
size_t _numFields; // the number of fields indexes size_t _numFields; // the number of fields indexes
uint64_t _nrAlloc; // the size of the table struct Bucket {
uint64_t _nrUsed; // the number of used entries
TRI_index_element_t** _table; // the table itself, aligned to a cache line boundary uint64_t _nrAlloc; // the size of the table
} uint64_t _nrUsed; // the number of used entries
TRI_hash_array_t;
// ----------------------------------------------------------------------------- TRI_index_element_t** _table; // the table itself, aligned to a cache line boundary
// --SECTION-- HASH ARRAY };
// -----------------------------------------------------------------------------
std::vector<Bucket> _buckets;
size_t _bucketsMask;
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// --SECTION-- constructors and destructors // --SECTION-- constructors and destructors
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief initialises an array /// @brief constructor
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int TRI_InitHashArray (TRI_hash_array_t*, public:
size_t); TRI_hash_array_t (size_t numFields, size_t numberBuckets = 1)
: _numFields(numFields) {
// Make the number of buckets a power of two:
size_t ex = 0;
size_t nr = 1;
numberBuckets >>= 1;
while (numberBuckets > 0) {
ex += 1;
numberBuckets >>= 1;
nr <<= 1;
}
numberBuckets = nr;
_bucketsMask = nr - 1;
try {
for (size_t j = 0; j < numberBuckets; j++) {
_buckets.emplace_back();
Bucket& b = _buckets.back();
b._nrAlloc = initialSize();
b._table = nullptr;
// may fail...
b._table = new TRI_index_element_t* [b._nrAlloc];
for (uint64_t i = 0; i < b._nrAlloc; i++) {
b._table[i] = nullptr;
}
}
}
catch (...) {
for (auto& b : _buckets) {
delete [] b._table;
b._table = nullptr;
b._nrAlloc = 0;
}
throw;
}
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief destroys an array, but does not free the pointer /// @brief destructor
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void TRI_DestroyHashArray (TRI_hash_array_t*); ~TRI_hash_array_t () {
for (auto& b : _buckets) {
for (size_t i = 0; i < b._nrAlloc; i++) {
TRI_index_element_t* p = b._table[i];
if (p != nullptr) {
TRI_index_element_t::free(p);
}
}
delete [] b._table;
b._table = nullptr;
b._nrAlloc = 0;
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief adhere to the rule of five
////////////////////////////////////////////////////////////////////////////////
TRI_hash_array_t (TRI_hash_array_t const&) = delete; // copy constructor
TRI_hash_array_t (TRI_hash_array_t&&) = delete; // move constructor
TRI_hash_array_t& operator= (TRI_hash_array_t const&) = delete; // op =
TRI_hash_array_t& operator= (TRI_hash_array_t&&) = delete; // op =
////////////////////////////////////////////////////////////////////////////////
/// @brief initial preallocation size of the hash table when the table is
/// first created
/// setting this to a high value will waste memory but reduce the number of
/// reallocations/repositionings necessary when the table grows
////////////////////////////////////////////////////////////////////////////////
private:
static uint64_t initialSize () {
return 251;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief determines if a key corresponds to an element
////////////////////////////////////////////////////////////////////////////////
bool isEqualKeyElement (TRI_index_search_value_t const* left,
TRI_index_element_t const* right) const;
////////////////////////////////////////////////////////////////////////////////
/// @brief given a key generates a hash integer
////////////////////////////////////////////////////////////////////////////////
uint64_t hashKey (TRI_index_search_value_t const* key) const;
////////////////////////////////////////////////////////////////////////////////
/// @brief given an element generates a hash integer
////////////////////////////////////////////////////////////////////////////////
uint64_t hashElement (TRI_index_element_t const* element) const;
////////////////////////////////////////////////////////////////////////////////
/// @brief resize the hash array
////////////////////////////////////////////////////////////////////////////////
int resizeInternal (triagens::arango::HashIndex* hashIndex,
Bucket& b,
uint64_t targetSize,
bool allowShrink);
////////////////////////////////////////////////////////////////////////////////
/// @brief check a resize of the hash array
////////////////////////////////////////////////////////////////////////////////
bool checkResize (triagens::arango::HashIndex* hashIndex,
Bucket& b);
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// --SECTION-- public functions // --SECTION-- public functions
@ -93,40 +200,50 @@ void TRI_DestroyHashArray (TRI_hash_array_t*);
/// @brief get the hash array's memory usage /// @brief get the hash array's memory usage
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
size_t TRI_MemoryUsageHashArray (TRI_hash_array_t const*); public:
size_t memoryUsage ();
////////////////////////////////////////////////////////////////////////////////
/// @brief get the number of elements in the hash
////////////////////////////////////////////////////////////////////////////////
size_t size () {
size_t sum = 0;
for (auto& b : _buckets) {
sum += static_cast<size_t>(b._nrUsed);
}
return sum;
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief resizes the hash table /// @brief resizes the hash table
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int TRI_ResizeHashArray (triagens::arango::HashIndex*, int resize (triagens::arango::HashIndex*, size_t);
TRI_hash_array_t*,
size_t);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief finds an element given a key, returns NULL if not found /// @brief finds an element given a key, returns NULL if not found
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
TRI_index_element_t* TRI_FindByKeyHashArray (TRI_hash_array_t const*, TRI_index_element_t* findByKey (TRI_index_search_value_t* key) const;
struct TRI_index_search_value_s* key);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief adds an key/element to the array /// @brief adds an key/element to the array
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int TRI_InsertKeyHashArray (triagens::arango::HashIndex*, int insert (triagens::arango::HashIndex*,
TRI_hash_array_t*, TRI_index_search_value_t const* key,
struct TRI_index_search_value_s const* key, TRI_index_element_t const* element,
TRI_index_element_t const* element, bool isRollback);
bool isRollback);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief removes an element from the array /// @brief removes an element from the array
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int TRI_RemoveElementHashArray (triagens::arango::HashIndex*, int remove (triagens::arango::HashIndex*,
TRI_hash_array_t*, TRI_index_element_t* element);
TRI_index_element_t* element);
};
#endif #endif

View File

@ -131,15 +131,6 @@ static int FillIndexSearchValueByHashIndexElement (HashIndex const* hashIndex,
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/* /*
<<<<<<< HEAD
// TODO needs to be extracted as a helper function
if ( triagens::basics::TRI_AttributeNamesHaveExpansion(hashIndex->fields()[j]) ) {
TRI_shape_t const* shape = shaper->lookupShapeId(shapedObject._sid);
if (shape->_type >= TRI_SHAPE_LIST && shape->_type <= TRI_SHAPE_HOMOGENEOUS_SIZED_LIST) {
std::cout << "Should expand here" << std::endl;
auto json = triagens::basics::Json(TRI_UNKNOWN_MEM_ZONE, TRI_JsonShapedJson(shaper, &shapedObject));
std::cout << "Is Array " << json.isArray() << " :: " << json << std::endl;
=======
static int HashIndexHelper (HashIndex const* hashIndex, static int HashIndexHelper (HashIndex const* hashIndex,
TRI_hash_index_element_t* hashElement, TRI_hash_index_element_t* hashElement,
TRI_doc_mptr_t const* document) { TRI_doc_mptr_t const* document) {
@ -182,7 +173,6 @@ static int HashIndexHelper (HashIndex const* hashIndex,
if (sparse) { if (sparse) {
// no need to continue // no need to continue
return res; return res;
>>>>>>> origin/eimerung_hashindex
} }
} }
} }
@ -203,7 +193,7 @@ static TRI_vector_pointer_t HashIndex_find (TRI_hash_array_t const* hashArray,
// to locate the hash array entry by key. // to locate the hash array entry by key.
// ............................................................................. // .............................................................................
TRI_index_element_t* result = TRI_FindByKeyHashArray(hashArray, key); TRI_index_element_t* result = hashArray->findByKey(key);
if (result != nullptr) { if (result != nullptr) {
// unique hash index: maximum number is 1 // unique hash index: maximum number is 1
@ -226,7 +216,7 @@ static int HashIndex_find (TRI_hash_array_t const* hashArray,
// to locate the hash array entry by key. // to locate the hash array entry by key.
// ............................................................................. // .............................................................................
TRI_index_element_t* found = TRI_FindByKeyHashArray(hashArray, key); TRI_index_element_t* found = hashArray->findByKey(key);
if (found != nullptr) { if (found != nullptr) {
// unique hash index: maximum number is 1 // unique hash index: maximum number is 1
@ -259,9 +249,11 @@ HashIndex::HashIndex (TRI_idx_iid_t iid,
TRI_ASSERT(iid != 0); TRI_ASSERT(iid != 0);
if (unique) { if (unique) {
_hashArray._table = nullptr; _hashArray = nullptr;
try {
if (TRI_InitHashArray(&_hashArray, _paths.size()) != TRI_ERROR_NO_ERROR) { _hashArray = new TRI_hash_array_t(_paths.size());
}
catch (...) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY);
} }
} }
@ -298,7 +290,8 @@ HashIndex::HashIndex (TRI_idx_iid_t iid,
HashIndex::~HashIndex () { HashIndex::~HashIndex () {
if (_unique) { if (_unique) {
TRI_DestroyHashArray(&_hashArray); delete _hashArray;
_hashArray = nullptr;
} }
else { else {
delete _multi._hashElement; delete _multi._hashElement;
@ -327,8 +320,8 @@ double HashIndex::selectivityEstimate () const {
size_t HashIndex::memory () const { size_t HashIndex::memory () const {
if (_unique) { if (_unique) {
return static_cast<size_t>(keyEntrySize() * _hashArray._nrUsed + return static_cast<size_t>(keyEntrySize() * _hashArray->size() +
TRI_MemoryUsageHashArray(&_hashArray)); _hashArray->memoryUsage());
} }
return static_cast<size_t>(keyEntrySize() * _multi._hashArray->size() + return static_cast<size_t>(keyEntrySize() * _multi._hashArray->size() +
@ -381,7 +374,7 @@ int HashIndex::sizeHint (size_t size) {
} }
if (_unique) { if (_unique) {
return TRI_ResizeHashArray(this, &_hashArray, size); return _hashArray->resize(this, size);
} }
else { else {
return _multi._hashArray->resize(size); return _multi._hashArray->resize(size);
@ -396,7 +389,7 @@ int HashIndex::sizeHint (size_t size) {
// FIXME: use std::vector here as well // FIXME: use std::vector here as well
TRI_vector_pointer_t HashIndex::lookup (TRI_index_search_value_t* searchValue) const { TRI_vector_pointer_t HashIndex::lookup (TRI_index_search_value_t* searchValue) const {
if (_unique) { if (_unique) {
return HashIndex_find(&_hashArray, searchValue); return HashIndex_find(_hashArray, searchValue);
} }
std::vector<TRI_index_element_t*>* results std::vector<TRI_index_element_t*>* results
@ -421,7 +414,7 @@ int HashIndex::lookup (TRI_index_search_value_t* searchValue,
std::vector<TRI_doc_mptr_copy_t>& documents) const { std::vector<TRI_doc_mptr_copy_t>& documents) const {
if (_unique) { if (_unique) {
return HashIndex_find(&_hashArray, searchValue, documents); return HashIndex_find(_hashArray, searchValue, documents);
} }
std::vector<TRI_index_element_t*>* results = nullptr; std::vector<TRI_index_element_t*>* results = nullptr;
@ -457,7 +450,7 @@ int HashIndex::lookup (TRI_index_search_value_t* searchValue,
if (_unique) { if (_unique) {
next = nullptr; next = nullptr;
return HashIndex_find(&_hashArray, searchValue, documents); return HashIndex_find(_hashArray, searchValue, documents);
} }
std::vector<TRI_index_element_t*>* results = nullptr; std::vector<TRI_index_element_t*>* results = nullptr;
@ -530,7 +523,7 @@ int HashIndex::insertUnique (TRI_doc_mptr_t const* doc,
return res; return res;
} }
res = TRI_InsertKeyHashArray(this, &_hashArray, &key, element, isRollback); res = _hashArray->insert(this, &key, element, isRollback);
if (key._values != nullptr) { if (key._values != nullptr) {
TRI_Free(TRI_UNKNOWN_MEM_ZONE, key._values); TRI_Free(TRI_UNKNOWN_MEM_ZONE, key._values);
@ -611,7 +604,7 @@ int HashIndex::removeUniqueElement(TRI_index_element_t* element, bool isRollback
return TRI_ERROR_DEBUG; return TRI_ERROR_DEBUG;
} }
int res = TRI_RemoveElementHashArray(this, &_hashArray, element); int res = _hashArray->remove (this, element);
// this might happen when rolling back // this might happen when rolling back
if (res == TRI_RESULT_ELEMENT_NOT_FOUND) { if (res == TRI_RESULT_ELEMENT_NOT_FOUND) {

View File

@ -270,7 +270,7 @@ namespace triagens {
TRI_HashArrayMulti_t; TRI_HashArrayMulti_t;
union { union {
TRI_hash_array_t _hashArray; // the hash array itself, unique values TRI_hash_array_t* _hashArray; // the hash array itself, unique values
struct { struct {
TRI_HashArrayMulti_t* _hashArray; // the hash array itself, non-unique values TRI_HashArrayMulti_t* _hashArray; // the hash array itself, non-unique values
HashElementFunc* _hashElement; // hash function for elements HashElementFunc* _hashElement; // hash function for elements

View File

@ -117,6 +117,15 @@ struct TRI_index_element_t {
return new (space) TRI_index_element_t(); return new (space) TRI_index_element_t();
} }
////////////////////////////////////////////////////////////////////////////////
/// @brief Memory usage of an index element
////////////////////////////////////////////////////////////////////////////////
static size_t memoryUsage (size_t numSubs) {
return sizeof(TRI_doc_mptr_t*) +
(sizeof(TRI_shaped_sub_t) * numSubs);
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief Free the index element. /// @brief Free the index element.
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////