1
0
Fork 0

Use multiple buckets in AssocMulti hash.

This commit is contained in:
Max Neunhoeffer 2015-05-17 11:32:58 -07:00
parent fe7eb0d1ad
commit ead85dfd7c
1 changed files with 325 additions and 255 deletions

View File

@ -121,6 +121,7 @@ namespace triagens {
// list of all items with the same key // list of all items with the same key
}; };
struct Bucket {
IndexType _nrAlloc; // the size of the table IndexType _nrAlloc; // the size of the table
IndexType _nrUsed; // the number of used entries IndexType _nrUsed; // the number of used entries
IndexType _nrCollisions; // the number of entries that have IndexType _nrCollisions; // the number of entries that have
@ -128,6 +129,16 @@ namespace triagens {
Entry* _table; // the table itself Entry* _table; // the table itself
Bucket () : _nrAlloc(0), _nrUsed(0), _nrCollisions(0),
_table(nullptr) {
}
// Intentionally no destructor, the AssocMulti class takes
// care of freeing the tables!
};
std::vector<Bucket> _buckets;
size_t _bucketsMask;
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
uint64_t _nrFinds; // statistics: number of lookup calls uint64_t _nrFinds; // statistics: number of lookup calls
uint64_t _nrAdds; // statistics: number of insert calls uint64_t _nrAdds; // statistics: number of insert calls
@ -162,11 +173,8 @@ namespace triagens {
IsEqualKeyElementFuncType isEqualKeyElement, IsEqualKeyElementFuncType isEqualKeyElement,
IsEqualElementElementFuncType isEqualElementElement, IsEqualElementElementFuncType isEqualElementElement,
IsEqualElementElementFuncType isEqualElementElementByKey, IsEqualElementElementFuncType isEqualElementElementByKey,
IndexType initialSize = 64) size_t numberBuckets = 1,
: _nrAlloc(initialSize), IndexType initialSize = 64) :
_nrUsed(0),
_nrCollisions(0),
_table(nullptr),
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
_nrFinds(0), _nrAdds(0), _nrRems(0), _nrResizes(0), _nrFinds(0), _nrAdds(0), _nrRems(0), _nrResizes(0),
_nrProbes(0), _nrProbesF(0), _nrProbesD(0), _nrProbes(0), _nrProbesF(0), _nrProbesD(0),
@ -177,16 +185,36 @@ namespace triagens {
_isEqualElementElement(isEqualElementElement), _isEqualElementElement(isEqualElementElement),
_isEqualElementElementByKey(isEqualElementElementByKey) { _isEqualElementElementByKey(isEqualElementElementByKey) {
// Make the number of buckets a power of two:
size_t ex = 0;
size_t nr = 1;
numberBuckets >>= 1;
while (numberBuckets > 0) {
ex += 1;
numberBuckets >>= 1;
nr <<= 1;
}
numberBuckets = nr;
_bucketsMask = nr - 1;
std::cout << "FUXX: numberBuckets=" << numberBuckets
<< " _bucketsMask=" << _bucketsMask << std::endl;
try { try {
_table = new Entry[_nrAlloc]; for (size_t j = 0; j < numberBuckets; j++) {
IndexType i; _buckets.emplace_back();
for (i = 0; i < _nrAlloc; i++) { Bucket& b = _buckets.back();
invalidateEntry(i); b._nrAlloc = initialSize;
b._table = new Entry[b._nrAlloc];
for (IndexType i = 0; i < b._nrAlloc; i++) {
invalidateEntry(b, i);
}
} }
} }
catch (...) { catch (...) {
_table = nullptr; for (auto& b : _buckets) {
_nrAlloc = 0; b._table = nullptr;
b._nrAlloc = 0;
}
throw; throw;
} }
} }
@ -196,9 +224,11 @@ namespace triagens {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
~AssocMulti () { ~AssocMulti () {
if (_table != nullptr) { for (auto& b : _buckets) {
delete [] _table; if (b._table != nullptr) {
_table = nullptr; delete [] b._table;
b._table = nullptr;
}
} }
} }
@ -212,7 +242,14 @@ namespace triagens {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
size_t memoryUsage () const { size_t memoryUsage () const {
return static_cast<size_t> (_nrAlloc * sizeof(Entry)); size_t res = 0;
size_t count = 0;
for (auto& b : _buckets) {
res += static_cast<size_t> (b._nrAlloc) * sizeof(Entry);
std::cout << "Bucket: " << count++ << " _nrAlloc=" << b._nrAlloc
<< " _nrUsed=" << b._nrUsed << std::endl;
}
return res;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -220,7 +257,11 @@ namespace triagens {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
size_t size () const { size_t size () const {
return static_cast<size_t>(_nrUsed); size_t res = 0;
for (auto& b : _buckets) {
res += static_cast<size_t>(b._nrUsed);
}
return res;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -228,7 +269,11 @@ namespace triagens {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
size_t capacity () const { size_t capacity () const {
return static_cast<size_t>(_nrAlloc); size_t res = 0;
for (auto& b : _buckets) {
res += static_cast<size_t>(b._nrAlloc);
}
return res;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -236,8 +281,8 @@ namespace triagens {
/// this may return a nullptr /// this may return a nullptr
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
Element* at (size_t position) const { Element* at (Bucket& b, size_t position) const {
return _table[position].ptr; return b._table[position].ptr;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -259,9 +304,13 @@ namespace triagens {
check(true, true); check(true, true);
#endif #endif
// compute the hash by the key only first
uint64_t hashByKey = _hashElement(element, true);
Bucket& b = _buckets[hashByKey & _bucketsMask];
// if we were adding and the table is more than 2/3 full, extend it // if we were adding and the table is more than 2/3 full, extend it
if (2 * _nrAlloc < 3 * _nrUsed) { if (2 * b._nrAlloc < 3 * b._nrUsed) {
resizeInternal(2 * _nrAlloc + 1); resizeInternal(b, 2 * b._nrAlloc + 1);
} }
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
@ -269,15 +318,13 @@ namespace triagens {
_nrAdds++; _nrAdds++;
#endif #endif
// compute the hash by the key only first
uint64_t hashByKey = _hashElement(element, true);
IndexType hashIndex = hashToIndex(hashByKey); IndexType hashIndex = hashToIndex(hashByKey);
IndexType i = hashIndex % _nrAlloc; IndexType i = hashIndex % b._nrAlloc;
// If this slot is free, just use it: // If this slot is free, just use it:
if (nullptr == _table[i].ptr) { if (nullptr == b._table[i].ptr) {
_table[i] = { hashByKey, element, INVALID_INDEX, INVALID_INDEX }; b._table[i] = { hashByKey, element, INVALID_INDEX, INVALID_INDEX };
_nrUsed++; b._nrUsed++;
// no collision generated here! // no collision generated here!
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(true, true); check(true, true);
@ -287,12 +334,12 @@ namespace triagens {
// Now find the first slot with an entry with the same key // Now find the first slot with an entry with the same key
// that is the start of a linked list, or a free slot: // that is the start of a linked list, or a free slot:
while (_table[i].ptr != nullptr && while (b._table[i].ptr != nullptr &&
(_table[i].prev != INVALID_INDEX || (b._table[i].prev != INVALID_INDEX ||
_table[i].hashCache != hashByKey || b._table[i].hashCache != hashByKey ||
! _isEqualElementElementByKey(element, _table[i].ptr)) ! _isEqualElementElementByKey(element, b._table[i].ptr))
) { ) {
i = incr(i); i = incr(b, i);
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
// update statistics // update statistics
_ProbesA++; _ProbesA++;
@ -301,9 +348,9 @@ namespace triagens {
} }
// If this is free, we are the first with this key: // If this is free, we are the first with this key:
if (nullptr == _table[i].ptr) { if (nullptr == b._table[i].ptr) {
_table[i] = { hashByKey, element, INVALID_INDEX, INVALID_INDEX }; b._table[i] = { hashByKey, element, INVALID_INDEX, INVALID_INDEX };
_nrUsed++; b._nrUsed++;
// no collision generated here either! // no collision generated here either!
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(true, true); check(true, true);
@ -315,11 +362,11 @@ namespace triagens {
// list of which we want to make element a member. Perhaps an // list of which we want to make element a member. Perhaps an
// equal element is right here: // equal element is right here:
if (checkEquality && if (checkEquality &&
_isEqualElementElement(element, _table[i].ptr)) { _isEqualElementElement(element, b._table[i].ptr)) {
old = _table[i].ptr; old = b._table[i].ptr;
if (overwrite) { if (overwrite) {
TRI_ASSERT(_table[i].hashCache == hashByKey); TRI_ASSERT(b._table[i].hashCache == hashByKey);
_table[i].ptr = element; b._table[i].ptr = element;
} }
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(true, true); check(true, true);
@ -329,15 +376,15 @@ namespace triagens {
// Now find a new home for element in this linked list: // Now find a new home for element in this linked list:
uint64_t hashByElm; uint64_t hashByElm;
IndexType j = findElementPlace(element, checkEquality, hashByElm); IndexType j = findElementPlace(b, element, checkEquality, hashByElm);
old = _table[j].ptr; old = b._table[j].ptr;
// if we found an element, return // if we found an element, return
if (old != nullptr) { if (old != nullptr) {
if (overwrite) { if (overwrite) {
_table[j].hashCache = hashByElm; b._table[j].hashCache = hashByElm;
_table[j].ptr = element; b._table[j].ptr = element;
} }
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(true, true); check(true, true);
@ -346,14 +393,14 @@ namespace triagens {
} }
// add a new element to the associative array and linked list (in pos 2): // add a new element to the associative array and linked list (in pos 2):
_table[j] = { hashByElm, element, _table[i].next, i }; b._table[j] = { hashByElm, element, b._table[i].next, i };
_table[i].next = j; b._table[i].next = j;
// Finally, we need to find the successor to patch it up: // Finally, we need to find the successor to patch it up:
if (_table[j].next != INVALID_INDEX) { if (b._table[j].next != INVALID_INDEX) {
_table[_table[j].next].prev = j; b._table[b._table[j].next].prev = j;
} }
_nrUsed++; b._nrUsed++;
_nrCollisions++; b._nrCollisions++;
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(true, true); check(true, true);
@ -369,7 +416,7 @@ namespace triagens {
private: private:
void insertFirst (Element* element, uint64_t hashByKey) { void insertFirst (Bucket& b, Element* element, uint64_t hashByKey) {
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(true, true); check(true, true);
@ -381,12 +428,12 @@ namespace triagens {
#endif #endif
IndexType hashIndex = hashToIndex(hashByKey); IndexType hashIndex = hashToIndex(hashByKey);
IndexType i = hashIndex % _nrAlloc; IndexType i = hashIndex % b._nrAlloc;
// If this slot is free, just use it: // If this slot is free, just use it:
if (nullptr == _table[i].ptr) { if (nullptr == b._table[i].ptr) {
_table[i] = { hashByKey, element, INVALID_INDEX, INVALID_INDEX }; b._table[i] = { hashByKey, element, INVALID_INDEX, INVALID_INDEX };
_nrUsed++; b._nrUsed++;
// no collision generated here! // no collision generated here!
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(true, true); check(true, true);
@ -395,8 +442,8 @@ namespace triagens {
// Now find the first slot with an entry with the same key // Now find the first slot with an entry with the same key
// that is the start of a linked list, or a free slot: // that is the start of a linked list, or a free slot:
while (_table[i].ptr != nullptr) { while (b._table[i].ptr != nullptr) {
i = incr(i); i = incr(b, i);
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
// update statistics // update statistics
_ProbesA++; _ProbesA++;
@ -404,8 +451,8 @@ namespace triagens {
} }
// We are the first with this key: // We are the first with this key:
_table[i] = { hashByKey, element, INVALID_INDEX, INVALID_INDEX }; b._table[i] = { hashByKey, element, INVALID_INDEX, INVALID_INDEX };
_nrUsed++; b._nrUsed++;
// no collision generated here either! // no collision generated here either!
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(true, true); check(true, true);
@ -419,7 +466,7 @@ namespace triagens {
/// example the case when resizing. /// example the case when resizing.
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void insertFurther (Element* element, void insertFurther (Bucket& b, Element* element,
uint64_t hashByKey, uint64_t hashByElm) { uint64_t hashByKey, uint64_t hashByElm) {
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(true, true); check(true, true);
@ -432,18 +479,18 @@ namespace triagens {
// We need the beginning of the doubly linked list: // We need the beginning of the doubly linked list:
IndexType hashIndex = hashToIndex(hashByKey); IndexType hashIndex = hashToIndex(hashByKey);
IndexType i = hashIndex % _nrAlloc; IndexType i = hashIndex % b._nrAlloc;
TRI_ASSERT(nullptr != _table[i].ptr); TRI_ASSERT(nullptr != b._table[i].ptr);
// Find the first slot with an entry with the same key // Find the first slot with an entry with the same key
// that is the start of a linked list, or a free slot: // that is the start of a linked list, or a free slot:
while (_table[i].ptr != nullptr && while (b._table[i].ptr != nullptr &&
(_table[i].prev != INVALID_INDEX || (b._table[i].prev != INVALID_INDEX ||
_table[i].hashCache != hashByKey || b._table[i].hashCache != hashByKey ||
! _isEqualElementElementByKey(element, _table[i].ptr)) ! _isEqualElementElementByKey(element, b._table[i].ptr))
) { ) {
i = incr(i); i = incr(b, i);
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
// update statistics // update statistics
_ProbesA++; _ProbesA++;
@ -452,31 +499,31 @@ namespace triagens {
} }
// If this is free, we are the first with this key, a contradiction: // If this is free, we are the first with this key, a contradiction:
TRI_ASSERT(nullptr != _table[i].ptr); TRI_ASSERT(nullptr != b._table[i].ptr);
// Now, entry i points to the beginning of the linked // Now, entry i points to the beginning of the linked
// list of which we want to make element a member. // list of which we want to make element a member.
// Now find a new home for element in this linked list: // Now find a new home for element in this linked list:
hashIndex = hashToIndex(hashByElm); hashIndex = hashToIndex(hashByElm);
IndexType j = hashIndex % _nrAlloc; IndexType j = hashIndex % b._nrAlloc;
while (_table[j].ptr != nullptr) { while (b._table[j].ptr != nullptr) {
j = incr(j); j = incr(b, j);
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
_nrProbes++; _nrProbes++;
#endif #endif
} }
// add the element to the hash and linked list (in pos 2): // add the element to the hash and linked list (in pos 2):
_table[j] = { hashByElm, element, _table[i].next, i }; b._table[j] = { hashByElm, element, b._table[i].next, i };
_table[i].next = j; b._table[i].next = j;
// Finally, we need to find the successor to patch it up: // Finally, we need to find the successor to patch it up:
if (_table[j].next != INVALID_INDEX) { if (b._table[j].next != INVALID_INDEX) {
_table[_table[j].next].prev = j; b._table[b._table[j].next].prev = j;
} }
_nrUsed++; b._nrUsed++;
_nrCollisions++; b._nrCollisions++;
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(true, true); check(true, true);
@ -497,8 +544,9 @@ namespace triagens {
_nrFinds++; _nrFinds++;
#endif #endif
i = lookupByElement(element); Bucket* b;
return _table[i].ptr; i = lookupByElement(element, b);
return b->_table[i].ptr;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -512,8 +560,9 @@ namespace triagens {
// compute the hash // compute the hash
uint64_t hashByKey = _hashKey(key); uint64_t hashByKey = _hashKey(key);
Bucket const& b = _buckets[hashByKey & _bucketsMask];
IndexType hashIndex = hashToIndex(hashByKey); IndexType hashIndex = hashToIndex(hashByKey);
IndexType i = hashIndex % _nrAlloc; IndexType i = hashIndex % b._nrAlloc;
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
// update statistics // update statistics
@ -521,23 +570,23 @@ namespace triagens {
#endif #endif
// search the table // search the table
while (_table[i].ptr != nullptr && while (b._table[i].ptr != nullptr &&
(_table[i].prev != INVALID_INDEX || (b._table[i].prev != INVALID_INDEX ||
_table[i].hashCache != hashByKey || b._table[i].hashCache != hashByKey ||
! _isEqualKeyElement(key, _table[i].ptr)) ! _isEqualKeyElement(key, b._table[i].ptr))
) { ) {
i = incr(i); i = incr(b, i);
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
_nrProbesF++; _nrProbesF++;
#endif #endif
} }
if (_table[i].ptr != nullptr) { if (b._table[i].ptr != nullptr) {
// We found the beginning of the linked list: // We found the beginning of the linked list:
do { do {
result->push_back(_table[i].ptr); result->push_back(b._table[i].ptr);
i = _table[i].next; i = b._table[i].next;
} }
while (i != INVALID_INDEX && while (i != INVALID_INDEX &&
(limit == 0 || result->size() < limit)); (limit == 0 || result->size() < limit));
@ -559,8 +608,9 @@ namespace triagens {
// compute the hash // compute the hash
uint64_t hashByKey = _hashElement(element, true); uint64_t hashByKey = _hashElement(element, true);
Bucket const& b = _buckets[hashByKey & _bucketsMask];
IndexType hashIndex = hashToIndex(hashByKey); IndexType hashIndex = hashToIndex(hashByKey);
IndexType i = hashIndex % _nrAlloc; IndexType i = hashIndex % b._nrAlloc;
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
// update statistics // update statistics
@ -568,23 +618,23 @@ namespace triagens {
#endif #endif
// search the table // search the table
while (_table[i].ptr != nullptr && while (b._table[i].ptr != nullptr &&
(_table[i].prev != INVALID_INDEX || (b._table[i].prev != INVALID_INDEX ||
_table[i].hashCache != hashByKey || b._table[i].hashCache != hashByKey ||
! _isEqualElementElementByKey(element, _table[i].ptr)) ! _isEqualElementElementByKey(element, b._table[i].ptr))
) { ) {
i = incr(i); i = incr(b, i);
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
_nrProbesF++; _nrProbesF++;
#endif #endif
} }
if (_table[i].ptr != nullptr) { if (b._table[i].ptr != nullptr) {
// We found the beginning of the linked list: // We found the beginning of the linked list:
do { do {
result->push_back(_table[i].ptr); result->push_back(b._table[i].ptr);
i = _table[i].next; i = b._table[i].next;
} }
while (i != INVALID_INDEX && while (i != INVALID_INDEX &&
(limit == 0 || result->size() < limit)); (limit == 0 || result->size() < limit));
@ -605,20 +655,22 @@ namespace triagens {
std::unique_ptr<std::vector<Element*>> result std::unique_ptr<std::vector<Element*>> result
(new std::vector<Element*>()); (new std::vector<Element*>());
uint64_t hashByKey = _hashElement(element, true);
Bucket const& b = _buckets[hashByKey & _bucketsMask];
uint64_t hashByElm; uint64_t hashByElm;
IndexType i = findElementPlace(element, true, hashByElm); IndexType i = findElementPlace(b, element, true, hashByElm);
if (_table[i].ptr == nullptr) { if (b._table[i].ptr == nullptr) {
return nullptr; return nullptr;
} }
// compute the hash // compute the hash
// continue search of the table // continue search of the table
while (true) { while (true) {
i = _table[i].next; i = b._table[i].next;
if (i == INVALID_INDEX || (limit != 0 && result->size() >= limit)) { if (i == INVALID_INDEX || (limit != 0 && result->size() >= limit)) {
break; break;
} }
result->push_back(_table[i].ptr); result->push_back(b._table[i].ptr);
} }
// return whatever we found // return whatever we found
@ -651,56 +703,57 @@ namespace triagens {
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(true, true); check(true, true);
#endif #endif
IndexType i = lookupByElement(element); Bucket* b;
if (_table[i].ptr == nullptr) { IndexType i = lookupByElement(element, b);
if (b->_table[i].ptr == nullptr) {
return nullptr; return nullptr;
} }
Element* old = _table[i].ptr; Element* old = b->_table[i].ptr;
// We have to delete entry i // We have to delete entry i
if (_table[i].prev == INVALID_INDEX) { if (b->_table[i].prev == INVALID_INDEX) {
// This is the first in its linked list. // This is the first in its linked list.
j = _table[i].next; j = b->_table[i].next;
if (j == INVALID_INDEX) { if (j == INVALID_INDEX) {
// The only one in its linked list, simply remove it and heal // The only one in its linked list, simply remove it and heal
// the hole: // the hole:
invalidateEntry(i); invalidateEntry(*b, i);
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(false, false); check(false, false);
#endif #endif
healHole(i); healHole(*b, i);
// this element did not create a collision // this element did not create a collision
} }
else { else {
// There is at least one successor in position j. // There is at least one successor in position j.
_table[j].prev = INVALID_INDEX; b->_table[j].prev = INVALID_INDEX;
moveEntry(j, i); moveEntry(*b, j, i);
// We need to exchange the hashCache value by that of the key: // We need to exchange the hashCache value by that of the key:
_table[i].hashCache = _hashElement(_table[i].ptr, true); b->_table[i].hashCache = _hashElement(b->_table[i].ptr, true);
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(false, false); check(false, false);
#endif #endif
healHole(j); healHole(*b, j);
_nrCollisions--; // one collision less b->_nrCollisions--; // one collision less
} }
} }
else { else {
// This one is not the first in its linked list // This one is not the first in its linked list
j = _table[i].prev; j = b->_table[i].prev;
_table[j].next = _table[i].next; b->_table[j].next = b->_table[i].next;
j = _table[i].next; j = b->_table[i].next;
if (j != INVALID_INDEX) { if (j != INVALID_INDEX) {
// We are not the last in the linked list. // We are not the last in the linked list.
_table[j].prev = _table[i].prev; b->_table[j].prev = b->_table[i].prev;
} }
invalidateEntry(i); invalidateEntry(*b, i);
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(false, false); check(false, false);
#endif #endif
healHole(i); healHole(*b, i);
_nrCollisions--; b->_nrCollisions--;
} }
_nrUsed--; b->_nrUsed--;
#ifdef TRI_CHECK_MULTI_POINTER_HASH #ifdef TRI_CHECK_MULTI_POINTER_HASH
check(true, true); check(true, true);
#endif #endif
@ -713,16 +766,19 @@ namespace triagens {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int resize (IndexType size) throw() { int resize (IndexType size) throw() {
if (2 * (2*size+1) < 3 * _nrUsed) { size /= _buckets.size();
for (auto& b : _buckets) {
if (2 * (2*size+1) < 3 * b._nrUsed) {
return TRI_ERROR_BAD_PARAMETER; return TRI_ERROR_BAD_PARAMETER;
} }
try { try {
resizeInternal(2*size+1); resizeInternal(b, 2*size+1);
} }
catch (...) { catch (...) {
return TRI_ERROR_OUT_OF_MEMORY; return TRI_ERROR_OUT_OF_MEMORY;
} }
}
return TRI_ERROR_NO_ERROR; return TRI_ERROR_NO_ERROR;
} }
@ -734,8 +790,14 @@ namespace triagens {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
double selectivity () { double selectivity () {
return _nrUsed > 0 ? size_t nrUsed = 0;
(_nrUsed - _nrCollisions) / _nrUsed : size_t nrCollisions = 0;
for (auto& b : _buckets) {
nrUsed += b._nrUsed;
nrCollisions += b._nrCollisions;
}
return nrUsed > 0 ?
(nrUsed - nrCollisions) / nrUsed :
1.0; 1.0;
} }
@ -745,9 +807,11 @@ namespace triagens {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void iterate (std::function<void(Element*)> callback) { void iterate (std::function<void(Element*)> callback) {
for (IndexType i = 0; i < _nrAlloc; i++) { for (auto& b : _buckets) {
if (_table[i].ptr != nullptr) { for (IndexType i = 0; i < b._nrAlloc; i++) {
callback(_table[i].ptr); if (b._table[i].ptr != nullptr) {
callback(b._table[i].ptr);
}
} }
} }
} }
@ -762,39 +826,39 @@ namespace triagens {
/// @brief increment IndexType by 1 modulo _nrAlloc: /// @brief increment IndexType by 1 modulo _nrAlloc:
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
inline IndexType incr (IndexType i) const { inline IndexType incr (Bucket const& b, IndexType i) const {
IndexType dummy = (++i) - _nrAlloc; IndexType dummy = (++i) - b._nrAlloc;
return i < _nrAlloc ? i : dummy; return i < b._nrAlloc ? i : dummy;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief resize the array, internal method /// @brief resize the array, internal method
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void resizeInternal (IndexType size) { void resizeInternal (Bucket& b, IndexType size) {
LOG_ACTION("edge-index-resize, target size: %llu", LOG_ACTION("edge-index-resize, target size: %llu",
(unsigned long long) size); (unsigned long long) size);
double start = TRI_microtime(); double start = TRI_microtime();
Entry* oldTable = _table; Entry* oldTable = b._table;
IndexType oldAlloc = _nrAlloc; IndexType oldAlloc = b._nrAlloc;
_nrAlloc = TRI_NearPrime(size); b._nrAlloc = TRI_NearPrime(size);
try { try {
_table = new Entry[_nrAlloc]; b._table = new Entry[b._nrAlloc];
IndexType i; IndexType i;
for (i = 0; i < _nrAlloc; i++) { for (i = 0; i < b._nrAlloc; i++) {
invalidateEntry(i); invalidateEntry(b, i);
} }
} }
catch (...) { catch (...) {
_nrAlloc = oldAlloc; b._nrAlloc = oldAlloc;
_table = oldTable; b._table = oldTable;
throw; throw;
} }
_nrUsed = 0; b._nrUsed = 0;
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
_nrResizes++; _nrResizes++;
#endif #endif
@ -805,7 +869,7 @@ namespace triagens {
if (oldTable[j].ptr != nullptr && if (oldTable[j].ptr != nullptr &&
oldTable[j].prev == INVALID_INDEX) { oldTable[j].prev == INVALID_INDEX) {
// This is a "first" one in its doubly linked list: // This is a "first" one in its doubly linked list:
insertFirst(oldTable[j].ptr, oldTable[j].hashCache); insertFirst(b, oldTable[j].ptr, oldTable[j].hashCache);
uint64_t hashByKey = oldTable[j].hashCache; uint64_t hashByKey = oldTable[j].hashCache;
// Now walk to the end of the list: // Now walk to the end of the list:
IndexType k = j; IndexType k = j;
@ -814,7 +878,7 @@ namespace triagens {
} }
// Now insert all of them backwards, not repeating k: // Now insert all of them backwards, not repeating k:
while (k != j) { while (k != j) {
insertFurther(oldTable[k].ptr, hashByKey, insertFurther(b, oldTable[k].ptr, hashByKey,
oldTable[k].hashCache); oldTable[k].hashCache);
k = oldTable[k].prev; k = oldTable[k].prev;
} }
@ -836,29 +900,30 @@ namespace triagens {
bool check (bool checkCount, bool checkPositions) const { bool check (bool checkCount, bool checkPositions) const {
std::cout << "Performing AssocMulti check " << checkCount std::cout << "Performing AssocMulti check " << checkCount
<< checkPositions << std::endl; << checkPositions << std::endl;
for (auto& b : _buckets) {
IndexType i, ii, j, k; IndexType i, ii, j, k;
bool ok = true; bool ok = true;
IndexType count = 0; IndexType count = 0;
for (i = 0;i < _nrAlloc;i++) { for (i = 0;i < b._nrAlloc;i++) {
if (_table[i].ptr != nullptr) { if (b._table[i].ptr != nullptr) {
count++; count++;
if (_table[i].prev != INVALID_INDEX) { if (b._table[i].prev != INVALID_INDEX) {
if (_table[_table[i].prev].next != i) { if (b._table[b._table[i].prev].next != i) {
std::cout << "Alarm prev " << i << std::endl; std::cout << "Alarm prev " << i << std::endl;
ok = false; ok = false;
} }
} }
if (_table[i].next != INVALID_INDEX) { if (b._table[i].next != INVALID_INDEX) {
if (_table[_table[i].next].prev != i) { if (b._table[b._table[i].next].prev != i) {
std::cout << "Alarm next " << i << std::endl; std::cout << "Alarm next " << i << std::endl;
ok = false; ok = false;
} }
} }
ii = i; ii = i;
j = _table[ii].next; j = b._table[ii].next;
while (j != INVALID_INDEX) { while (j != INVALID_INDEX) {
if (j == i) { if (j == i) {
std::cout << "Alarm cycle " << i << std::endl; std::cout << "Alarm cycle " << i << std::endl;
@ -866,55 +931,56 @@ namespace triagens {
break; break;
} }
ii = j; ii = j;
j = _table[ii].next; j = b._table[ii].next;
} }
} }
} }
if (checkCount && count != _nrUsed) { if (checkCount && count != b._nrUsed) {
std::cout << "Alarm _nrUsed wrong " << _nrUsed << " != " std::cout << "Alarm _nrUsed wrong " << b._nrUsed << " != "
<< count << "!" << std::endl; << count << "!" << std::endl;
ok = false; ok = false;
} }
if (checkPositions) { if (checkPositions) {
for (i = 0;i < _nrAlloc;i++) { for (i = 0;i < b._nrAlloc;i++) {
if (_table[i].ptr != nullptr) { if (b._table[i].ptr != nullptr) {
IndexType hashIndex; IndexType hashIndex;
if (_table[i].prev == INVALID_INDEX) { if (b._table[i].prev == INVALID_INDEX) {
// We are the first in a linked list. // We are the first in a linked list.
uint64_t hashByKey = _hashElement(_table[i].ptr, true); uint64_t hashByKey = _hashElement(b._table[i].ptr, true);
hashIndex = hashToIndex(hashByKey); hashIndex = hashToIndex(hashByKey);
j = hashIndex % _nrAlloc; j = hashIndex % b._nrAlloc;
if (_table[i].hashCache != hashByKey) { if (b._table[i].hashCache != hashByKey) {
std::cout << "Alarm hashCache wrong " << i << std::endl; std::cout << "Alarm hashCache wrong " << i << std::endl;
} }
for (k = j; k != i; ) { for (k = j; k != i; ) {
if (_table[k].ptr == nullptr || if (b._table[k].ptr == nullptr ||
(_table[k].prev == INVALID_INDEX && (b._table[k].prev == INVALID_INDEX &&
_isEqualElementElementByKey(_table[i].ptr, _isEqualElementElementByKey(b._table[i].ptr,
_table[k].ptr))) { b._table[k].ptr))) {
ok = false; ok = false;
std::cout << "Alarm pos bykey: " << i << std::endl; std::cout << "Alarm pos bykey: " << i << std::endl;
} }
k = incr(k); k = incr(b, k);
} }
} }
else { else {
// We are not the first in a linked list. // We are not the first in a linked list.
uint64_t hashByElm = _hashElement(_table[i].ptr, false); uint64_t hashByElm = _hashElement(b._table[i].ptr, false);
hashIndex = hashToIndex(hashByElm); hashIndex = hashToIndex(hashByElm);
j = hashIndex % _nrAlloc; j = hashIndex % b._nrAlloc;
if (_table[i].hashCache != hashByElm) { if (b._table[i].hashCache != hashByElm) {
std::cout << "Alarm hashCache wrong " << i << std::endl; std::cout << "Alarm hashCache wrong " << i << std::endl;
} }
for (k = j; k != i; ) { for (k = j; k != i; ) {
if (_table[k].ptr == nullptr || if (b._table[k].ptr == nullptr ||
_isEqualElementElement(_table[i].ptr, _isEqualElementElement(b._table[i].ptr,
_table[k].ptr)) { b._table[k].ptr)) {
ok = false; ok = false;
std::cout << "Alarm unique: " << k << ", " std::cout << "Alarm unique: " << k << ", "
<< i << std::endl; << i << std::endl;
} }
k = incr(k); k = incr(b, k);
}
} }
} }
} }
@ -932,7 +998,8 @@ namespace triagens {
/// @brief find an element or its place using the element hash function /// @brief find an element or its place using the element hash function
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
inline IndexType findElementPlace (Element const* element, inline IndexType findElementPlace (Bucket const& b,
Element const* element,
bool checkEquality, bool checkEquality,
uint64_t& hashByElm) const { uint64_t& hashByElm) const {
@ -946,13 +1013,13 @@ namespace triagens {
hashByElm = _hashElement(element, false); hashByElm = _hashElement(element, false);
IndexType hashindex = hashToIndex(hashByElm); IndexType hashindex = hashToIndex(hashByElm);
IndexType i = hashindex % _nrAlloc; IndexType i = hashindex % b._nrAlloc;
while (_table[i].ptr != nullptr && while (b._table[i].ptr != nullptr &&
(! checkEquality || (! checkEquality ||
_table[i].hashCache != hashByElm || b._table[i].hashCache != hashByElm ||
! _isEqualElementElement(element, _table[i].ptr))) { ! _isEqualElementElement(element, b._table[i].ptr))) {
i = incr(i); i = incr(b, i);
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
_nrProbes++; _nrProbes++;
#endif #endif
@ -964,35 +1031,38 @@ namespace triagens {
/// @brief find an element or its place by key or element identity /// @brief find an element or its place by key or element identity
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
IndexType lookupByElement (Element const* element) const { IndexType lookupByElement (Element const* element,
Bucket*& buck) const {
// This performs a complete lookup for an element. It returns a slot // This performs a complete lookup for an element. It returns a slot
// number. This slot is either empty or contains an element that // number. This slot is either empty or contains an element that
// compares equal to element. // compares equal to element.
uint64_t hashByKey = _hashElement(element, true); uint64_t hashByKey = _hashElement(element, true);
Bucket const& b = _buckets[hashByKey & _bucketsMask];
buck = const_cast<Bucket*>(&b);
IndexType hashIndex = hashToIndex(hashByKey); IndexType hashIndex = hashToIndex(hashByKey);
IndexType i = hashIndex % _nrAlloc; IndexType i = hashIndex % b._nrAlloc;
// Now find the first slot with an entry with the same key // Now find the first slot with an entry with the same key
// that is the start of a linked list, or a free slot: // that is the start of a linked list, or a free slot:
while (_table[i].ptr != nullptr && while (b._table[i].ptr != nullptr &&
(_table[i].prev != INVALID_INDEX || (b._table[i].prev != INVALID_INDEX ||
_table[i].hashCache != hashByKey || b._table[i].hashCache != hashByKey ||
! _isEqualElementElementByKey(element, _table[i].ptr))) { ! _isEqualElementElementByKey(element, b._table[i].ptr))) {
i = incr(i); i = incr(b, i);
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
_nrProbes++; _nrProbes++;
#endif #endif
} }
if (_table[i].ptr != nullptr) { if (b._table[i].ptr != nullptr) {
// It might be right here! // It might be right here!
if (_isEqualElementElement(element, _table[i].ptr)) { if (_isEqualElementElement(element, b._table[i].ptr)) {
return i; return i;
} }
// Now we have to look for it in its hash position: // Now we have to look for it in its hash position:
uint64_t hashByElm; uint64_t hashByElm;
IndexType j = findElementPlace(element, true, hashByElm); IndexType j = findElementPlace(b, element, true, hashByElm);
// We have either found an equal element or nothing: // We have either found an equal element or nothing:
return j; return j;
@ -1022,50 +1092,50 @@ namespace triagens {
/// @brief helper to invalidate a slot /// @brief helper to invalidate a slot
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
inline void invalidateEntry (IndexType i) { inline void invalidateEntry (Bucket& b, IndexType i) {
_table[i] = { 0, nullptr, INVALID_INDEX, INVALID_INDEX }; b._table[i] = { 0, nullptr, INVALID_INDEX, INVALID_INDEX };
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief helper to move an entry from one slot to another /// @brief helper to move an entry from one slot to another
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
inline void moveEntry (IndexType from, IndexType to) { inline void moveEntry (Bucket& b, IndexType from, IndexType to) {
// Moves an entry, adjusts the linked lists, but does not take care // Moves an entry, adjusts the linked lists, but does not take care
// for the hole. to must be unused. from can be any element in a // for the hole. to must be unused. from can be any element in a
// linked list. // linked list.
_table[to] = _table[from]; b._table[to] = b._table[from];
if (_table[to].prev != INVALID_INDEX) { if (b._table[to].prev != INVALID_INDEX) {
_table[_table[to].prev].next = to; b._table[b._table[to].prev].next = to;
} }
if (_table[to].next != INVALID_INDEX) { if (b._table[to].next != INVALID_INDEX) {
_table[_table[to].next].prev = to; b._table[b._table[to].next].prev = to;
} }
invalidateEntry(from); invalidateEntry(b, from);
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief helper to heal a hole where we deleted something /// @brief helper to heal a hole where we deleted something
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void healHole (IndexType i) { void healHole (Bucket& b, IndexType i) {
IndexType j = incr(i); IndexType j = incr(b, i);
while (_table[j].ptr != nullptr) { while (b._table[j].ptr != nullptr) {
// Find out where this element ought to be: // Find out where this element ought to be:
// If it is the start of one of the linked lists, we need to hash // If it is the start of one of the linked lists, we need to hash
// by key, otherwise, we hash by the full identity of the element: // by key, otherwise, we hash by the full identity of the element:
uint64_t hash = _hashElement(_table[j].ptr, uint64_t hash = _hashElement(b._table[j].ptr,
_table[j].prev == INVALID_INDEX); b._table[j].prev == INVALID_INDEX);
IndexType hashIndex = hashToIndex(hash); IndexType hashIndex = hashToIndex(hash);
IndexType k = hashIndex % _nrAlloc; IndexType k = hashIndex % b._nrAlloc;
if (! isBetween(i, k, j)) { if (! isBetween(i, k, j)) {
// we have to move j to i: // we have to move j to i:
moveEntry(j, i); moveEntry(b, j, i);
i = j; // Now heal this hole at j, i = j; // Now heal this hole at j,
// j will be incremented right away // j will be incremented right away
} }
j = incr(j); j = incr(b, j);
#ifdef TRI_INTERNAL_STATS #ifdef TRI_INTERNAL_STATS
_nrProbesD++; _nrProbesD++;
#endif #endif