1
0
Fork 0

optimized memory usage of multi hash index

This commit is contained in:
Jan Steemann 2014-10-11 18:38:22 +02:00
parent fefd527b9f
commit ce8ac10f59
5 changed files with 249 additions and 65 deletions

View File

@ -126,16 +126,32 @@ static inline size_t GetBlockSize (size_t blockNumber) {
return (size_t) (BLOCK_SIZE_UNIT << blockNumber);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief return the size of a single entry
////////////////////////////////////////////////////////////////////////////////
static inline size_t TableEntrySize () {
return sizeof(TRI_hash_index_element_multi_t);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief return the size of a single overflow entry
////////////////////////////////////////////////////////////////////////////////
static inline size_t OverflowEntrySize () {
return sizeof(TRI_hash_index_element_overflow_t);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get a storage location from the freelist
////////////////////////////////////////////////////////////////////////////////
static TRI_hash_index_element_multi_t* GetFromFreelist (TRI_hash_array_multi_t* array) {
static TRI_hash_index_element_overflow_t* GetFromFreelist (TRI_hash_array_multi_t* array) {
if (array->_freelist == nullptr) {
size_t blockSize = GetBlockSize(array->_blocks._length);
TRI_ASSERT(blockSize > 0);
auto begin = static_cast<TRI_hash_index_element_multi_t*>(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, blockSize * sizeof(TRI_hash_index_element_multi_t), true));
auto begin = static_cast<TRI_hash_index_element_overflow_t*>(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, blockSize * OverflowEntrySize(), true));
if (begin == nullptr) {
return nullptr;
@ -168,7 +184,7 @@ static TRI_hash_index_element_multi_t* GetFromFreelist (TRI_hash_array_multi_t*
////////////////////////////////////////////////////////////////////////////////
static void ReturnToFreelist (TRI_hash_array_multi_t* array,
TRI_hash_index_element_multi_t* element) {
TRI_hash_index_element_overflow_t* element) {
element->_document = nullptr;
element->_next = array->_freelist;
array->_freelist = element;
@ -189,6 +205,19 @@ static void DestroyElement (TRI_hash_array_multi_t* array,
element->_next = nullptr;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief destroys an element, removing any allocated memory
////////////////////////////////////////////////////////////////////////////////
static void DestroyElement (TRI_hash_array_multi_t* array,
TRI_hash_index_element_overflow_t* element) {
TRI_ASSERT_EXPENSIVE(element != nullptr);
TRI_ASSERT_EXPENSIVE(element->_document != nullptr);
element->_document = nullptr;
element->_next = nullptr;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief initial preallocation size of the hash table when the table is
/// first created
@ -200,18 +229,6 @@ static inline uint64_t InitialSize () {
return 251;
}
// -----------------------------------------------------------------------------
// --SECTION-- private functions
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief return the size of a single entry
////////////////////////////////////////////////////////////////////////////////
static inline size_t TableEntrySize () {
return sizeof(TRI_hash_index_element_multi_t);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief allocate memory for the hash table
///
@ -351,12 +368,17 @@ void TRI_DestroyHashArrayMulti (TRI_hash_array_multi_t* array) {
e = p + array->_nrAlloc;
for (; p < e; ++p) {
auto current = p;
if (p->_document != nullptr) {
// destroy overflow elements
auto current = p->_next;
while (current != nullptr) {
auto ptr = current->_next;
DestroyElement(array, current);
current = ptr;
}
while (current != nullptr && current->_document != nullptr) {
auto ptr = current->_next;
DestroyElement(array, current);
current = ptr;
// destroy the element itself
DestroyElement(array, p);
}
}
@ -397,10 +419,9 @@ size_t TRI_MemoryUsageHashArrayMulti (TRI_hash_array_multi_t const* array) {
size_t tableSize = (size_t) (array->_nrAlloc * TableEntrySize() + 64);
size_t memberSize = (size_t) (array->_nrUsed * array->_numFields * sizeof(TRI_shaped_sub_t));
size_t overflowUsed = (size_t) (array->_nrOverflowUsed * array->_numFields * sizeof(TRI_shaped_sub_t));
size_t overflowAlloc = (size_t) (array->_nrOverflowAlloc * TableEntrySize());
size_t overflowAlloc = (size_t) (array->_nrOverflowAlloc * OverflowEntrySize());
return (size_t) (tableSize + memberSize + overflowUsed + overflowAlloc);
return (size_t) (tableSize + memberSize + overflowAlloc);
}
////////////////////////////////////////////////////////////////////////////////
@ -409,7 +430,15 @@ size_t TRI_MemoryUsageHashArrayMulti (TRI_hash_array_multi_t const* array) {
int TRI_ResizeHashArrayMulti (TRI_hash_array_multi_t* array,
size_t size) {
return ResizeHashArray(array, (uint64_t) (2 * size + 1), false);
// use less than 1 element per number of documents
// we does this because expect duplicate values, which are stored in the overflow
// items (which are allocated separately)
size_t targetSize = 0.75 * size;
if ((targetSize & 1) == 0) {
// make odd
targetSize++;
}
return ResizeHashArray(array, (uint64_t) targetSize, false);
}
// -----------------------------------------------------------------------------
@ -444,20 +473,17 @@ TRI_vector_pointer_t TRI_LookupByKeyHashArrayMulti (TRI_hash_array_multi_t const
TRI_ASSERT_EXPENSIVE(i < n);
if (array->_table[i]._document != nullptr) {
auto current = &array->_table[i];
// add the element itself
TRI_PushBackVectorPointer(&result, array->_table[i]._document);
// add the overflow elements
auto current = array->_table[i]._next;
while (current != nullptr) {
if (IsEqualKeyElement(array, key, current)) {
TRI_PushBackVectorPointer(&result, current);
}
TRI_PushBackVectorPointer(&result, current->_document);
current = current->_next;
}
}
// ...........................................................................
// return whatever we found -- which could be an empty vector list if nothing
// matches.
// ...........................................................................
return result;
}
@ -476,8 +502,6 @@ int TRI_InsertElementHashArrayMulti (TRI_hash_array_multi_t* array,
return TRI_ERROR_OUT_OF_MEMORY;
}
element->_next = nullptr;
uint64_t const n = array->_nrAlloc;
uint64_t i, k;
@ -504,7 +528,13 @@ int TRI_InsertElementHashArrayMulti (TRI_hash_array_multi_t* array,
if (found) {
if (isRollback) {
auto current = arrayElement;
if (arrayElement->_document == element->_document) {
DestroyElement(array, element);
return TRI_RESULT_ELEMENT_EXISTS;
}
auto current = arrayElement->_next;
while (current != nullptr) {
if (current->_document == element->_document) {
DestroyElement(array, element);
@ -521,14 +551,22 @@ int TRI_InsertElementHashArrayMulti (TRI_hash_array_multi_t* array,
return TRI_ERROR_OUT_OF_MEMORY;
}
element->_next = arrayElement->_next;
*ptr = *element;
// link our element at the list head
ptr->_document = element->_document;
ptr->_next = arrayElement->_next;
arrayElement->_next = ptr;
// it is ok to destroy the element here, because we have copied its internal before!
DestroyElement(array, element);
return TRI_ERROR_NO_ERROR;
}
TRI_ASSERT(arrayElement->_next == nullptr);
*arrayElement = *element;
// not found in list, now insert insert
element->_next = nullptr;
*arrayElement = *element;
array->_nrUsed++;
return TRI_ERROR_NO_ERROR;
@ -555,10 +593,6 @@ int TRI_RemoveElementHashArrayMulti (TRI_hash_array_multi_t* array,
TRI_hash_index_element_multi_t* arrayElement = &array->_table[i];
// ...........................................................................
// if we did not find such an item return false
// ...........................................................................
bool found = (arrayElement->_document != nullptr);
if (! found) {
@ -566,34 +600,39 @@ int TRI_RemoveElementHashArrayMulti (TRI_hash_array_multi_t* array,
}
if (arrayElement->_document != element->_document) {
auto current = arrayElement;
while (current->_next != nullptr) {
if (current->_next->_document == element->_document) {
auto ptr = current->_next->_next;
DestroyElement(array, current->_next);
ReturnToFreelist(array, current->_next);
// look in the overflow list for the sought document
auto next = &(arrayElement->_next);
while (*next != nullptr) {
if ((*next)->_document == element->_document) {
auto ptr = (*next)->_next;
DestroyElement(array, *next);
ReturnToFreelist(array, *next);
*next = ptr;
current->_next = ptr;
return TRI_ERROR_NO_ERROR;
}
current = current->_next;
next = &((*next)->_next);
}
return TRI_RESULT_ELEMENT_NOT_FOUND;
}
// the element itself is the document to remove
if (arrayElement->_next != nullptr) {
auto ptr = arrayElement->_next;
DestroyElement(array, arrayElement);
*arrayElement = *ptr;
ReturnToFreelist(array, ptr);
auto next = arrayElement->_next;
// copy data from first overflow element into ourselves
arrayElement->_document = next->_document;
arrayElement->_next = next->_next;
// and remove the first overflow element
DestroyElement(array, next);
ReturnToFreelist(array, next);
return TRI_ERROR_NO_ERROR;
}
// ...........................................................................
// remove item
// ...........................................................................
TRI_ASSERT(arrayElement->_next == nullptr);
DestroyElement(array, arrayElement);
array->_nrUsed--;
@ -617,6 +656,7 @@ int TRI_RemoveElementHashArrayMulti (TRI_hash_array_multi_t* array,
}
if (array->_nrUsed == 0) {
TRI_ASSERT(array->_nrOverflowUsed == 0);
ResizeHashArray(array, InitialSize(), true);
}

View File

@ -40,6 +40,7 @@
// --SECTION-- forward declarations
// -----------------------------------------------------------------------------
struct TRI_hash_index_element_overflow_s;
struct TRI_hash_index_element_multi_s;
struct TRI_index_search_value_s;
@ -62,7 +63,7 @@ typedef struct TRI_hash_array_multi_s {
struct TRI_hash_index_element_multi_s* _table; // the table itself, aligned to a cache line boundary
struct TRI_hash_index_element_multi_s* _tablePtr; // the table itself
struct TRI_hash_index_element_multi_s* _freelist;
struct TRI_hash_index_element_overflow_s* _freelist;
TRI_vector_pointer_t _blocks;
}

View File

@ -411,7 +411,7 @@ static TRI_index_result_t MultiHashIndex_find (TRI_hash_index_t* hashIndex,
}
for (size_t j = 0; j < result._length; ++j) {
results._documents[j] = ((TRI_hash_index_element_multi_t*)(result._buffer[j]))->_document;
results._documents[j] = ((TRI_doc_mptr_t*) result._buffer[j]);
}
}

View File

@ -64,10 +64,16 @@ typedef struct TRI_hash_index_element_s {
}
TRI_hash_index_element_t;
typedef struct TRI_hash_index_element_overflow_s {
struct TRI_doc_mptr_t* _document;
struct TRI_hash_index_element_overflow_s* _next;
}
TRI_hash_index_element_overflow_t;
typedef struct TRI_hash_index_element_multi_s {
struct TRI_doc_mptr_t* _document;
struct TRI_shaped_sub_s* _subObjects;
struct TRI_hash_index_element_multi_s* _next;
struct TRI_hash_index_element_overflow_s* _next;
}
TRI_hash_index_element_multi_t;

View File

@ -1,4 +1,4 @@
/*global require, db, assertEqual, assertTrue, ArangoCollection */
/*global require, db, assertEqual, assertTrue */
////////////////////////////////////////////////////////////////////////////////
/// @brief test the unique constraint
@ -356,6 +356,143 @@ function HashIndexSuite() {
var doc2 = collection.save({ a : "test3", b : 1});
assertTrue(doc2._key !== "");
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test multi hash querying
////////////////////////////////////////////////////////////////////////////////
testMultiHashQuery1 : function () {
var i;
collection.ensureHashIndex("value");
for (i = 0; i < 1000; ++i) {
collection.save({ _key: "test" + i, value: 1 });
}
assertEqual(1000, collection.byExampleHash(collection.getIndexes()[1], { value: 1 }).toArray().length);
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test multi hash querying
////////////////////////////////////////////////////////////////////////////////
testMultiHashQuery2 : function () {
var i;
collection.ensureHashIndex("value");
for (i = 0; i < 1000; ++i) {
collection.save({ _key: "test" + i, value: i });
}
for (i = 0; i < 1000; ++i) {
var docs = collection.byExampleHash(collection.getIndexes()[1], { value: i }).toArray();
assertEqual(1, docs.length);
assertEqual("test" + i, docs[0]._key);
assertEqual(i, docs[0].value);
}
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test multi hash querying
////////////////////////////////////////////////////////////////////////////////
testMultiHashQuery3 : function () {
var i;
collection.ensureHashIndex("value");
for (i = 0; i < 1000; ++i) {
collection.save({ _key: "test" + i, value: i % 4 });
}
for (i = 0; i < 4; ++i) {
var docs = collection.byExampleHash(collection.getIndexes()[1], { value: i }).toArray();
assertEqual(250, docs.length);
docs.forEach(function(doc) {
assertEqual(i, doc.value);
});
}
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test collisions
////////////////////////////////////////////////////////////////////////////////
testMultiHashQuery4 : function () {
var i;
collection.ensureHashIndex("value");
for (i = 0; i < 10000; ++i) {
collection.save({ _key: "test" + i, value: "testvalue" + i });
}
for (i = 0; i < 10000; ++i) {
var docs = collection.byExampleHash(collection.getIndexes()[1], { value: "testvalue" + i }).toArray();
assertEqual(1, docs.length);
assertEqual("test" + i, docs[0]._key);
assertEqual("testvalue" + i, docs[0].value);
}
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test multi hash updating
////////////////////////////////////////////////////////////////////////////////
testMultiHashUpdate : function () {
var i;
collection.ensureHashIndex("value");
for (i = 0; i < 1000; ++i) {
collection.save({ _key: "test" + i, value: i % 4 });
}
for (i = 0; i < 4; ++i) {
var docs = collection.byExampleHash(collection.getIndexes()[1], { value: i }).toArray();
assertEqual(250, docs.length);
docs.forEach(function(doc) {
assertEqual(i, doc.value);
});
}
for (i = 500; i < 1000; ++i) {
collection.update("test" + i, { value: (i % 4) + 4 });
}
for (i = 0; i < 8; ++i) {
var docs = collection.byExampleHash(collection.getIndexes()[1], { value: i }).toArray();
assertEqual(125, docs.length);
docs.forEach(function(doc) {
assertEqual(i, doc.value);
});
}
},
////////////////////////////////////////////////////////////////////////////////
/// @brief multi hash index removal
////////////////////////////////////////////////////////////////////////////////
testMultiHashRemoval : function () {
var i;
collection.ensureHashIndex("value");
for (i = 0; i < 1000; ++i) {
collection.save({ _key: "test" + i, value: 1 });
}
assertEqual(1000, collection.byExampleHash(collection.getIndexes()[1], { value: 1 }).toArray().length);
// remove head of list
collection.remove("test0");
assertEqual(999, collection.byExampleHash(collection.getIndexes()[1], { value: 1 }).toArray().length);
// remove new head of list
collection.remove("test999");
assertEqual(998, collection.byExampleHash(collection.getIndexes()[1], { value: 1 }).toArray().length);
for (i = 1; i < 998; ++i) {
collection.remove("test" + i);
assertEqual(998 - i, collection.byExampleHash(collection.getIndexes()[1], { value: 1 }).toArray().length);
}
}
};