From ce8ac10f59170aed25bb55c59bda56abf2db30b5 Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Sat, 11 Oct 2014 18:38:22 +0200 Subject: [PATCH] optimized memory usage of multi hash index --- arangod/HashIndex/hash-array-multi.cpp | 162 +++++++++++++++---------- arangod/HashIndex/hash-array-multi.h | 3 +- arangod/HashIndex/hash-index.cpp | 2 +- arangod/HashIndex/hash-index.h | 8 +- js/common/tests/shell-hash-index.js | 139 ++++++++++++++++++++- 5 files changed, 249 insertions(+), 65 deletions(-) diff --git a/arangod/HashIndex/hash-array-multi.cpp b/arangod/HashIndex/hash-array-multi.cpp index df9355f8de..a30fc4489e 100644 --- a/arangod/HashIndex/hash-array-multi.cpp +++ b/arangod/HashIndex/hash-array-multi.cpp @@ -126,16 +126,32 @@ static inline size_t GetBlockSize (size_t blockNumber) { return (size_t) (BLOCK_SIZE_UNIT << blockNumber); } +//////////////////////////////////////////////////////////////////////////////// +/// @brief return the size of a single entry +//////////////////////////////////////////////////////////////////////////////// + +static inline size_t TableEntrySize () { + return sizeof(TRI_hash_index_element_multi_t); +} + +//////////////////////////////////////////////////////////////////////////////// +/// @brief return the size of a single overflow entry +//////////////////////////////////////////////////////////////////////////////// + +static inline size_t OverflowEntrySize () { + return sizeof(TRI_hash_index_element_overflow_t); +} + //////////////////////////////////////////////////////////////////////////////// /// @brief get a storage location from the freelist //////////////////////////////////////////////////////////////////////////////// -static TRI_hash_index_element_multi_t* GetFromFreelist (TRI_hash_array_multi_t* array) { +static TRI_hash_index_element_overflow_t* GetFromFreelist (TRI_hash_array_multi_t* array) { if (array->_freelist == nullptr) { size_t blockSize = GetBlockSize(array->_blocks._length); TRI_ASSERT(blockSize > 0); - auto begin = static_cast(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, blockSize * sizeof(TRI_hash_index_element_multi_t), true)); + auto begin = static_cast(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, blockSize * OverflowEntrySize(), true)); if (begin == nullptr) { return nullptr; @@ -168,7 +184,7 @@ static TRI_hash_index_element_multi_t* GetFromFreelist (TRI_hash_array_multi_t* //////////////////////////////////////////////////////////////////////////////// static void ReturnToFreelist (TRI_hash_array_multi_t* array, - TRI_hash_index_element_multi_t* element) { + TRI_hash_index_element_overflow_t* element) { element->_document = nullptr; element->_next = array->_freelist; array->_freelist = element; @@ -189,6 +205,19 @@ static void DestroyElement (TRI_hash_array_multi_t* array, element->_next = nullptr; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief destroys an element, removing any allocated memory +//////////////////////////////////////////////////////////////////////////////// + +static void DestroyElement (TRI_hash_array_multi_t* array, + TRI_hash_index_element_overflow_t* element) { + TRI_ASSERT_EXPENSIVE(element != nullptr); + TRI_ASSERT_EXPENSIVE(element->_document != nullptr); + + element->_document = nullptr; + element->_next = nullptr; +} + //////////////////////////////////////////////////////////////////////////////// /// @brief initial preallocation size of the hash table when the table is /// first created @@ -200,18 +229,6 @@ static inline uint64_t InitialSize () { return 251; } -// ----------------------------------------------------------------------------- -// --SECTION-- private functions -// ----------------------------------------------------------------------------- - -//////////////////////////////////////////////////////////////////////////////// -/// @brief return the size of a single entry -//////////////////////////////////////////////////////////////////////////////// - -static inline size_t TableEntrySize () { - return sizeof(TRI_hash_index_element_multi_t); -} - //////////////////////////////////////////////////////////////////////////////// /// @brief allocate memory for the hash table /// @@ -351,12 +368,17 @@ void TRI_DestroyHashArrayMulti (TRI_hash_array_multi_t* array) { e = p + array->_nrAlloc; for (; p < e; ++p) { - auto current = p; + if (p->_document != nullptr) { + // destroy overflow elements + auto current = p->_next; + while (current != nullptr) { + auto ptr = current->_next; + DestroyElement(array, current); + current = ptr; + } - while (current != nullptr && current->_document != nullptr) { - auto ptr = current->_next; - DestroyElement(array, current); - current = ptr; + // destroy the element itself + DestroyElement(array, p); } } @@ -397,10 +419,9 @@ size_t TRI_MemoryUsageHashArrayMulti (TRI_hash_array_multi_t const* array) { size_t tableSize = (size_t) (array->_nrAlloc * TableEntrySize() + 64); size_t memberSize = (size_t) (array->_nrUsed * array->_numFields * sizeof(TRI_shaped_sub_t)); - size_t overflowUsed = (size_t) (array->_nrOverflowUsed * array->_numFields * sizeof(TRI_shaped_sub_t)); - size_t overflowAlloc = (size_t) (array->_nrOverflowAlloc * TableEntrySize()); + size_t overflowAlloc = (size_t) (array->_nrOverflowAlloc * OverflowEntrySize()); - return (size_t) (tableSize + memberSize + overflowUsed + overflowAlloc); + return (size_t) (tableSize + memberSize + overflowAlloc); } //////////////////////////////////////////////////////////////////////////////// @@ -409,7 +430,15 @@ size_t TRI_MemoryUsageHashArrayMulti (TRI_hash_array_multi_t const* array) { int TRI_ResizeHashArrayMulti (TRI_hash_array_multi_t* array, size_t size) { - return ResizeHashArray(array, (uint64_t) (2 * size + 1), false); + // use less than 1 element per number of documents + // we does this because expect duplicate values, which are stored in the overflow + // items (which are allocated separately) + size_t targetSize = 0.75 * size; + if ((targetSize & 1) == 0) { + // make odd + targetSize++; + } + return ResizeHashArray(array, (uint64_t) targetSize, false); } // ----------------------------------------------------------------------------- @@ -444,20 +473,17 @@ TRI_vector_pointer_t TRI_LookupByKeyHashArrayMulti (TRI_hash_array_multi_t const TRI_ASSERT_EXPENSIVE(i < n); if (array->_table[i]._document != nullptr) { - auto current = &array->_table[i]; + // add the element itself + TRI_PushBackVectorPointer(&result, array->_table[i]._document); + + // add the overflow elements + auto current = array->_table[i]._next; while (current != nullptr) { - if (IsEqualKeyElement(array, key, current)) { - TRI_PushBackVectorPointer(&result, current); - } + TRI_PushBackVectorPointer(&result, current->_document); current = current->_next; } } - // ........................................................................... - // return whatever we found -- which could be an empty vector list if nothing - // matches. - // ........................................................................... - return result; } @@ -476,8 +502,6 @@ int TRI_InsertElementHashArrayMulti (TRI_hash_array_multi_t* array, return TRI_ERROR_OUT_OF_MEMORY; } - element->_next = nullptr; - uint64_t const n = array->_nrAlloc; uint64_t i, k; @@ -504,7 +528,13 @@ int TRI_InsertElementHashArrayMulti (TRI_hash_array_multi_t* array, if (found) { if (isRollback) { - auto current = arrayElement; + if (arrayElement->_document == element->_document) { + DestroyElement(array, element); + + return TRI_RESULT_ELEMENT_EXISTS; + } + + auto current = arrayElement->_next; while (current != nullptr) { if (current->_document == element->_document) { DestroyElement(array, element); @@ -521,14 +551,22 @@ int TRI_InsertElementHashArrayMulti (TRI_hash_array_multi_t* array, return TRI_ERROR_OUT_OF_MEMORY; } - element->_next = arrayElement->_next; - *ptr = *element; + // link our element at the list head + ptr->_document = element->_document; + ptr->_next = arrayElement->_next; arrayElement->_next = ptr; + + // it is ok to destroy the element here, because we have copied its internal before! + DestroyElement(array, element); return TRI_ERROR_NO_ERROR; } + + TRI_ASSERT(arrayElement->_next == nullptr); - *arrayElement = *element; + // not found in list, now insert insert + element->_next = nullptr; + *arrayElement = *element; array->_nrUsed++; return TRI_ERROR_NO_ERROR; @@ -555,10 +593,6 @@ int TRI_RemoveElementHashArrayMulti (TRI_hash_array_multi_t* array, TRI_hash_index_element_multi_t* arrayElement = &array->_table[i]; - // ........................................................................... - // if we did not find such an item return false - // ........................................................................... - bool found = (arrayElement->_document != nullptr); if (! found) { @@ -566,34 +600,39 @@ int TRI_RemoveElementHashArrayMulti (TRI_hash_array_multi_t* array, } if (arrayElement->_document != element->_document) { - auto current = arrayElement; - while (current->_next != nullptr) { - if (current->_next->_document == element->_document) { - auto ptr = current->_next->_next; - DestroyElement(array, current->_next); - ReturnToFreelist(array, current->_next); + // look in the overflow list for the sought document + auto next = &(arrayElement->_next); + while (*next != nullptr) { + if ((*next)->_document == element->_document) { + auto ptr = (*next)->_next; + DestroyElement(array, *next); + ReturnToFreelist(array, *next); + *next = ptr; - current->_next = ptr; - return TRI_ERROR_NO_ERROR; } - current = current->_next; + next = &((*next)->_next); } + + return TRI_RESULT_ELEMENT_NOT_FOUND; } + // the element itself is the document to remove if (arrayElement->_next != nullptr) { - auto ptr = arrayElement->_next; - DestroyElement(array, arrayElement); - - *arrayElement = *ptr; - ReturnToFreelist(array, ptr); + auto next = arrayElement->_next; + // copy data from first overflow element into ourselves + arrayElement->_document = next->_document; + arrayElement->_next = next->_next; + + // and remove the first overflow element + DestroyElement(array, next); + ReturnToFreelist(array, next); + return TRI_ERROR_NO_ERROR; } - - // ........................................................................... - // remove item - // ........................................................................... + + TRI_ASSERT(arrayElement->_next == nullptr); DestroyElement(array, arrayElement); array->_nrUsed--; @@ -617,6 +656,7 @@ int TRI_RemoveElementHashArrayMulti (TRI_hash_array_multi_t* array, } if (array->_nrUsed == 0) { + TRI_ASSERT(array->_nrOverflowUsed == 0); ResizeHashArray(array, InitialSize(), true); } diff --git a/arangod/HashIndex/hash-array-multi.h b/arangod/HashIndex/hash-array-multi.h index 2e49279efc..66d7eab0da 100644 --- a/arangod/HashIndex/hash-array-multi.h +++ b/arangod/HashIndex/hash-array-multi.h @@ -40,6 +40,7 @@ // --SECTION-- forward declarations // ----------------------------------------------------------------------------- +struct TRI_hash_index_element_overflow_s; struct TRI_hash_index_element_multi_s; struct TRI_index_search_value_s; @@ -62,7 +63,7 @@ typedef struct TRI_hash_array_multi_s { struct TRI_hash_index_element_multi_s* _table; // the table itself, aligned to a cache line boundary struct TRI_hash_index_element_multi_s* _tablePtr; // the table itself - struct TRI_hash_index_element_multi_s* _freelist; + struct TRI_hash_index_element_overflow_s* _freelist; TRI_vector_pointer_t _blocks; } diff --git a/arangod/HashIndex/hash-index.cpp b/arangod/HashIndex/hash-index.cpp index de5eac327c..eaa986a8a0 100644 --- a/arangod/HashIndex/hash-index.cpp +++ b/arangod/HashIndex/hash-index.cpp @@ -411,7 +411,7 @@ static TRI_index_result_t MultiHashIndex_find (TRI_hash_index_t* hashIndex, } for (size_t j = 0; j < result._length; ++j) { - results._documents[j] = ((TRI_hash_index_element_multi_t*)(result._buffer[j]))->_document; + results._documents[j] = ((TRI_doc_mptr_t*) result._buffer[j]); } } diff --git a/arangod/HashIndex/hash-index.h b/arangod/HashIndex/hash-index.h index e00a483745..79547161d8 100644 --- a/arangod/HashIndex/hash-index.h +++ b/arangod/HashIndex/hash-index.h @@ -64,10 +64,16 @@ typedef struct TRI_hash_index_element_s { } TRI_hash_index_element_t; +typedef struct TRI_hash_index_element_overflow_s { + struct TRI_doc_mptr_t* _document; + struct TRI_hash_index_element_overflow_s* _next; +} +TRI_hash_index_element_overflow_t; + typedef struct TRI_hash_index_element_multi_s { struct TRI_doc_mptr_t* _document; struct TRI_shaped_sub_s* _subObjects; - struct TRI_hash_index_element_multi_s* _next; + struct TRI_hash_index_element_overflow_s* _next; } TRI_hash_index_element_multi_t; diff --git a/js/common/tests/shell-hash-index.js b/js/common/tests/shell-hash-index.js index 961d52c2c0..b2a023a1d0 100644 --- a/js/common/tests/shell-hash-index.js +++ b/js/common/tests/shell-hash-index.js @@ -1,4 +1,4 @@ -/*global require, db, assertEqual, assertTrue, ArangoCollection */ +/*global require, db, assertEqual, assertTrue */ //////////////////////////////////////////////////////////////////////////////// /// @brief test the unique constraint @@ -356,6 +356,143 @@ function HashIndexSuite() { var doc2 = collection.save({ a : "test3", b : 1}); assertTrue(doc2._key !== ""); + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test multi hash querying +//////////////////////////////////////////////////////////////////////////////// + + testMultiHashQuery1 : function () { + var i; + + collection.ensureHashIndex("value"); + for (i = 0; i < 1000; ++i) { + collection.save({ _key: "test" + i, value: 1 }); + } + + assertEqual(1000, collection.byExampleHash(collection.getIndexes()[1], { value: 1 }).toArray().length); + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test multi hash querying +//////////////////////////////////////////////////////////////////////////////// + + testMultiHashQuery2 : function () { + var i; + + collection.ensureHashIndex("value"); + for (i = 0; i < 1000; ++i) { + collection.save({ _key: "test" + i, value: i }); + } + + for (i = 0; i < 1000; ++i) { + var docs = collection.byExampleHash(collection.getIndexes()[1], { value: i }).toArray(); + assertEqual(1, docs.length); + assertEqual("test" + i, docs[0]._key); + assertEqual(i, docs[0].value); + } + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test multi hash querying +//////////////////////////////////////////////////////////////////////////////// + + testMultiHashQuery3 : function () { + var i; + + collection.ensureHashIndex("value"); + for (i = 0; i < 1000; ++i) { + collection.save({ _key: "test" + i, value: i % 4 }); + } + + for (i = 0; i < 4; ++i) { + var docs = collection.byExampleHash(collection.getIndexes()[1], { value: i }).toArray(); + assertEqual(250, docs.length); + docs.forEach(function(doc) { + assertEqual(i, doc.value); + }); + } + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test collisions +//////////////////////////////////////////////////////////////////////////////// + + testMultiHashQuery4 : function () { + var i; + + collection.ensureHashIndex("value"); + for (i = 0; i < 10000; ++i) { + collection.save({ _key: "test" + i, value: "testvalue" + i }); + } + + for (i = 0; i < 10000; ++i) { + var docs = collection.byExampleHash(collection.getIndexes()[1], { value: "testvalue" + i }).toArray(); + assertEqual(1, docs.length); + assertEqual("test" + i, docs[0]._key); + assertEqual("testvalue" + i, docs[0].value); + } + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test multi hash updating +//////////////////////////////////////////////////////////////////////////////// + + testMultiHashUpdate : function () { + var i; + + collection.ensureHashIndex("value"); + for (i = 0; i < 1000; ++i) { + collection.save({ _key: "test" + i, value: i % 4 }); + } + + for (i = 0; i < 4; ++i) { + var docs = collection.byExampleHash(collection.getIndexes()[1], { value: i }).toArray(); + assertEqual(250, docs.length); + docs.forEach(function(doc) { + assertEqual(i, doc.value); + }); + } + + for (i = 500; i < 1000; ++i) { + collection.update("test" + i, { value: (i % 4) + 4 }); + } + + for (i = 0; i < 8; ++i) { + var docs = collection.byExampleHash(collection.getIndexes()[1], { value: i }).toArray(); + assertEqual(125, docs.length); + docs.forEach(function(doc) { + assertEqual(i, doc.value); + }); + } + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief multi hash index removal +//////////////////////////////////////////////////////////////////////////////// + + testMultiHashRemoval : function () { + var i; + + collection.ensureHashIndex("value"); + for (i = 0; i < 1000; ++i) { + collection.save({ _key: "test" + i, value: 1 }); + } + + assertEqual(1000, collection.byExampleHash(collection.getIndexes()[1], { value: 1 }).toArray().length); + + // remove head of list + collection.remove("test0"); + assertEqual(999, collection.byExampleHash(collection.getIndexes()[1], { value: 1 }).toArray().length); + + // remove new head of list + collection.remove("test999"); + assertEqual(998, collection.byExampleHash(collection.getIndexes()[1], { value: 1 }).toArray().length); + + for (i = 1; i < 998; ++i) { + collection.remove("test" + i); + assertEqual(998 - i, collection.byExampleHash(collection.getIndexes()[1], { value: 1 }).toArray().length); + } } };