1
0
Fork 0

lazy hash index iteration

This commit is contained in:
Jan Steemann 2015-01-18 03:25:06 +01:00
parent cacaddd5e0
commit a7538ca6e3
6 changed files with 286 additions and 130 deletions

View File

@ -886,6 +886,8 @@ IndexRangeBlock::IndexRangeBlock (ExecutionEngine* engine,
_posInDocs(0),
_anyBoundVariable(false),
_skiplistIterator(nullptr),
_hashIndexSearchValue({ 0, nullptr }),
_hashNextElement(nullptr),
_condition(new IndexOrCondition()),
_posInRanges(0),
_sortCoords(),
@ -1223,58 +1225,63 @@ bool IndexRangeBlock::initRanges () {
removeOverlapsIndexOr(*_condition);
}
if (en->_index->type == TRI_IDX_TYPE_PRIMARY_INDEX) {
if (en->_index->type == TRI_IDX_TYPE_PRIMARY_INDEX ||
en->_index->type == TRI_IDX_TYPE_EDGE_INDEX) {
return true; //no initialization here!
}
else if (en->_index->type == TRI_IDX_TYPE_HASH_INDEX) {
return true; //no initialization here!
if (en->_index->type == TRI_IDX_TYPE_HASH_INDEX) {
if (_condition->empty()) {
return false;
}
_posInRanges = 0;
getHashIndexIterator(_condition->at(_posInRanges));
return (_hashIndexSearchValue._values != nullptr);
}
if (en->_index->type == TRI_IDX_TYPE_SKIPLIST_INDEX) {
if (! _condition->empty()) {
// sort the conditions!
if (_condition->empty()) {
return false;
}
// TODO this should also be done for hash indexes when
// they are lazy too.
// sort the conditions!
// first sort by the prefix of the index
std::vector<std::vector<size_t>> prefix;
if (! _sortCoords.empty()) {
_sortCoords.clear();
_sortCoords.reserve(_condition->size());
}
for (size_t s = 0; s < _condition->size(); s++) {
_sortCoords.push_back(s);
std::vector<size_t> next;
next.reserve(en->_index->fields.size());
prefix.emplace_back(next);
// prefix[s][t] = position in _condition[s] corresponding to the <t>th index
// field
for (size_t t = 0; t < en->_index->fields.size(); t++) {
for (size_t u = 0; u < _condition->at(s).size(); u++) {
auto ri = _condition->at(s)[u];
if (en->_index->fields[t].compare(ri._attr) == 0) {
prefix.at(s).insert(prefix.at(s).begin() + t, u);
break;
}
// TODO this should also be done for hash indexes when
// they are lazy too.
// first sort by the prefix of the index
std::vector<std::vector<size_t>> prefix;
if (! _sortCoords.empty()) {
_sortCoords.clear();
_sortCoords.reserve(_condition->size());
}
for (size_t s = 0; s < _condition->size(); s++) {
_sortCoords.push_back(s);
std::vector<size_t> next;
next.reserve(en->_index->fields.size());
prefix.emplace_back(next);
// prefix[s][t] = position in _condition[s] corresponding to the <t>th index
// field
for (size_t t = 0; t < en->_index->fields.size(); t++) {
for (size_t u = 0; u < _condition->at(s).size(); u++) {
auto ri = _condition->at(s)[u];
if (en->_index->fields[t].compare(ri._attr) == 0) {
prefix.at(s).insert(prefix.at(s).begin() + t, u);
break;
}
}
}
SortFunc sortFunc(prefix, _condition, en->_reverse);
// then sort by the values of the bounds
std::sort(_sortCoords.begin(), _sortCoords.end(), sortFunc);
_posInRanges = 0;
getSkiplistIterator(_condition->at(_sortCoords[_posInRanges]));
return (_skiplistIterator != nullptr);
}
else {
return false;
}
}
else if (en->_index->type == TRI_IDX_TYPE_EDGE_INDEX) {
return true; //no initialization here!
SortFunc sortFunc(prefix, _condition, en->_reverse);
// then sort by the values of the bounds
std::sort(_sortCoords.begin(), _sortCoords.end(), sortFunc);
_posInRanges = 0;
getSkiplistIterator(_condition->at(_sortCoords[_posInRanges]));
return (_skiplistIterator != nullptr);
}
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "unexpected index type");
@ -1431,9 +1438,7 @@ bool IndexRangeBlock::readIndex (size_t atMost) {
}
}
else if (en->_index->type == TRI_IDX_TYPE_HASH_INDEX) {
if (_flag) {
readHashIndex(*_condition);
}
readHashIndex(atMost);
}
else if (en->_index->type == TRI_IDX_TYPE_SKIPLIST_INDEX) {
readSkiplistIndex(atMost);
@ -1715,87 +1720,6 @@ void IndexRangeBlock::readPrimaryIndex (IndexOrCondition const& ranges) {
LEAVE_BLOCK;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief read documents using a hash index
////////////////////////////////////////////////////////////////////////////////
void IndexRangeBlock::readHashIndex (IndexOrCondition const& ranges) {
ENTER_BLOCK;
auto en = static_cast<IndexRangeNode const*>(getPlanNode());
TRI_index_t* idx = en->_index->data;
TRI_ASSERT(idx != nullptr);
TRI_hash_index_t* hashIndex = (TRI_hash_index_t*) idx;
TRI_shaper_t* shaper = _collection->documentCollection()->getShaper();
TRI_ASSERT(shaper != nullptr);
TRI_index_search_value_t searchValue;
auto destroySearchValue = [&]() {
if (searchValue._values != nullptr) {
for (size_t i = 0; i < searchValue._length; ++i) {
TRI_DestroyShapedJson(shaper->_memoryZone, &searchValue._values[i]);
}
TRI_Free(TRI_UNKNOWN_MEM_ZONE, searchValue._values);
}
searchValue._values = nullptr;
};
auto setupSearchValue = [&](size_t pos) -> bool {
size_t const n = hashIndex->_paths._length;
searchValue._length = 0;
// initialize the whole range of shapes with zeros
searchValue._values = static_cast<TRI_shaped_json_t*>(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE,
n * sizeof(TRI_shaped_json_t), true));
if (searchValue._values == nullptr) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY);
}
searchValue._length = n;
for (size_t i = 0; i < n; ++i) {
TRI_shape_pid_t pid = *(static_cast<TRI_shape_pid_t*>(TRI_AtVector(&hashIndex->_paths, i)));
TRI_ASSERT(pid != 0);
char const* name = TRI_AttributeNameShapePid(shaper, pid);
std::string const lookFor = std::string(name);
for (auto x : ranges[pos]) {
if (x._attr == lookFor) { //found attribute
auto shaped = TRI_ShapedJsonJson(shaper, x._lowConst.bound().json(), false);
// here x->_low->_bound = x->_high->_bound
if (shaped == nullptr) {
return false;
}
searchValue._values[i] = *shaped;
TRI_Free(shaper->_memoryZone, shaped);
break;
}
}
}
return true;
};
for (size_t i = 0; i < ranges.size(); i++) {
if (setupSearchValue(i)) {
try {
size_t const n = _documents.size();
TRI_LookupHashIndex(idx, &searchValue, _documents);
_engine->_stats.scannedIndex += static_cast<int64_t>(_documents.size() - n);
}
catch (...) {
destroySearchValue();
throw;
}
}
destroySearchValue();
}
LEAVE_BLOCK;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief read documents using the edges index
////////////////////////////////////////////////////////////////////////////////
@ -1853,6 +1777,119 @@ void IndexRangeBlock::readEdgeIndex (IndexOrCondition const& ranges) {
LEAVE_BLOCK;
}
void IndexRangeBlock::destroyHashIndexSearchValues () {
if (_hashIndexSearchValue._values != nullptr) {
TRI_shaper_t* shaper = _collection->documentCollection()->getShaper();
for (size_t i = 0; i < _hashIndexSearchValue._length; ++i) {
TRI_DestroyShapedJson(shaper->_memoryZone, &_hashIndexSearchValue._values[i]);
}
TRI_Free(TRI_UNKNOWN_MEM_ZONE, _hashIndexSearchValue._values);
_hashIndexSearchValue._values = nullptr;
}
}
bool IndexRangeBlock::setupHashIndexSearchValue (IndexAndCondition const& range) {
auto en = static_cast<IndexRangeNode const*>(getPlanNode());
TRI_index_t* idx = en->_index->data;
TRI_ASSERT(idx != nullptr);
TRI_hash_index_t* hashIndex = (TRI_hash_index_t*) idx;
TRI_shaper_t* shaper = _collection->documentCollection()->getShaper();
size_t const n = hashIndex->_paths._length;
_hashIndexSearchValue._length = 0;
// initialize the whole range of shapes with zeros
_hashIndexSearchValue._values = static_cast<TRI_shaped_json_t*>(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE,
n * sizeof(TRI_shaped_json_t), true));
if (_hashIndexSearchValue._values == nullptr) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY);
}
_hashIndexSearchValue._length = n;
for (size_t i = 0; i < n; ++i) {
TRI_shape_pid_t pid = *(static_cast<TRI_shape_pid_t*>(TRI_AtVector(&hashIndex->_paths, i)));
TRI_ASSERT(pid != 0);
char const* name = TRI_AttributeNameShapePid(shaper, pid);
std::string const lookFor = std::string(name);
for (auto x : range) {
if (x._attr == lookFor) { //found attribute
auto shaped = TRI_ShapedJsonJson(shaper, x._lowConst.bound().json(), false);
// here x->_low->_bound = x->_high->_bound
if (shaped == nullptr) {
return false;
}
_hashIndexSearchValue._values[i] = *shaped;
TRI_Free(shaper->_memoryZone, shaped);
break;
}
}
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief build search values for hash index lookup
////////////////////////////////////////////////////////////////////////////////
void IndexRangeBlock::getHashIndexIterator (IndexAndCondition const& ranges) {
ENTER_BLOCK;
_hashNextElement = nullptr;
destroyHashIndexSearchValues();
if (! setupHashIndexSearchValue(ranges)) {
destroyHashIndexSearchValues();
}
LEAVE_BLOCK;
}
void IndexRangeBlock::readHashIndex (size_t atMost) {
ENTER_BLOCK;
if (_hashIndexSearchValue._values == nullptr) {
return;
}
auto en = static_cast<IndexRangeNode const*>(getPlanNode());
TRI_index_t* idx = en->_index->data;
TRI_ASSERT(idx != nullptr);
size_t nrSent = 0;
while (nrSent < atMost) {
size_t const n = _documents.size();
TRI_LookupHashIndex(idx, &_hashIndexSearchValue, _documents, _hashNextElement, atMost);
size_t const numRead = _documents.size() - n;
_engine->_stats.scannedIndex += static_cast<int64_t>(numRead);
nrSent += numRead;
if (_hashNextElement == nullptr) {
destroyHashIndexSearchValues();
if (++_posInRanges < _condition->size()) {
getHashIndexIterator(_condition->at(_posInRanges));
}
if (_hashIndexSearchValue._values == nullptr) {
_hashNextElement = nullptr;
break;
}
}
}
LEAVE_BLOCK;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief read documents using a skiplist index
////////////////////////////////////////////////////////////////////////////////

View File

@ -43,6 +43,8 @@
#include "Utils/V8TransactionContext.h"
#include "Cluster/ClusterComm.h"
struct TRI_hash_index_element_multi_s;
namespace triagens {
namespace aql {
@ -605,10 +607,28 @@ namespace triagens {
void readEdgeIndex (IndexOrCondition const&);
////////////////////////////////////////////////////////////////////////////////
/// @brief read using a skiplist index
/// @brief destroy the hash index search value
////////////////////////////////////////////////////////////////////////////////
void readSkiplistIndex (size_t atMost);
void destroyHashIndexSearchValues ();
////////////////////////////////////////////////////////////////////////////////
/// @brief set up a hash index search value
////////////////////////////////////////////////////////////////////////////////
bool setupHashIndexSearchValue (IndexAndCondition const&);
////////////////////////////////////////////////////////////////////////////////
/// @brief produce a reentrant hash index iterator
////////////////////////////////////////////////////////////////////////////////
void getHashIndexIterator (IndexAndCondition const&);
////////////////////////////////////////////////////////////////////////////////
/// @brief read using a hash index
////////////////////////////////////////////////////////////////////////////////
void readHashIndex (size_t);
////////////////////////////////////////////////////////////////////////////////
/// @brief this tries to create a skiplistIterator to read from the index.
@ -617,10 +637,10 @@ namespace triagens {
void getSkiplistIterator (IndexAndCondition const&);
////////////////////////////////////////////////////////////////////////////////
/// @brief read using a hash index
/// @brief read using a skiplist index
////////////////////////////////////////////////////////////////////////////////
void readHashIndex (IndexOrCondition const&);
void readSkiplistIndex (size_t atMost);
////////////////////////////////////////////////////////////////////////////////
/// @brief andCombineRangeInfoVecs: combine the arguments into a single vector,
@ -724,6 +744,18 @@ namespace triagens {
TRI_skiplist_iterator_t* _skiplistIterator;
////////////////////////////////////////////////////////////////////////////////
/// @brief current search value for hash index lookup
////////////////////////////////////////////////////////////////////////////////
TRI_index_search_value_t _hashIndexSearchValue;
////////////////////////////////////////////////////////////////////////////////
/// @brief reentrant hash index iterator state
////////////////////////////////////////////////////////////////////////////////
struct TRI_hash_index_element_multi_s* _hashNextElement;
////////////////////////////////////////////////////////////////////////////////
/// @brief _condition: holds the IndexAndCondition for the current incoming block,
/// this is just the _ranges[_rangesPos] member of the plan node if _allBoundsConstant

View File

@ -516,6 +516,53 @@ int TRI_LookupByKeyHashArrayMulti (TRI_hash_array_multi_t const* array,
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief lookups an element given a key and a state
////////////////////////////////////////////////////////////////////////////////
int TRI_LookupByKeyHashArrayMulti (TRI_hash_array_multi_t const* array,
TRI_index_search_value_t const* key,
std::vector<TRI_doc_mptr_copy_t>& result,
TRI_hash_index_element_multi_t*& next,
size_t batchSize) {
size_t const initialSize = result.size();
TRI_ASSERT_EXPENSIVE(array->_nrUsed < array->_nrAlloc);
TRI_ASSERT(batchSize > 0);
if (next == nullptr) {
// no previous state. start at the beginning
uint64_t const n = array->_nrAlloc;
uint64_t i, k;
i = k = HashKey(array, key) % n;
for (; i < n && array->_table[i]._document != nullptr && ! IsEqualKeyElement(array, key, &array->_table[i]); ++i);
if (i == n) {
for (i = 0; i < k && array->_table[i]._document != nullptr && ! IsEqualKeyElement(array, key, &array->_table[i]); ++i);
}
TRI_ASSERT_EXPENSIVE(i < n);
if (array->_table[i]._document != nullptr) {
result.emplace_back(*(array->_table[i]._document));
}
next = array->_table[i]._next;
}
if (next != nullptr) {
// we already had a state
size_t total = result.size() - initialSize;
while (next != nullptr && total < batchSize) {
result.emplace_back(*(next->_document));
next = next->_next;
++total;
}
}
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief adds an element to the array
///

View File

@ -129,6 +129,16 @@ int TRI_LookupByKeyHashArrayMulti (TRI_hash_array_multi_t const*,
struct TRI_index_search_value_s const*,
std::vector<TRI_doc_mptr_copy_t>&);
////////////////////////////////////////////////////////////////////////////////
/// @brief lookups an element given a key
////////////////////////////////////////////////////////////////////////////////
int TRI_LookupByKeyHashArrayMulti (TRI_hash_array_multi_t const*,
struct TRI_index_search_value_s const*,
std::vector<TRI_doc_mptr_copy_t>&,
struct TRI_hash_index_element_multi_s*&,
size_t);
////////////////////////////////////////////////////////////////////////////////
/// @brief adds an element to the array
////////////////////////////////////////////////////////////////////////////////

View File

@ -35,6 +35,8 @@
#include "ShapedJson/shaped-json.h"
#include "VocBase/document-collection.h"
#include "VocBase/voc-shaper.h"
struct TRI_hash_index_element_multi_s;
// -----------------------------------------------------------------------------
// --SECTION-- private functions
@ -713,6 +715,25 @@ int TRI_LookupHashIndex (TRI_index_t* idx,
return TRI_LookupByKeyHashArrayMulti(&hashIndex->_hashArrayMulti, searchValue, documents);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief locates entries in the hash index given shaped json objects
////////////////////////////////////////////////////////////////////////////////
int TRI_LookupHashIndex (TRI_index_t* idx,
TRI_index_search_value_t* searchValue,
std::vector<TRI_doc_mptr_copy_t>& documents,
struct TRI_hash_index_element_multi_s*& next,
size_t batchSize) {
TRI_hash_index_t* hashIndex = (TRI_hash_index_t*) idx;
if (hashIndex->base._unique) {
next = nullptr;
return HashIndex_find(hashIndex, searchValue, documents);
}
return TRI_LookupByKeyHashArrayMulti(&hashIndex->_hashArrayMulti, searchValue, documents, next, batchSize);
}
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------

View File

@ -133,13 +133,22 @@ TRI_vector_pointer_t TRI_LookupHashIndex (TRI_index_t*,
////////////////////////////////////////////////////////////////////////////////
/// @brief locates entries in the hash index given shaped json objects
/// it is the callers responsibility to destroy the result
////////////////////////////////////////////////////////////////////////////////
int TRI_LookupHashIndex (TRI_index_t*,
struct TRI_index_search_value_s*,
std::vector<TRI_doc_mptr_copy_t>&);
////////////////////////////////////////////////////////////////////////////////
/// @brief locates entries in the hash index given shaped json objects
////////////////////////////////////////////////////////////////////////////////
int TRI_LookupHashIndex (TRI_index_t*,
struct TRI_index_search_value_s*,
std::vector<TRI_doc_mptr_copy_t>&,
struct TRI_hash_index_element_multi_s*&,
size_t);
#endif
// -----------------------------------------------------------------------------