mirror of https://gitee.com/bigwinds/arangodb
1521 lines
50 KiB
C++
1521 lines
50 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Jan Steemann
|
|
/// @author Daniel H. Larkin
|
|
/// @author Simon Grätzer
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "RocksDBVPackIndex.h"
|
|
#include "Aql/AstNode.h"
|
|
#include "Aql/SortCondition.h"
|
|
#include "Basics/StaticStrings.h"
|
|
#include "Basics/VelocyPackHelper.h"
|
|
#include "RocksDBEngine/RocksDBCollection.h"
|
|
#include "RocksDBEngine/RocksDBColumnFamily.h"
|
|
#include "RocksDBEngine/RocksDBCommon.h"
|
|
#include "RocksDBEngine/RocksDBComparator.h"
|
|
#include "RocksDBEngine/RocksDBCounterManager.h"
|
|
#include "RocksDBEngine/RocksDBKeyBounds.h"
|
|
#include "RocksDBEngine/RocksDBMethods.h"
|
|
#include "RocksDBEngine/RocksDBPrimaryIndex.h"
|
|
#include "RocksDBEngine/RocksDBToken.h"
|
|
#include "RocksDBEngine/RocksDBTransactionState.h"
|
|
#include "Transaction/Helpers.h"
|
|
#include "Transaction/Methods.h"
|
|
#include "VocBase/LogicalCollection.h"
|
|
|
|
#include <rocksdb/iterator.h>
|
|
#include <rocksdb/options.h>
|
|
#include <rocksdb/utilities/transaction.h>
|
|
#include <rocksdb/utilities/transaction_db.h>
|
|
#include <rocksdb/utilities/write_batch_with_index.h>
|
|
|
|
#include <velocypack/Iterator.h>
|
|
#include <velocypack/velocypack-aliases.h>
|
|
|
|
using namespace arangodb;
|
|
|
|
/// @brief the _key attribute, which, when used in an index, will implictly make
|
|
/// it unique
|
|
static std::vector<arangodb::basics::AttributeName> const KeyAttribute{
|
|
arangodb::basics::AttributeName("_key", false)};
|
|
|
|
// .............................................................................
|
|
// recall for all of the following comparison functions:
|
|
//
|
|
// left < right return -1
|
|
// left > right return 1
|
|
// left == right return 0
|
|
//
|
|
// furthermore:
|
|
//
|
|
// the following order is currently defined for placing an order on documents
|
|
// undef < null < boolean < number < strings < lists < hash arrays
|
|
// note: undefined will be treated as NULL pointer not NULL JSON OBJECT
|
|
// within each type class we have the following order
|
|
// boolean: false < true
|
|
// number: natural order
|
|
// strings: lexicographical
|
|
// lists: lexicographically and within each slot according to these rules.
|
|
// ...........................................................................
|
|
|
|
RocksDBVPackIndexIterator::RocksDBVPackIndexIterator(
|
|
LogicalCollection* collection, transaction::Methods* trx,
|
|
ManagedDocumentResult* mmdr, arangodb::RocksDBVPackIndex const* index,
|
|
bool reverse,
|
|
VPackSlice const& left, VPackSlice const& right)
|
|
: IndexIterator(collection, trx, mmdr, index),
|
|
_index(index),
|
|
_cmp(index->comparator()),
|
|
_reverse(reverse),
|
|
_bounds(index->_unique ? RocksDBKeyBounds::UniqueIndexRange(
|
|
index->objectId(), left, right)
|
|
: RocksDBKeyBounds::IndexRange(index->objectId(),
|
|
left, right)) {
|
|
RocksDBMethods* mthds = rocksutils::toRocksMethods(trx);
|
|
rocksdb::ReadOptions options = mthds->readOptions();
|
|
if (!reverse) {
|
|
// we need to have a pointer to a slice for the upper bound
|
|
// so we need to assign the slice to an instance variable here
|
|
_upperBound = _bounds.end();
|
|
options.iterate_upper_bound = &_upperBound;
|
|
}
|
|
|
|
_iterator = mthds->NewIterator(options, index->columnFamily());
|
|
if (reverse) {
|
|
_iterator->SeekForPrev(_bounds.end());
|
|
} else {
|
|
_iterator->Seek(_bounds.start());
|
|
}
|
|
}
|
|
|
|
/// @brief Reset the cursor
|
|
void RocksDBVPackIndexIterator::reset() {
|
|
TRI_ASSERT(_trx->state()->isRunning());
|
|
|
|
if (_reverse) {
|
|
_iterator->SeekForPrev(_bounds.end());
|
|
} else {
|
|
_iterator->Seek(_bounds.start());
|
|
}
|
|
}
|
|
|
|
bool RocksDBVPackIndexIterator::outOfRange() const {
|
|
TRI_ASSERT(_trx->state()->isRunning());
|
|
if (_reverse) {
|
|
return (_cmp->Compare(_iterator->key(), _bounds.start()) < 0);
|
|
} else {
|
|
return (_cmp->Compare(_iterator->key(), _bounds.end()) > 0);
|
|
}
|
|
}
|
|
|
|
bool RocksDBVPackIndexIterator::next(TokenCallback const& cb, size_t limit) {
|
|
TRI_ASSERT(_trx->state()->isRunning());
|
|
|
|
if (limit == 0 || !_iterator->Valid() || outOfRange()) {
|
|
// No limit no data, or we are actually done. The last call should have
|
|
// returned false
|
|
TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken
|
|
return false;
|
|
}
|
|
|
|
while (limit > 0) {
|
|
TRI_ASSERT(_index->objectId() == RocksDBKey::objectId(_iterator->key()));
|
|
|
|
TRI_voc_rid_t revisionId = _index->_unique ? RocksDBValue::revisionId(_iterator->value())
|
|
: RocksDBKey::revisionId(_iterator->key());
|
|
cb(RocksDBToken(revisionId));
|
|
|
|
--limit;
|
|
if (_reverse) {
|
|
_iterator->Prev();
|
|
} else {
|
|
_iterator->Next();
|
|
}
|
|
|
|
if (!_iterator->Valid() || outOfRange()) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
uint64_t RocksDBVPackIndex::HashForKey(const rocksdb::Slice& key) {
|
|
// NOTE: This function needs to use the same hashing on the
|
|
// indexed VPack as the initial inserter does
|
|
VPackSlice tmp = RocksDBKey::indexedVPack(key);
|
|
return tmp.normalizedHash();
|
|
}
|
|
|
|
/// @brief create the index
|
|
RocksDBVPackIndex::RocksDBVPackIndex(TRI_idx_iid_t iid,
|
|
arangodb::LogicalCollection* collection,
|
|
arangodb::velocypack::Slice const& info)
|
|
: RocksDBIndex(iid, collection, info, RocksDBColumnFamily::index()),
|
|
_useExpansion(false),
|
|
_allowPartialIndex(true),
|
|
_estimator(nullptr) {
|
|
_cf = _unique ? RocksDBColumnFamily::uniqueIndex()
|
|
: RocksDBColumnFamily::index();
|
|
if (!_unique && !ServerState::instance()->isCoordinator()) {
|
|
// We activate the estimator for all non unique-indexes.
|
|
// And only on DBServers
|
|
_estimator = std::make_unique<RocksDBCuckooIndexEstimator<uint64_t>>(
|
|
RocksDBIndex::ESTIMATOR_SIZE);
|
|
TRI_ASSERT(_estimator != nullptr);
|
|
}
|
|
TRI_ASSERT(!_fields.empty());
|
|
|
|
TRI_ASSERT(iid != 0);
|
|
|
|
fillPaths(_paths, _expanding);
|
|
|
|
for (auto const& it : _fields) {
|
|
if (TRI_AttributeNamesHaveExpansion(it)) {
|
|
_useExpansion = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// @brief destroy the index
|
|
RocksDBVPackIndex::~RocksDBVPackIndex() {}
|
|
|
|
double RocksDBVPackIndex::selectivityEstimate(
|
|
arangodb::StringRef const*) const {
|
|
if (_unique) {
|
|
return 1.0; // only valid if unique
|
|
}
|
|
return _estimator->computeEstimate();
|
|
}
|
|
|
|
size_t RocksDBVPackIndex::memory() const {
|
|
rocksdb::TransactionDB* db = rocksutils::globalRocksDB();
|
|
RocksDBKeyBounds bounds = _unique ? RocksDBKeyBounds::UniqueIndex(_objectId)
|
|
: RocksDBKeyBounds::IndexEntries(_objectId);
|
|
rocksdb::Range r(bounds.start(), bounds.end());
|
|
uint64_t out;
|
|
db->GetApproximateSizes(&r, 1, &out, true);
|
|
return (size_t)out;
|
|
}
|
|
|
|
/// @brief return a VelocyPack representation of the index
|
|
void RocksDBVPackIndex::toVelocyPack(VPackBuilder& builder, bool withFigures,
|
|
bool forPersistence) const {
|
|
TRI_ASSERT(builder.isOpenArray() || builder.isEmpty());
|
|
builder.openObject();
|
|
RocksDBIndex::toVelocyPack(builder, withFigures, forPersistence);
|
|
builder.add("unique", VPackValue(_unique));
|
|
builder.add("sparse", VPackValue(_sparse));
|
|
builder.close();
|
|
}
|
|
|
|
/// @brief whether or not the index is implicitly unique
|
|
/// this can be the case if the index is not declared as unique, but contains
|
|
/// a
|
|
/// unique attribute such as _key
|
|
bool RocksDBVPackIndex::implicitlyUnique() const {
|
|
if (_unique) {
|
|
// a unique index is always unique
|
|
return true;
|
|
}
|
|
if (_useExpansion) {
|
|
// when an expansion such as a[*] is used, the index may not be unique,
|
|
// even
|
|
// if it contains attributes that are guaranteed to be unique
|
|
return false;
|
|
}
|
|
|
|
for (auto const& it : _fields) {
|
|
// if _key is contained in the index fields definition, then the index is
|
|
// implicitly unique
|
|
if (it == KeyAttribute) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// _key not contained
|
|
return false;
|
|
}
|
|
|
|
/// @brief helper function to insert a document into any index type
|
|
/// Should result in an elements vector filled with the new index entries
|
|
/// uses the _unique field to determine the kind of key structure
|
|
int RocksDBVPackIndex::fillElement(VPackBuilder& leased,
|
|
TRI_voc_rid_t revisionId,
|
|
VPackSlice const& doc,
|
|
std::vector<RocksDBKey>& elements,
|
|
std::vector<uint64_t>& hashes) {
|
|
if (doc.isNone()) {
|
|
LOG_TOPIC(ERR, arangodb::Logger::FIXME)
|
|
<< "encountered invalid marker with slice of type None";
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
TRI_IF_FAILURE("FillElementIllegalSlice") { return TRI_ERROR_INTERNAL; }
|
|
|
|
TRI_ASSERT(leased.isEmpty());
|
|
if (!_useExpansion) {
|
|
// fast path for inserts... no array elements used
|
|
leased.openArray();
|
|
|
|
size_t const n = _paths.size();
|
|
for (size_t i = 0; i < n; ++i) {
|
|
TRI_ASSERT(!_paths[i].empty());
|
|
|
|
VPackSlice slice = doc.get(_paths[i]);
|
|
if (slice.isNone() || slice.isNull()) {
|
|
// attribute not found
|
|
if (_sparse) {
|
|
// if sparse we do not have to index, this is indicated by result
|
|
// being shorter than n
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
// null, note that this will be copied later!
|
|
leased.add(VPackSlice::nullSlice());
|
|
} else {
|
|
leased.add(slice);
|
|
}
|
|
}
|
|
leased.close();
|
|
|
|
TRI_IF_FAILURE("FillElementOOM") { return TRI_ERROR_OUT_OF_MEMORY; }
|
|
TRI_IF_FAILURE("FillElementOOM2") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY);
|
|
}
|
|
|
|
if (_unique) {
|
|
// Unique VPack index values are stored as follows:
|
|
// - Key: 7 + 8-byte object ID of index + VPack array with index
|
|
// value(s)
|
|
// + separator (NUL) byte
|
|
// - Value: primary key
|
|
elements.emplace_back(
|
|
RocksDBKey::UniqueIndexValue(_objectId, leased.slice()));
|
|
} else {
|
|
// Non-unique VPack index values are stored as follows:
|
|
// - Key: 6 + 8-byte object ID of index + VPack array with index
|
|
// value(s)
|
|
// + separator (NUL) byte + primary key
|
|
// - Value: empty
|
|
elements.emplace_back(
|
|
RocksDBKey::IndexValue(_objectId, leased.slice(), revisionId));
|
|
hashes.push_back(leased.slice().normalizedHash());
|
|
}
|
|
} else {
|
|
// other path for handling array elements, too
|
|
|
|
std::vector<VPackSlice> sliceStack;
|
|
buildIndexValues(leased, revisionId, doc, 0, elements, sliceStack, hashes);
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
void RocksDBVPackIndex::addIndexValue(VPackBuilder& leased,
|
|
TRI_voc_rid_t revisionId,
|
|
VPackSlice const& document,
|
|
std::vector<RocksDBKey>& elements,
|
|
std::vector<VPackSlice>& sliceStack,
|
|
std::vector<uint64_t>& hashes) {
|
|
leased.clear();
|
|
leased.openArray(true); // unindexed
|
|
for (VPackSlice const& s : sliceStack) {
|
|
leased.add(s);
|
|
}
|
|
leased.close();
|
|
|
|
if (_unique) {
|
|
// Unique VPack index values are stored as follows:
|
|
// - Key: 7 + 8-byte object ID of index + VPack array with index value(s)
|
|
// - Value: primary key
|
|
elements.emplace_back(
|
|
RocksDBKey::UniqueIndexValue(_objectId, leased.slice()));
|
|
} else {
|
|
// Non-unique VPack index values are stored as follows:
|
|
// - Key: 6 + 8-byte object ID of index + VPack array with index value(s)
|
|
// + primary key
|
|
// - Value: empty
|
|
elements.emplace_back(
|
|
RocksDBKey::IndexValue(_objectId, leased.slice(), revisionId));
|
|
hashes.push_back(leased.slice().normalizedHash());
|
|
}
|
|
}
|
|
|
|
/// @brief helper function to create a set of index combinations to insert
|
|
void RocksDBVPackIndex::buildIndexValues(VPackBuilder& leased,
|
|
TRI_voc_rid_t revisionId,
|
|
VPackSlice const doc,
|
|
size_t level,
|
|
std::vector<RocksDBKey>& elements,
|
|
std::vector<VPackSlice>& sliceStack,
|
|
std::vector<uint64_t>& hashes) {
|
|
// Invariant: level == sliceStack.size()
|
|
|
|
// Stop the recursion:
|
|
if (level == _paths.size()) {
|
|
addIndexValue(leased, revisionId, doc, elements, sliceStack, hashes);
|
|
return;
|
|
}
|
|
|
|
if (_expanding[level] == -1) { // the trivial, non-expanding case
|
|
VPackSlice slice = doc.get(_paths[level]);
|
|
if (slice.isNone() || slice.isNull()) {
|
|
if (_sparse) {
|
|
return;
|
|
}
|
|
sliceStack.emplace_back(arangodb::basics::VelocyPackHelper::NullValue());
|
|
} else {
|
|
sliceStack.emplace_back(slice);
|
|
}
|
|
buildIndexValues(leased, revisionId, doc, level + 1, elements, sliceStack, hashes);
|
|
sliceStack.pop_back();
|
|
return;
|
|
}
|
|
|
|
// Finally, the complex case, where we have to expand one entry.
|
|
// Note again that at most one step in the attribute path can be
|
|
// an array step. Furthermore, if _allowPartialIndex is true and
|
|
// anything goes wrong with this attribute path, we have to bottom out
|
|
// with None values to be able to use the index for a prefix match.
|
|
|
|
// Trivial case to bottom out with Illegal types.
|
|
VPackSlice illegalSlice = arangodb::basics::VelocyPackHelper::IllegalValue();
|
|
|
|
auto finishWithNones = [&]() -> void {
|
|
if (!_allowPartialIndex || level == 0) {
|
|
return;
|
|
}
|
|
for (size_t i = level; i < _paths.size(); i++) {
|
|
sliceStack.emplace_back(illegalSlice);
|
|
}
|
|
addIndexValue(leased, revisionId, doc, elements, sliceStack, hashes);
|
|
for (size_t i = level; i < _paths.size(); i++) {
|
|
sliceStack.pop_back();
|
|
}
|
|
};
|
|
size_t const n = _paths[level].size();
|
|
// We have 0 <= _expanding[level] < n.
|
|
VPackSlice current(doc);
|
|
for (size_t i = 0; i <= static_cast<size_t>(_expanding[level]); i++) {
|
|
if (!current.isObject()) {
|
|
finishWithNones();
|
|
return;
|
|
}
|
|
current = current.get(_paths[level][i]);
|
|
if (current.isNone()) {
|
|
finishWithNones();
|
|
return;
|
|
}
|
|
}
|
|
// Now the expansion:
|
|
if (!current.isArray() || current.length() == 0) {
|
|
finishWithNones();
|
|
return;
|
|
}
|
|
|
|
std::unordered_set<VPackSlice, arangodb::basics::VelocyPackHelper::VPackHash,
|
|
arangodb::basics::VelocyPackHelper::VPackEqual>
|
|
seen(2, arangodb::basics::VelocyPackHelper::VPackHash(),
|
|
arangodb::basics::VelocyPackHelper::VPackEqual());
|
|
|
|
auto moveOn = [&](VPackSlice something) -> void {
|
|
auto it = seen.find(something);
|
|
if (it == seen.end()) {
|
|
seen.insert(something);
|
|
sliceStack.emplace_back(something);
|
|
buildIndexValues(leased, revisionId, doc, level + 1, elements, sliceStack,
|
|
hashes);
|
|
sliceStack.pop_back();
|
|
}
|
|
};
|
|
for (auto const& member : VPackArrayIterator(current)) {
|
|
VPackSlice current2(member);
|
|
bool doneNull = false;
|
|
for (size_t i = _expanding[level] + 1; i < n; i++) {
|
|
if (!current2.isObject()) {
|
|
if (!_sparse) {
|
|
moveOn(arangodb::basics::VelocyPackHelper::NullValue());
|
|
}
|
|
doneNull = true;
|
|
break;
|
|
}
|
|
current2 = current2.get(_paths[level][i]);
|
|
if (current2.isNone()) {
|
|
if (!_sparse) {
|
|
moveOn(arangodb::basics::VelocyPackHelper::NullValue());
|
|
}
|
|
doneNull = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!doneNull) {
|
|
moveOn(current2);
|
|
}
|
|
// Finally, if, because of sparsity, we have not inserted anything by now,
|
|
// we need to play the above trick with None because of the above
|
|
// mentioned
|
|
// reasons:
|
|
if (seen.empty()) {
|
|
finishWithNones();
|
|
}
|
|
}
|
|
}
|
|
|
|
/// @brief helper function to transform AttributeNames into strings.
|
|
void RocksDBVPackIndex::fillPaths(std::vector<std::vector<std::string>>& paths,
|
|
std::vector<int>& expanding) {
|
|
paths.clear();
|
|
expanding.clear();
|
|
for (std::vector<arangodb::basics::AttributeName> const& list : _fields) {
|
|
paths.emplace_back();
|
|
std::vector<std::string>& interior(paths.back());
|
|
int expands = -1;
|
|
int count = 0;
|
|
for (auto const& att : list) {
|
|
interior.emplace_back(att.name);
|
|
if (att.shouldExpand) {
|
|
expands = count;
|
|
}
|
|
++count;
|
|
}
|
|
expanding.emplace_back(expands);
|
|
}
|
|
}
|
|
|
|
/// @brief inserts a document into the index
|
|
int RocksDBVPackIndex::insert(transaction::Methods* trx,
|
|
TRI_voc_rid_t revisionId, VPackSlice const& doc,
|
|
bool isRollback) {
|
|
std::vector<RocksDBKey> elements;
|
|
std::vector<uint64_t> hashes;
|
|
int res;
|
|
try {
|
|
transaction::BuilderLeaser leased(trx);
|
|
res = fillElement(*(leased.get()), revisionId, doc, elements, hashes);
|
|
} catch (...) {
|
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
|
|
// now we are going to construct the value to insert into rocksdb
|
|
// unique indexes have a different key structure
|
|
RocksDBValue value = _unique ? RocksDBValue::UniqueIndexValue(revisionId)
|
|
: RocksDBValue::IndexValue();
|
|
|
|
RocksDBMethods* mthds = rocksutils::toRocksMethods(trx);
|
|
size_t const count = elements.size();
|
|
|
|
for (size_t i = 0; i < count; ++i) {
|
|
RocksDBKey& key = elements[i];
|
|
if (_unique) {
|
|
RocksDBValue existing =
|
|
RocksDBValue::Empty(RocksDBEntryType::UniqueIndexValue);
|
|
if (mthds->Exists(_cf, key)) {
|
|
res = TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED;
|
|
}
|
|
}
|
|
|
|
if (res == TRI_ERROR_NO_ERROR) {
|
|
arangodb::Result r =
|
|
mthds->Put(_cf, key, value.string(), rocksutils::index);
|
|
if (!r.ok()) {
|
|
// auto status =
|
|
// rocksutils::convertStatus(s, rocksutils::StatusHint::index);
|
|
res = r.errorNumber();
|
|
}
|
|
}
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
for (size_t j = 0; j < i; ++j) {
|
|
mthds->Delete(_cf, elements[j]);
|
|
}
|
|
|
|
if (res == TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED && !_unique) {
|
|
// We ignore unique_constraint violated if we are not unique
|
|
res = TRI_ERROR_NO_ERROR;
|
|
// TODO: remove this? seems dangerous...
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (auto& it : hashes) {
|
|
// The estimator is only useful if we are in a non-unique indexes
|
|
TRI_ASSERT(!_unique);
|
|
_estimator->insert(it);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
int RocksDBVPackIndex::insertRaw(RocksDBMethods* batch,
|
|
TRI_voc_rid_t revisionId,
|
|
VPackSlice const& doc) {
|
|
std::vector<RocksDBKey> elements;
|
|
std::vector<uint64_t> hashes;
|
|
int res;
|
|
try {
|
|
VPackBuilder leased;
|
|
res = fillElement(leased, revisionId, doc, elements, hashes);
|
|
} catch (...) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
|
|
// now we are going to construct the value to insert into rocksdb
|
|
// unique indexes have a different key structure
|
|
RocksDBValue value = _unique ? RocksDBValue::UniqueIndexValue(revisionId)
|
|
: RocksDBValue::IndexValue();
|
|
|
|
for (RocksDBKey const& key : elements) {
|
|
if (_unique) {
|
|
rocksdb::ReadOptions readOpts;
|
|
if (batch->Exists(_cf, key)) {
|
|
res = TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED;
|
|
}
|
|
}
|
|
if (res == TRI_ERROR_NO_ERROR) {
|
|
batch->Put(_cf, key, value.string(), rocksutils::index);
|
|
}
|
|
}
|
|
|
|
for (auto& it : hashes) {
|
|
// The estimator is only useful if we are in a non-unique indexes
|
|
TRI_ASSERT(!_unique);
|
|
_estimator->insert(it);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/// @brief removes a document from the index
|
|
int RocksDBVPackIndex::remove(transaction::Methods* trx,
|
|
TRI_voc_rid_t revisionId, VPackSlice const& doc,
|
|
bool isRollback) {
|
|
std::vector<RocksDBKey> elements;
|
|
std::vector<uint64_t> hashes;
|
|
|
|
int res;
|
|
try {
|
|
transaction::BuilderLeaser leased(trx);
|
|
res = fillElement(*(leased.get()), revisionId, doc, elements, hashes);
|
|
} catch (...) {
|
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
|
|
RocksDBMethods* mthds = rocksutils::toRocksMethods(trx);
|
|
|
|
size_t const count = elements.size();
|
|
for (size_t i = 0; i < count; ++i) {
|
|
arangodb::Result r = mthds->Delete(_cf, elements[i]);
|
|
if (!r.ok()) {
|
|
res = r.errorNumber();
|
|
}
|
|
}
|
|
|
|
for (auto& it : hashes) {
|
|
// The estimator is only useful if we are in a non-unique indexes
|
|
TRI_ASSERT(!_unique);
|
|
_estimator->remove(it);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
int RocksDBVPackIndex::removeRaw(RocksDBMethods* writeBatch,
|
|
TRI_voc_rid_t revisionId,
|
|
VPackSlice const& doc) {
|
|
std::vector<RocksDBKey> elements;
|
|
std::vector<uint64_t> hashes;
|
|
|
|
int res;
|
|
try {
|
|
VPackBuilder leased;
|
|
res = fillElement(leased, revisionId, doc, elements, hashes);
|
|
} catch (...) {
|
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
|
|
size_t const count = elements.size();
|
|
for (size_t i = 0; i < count; ++i) {
|
|
writeBatch->Delete(_cf, elements[i]);
|
|
}
|
|
|
|
for (auto& it : hashes) {
|
|
// The estimator is only useful if we are in a non-unique indexes
|
|
TRI_ASSERT(!_unique);
|
|
_estimator->remove(it);
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief called when the index is dropped
|
|
int RocksDBVPackIndex::drop() {
|
|
// First drop the cache all indexes can work without it.
|
|
RocksDBIndex::drop();
|
|
if (_unique) {
|
|
return rocksutils::removeLargeRange(
|
|
rocksutils::globalRocksDB(),
|
|
RocksDBKeyBounds::UniqueIndex(_objectId))
|
|
.errorNumber();
|
|
} else {
|
|
return rocksutils::removeLargeRange(
|
|
rocksutils::globalRocksDB(),
|
|
RocksDBKeyBounds::IndexEntries(_objectId))
|
|
.errorNumber();
|
|
}
|
|
}
|
|
|
|
/// @brief attempts to locate an entry in the index
|
|
/// Warning: who ever calls this function is responsible for destroying
|
|
/// the RocksDBVPackIndexIterator* results
|
|
RocksDBVPackIndexIterator* RocksDBVPackIndex::lookup(
|
|
transaction::Methods* trx, ManagedDocumentResult* mmdr,
|
|
VPackSlice const searchValues, bool reverse) const {
|
|
TRI_ASSERT(searchValues.isArray());
|
|
TRI_ASSERT(searchValues.length() <= _fields.size());
|
|
|
|
VPackBuilder leftSearch;
|
|
VPackBuilder rightSearch;
|
|
|
|
VPackSlice lastNonEq;
|
|
leftSearch.openArray();
|
|
for (auto const& it : VPackArrayIterator(searchValues)) {
|
|
TRI_ASSERT(it.isObject());
|
|
VPackSlice eq = it.get(StaticStrings::IndexEq);
|
|
if (eq.isNone()) {
|
|
lastNonEq = it;
|
|
break;
|
|
}
|
|
leftSearch.add(eq);
|
|
}
|
|
|
|
VPackSlice leftBorder;
|
|
VPackSlice rightBorder;
|
|
|
|
if (lastNonEq.isNone()) {
|
|
// We only have equality!
|
|
rightSearch = leftSearch;
|
|
|
|
leftSearch.add(VPackSlice::minKeySlice());
|
|
leftSearch.close();
|
|
|
|
rightSearch.add(VPackSlice::maxKeySlice());
|
|
rightSearch.close();
|
|
|
|
leftBorder = leftSearch.slice();
|
|
rightBorder = rightSearch.slice();
|
|
} else {
|
|
// Copy rightSearch = leftSearch for right border
|
|
rightSearch = leftSearch;
|
|
|
|
// Define Lower-Bound
|
|
VPackSlice lastLeft = lastNonEq.get(StaticStrings::IndexGe);
|
|
if (!lastLeft.isNone()) {
|
|
TRI_ASSERT(!lastNonEq.hasKey(StaticStrings::IndexGt));
|
|
leftSearch.add(lastLeft);
|
|
leftSearch.add(VPackSlice::minKeySlice());
|
|
leftSearch.close();
|
|
VPackSlice search = leftSearch.slice();
|
|
leftBorder = search;
|
|
} else {
|
|
lastLeft = lastNonEq.get(StaticStrings::IndexGt);
|
|
if (!lastLeft.isNone()) {
|
|
leftSearch.add(lastLeft);
|
|
leftSearch.add(VPackSlice::maxKeySlice());
|
|
leftSearch.close();
|
|
VPackSlice search = leftSearch.slice();
|
|
leftBorder = search;
|
|
} else {
|
|
// No lower bound set default to (null <= x)
|
|
leftSearch.add(VPackSlice::minKeySlice());
|
|
leftSearch.close();
|
|
VPackSlice search = leftSearch.slice();
|
|
leftBorder = search;
|
|
}
|
|
}
|
|
|
|
// Define upper-bound
|
|
VPackSlice lastRight = lastNonEq.get(StaticStrings::IndexLe);
|
|
if (!lastRight.isNone()) {
|
|
TRI_ASSERT(!lastNonEq.hasKey(StaticStrings::IndexLt));
|
|
rightSearch.add(lastRight);
|
|
rightSearch.add(VPackSlice::maxKeySlice());
|
|
rightSearch.close();
|
|
VPackSlice search = rightSearch.slice();
|
|
rightBorder = search;
|
|
} else {
|
|
lastRight = lastNonEq.get(StaticStrings::IndexLt);
|
|
if (!lastRight.isNone()) {
|
|
rightSearch.add(lastRight);
|
|
rightSearch.add(VPackSlice::minKeySlice());
|
|
rightSearch.close();
|
|
VPackSlice search = rightSearch.slice();
|
|
rightBorder = search;
|
|
} else {
|
|
// No upper bound set default to (x <= INFINITY)
|
|
rightSearch.add(VPackSlice::maxKeySlice());
|
|
rightSearch.close();
|
|
VPackSlice search = rightSearch.slice();
|
|
rightBorder = search;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Secured by trx. The shared_ptr index stays valid in
|
|
// _collection at least as long as trx is running.
|
|
// Same for the iterator
|
|
auto physical = static_cast<RocksDBCollection*>(_collection->getPhysical());
|
|
return new RocksDBVPackIndexIterator(_collection, trx, mmdr, this,
|
|
reverse, leftBorder, rightBorder);
|
|
}
|
|
|
|
bool RocksDBVPackIndex::accessFitsIndex(
|
|
arangodb::aql::AstNode const* access, arangodb::aql::AstNode const* other,
|
|
arangodb::aql::AstNode const* op, arangodb::aql::Variable const* reference,
|
|
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>>&
|
|
found,
|
|
std::unordered_set<std::string>& nonNullAttributes,
|
|
bool isExecution) const {
|
|
if (!canUseConditionPart(access, other, op, reference, nonNullAttributes,
|
|
isExecution)) {
|
|
return false;
|
|
}
|
|
|
|
arangodb::aql::AstNode const* what = access;
|
|
std::pair<arangodb::aql::Variable const*,
|
|
std::vector<arangodb::basics::AttributeName>>
|
|
attributeData;
|
|
|
|
if (op->type != arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) {
|
|
if (!what->isAttributeAccessForVariable(attributeData) ||
|
|
attributeData.first != reference) {
|
|
// this access is not referencing this collection
|
|
return false;
|
|
}
|
|
if (arangodb::basics::TRI_AttributeNamesHaveExpansion(
|
|
attributeData.second)) {
|
|
// doc.value[*] == 'value'
|
|
return false;
|
|
}
|
|
if (isAttributeExpanded(attributeData.second)) {
|
|
// doc.value == 'value' (with an array index)
|
|
return false;
|
|
}
|
|
} else {
|
|
// ok, we do have an IN here... check if it's something like 'value' IN
|
|
// doc.value[*]
|
|
TRI_ASSERT(op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN);
|
|
bool canUse = false;
|
|
|
|
if (what->isAttributeAccessForVariable(attributeData) &&
|
|
attributeData.first == reference &&
|
|
!arangodb::basics::TRI_AttributeNamesHaveExpansion(
|
|
attributeData.second) &&
|
|
attributeMatches(attributeData.second)) {
|
|
// doc.value IN 'value'
|
|
// can use this index
|
|
canUse = true;
|
|
} else {
|
|
// check for 'value' IN doc.value AND 'value' IN doc.value[*]
|
|
what = other;
|
|
if (what->isAttributeAccessForVariable(attributeData) &&
|
|
attributeData.first == reference &&
|
|
isAttributeExpanded(attributeData.second) &&
|
|
attributeMatches(attributeData.second)) {
|
|
canUse = true;
|
|
}
|
|
}
|
|
|
|
if (!canUse) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
std::vector<arangodb::basics::AttributeName> const& fieldNames =
|
|
attributeData.second;
|
|
|
|
for (size_t i = 0; i < _fields.size(); ++i) {
|
|
if (_fields[i].size() != fieldNames.size()) {
|
|
// attribute path length differs
|
|
continue;
|
|
}
|
|
|
|
if (this->isAttributeExpanded(i) &&
|
|
op->type != arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) {
|
|
// If this attribute is correct or not, it could only serve for IN
|
|
continue;
|
|
}
|
|
|
|
bool match = arangodb::basics::AttributeName::isIdentical(_fields[i],
|
|
fieldNames, true);
|
|
|
|
if (match) {
|
|
// mark ith attribute as being covered
|
|
auto it = found.find(i);
|
|
|
|
if (it == found.end()) {
|
|
found.emplace(i, std::vector<arangodb::aql::AstNode const*>{op});
|
|
} else {
|
|
(*it).second.emplace_back(op);
|
|
}
|
|
TRI_IF_FAILURE("PersistentIndex::accessFitsIndex") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
TRI_IF_FAILURE("SkiplistIndex::accessFitsIndex") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
TRI_IF_FAILURE("HashIndex::accessFitsIndex") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void RocksDBVPackIndex::matchAttributes(
|
|
arangodb::aql::AstNode const* node,
|
|
arangodb::aql::Variable const* reference,
|
|
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>>&
|
|
found,
|
|
size_t& values, std::unordered_set<std::string>& nonNullAttributes,
|
|
bool isExecution) const {
|
|
for (size_t i = 0; i < node->numMembers(); ++i) {
|
|
auto op = node->getMember(i);
|
|
|
|
switch (op->type) {
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ:
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT:
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE:
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT:
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE:
|
|
TRI_ASSERT(op->numMembers() == 2);
|
|
accessFitsIndex(op->getMember(0), op->getMember(1), op, reference,
|
|
found, nonNullAttributes, isExecution);
|
|
accessFitsIndex(op->getMember(1), op->getMember(0), op, reference,
|
|
found, nonNullAttributes, isExecution);
|
|
break;
|
|
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN:
|
|
if (accessFitsIndex(op->getMember(0), op->getMember(1), op, reference,
|
|
found, nonNullAttributes, isExecution)) {
|
|
auto m = op->getMember(1);
|
|
if (m->isArray() && m->numMembers() > 1) {
|
|
// attr IN [ a, b, c ] => this will produce multiple items, so
|
|
// count them!
|
|
values += m->numMembers() - 1;
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool RocksDBVPackIndex::supportsFilterCondition(
|
|
arangodb::aql::AstNode const* node,
|
|
arangodb::aql::Variable const* reference, size_t itemsInIndex,
|
|
size_t& estimatedItems, double& estimatedCost) const {
|
|
// mmfiles failure point compat
|
|
if (this->type() == Index::TRI_IDX_TYPE_HASH_INDEX) {
|
|
TRI_IF_FAILURE("SimpleAttributeMatcher::accessFitsIndex") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
}
|
|
|
|
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>> found;
|
|
std::unordered_set<std::string> nonNullAttributes;
|
|
size_t values = 0;
|
|
matchAttributes(node, reference, found, values, nonNullAttributes, false);
|
|
|
|
bool lastContainsEquality = true;
|
|
size_t attributesCovered = 0;
|
|
size_t attributesCoveredByEquality = 0;
|
|
double equalityReductionFactor = 20.0;
|
|
estimatedCost = static_cast<double>(itemsInIndex);
|
|
|
|
for (size_t i = 0; i < _fields.size(); ++i) {
|
|
auto it = found.find(i);
|
|
|
|
if (it == found.end()) {
|
|
// index attribute not covered by condition
|
|
break;
|
|
}
|
|
|
|
// check if the current condition contains an equality condition
|
|
auto const& nodes = (*it).second;
|
|
bool containsEquality = false;
|
|
for (size_t j = 0; j < nodes.size(); ++j) {
|
|
if (nodes[j]->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ ||
|
|
nodes[j]->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) {
|
|
containsEquality = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!lastContainsEquality) {
|
|
// unsupported condition. must abort
|
|
break;
|
|
}
|
|
|
|
++attributesCovered;
|
|
if (containsEquality) {
|
|
++attributesCoveredByEquality;
|
|
estimatedCost /= equalityReductionFactor;
|
|
|
|
// decrease the effect of the equality reduction factor
|
|
equalityReductionFactor *= 0.25;
|
|
if (equalityReductionFactor < 2.0) {
|
|
// equalityReductionFactor shouldn't get too low
|
|
equalityReductionFactor = 2.0;
|
|
}
|
|
} else {
|
|
// quick estimate for the potential reductions caused by the conditions
|
|
if (nodes.size() >= 2) {
|
|
// at least two (non-equality) conditions. probably a range with lower
|
|
// and upper bound defined
|
|
estimatedCost /= 7.5;
|
|
} else {
|
|
// one (non-equality). this is either a lower or a higher bound
|
|
estimatedCost /= 2.0;
|
|
}
|
|
}
|
|
|
|
lastContainsEquality = containsEquality;
|
|
}
|
|
|
|
if (values == 0) {
|
|
values = 1;
|
|
}
|
|
|
|
if (attributesCoveredByEquality == _fields.size() && unique()) {
|
|
// index is unique and condition covers all attributes by equality
|
|
if (estimatedItems >= values) {
|
|
// reduce costs due to uniqueness
|
|
estimatedItems = values;
|
|
estimatedCost = static_cast<double>(estimatedItems);
|
|
} else {
|
|
// cost is already low... now slightly prioritize the unique index
|
|
estimatedCost *= 0.995;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
if (attributesCovered > 0 &&
|
|
(!_sparse || attributesCovered == _fields.size())) {
|
|
// if the condition contains at least one index attribute and is not
|
|
// sparse,
|
|
// or the index is sparse and all attributes are covered by the condition,
|
|
// then it can be used (note: additional checks for condition parts in
|
|
// sparse indexes are contained in Index::canUseConditionPart)
|
|
estimatedItems = static_cast<size_t>((std::max)(
|
|
static_cast<size_t>(estimatedCost * values), static_cast<size_t>(1)));
|
|
estimatedCost *= static_cast<double>(values);
|
|
return true;
|
|
}
|
|
|
|
// no condition
|
|
estimatedItems = itemsInIndex;
|
|
estimatedCost = static_cast<double>(estimatedItems);
|
|
return false;
|
|
}
|
|
|
|
bool RocksDBVPackIndex::supportsSortCondition(
|
|
arangodb::aql::SortCondition const* sortCondition,
|
|
arangodb::aql::Variable const* reference, size_t itemsInIndex,
|
|
double& estimatedCost, size_t& coveredAttributes) const {
|
|
TRI_ASSERT(sortCondition != nullptr);
|
|
|
|
if (!_sparse) {
|
|
// only non-sparse indexes can be used for sorting
|
|
if (!_useExpansion && sortCondition->isUnidirectional() &&
|
|
sortCondition->isOnlyAttributeAccess()) {
|
|
coveredAttributes = sortCondition->coveredAttributes(reference, _fields);
|
|
|
|
if (coveredAttributes >= sortCondition->numAttributes()) {
|
|
// sort is fully covered by index. no additional sort costs!
|
|
// forward iteration does not have high costs
|
|
estimatedCost = itemsInIndex * 0.001;
|
|
if (sortCondition->isDescending()) {
|
|
// reverse iteration has higher costs than forward iteration
|
|
estimatedCost *= 4;
|
|
}
|
|
return true;
|
|
} else if (coveredAttributes > 0) {
|
|
estimatedCost = (itemsInIndex / coveredAttributes) *
|
|
std::log2(static_cast<double>(itemsInIndex));
|
|
if (sortCondition->isAscending()) {
|
|
// reverse iteration is more expensive
|
|
estimatedCost *= 4;
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
coveredAttributes = 0;
|
|
// by default no sort conditions are supported
|
|
if (itemsInIndex > 0) {
|
|
estimatedCost = itemsInIndex * std::log2(static_cast<double>(itemsInIndex));
|
|
// slightly penalize this type of index against other indexes which
|
|
// are in memory
|
|
estimatedCost *= 1.05;
|
|
} else {
|
|
estimatedCost = 0.0;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
IndexIterator* RocksDBVPackIndex::iteratorForCondition(
|
|
transaction::Methods* trx, ManagedDocumentResult* mmdr,
|
|
arangodb::aql::AstNode const* node,
|
|
arangodb::aql::Variable const* reference, bool reverse) {
|
|
VPackBuilder searchValues;
|
|
searchValues.openArray();
|
|
bool needNormalize = false;
|
|
if (node == nullptr) {
|
|
// We only use this index for sort. Empty searchValue
|
|
VPackArrayBuilder guard(&searchValues);
|
|
|
|
TRI_IF_FAILURE("PersistentIndex::noSortIterator") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
TRI_IF_FAILURE("SkiplistIndex::noSortIterator") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
TRI_IF_FAILURE("HashIndex::noSortIterator") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
} else {
|
|
// Create the search Values for the lookup
|
|
VPackArrayBuilder guard(&searchValues);
|
|
|
|
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>>
|
|
found;
|
|
std::unordered_set<std::string> nonNullAttributes;
|
|
size_t unused = 0;
|
|
matchAttributes(node, reference, found, unused, nonNullAttributes, true);
|
|
|
|
// found contains all attributes that are relevant for this node.
|
|
// It might be less than fields().
|
|
//
|
|
// Handle the first attributes. They can only be == or IN and only
|
|
// one node per attribute
|
|
|
|
auto getValueAccess = [&](arangodb::aql::AstNode const* comp,
|
|
arangodb::aql::AstNode const*& access,
|
|
arangodb::aql::AstNode const*& value) -> bool {
|
|
access = comp->getMember(0);
|
|
value = comp->getMember(1);
|
|
std::pair<arangodb::aql::Variable const*,
|
|
std::vector<arangodb::basics::AttributeName>>
|
|
paramPair;
|
|
if (!(access->isAttributeAccessForVariable(paramPair) &&
|
|
paramPair.first == reference)) {
|
|
access = comp->getMember(1);
|
|
value = comp->getMember(0);
|
|
if (!(access->isAttributeAccessForVariable(paramPair) &&
|
|
paramPair.first == reference)) {
|
|
// Both side do not have a correct AttributeAccess, this should not
|
|
// happen and indicates
|
|
// an error in the optimizer
|
|
TRI_ASSERT(false);
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
size_t usedFields = 0;
|
|
for (; usedFields < _fields.size(); ++usedFields) {
|
|
auto it = found.find(usedFields);
|
|
if (it == found.end()) {
|
|
// We are either done
|
|
// or this is a range.
|
|
// Continue with more complicated loop
|
|
break;
|
|
}
|
|
|
|
auto comp = it->second[0];
|
|
TRI_ASSERT(comp->numMembers() == 2);
|
|
arangodb::aql::AstNode const* access = nullptr;
|
|
arangodb::aql::AstNode const* value = nullptr;
|
|
getValueAccess(comp, access, value);
|
|
// We found an access for this field
|
|
|
|
if (comp->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
searchValues.openObject();
|
|
searchValues.add(VPackValue(StaticStrings::IndexEq));
|
|
TRI_IF_FAILURE("PersistentIndex::permutationEQ") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
TRI_IF_FAILURE("SkiplistIndex::permutationEQ") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
TRI_IF_FAILURE("HashIndex::permutationEQ") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
} else if (comp->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) {
|
|
if (isAttributeExpanded(usedFields)) {
|
|
searchValues.openObject();
|
|
searchValues.add(VPackValue(StaticStrings::IndexEq));
|
|
TRI_IF_FAILURE("PersistentIndex::permutationArrayIN") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
TRI_IF_FAILURE("SkiplistIndex::permutationArrayIN") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
TRI_IF_FAILURE("HashIndex::permutationArrayIN") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
} else {
|
|
needNormalize = true;
|
|
searchValues.openObject();
|
|
searchValues.add(VPackValue(StaticStrings::IndexIn));
|
|
}
|
|
} else {
|
|
// This is a one-sided range
|
|
break;
|
|
}
|
|
// We have to add the value always, the key was added before
|
|
value->toVelocyPackValue(searchValues);
|
|
searchValues.close();
|
|
}
|
|
|
|
// Now handle the next element, which might be a range
|
|
if (usedFields < _fields.size()) {
|
|
auto it = found.find(usedFields);
|
|
if (it != found.end()) {
|
|
auto rangeConditions = it->second;
|
|
TRI_ASSERT(rangeConditions.size() <= 2);
|
|
|
|
VPackObjectBuilder searchElement(&searchValues);
|
|
for (auto& comp : rangeConditions) {
|
|
TRI_ASSERT(comp->numMembers() == 2);
|
|
arangodb::aql::AstNode const* access = nullptr;
|
|
arangodb::aql::AstNode const* value = nullptr;
|
|
bool isReverseOrder = getValueAccess(comp, access, value);
|
|
// Add the key
|
|
switch (comp->type) {
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT:
|
|
if (isReverseOrder) {
|
|
searchValues.add(VPackValue(StaticStrings::IndexGt));
|
|
} else {
|
|
searchValues.add(VPackValue(StaticStrings::IndexLt));
|
|
}
|
|
break;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE:
|
|
if (isReverseOrder) {
|
|
searchValues.add(VPackValue(StaticStrings::IndexGe));
|
|
} else {
|
|
searchValues.add(VPackValue(StaticStrings::IndexLe));
|
|
}
|
|
break;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT:
|
|
if (isReverseOrder) {
|
|
searchValues.add(VPackValue(StaticStrings::IndexLt));
|
|
} else {
|
|
searchValues.add(VPackValue(StaticStrings::IndexGt));
|
|
}
|
|
break;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE:
|
|
if (isReverseOrder) {
|
|
searchValues.add(VPackValue(StaticStrings::IndexLe));
|
|
} else {
|
|
searchValues.add(VPackValue(StaticStrings::IndexGe));
|
|
}
|
|
break;
|
|
default:
|
|
// unsupported right now. Should have been rejected by
|
|
// supportsFilterCondition
|
|
TRI_ASSERT(false);
|
|
return new EmptyIndexIterator(_collection, trx, mmdr, this);
|
|
}
|
|
value->toVelocyPackValue(searchValues);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
searchValues.close();
|
|
|
|
TRI_IF_FAILURE("PersistentIndex::noIterator") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
TRI_IF_FAILURE("SkiplistIndex::noIterator") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
TRI_IF_FAILURE("HashIndex::noIterator") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
if (needNormalize) {
|
|
VPackBuilder expandedSearchValues;
|
|
expandInSearchValues(searchValues.slice(), expandedSearchValues);
|
|
VPackSlice expandedSlice = expandedSearchValues.slice();
|
|
std::vector<IndexIterator*> iterators;
|
|
try {
|
|
for (auto const& val : VPackArrayIterator(expandedSlice)) {
|
|
auto iterator = lookup(trx, mmdr, val, reverse);
|
|
try {
|
|
iterators.push_back(iterator);
|
|
} catch (...) {
|
|
// avoid leak
|
|
delete iterator;
|
|
throw;
|
|
}
|
|
}
|
|
if (reverse) {
|
|
std::reverse(iterators.begin(), iterators.end());
|
|
}
|
|
} catch (...) {
|
|
for (auto& it : iterators) {
|
|
delete it;
|
|
}
|
|
throw;
|
|
}
|
|
return new MultiIndexIterator(_collection, trx, mmdr, this, iterators);
|
|
}
|
|
|
|
VPackSlice searchSlice = searchValues.slice();
|
|
TRI_ASSERT(searchSlice.length() == 1);
|
|
searchSlice = searchSlice.at(0);
|
|
return lookup(trx, mmdr, searchSlice, reverse);
|
|
}
|
|
|
|
/// @brief specializes the condition for use with the index
|
|
arangodb::aql::AstNode* RocksDBVPackIndex::specializeCondition(
|
|
arangodb::aql::AstNode* node,
|
|
arangodb::aql::Variable const* reference) const {
|
|
// mmfiles failure compat
|
|
if (this->type() == Index::TRI_IDX_TYPE_HASH_INDEX) {
|
|
TRI_IF_FAILURE("SimpleAttributeMatcher::specializeAllChildrenEQ") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
TRI_IF_FAILURE("SimpleAttributeMatcher::specializeAllChildrenIN") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
}
|
|
|
|
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>> found;
|
|
std::unordered_set<std::string> nonNullAttributes;
|
|
size_t values = 0;
|
|
matchAttributes(node, reference, found, values, nonNullAttributes, false);
|
|
|
|
std::vector<arangodb::aql::AstNode const*> children;
|
|
bool lastContainsEquality = true;
|
|
|
|
for (size_t i = 0; i < _fields.size(); ++i) {
|
|
auto it = found.find(i);
|
|
|
|
if (it == found.end()) {
|
|
// index attribute not covered by condition
|
|
break;
|
|
}
|
|
|
|
// check if the current condition contains an equality condition
|
|
auto& nodes = (*it).second;
|
|
bool containsEquality = false;
|
|
for (size_t j = 0; j < nodes.size(); ++j) {
|
|
if (nodes[j]->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ ||
|
|
nodes[j]->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) {
|
|
containsEquality = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!lastContainsEquality) {
|
|
// unsupported condition. must abort
|
|
break;
|
|
}
|
|
|
|
std::sort(nodes.begin(), nodes.end(),
|
|
[](arangodb::aql::AstNode const* lhs,
|
|
arangodb::aql::AstNode const* rhs) -> bool {
|
|
return sortWeight(lhs) < sortWeight(rhs);
|
|
});
|
|
|
|
lastContainsEquality = containsEquality;
|
|
std::unordered_set<int> operatorsFound;
|
|
for (auto& it : nodes) {
|
|
// do not let duplicate or related operators pass
|
|
if (isDuplicateOperator(it, operatorsFound)) {
|
|
continue;
|
|
}
|
|
|
|
operatorsFound.emplace(static_cast<int>(it->type));
|
|
children.emplace_back(it);
|
|
}
|
|
}
|
|
|
|
while (node->numMembers() > 0) {
|
|
node->removeMemberUnchecked(0);
|
|
}
|
|
|
|
for (auto& it : children) {
|
|
node->addMember(it);
|
|
}
|
|
return node;
|
|
}
|
|
|
|
bool RocksDBVPackIndex::isDuplicateOperator(
|
|
arangodb::aql::AstNode const* node,
|
|
std::unordered_set<int> const& operatorsFound) const {
|
|
auto type = node->type;
|
|
if (operatorsFound.find(static_cast<int>(type)) != operatorsFound.end()) {
|
|
// duplicate operator
|
|
return true;
|
|
}
|
|
|
|
if (operatorsFound.find(
|
|
static_cast<int>(arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ)) !=
|
|
operatorsFound.end() ||
|
|
operatorsFound.find(
|
|
static_cast<int>(arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN)) !=
|
|
operatorsFound.end()) {
|
|
return true;
|
|
}
|
|
|
|
bool duplicate = false;
|
|
switch (type) {
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT:
|
|
duplicate = operatorsFound.find(static_cast<int>(
|
|
arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE)) !=
|
|
operatorsFound.end();
|
|
break;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE:
|
|
duplicate = operatorsFound.find(static_cast<int>(
|
|
arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT)) !=
|
|
operatorsFound.end();
|
|
break;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT:
|
|
duplicate = operatorsFound.find(static_cast<int>(
|
|
arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE)) !=
|
|
operatorsFound.end();
|
|
break;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE:
|
|
duplicate = operatorsFound.find(static_cast<int>(
|
|
arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT)) !=
|
|
operatorsFound.end();
|
|
break;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ:
|
|
duplicate = operatorsFound.find(static_cast<int>(
|
|
arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN)) !=
|
|
operatorsFound.end();
|
|
break;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN:
|
|
duplicate = operatorsFound.find(static_cast<int>(
|
|
arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ)) !=
|
|
operatorsFound.end();
|
|
break;
|
|
default: {
|
|
// ignore
|
|
}
|
|
}
|
|
|
|
return duplicate;
|
|
}
|
|
|
|
int RocksDBVPackIndex::cleanup() {
|
|
rocksdb::TransactionDB* db = rocksutils::globalRocksDB();
|
|
rocksdb::CompactRangeOptions opts;
|
|
RocksDBKeyBounds bounds = _unique ? RocksDBKeyBounds::UniqueIndex(_objectId)
|
|
: RocksDBKeyBounds::IndexEntries(_objectId);
|
|
rocksdb::Slice b = bounds.start(), e = bounds.end();
|
|
LOG_TOPIC(DEBUG, Logger::FIXME) << "compacting index range " << bounds;
|
|
db->CompactRange(opts, &b, &e);
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
void RocksDBVPackIndex::serializeEstimate(std::string& output) const {
|
|
TRI_ASSERT(!ServerState::instance()->isCoordinator());
|
|
if (!_unique) {
|
|
TRI_ASSERT(_estimator != nullptr);
|
|
_estimator->serialize(output);
|
|
}
|
|
}
|
|
|
|
bool RocksDBVPackIndex::deserializeEstimate(RocksDBCounterManager* mgr) {
|
|
TRI_ASSERT(!ServerState::instance()->isCoordinator());
|
|
if (_unique) {
|
|
return true;
|
|
}
|
|
// We simply drop the current estimator and steal the one from recovery
|
|
// We are than save for resizing issues in our _estimator format
|
|
// and will use the old size.
|
|
|
|
TRI_ASSERT(mgr != nullptr);
|
|
auto tmp = mgr->stealIndexEstimator(_objectId);
|
|
if (tmp == nullptr) {
|
|
// We expected to receive a stored index estimate, however we got none.
|
|
// We use the freshly created estimator but have to recompute it.
|
|
return false;
|
|
}
|
|
_estimator.swap(tmp);
|
|
TRI_ASSERT(_estimator != nullptr);
|
|
return true;
|
|
}
|
|
|
|
void RocksDBVPackIndex::recalculateEstimates() {
|
|
if (unique()) {
|
|
return;
|
|
}
|
|
TRI_ASSERT(_estimator != nullptr);
|
|
_estimator->clear();
|
|
|
|
auto bounds = RocksDBKeyBounds::IndexEntries(_objectId);
|
|
rocksutils::iterateBounds(bounds, [&](rocksdb::Iterator* it) {
|
|
uint64_t hash = RocksDBVPackIndex::HashForKey(it->key());
|
|
_estimator->insert(hash);
|
|
}, arangodb::RocksDBColumnFamily::index());
|
|
}
|
|
|
|
Result RocksDBVPackIndex::postprocessRemove(transaction::Methods* trx,
|
|
rocksdb::Slice const& key,
|
|
rocksdb::Slice const& value) {
|
|
if (!unique()) {
|
|
uint64_t hash = RocksDBVPackIndex::HashForKey(key);
|
|
_estimator->remove(hash);
|
|
}
|
|
return {TRI_ERROR_NO_ERROR};
|
|
}
|