mirror of https://gitee.com/bigwinds/arangodb
1078 lines
36 KiB
C++
1078 lines
36 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Jan Steemann
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "Aql/Ast.h"
|
|
#include "Aql/AstNode.h"
|
|
#include "Aql/Variable.h"
|
|
#include "Basics/Exceptions.h"
|
|
#include "Basics/HashSet.h"
|
|
#include "Basics/StaticStrings.h"
|
|
#include "Basics/StringUtils.h"
|
|
#include "Basics/VelocyPackHelper.h"
|
|
#include "Basics/datetime.h"
|
|
#include "Cluster/ServerState.h"
|
|
#include "Index.h"
|
|
#include "IResearch/IResearchCommon.h"
|
|
#include "StorageEngine/EngineSelectorFeature.h"
|
|
#include "StorageEngine/StorageEngine.h"
|
|
#include "VocBase/LogicalCollection.h"
|
|
#include "VocBase/ticks.h"
|
|
|
|
#include <date/date.h>
|
|
#include <velocypack/Iterator.h>
|
|
#include <velocypack/StringRef.h>
|
|
#include <velocypack/velocypack-aliases.h>
|
|
#include <iostream>
|
|
|
|
using namespace arangodb;
|
|
using namespace std::chrono;
|
|
using namespace date;
|
|
|
|
namespace {
|
|
|
|
/// @brief the _key attribute, which, when used in an index, will implictly make it unique
|
|
/// (note that we must not refer to StaticStrings::KeyString here to avoid an init-order-fiasco
|
|
std::vector<arangodb::basics::AttributeName> const KeyAttribute{
|
|
arangodb::basics::AttributeName("_key", false)};
|
|
|
|
bool hasExpansion(std::vector<std::vector<arangodb::basics::AttributeName>> const& fields) {
|
|
for (auto const& it : fields) {
|
|
if (TRI_AttributeNamesHaveExpansion(it)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// @brief set fields from slice
|
|
std::vector<std::vector<arangodb::basics::AttributeName>> parseFields(VPackSlice const& fields,
|
|
bool allowExpansion) {
|
|
std::vector<std::vector<arangodb::basics::AttributeName>> result;
|
|
if (!fields.isArray()) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_ARANGO_ATTRIBUTE_PARSER_FAILED,
|
|
"invalid index description");
|
|
}
|
|
|
|
size_t const n = static_cast<size_t>(fields.length());
|
|
result.reserve(n);
|
|
|
|
for (auto const& name : VPackArrayIterator(fields)) {
|
|
if (!name.isString()) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_ARANGO_ATTRIBUTE_PARSER_FAILED,
|
|
"invalid index description");
|
|
}
|
|
|
|
std::vector<arangodb::basics::AttributeName> parsedAttributes;
|
|
TRI_ParseAttributeString(name.copyString(), parsedAttributes, allowExpansion);
|
|
result.emplace_back(std::move(parsedAttributes));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
bool canBeNull(arangodb::aql::AstNode const* op, arangodb::aql::AstNode const* access,
|
|
std::unordered_set<std::string> const& nonNullAttributes) {
|
|
TRI_ASSERT(op != nullptr);
|
|
TRI_ASSERT(access != nullptr);
|
|
|
|
if (access->type == arangodb::aql::NODE_TYPE_ATTRIBUTE_ACCESS &&
|
|
access->getMemberUnchecked(0)->type == arangodb::aql::NODE_TYPE_REFERENCE) {
|
|
// a.b
|
|
// now check if the accessed attribute is _key, _rev or _id.
|
|
// all of these cannot be null
|
|
auto attributeName = access->getStringRef();
|
|
if (attributeName == StaticStrings::KeyString || attributeName == StaticStrings::IdString ||
|
|
attributeName == StaticStrings::RevString) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT ||
|
|
op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE ||
|
|
op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
if (op->getExcludesNull()) {
|
|
// already proven that the attribute cannot become "null"
|
|
return false;
|
|
}
|
|
}
|
|
|
|
try {
|
|
if (nonNullAttributes.find(access->toString()) != nonNullAttributes.end()) {
|
|
// found an attribute marked as non-null
|
|
return false;
|
|
}
|
|
} catch (...) {
|
|
// stringification may throw
|
|
}
|
|
|
|
// for everything else we are unsure
|
|
return true;
|
|
}
|
|
|
|
void markAsNonNull(arangodb::aql::AstNode const* op, arangodb::aql::AstNode const* access,
|
|
std::unordered_set<std::string>& nonNullAttributes) {
|
|
TRI_ASSERT(op != nullptr);
|
|
TRI_ASSERT(access != nullptr);
|
|
|
|
if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT ||
|
|
op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE ||
|
|
op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
// non-null marking currently only supported for these node types
|
|
const_cast<arangodb::aql::AstNode*>(op)->setExcludesNull(true);
|
|
}
|
|
// all other node types will be ignored here
|
|
|
|
try {
|
|
nonNullAttributes.emplace(access->toString());
|
|
} catch (...) {
|
|
// stringification may throw
|
|
}
|
|
}
|
|
|
|
bool typeMatch(char const* type, size_t len, char const* expected) {
|
|
return (len == ::strlen(expected)) && (::memcmp(type, expected, len) == 0);
|
|
}
|
|
|
|
std::string defaultIndexName(VPackSlice const& slice) {
|
|
auto type =
|
|
arangodb::Index::type(slice.get(arangodb::StaticStrings::IndexType).copyString());
|
|
if (type == arangodb::Index::IndexType::TRI_IDX_TYPE_PRIMARY_INDEX) {
|
|
return arangodb::StaticStrings::IndexNamePrimary;
|
|
} else if (type == arangodb::Index::IndexType::TRI_IDX_TYPE_EDGE_INDEX) {
|
|
if (EngineSelectorFeature::isRocksDB()) {
|
|
auto fields = slice.get(arangodb::StaticStrings::IndexFields);
|
|
TRI_ASSERT(fields.isArray());
|
|
auto firstField = fields.at(0);
|
|
TRI_ASSERT(firstField.isString());
|
|
bool isFromIndex = firstField.isEqualString(arangodb::StaticStrings::FromString);
|
|
return isFromIndex ? arangodb::StaticStrings::IndexNameEdgeFrom
|
|
: arangodb::StaticStrings::IndexNameEdgeTo;
|
|
}
|
|
return arangodb::StaticStrings::IndexNameEdge;
|
|
}
|
|
|
|
std::string idString = arangodb::basics::VelocyPackHelper::getStringValue(
|
|
slice, arangodb::StaticStrings::IndexId.c_str(),
|
|
std::to_string(TRI_NewTickServer()));
|
|
return std::string("idx_").append(idString);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
Index::FilterCosts Index::FilterCosts::zeroCosts() {
|
|
Index::FilterCosts costs;
|
|
costs.supportsCondition = true;
|
|
costs.coveredAttributes = 0;
|
|
costs.estimatedItems = 0;
|
|
costs.estimatedCosts = 0;
|
|
return costs;
|
|
}
|
|
|
|
Index::FilterCosts Index::FilterCosts::defaultCosts(size_t itemsInIndex) {
|
|
Index::FilterCosts costs;
|
|
costs.supportsCondition = false;
|
|
costs.coveredAttributes = 0;
|
|
costs.estimatedItems = itemsInIndex;
|
|
costs.estimatedCosts = static_cast<double>(itemsInIndex);
|
|
return costs;
|
|
}
|
|
|
|
Index::SortCosts Index::SortCosts::zeroCosts(size_t coveredAttributes) {
|
|
Index::SortCosts costs;
|
|
costs.coveredAttributes = coveredAttributes;
|
|
costs.supportsCondition = true;
|
|
costs.estimatedCosts = 0;
|
|
return costs;
|
|
}
|
|
|
|
Index::SortCosts Index::SortCosts::defaultCosts(size_t itemsInIndex, bool isPersistent) {
|
|
Index::SortCosts costs;
|
|
TRI_ASSERT(!costs.supportsCondition);
|
|
costs.coveredAttributes = 0;
|
|
costs.estimatedCosts = itemsInIndex > 0 ? (itemsInIndex * std::log2(static_cast<double>(itemsInIndex))) : 0.0;
|
|
if (isPersistent) {
|
|
// slightly penalize this type of index against other indexes which
|
|
// are in memory
|
|
costs.estimatedCosts *= 1.05;
|
|
}
|
|
return costs;
|
|
}
|
|
|
|
// If the Index is on a coordinator instance the index may not access the
|
|
// logical collection because it could be gone!
|
|
|
|
Index::Index(TRI_idx_iid_t iid, arangodb::LogicalCollection& collection,
|
|
std::string const& name,
|
|
std::vector<std::vector<arangodb::basics::AttributeName>> const& fields,
|
|
bool unique, bool sparse)
|
|
: _iid(iid),
|
|
_collection(collection),
|
|
_name(name),
|
|
_fields(fields),
|
|
_useExpansion(::hasExpansion(_fields)),
|
|
_unique(unique),
|
|
_sparse(sparse) {
|
|
// note: _collection can be a nullptr in the cluster coordinator case!!
|
|
}
|
|
|
|
Index::Index(TRI_idx_iid_t iid, arangodb::LogicalCollection& collection, VPackSlice const& slice)
|
|
: _iid(iid),
|
|
_collection(collection),
|
|
_name(arangodb::basics::VelocyPackHelper::getStringValue(
|
|
slice, arangodb::StaticStrings::IndexName, ::defaultIndexName(slice))),
|
|
_fields(::parseFields(slice.get(arangodb::StaticStrings::IndexFields),
|
|
Index::allowExpansion(Index::type(
|
|
slice.get(arangodb::StaticStrings::IndexType).copyString())))),
|
|
_useExpansion(::hasExpansion(_fields)),
|
|
_unique(arangodb::basics::VelocyPackHelper::getBooleanValue(slice, arangodb::StaticStrings::IndexUnique,
|
|
false)),
|
|
_sparse(arangodb::basics::VelocyPackHelper::getBooleanValue(slice, arangodb::StaticStrings::IndexSparse,
|
|
false)) {}
|
|
|
|
Index::~Index() {}
|
|
|
|
void Index::name(std::string const& newName) {
|
|
if (_name.empty()) {
|
|
_name = newName;
|
|
}
|
|
}
|
|
|
|
size_t Index::sortWeight(arangodb::aql::AstNode const* node) {
|
|
switch (node->type) {
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ:
|
|
return 1;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN:
|
|
return 2;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT:
|
|
return 3;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE:
|
|
return 4;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT:
|
|
return 5;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE:
|
|
return 6;
|
|
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_NE:
|
|
return 7;
|
|
default:
|
|
return 42; /* OPST_CIRCUS */
|
|
}
|
|
}
|
|
|
|
/// @brief validate fields from slice
|
|
void Index::validateFields(VPackSlice const& slice) {
|
|
VPackValueLength len;
|
|
const char* idxStr = slice.get(arangodb::StaticStrings::IndexType).getString(len);
|
|
auto allowExpansion = Index::allowExpansion(Index::type(idxStr, len));
|
|
|
|
auto fields = slice.get(arangodb::StaticStrings::IndexFields);
|
|
if (!fields.isArray()) {
|
|
return;
|
|
}
|
|
|
|
for (auto const& name : VPackArrayIterator(fields)) {
|
|
if (!name.isString()) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_ARANGO_ATTRIBUTE_PARSER_FAILED,
|
|
"invalid index description");
|
|
}
|
|
|
|
std::vector<arangodb::basics::AttributeName> parsedAttributes;
|
|
TRI_ParseAttributeString(name.copyString(), parsedAttributes, allowExpansion);
|
|
}
|
|
}
|
|
|
|
/// @brief return the index type based on a type name
|
|
Index::IndexType Index::type(char const* type, size_t len) {
|
|
if (::typeMatch(type, len, "primary")) {
|
|
return TRI_IDX_TYPE_PRIMARY_INDEX;
|
|
}
|
|
if (::typeMatch(type, len, "edge")) {
|
|
return TRI_IDX_TYPE_EDGE_INDEX;
|
|
}
|
|
if (::typeMatch(type, len, "hash")) {
|
|
return TRI_IDX_TYPE_HASH_INDEX;
|
|
}
|
|
if (::typeMatch(type, len, "skiplist")) {
|
|
return TRI_IDX_TYPE_SKIPLIST_INDEX;
|
|
}
|
|
if (::typeMatch(type, len, "ttl")) {
|
|
return TRI_IDX_TYPE_TTL_INDEX;
|
|
}
|
|
if (::typeMatch(type, len, "persistent") ||
|
|
::typeMatch(type, len, "rocksdb")) {
|
|
return TRI_IDX_TYPE_PERSISTENT_INDEX;
|
|
}
|
|
if (::typeMatch(type, len, "fulltext")) {
|
|
return TRI_IDX_TYPE_FULLTEXT_INDEX;
|
|
}
|
|
if (::typeMatch(type, len, "geo")) {
|
|
return TRI_IDX_TYPE_GEO_INDEX;
|
|
}
|
|
if (::typeMatch(type, len, "geo1")) {
|
|
return TRI_IDX_TYPE_GEO1_INDEX;
|
|
}
|
|
if (::typeMatch(type, len, "geo2")) {
|
|
return TRI_IDX_TYPE_GEO2_INDEX;
|
|
}
|
|
std::string const& tmp = arangodb::iresearch::DATA_SOURCE_TYPE.name();
|
|
if (::typeMatch(type, len, tmp.c_str())) {
|
|
return TRI_IDX_TYPE_IRESEARCH_LINK;
|
|
}
|
|
if (::typeMatch(type, len, "noaccess")) {
|
|
return TRI_IDX_TYPE_NO_ACCESS_INDEX;
|
|
}
|
|
|
|
return TRI_IDX_TYPE_UNKNOWN;
|
|
}
|
|
|
|
Index::IndexType Index::type(std::string const& type) {
|
|
return Index::type(type.c_str(), type.size());
|
|
}
|
|
|
|
/// @brief return the name of an index type
|
|
char const* Index::oldtypeName(Index::IndexType type) {
|
|
switch (type) {
|
|
case TRI_IDX_TYPE_PRIMARY_INDEX:
|
|
return "primary";
|
|
case TRI_IDX_TYPE_EDGE_INDEX:
|
|
return "edge";
|
|
case TRI_IDX_TYPE_HASH_INDEX:
|
|
return "hash";
|
|
case TRI_IDX_TYPE_SKIPLIST_INDEX:
|
|
return "skiplist";
|
|
case TRI_IDX_TYPE_TTL_INDEX:
|
|
return "ttl";
|
|
case TRI_IDX_TYPE_PERSISTENT_INDEX:
|
|
return "persistent";
|
|
case TRI_IDX_TYPE_FULLTEXT_INDEX:
|
|
return "fulltext";
|
|
case TRI_IDX_TYPE_GEO1_INDEX:
|
|
return "geo1";
|
|
case TRI_IDX_TYPE_GEO2_INDEX:
|
|
return "geo2";
|
|
case TRI_IDX_TYPE_GEO_INDEX:
|
|
return "geo";
|
|
case TRI_IDX_TYPE_IRESEARCH_LINK:
|
|
return arangodb::iresearch::DATA_SOURCE_TYPE.name().c_str();
|
|
case TRI_IDX_TYPE_NO_ACCESS_INDEX:
|
|
return "noaccess";
|
|
case TRI_IDX_TYPE_UNKNOWN: {
|
|
}
|
|
}
|
|
|
|
return "";
|
|
}
|
|
|
|
/// @brief validate an index id
|
|
bool Index::validateId(char const* key) {
|
|
char const* p = key;
|
|
|
|
while (1) {
|
|
char const c = *p;
|
|
|
|
if (c == '\0') {
|
|
return (p - key) > 0;
|
|
}
|
|
if (c >= '0' && c <= '9') {
|
|
++p;
|
|
continue;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/// @brief validate an index name
|
|
bool Index::validateName(char const* key) {
|
|
return TRI_vocbase_t::IsAllowedName(false, arangodb::velocypack::StringRef(key, strlen(key)));
|
|
}
|
|
|
|
namespace {
|
|
bool validatePrefix(char const* key, size_t* split) {
|
|
char const* p = key;
|
|
char c = *p;
|
|
|
|
// find divider
|
|
|
|
while (1) {
|
|
c = *p;
|
|
|
|
if (c == '\0') {
|
|
return false;
|
|
}
|
|
|
|
if (c == '/') {
|
|
break;
|
|
}
|
|
|
|
p++;
|
|
}
|
|
|
|
// store split position
|
|
*split = p - key;
|
|
|
|
return TRI_vocbase_t::IsAllowedName(true, arangodb::velocypack::StringRef(key, *split));
|
|
}
|
|
} // namespace
|
|
|
|
/// @brief validate an index handle (collection name + / + index id)
|
|
bool Index::validateHandle(char const* key, size_t* split) {
|
|
bool ok = validatePrefix(key, split);
|
|
// validate index id
|
|
return ok && validateId(key + *split + 1);
|
|
}
|
|
|
|
/// @brief validate an index handle (collection name + / + index name)
|
|
bool Index::validateHandleName(char const* key, size_t* split) {
|
|
bool ok = validatePrefix(key, split);
|
|
// validate index id
|
|
return ok && validateName(key + *split + 1);
|
|
}
|
|
|
|
/// @brief generate a new index id
|
|
TRI_idx_iid_t Index::generateId() { return TRI_NewTickServer(); }
|
|
|
|
/// @brief check if two index definitions share any identifiers (_id, name)
|
|
bool Index::CompareIdentifiers(velocypack::Slice const& lhs, velocypack::Slice const& rhs) {
|
|
VPackSlice lhsId = lhs.get(arangodb::StaticStrings::IndexId);
|
|
VPackSlice rhsId = rhs.get(arangodb::StaticStrings::IndexId);
|
|
if (lhsId.isString() && rhsId.isString() &&
|
|
arangodb::basics::VelocyPackHelper::equal(lhsId, rhsId, true)) {
|
|
return true;
|
|
}
|
|
|
|
VPackSlice lhsName = lhs.get(arangodb::StaticStrings::IndexName);
|
|
VPackSlice rhsName = rhs.get(arangodb::StaticStrings::IndexName);
|
|
if (lhsName.isString() && rhsName.isString() &&
|
|
arangodb::basics::VelocyPackHelper::equal(lhsName, rhsName, true)) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/// @brief index comparator, used by the coordinator to detect if two index
|
|
/// contents are the same
|
|
bool Index::Compare(VPackSlice const& lhs, VPackSlice const& rhs) {
|
|
auto lhsType = lhs.get(arangodb::StaticStrings::IndexType);
|
|
TRI_ASSERT(lhsType.isString());
|
|
|
|
// type must be identical
|
|
if (!arangodb::basics::VelocyPackHelper::equal(lhsType, rhs.get(arangodb::StaticStrings::IndexType), false)) {
|
|
return false;
|
|
}
|
|
|
|
auto* engine = EngineSelectorFeature::ENGINE;
|
|
|
|
return engine && engine->indexFactory().factory(lhsType.copyString()).equal(lhs, rhs);
|
|
}
|
|
|
|
/// @brief return a contextual string for logging
|
|
std::string Index::context() const {
|
|
std::ostringstream result;
|
|
|
|
result << "index { id: " << id() << ", type: " << oldtypeName()
|
|
<< ", collection: " << _collection.vocbase().name() << "/"
|
|
<< _collection.name() << ", unique: " << (_unique ? "true" : "false")
|
|
<< ", fields: ";
|
|
result << "[";
|
|
|
|
for (size_t i = 0; i < _fields.size(); ++i) {
|
|
if (i > 0) {
|
|
result << ", ";
|
|
}
|
|
|
|
result << _fields[i];
|
|
}
|
|
|
|
result << "] }";
|
|
|
|
return result.str();
|
|
}
|
|
|
|
/// @brief create a VelocyPack representation of the index
|
|
/// base functionality (called from derived classes)
|
|
std::shared_ptr<VPackBuilder> Index::toVelocyPack(std::underlying_type<Index::Serialize>::type flags) const {
|
|
auto builder = std::make_shared<VPackBuilder>();
|
|
toVelocyPack(*builder, flags);
|
|
return builder;
|
|
}
|
|
|
|
/// @brief create a VelocyPack representation of the index
|
|
/// base functionality (called from derived classes)
|
|
/// note: needs an already-opened object as its input!
|
|
void Index::toVelocyPack(VPackBuilder& builder,
|
|
std::underlying_type<Index::Serialize>::type flags) const {
|
|
TRI_ASSERT(builder.isOpenObject());
|
|
builder.add(arangodb::StaticStrings::IndexId,
|
|
arangodb::velocypack::Value(std::to_string(_iid)));
|
|
builder.add(arangodb::StaticStrings::IndexType,
|
|
arangodb::velocypack::Value(oldtypeName(type())));
|
|
builder.add(arangodb::StaticStrings::IndexName, arangodb::velocypack::Value(name()));
|
|
|
|
builder.add(arangodb::velocypack::Value(arangodb::StaticStrings::IndexFields));
|
|
builder.openArray();
|
|
|
|
for (auto const& field : fields()) {
|
|
std::string fieldString;
|
|
TRI_AttributeNamesToString(field, fieldString);
|
|
builder.add(VPackValue(fieldString));
|
|
}
|
|
|
|
builder.close();
|
|
|
|
if (hasSelectivityEstimate() && Index::hasFlag(flags, Index::Serialize::Estimates)) {
|
|
builder.add("selectivityEstimate", VPackValue(selectivityEstimate()));
|
|
}
|
|
|
|
if (Index::hasFlag(flags, Index::Serialize::Figures)) {
|
|
builder.add("figures", VPackValue(VPackValueType::Object));
|
|
toVelocyPackFigures(builder);
|
|
builder.close();
|
|
}
|
|
}
|
|
|
|
/// @brief create a VelocyPack representation of the index figures
|
|
/// base functionality (called from derived classes)
|
|
std::shared_ptr<VPackBuilder> Index::toVelocyPackFigures() const {
|
|
auto builder = std::make_shared<VPackBuilder>();
|
|
builder->openObject();
|
|
toVelocyPackFigures(*builder);
|
|
builder->close();
|
|
return builder;
|
|
}
|
|
|
|
/// @brief create a VelocyPack representation of the index figures
|
|
/// base functionality (called from derived classes)
|
|
void Index::toVelocyPackFigures(VPackBuilder& builder) const {
|
|
TRI_ASSERT(builder.isOpenObject());
|
|
builder.add("memory", VPackValue(memory()));
|
|
}
|
|
|
|
/// @brief default implementation for matchesDefinition
|
|
bool Index::matchesDefinition(VPackSlice const& info) const {
|
|
TRI_ASSERT(info.isObject());
|
|
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
|
auto typeSlice = info.get(arangodb::StaticStrings::IndexType);
|
|
TRI_ASSERT(typeSlice.isString());
|
|
arangodb::velocypack::StringRef typeStr(typeSlice);
|
|
TRI_ASSERT(typeStr == oldtypeName());
|
|
#endif
|
|
auto value = info.get(arangodb::StaticStrings::IndexId);
|
|
|
|
if (!value.isNone()) {
|
|
// We already have an id.
|
|
if (!value.isString()) {
|
|
// Invalid ID
|
|
return false;
|
|
}
|
|
// Short circuit. If id is correct the index is identical.
|
|
arangodb::velocypack::StringRef idRef(value);
|
|
return idRef == std::to_string(_iid);
|
|
}
|
|
|
|
value = info.get(arangodb::StaticStrings::IndexFields);
|
|
|
|
if (!value.isArray()) {
|
|
return false;
|
|
}
|
|
|
|
size_t const n = static_cast<size_t>(value.length());
|
|
if (n != _fields.size()) {
|
|
return false;
|
|
}
|
|
|
|
if (_unique != arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
info, arangodb::StaticStrings::IndexUnique, false)) {
|
|
return false;
|
|
}
|
|
|
|
if (_sparse != arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
info, arangodb::StaticStrings::IndexSparse, false)) {
|
|
return false;
|
|
}
|
|
|
|
// This check takes ordering of attributes into account.
|
|
std::vector<arangodb::basics::AttributeName> translate;
|
|
for (size_t i = 0; i < n; ++i) {
|
|
translate.clear();
|
|
VPackSlice f = value.at(i);
|
|
if (!f.isString()) {
|
|
// Invalid field definition!
|
|
return false;
|
|
}
|
|
arangodb::velocypack::StringRef in(f);
|
|
TRI_ParseAttributeString(in, translate, true);
|
|
if (!arangodb::basics::AttributeName::isIdentical(_fields[i], translate, false)) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/// @brief default implementation for selectivityEstimate
|
|
double Index::selectivityEstimate(arangodb::velocypack::StringRef const&) const {
|
|
if (_unique) {
|
|
return 1.0;
|
|
}
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED);
|
|
}
|
|
|
|
/// @brief whether or not the index is implicitly unique
|
|
/// this can be the case if the index is not declared as unique, but contains a
|
|
/// unique attribute such as _key
|
|
bool Index::implicitlyUnique() const {
|
|
if (_unique) {
|
|
// a unique index is always unique
|
|
return true;
|
|
}
|
|
if (_useExpansion) {
|
|
// when an expansion such as a[*] is used, the index may not be unique, even
|
|
// if it contains attributes that are guaranteed to be unique
|
|
return false;
|
|
}
|
|
|
|
for (auto const& it : _fields) {
|
|
// if _key is contained in the index fields definition, then the index is
|
|
// implicitly unique
|
|
if (it == KeyAttribute) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// _key not contained
|
|
return false;
|
|
}
|
|
|
|
/// @brief default implementation for drop
|
|
Result Index::drop() {
|
|
return Result(); // do nothing
|
|
}
|
|
|
|
/// @brief default implementation for supportsFilterCondition
|
|
Index::FilterCosts Index::supportsFilterCondition(std::vector<std::shared_ptr<arangodb::Index>> const&,
|
|
arangodb::aql::AstNode const* /* node */,
|
|
arangodb::aql::Variable const* /* reference */,
|
|
size_t itemsInIndex) const {
|
|
// by default no filter conditions are supported
|
|
return Index::FilterCosts::defaultCosts(itemsInIndex);
|
|
}
|
|
|
|
/// @brief default implementation for supportsSortCondition
|
|
Index::SortCosts Index::supportsSortCondition(arangodb::aql::SortCondition const* /* sortCondition */,
|
|
arangodb::aql::Variable const* /* node */,
|
|
size_t itemsInIndex) const {
|
|
// by default no sort conditions are supported
|
|
return Index::SortCosts::defaultCosts(itemsInIndex, this->isPersistent());
|
|
}
|
|
|
|
arangodb::aql::AstNode* Index::specializeCondition(arangodb::aql::AstNode* /* node */,
|
|
arangodb::aql::Variable const* /* reference */) const {
|
|
// the default implementation should never be called
|
|
TRI_ASSERT(false);
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, std::string("no default implementation for specializeCondition. index type: ") + typeName());
|
|
}
|
|
|
|
std::unique_ptr<IndexIterator> Index::iteratorForCondition(transaction::Methods* /* trx */,
|
|
aql::AstNode const* /* node */,
|
|
aql::Variable const* /* reference */,
|
|
IndexIteratorOptions const& /* opts */) {
|
|
// the default implementation should never be called
|
|
TRI_ASSERT(false);
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, std::string("no default implementation for iteratorForCondition. index type: ") + typeName());
|
|
}
|
|
|
|
/// @brief perform some base checks for an index condition part
|
|
bool Index::canUseConditionPart(arangodb::aql::AstNode const* access,
|
|
arangodb::aql::AstNode const* other,
|
|
arangodb::aql::AstNode const* op,
|
|
arangodb::aql::Variable const* reference,
|
|
std::unordered_set<std::string>& nonNullAttributes,
|
|
bool isExecution) const {
|
|
if (_sparse) {
|
|
if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_NIN) {
|
|
return false;
|
|
}
|
|
|
|
if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN &&
|
|
(other->type == arangodb::aql::NODE_TYPE_EXPANSION ||
|
|
other->type == arangodb::aql::NODE_TYPE_ATTRIBUTE_ACCESS)) {
|
|
// value IN a.b OR value IN a.b[*]
|
|
if (!access->isConstant()) {
|
|
return false;
|
|
}
|
|
|
|
/* A sparse index will store null in Array
|
|
if (access->isNullValue()) {
|
|
return false;
|
|
}
|
|
*/
|
|
} else if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN &&
|
|
access->type == arangodb::aql::NODE_TYPE_EXPANSION) {
|
|
// value[*] IN a.b
|
|
if (!other->isConstant()) {
|
|
return false;
|
|
}
|
|
|
|
/* A sparse index will store null in Array
|
|
if (other->isNullValue()) {
|
|
return false;
|
|
}
|
|
*/
|
|
} else if (access->type == arangodb::aql::NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
// a.b == value OR a.b IN values
|
|
|
|
if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT) {
|
|
// > anything also excludes "null". now note that this attribute cannot
|
|
// become null range definitely exludes the "null" value
|
|
::markAsNonNull(op, access, nonNullAttributes);
|
|
} else if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT ||
|
|
op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE) {
|
|
// < and <= are not supported with sparse indexes as this may include
|
|
// null values
|
|
if (::canBeNull(op, access, nonNullAttributes)) {
|
|
return false;
|
|
}
|
|
|
|
// range definitely exludes the "null" value
|
|
::markAsNonNull(op, access, nonNullAttributes);
|
|
}
|
|
|
|
if (other->isConstant()) {
|
|
if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_NE && other->isNullValue()) {
|
|
// != null. now note that a certain attribute cannot become null
|
|
::markAsNonNull(op, access, nonNullAttributes);
|
|
return true;
|
|
} else if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE &&
|
|
!other->isNullValue()) {
|
|
// >= non-null. now note that a certain attribute cannot become null
|
|
::markAsNonNull(op, access, nonNullAttributes);
|
|
return true;
|
|
}
|
|
|
|
if (other->isNullValue() &&
|
|
(op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ ||
|
|
op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE)) {
|
|
// == and >= null are not supported with sparse indexes for the same
|
|
// reason
|
|
if (::canBeNull(op, access, nonNullAttributes)) {
|
|
return false;
|
|
}
|
|
::markAsNonNull(op, access, nonNullAttributes);
|
|
return true;
|
|
}
|
|
|
|
if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN &&
|
|
other->type == arangodb::aql::NODE_TYPE_ARRAY) {
|
|
size_t const n = other->numMembers();
|
|
|
|
for (size_t i = 0; i < n; ++i) {
|
|
if (other->getMemberUnchecked(i)->isNullValue()) {
|
|
return false;
|
|
}
|
|
}
|
|
::markAsNonNull(op, access, nonNullAttributes);
|
|
return true;
|
|
}
|
|
} else {
|
|
// !other->isConstant()
|
|
if (::canBeNull(op, access, nonNullAttributes)) {
|
|
return false;
|
|
}
|
|
|
|
// range definitely exludes the "null" value
|
|
::markAsNonNull(op, access, nonNullAttributes);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (isExecution) {
|
|
// in execution phase, we do not need to check the variable usage again
|
|
return true;
|
|
}
|
|
|
|
if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_NE) {
|
|
// none of the indexes can use !=, so we can exit here
|
|
// note that this function may have been called for operator !=. this is
|
|
// necessary to track the non-null attributes, e.g. attr != null, so we can
|
|
// note which attributes cannot be null and still use sparse indexes for
|
|
// these attributes
|
|
return false;
|
|
}
|
|
|
|
// test if the reference variable is contained on both sides of the expression
|
|
arangodb::HashSet<aql::Variable const*> variables;
|
|
if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN &&
|
|
(other->type == arangodb::aql::NODE_TYPE_EXPANSION ||
|
|
other->type == arangodb::aql::NODE_TYPE_ATTRIBUTE_ACCESS)) {
|
|
// value IN a.b OR value IN a.b[*]
|
|
arangodb::aql::Ast::getReferencedVariables(access, variables);
|
|
if (other->type == arangodb::aql::NODE_TYPE_ATTRIBUTE_ACCESS &&
|
|
variables.find(reference) != variables.end()) {
|
|
variables.clear();
|
|
arangodb::aql::Ast::getReferencedVariables(other, variables);
|
|
}
|
|
} else {
|
|
// a.b == value OR a.b IN values
|
|
if (!other->isConstant()) {
|
|
// don't look for referenced variables if we only access a
|
|
// constant value (there will be no variables then...)
|
|
arangodb::aql::Ast::getReferencedVariables(other, variables);
|
|
}
|
|
}
|
|
|
|
if (variables.find(reference) != variables.end()) {
|
|
// yes. then we cannot use an index here
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/// @brief Transform the list of search slices to search values.
|
|
/// Always expects a list of lists as input.
|
|
/// Outer list represents the single lookups, inner list represents the
|
|
/// index field values.
|
|
/// This will multiply all IN entries and simply return all other
|
|
/// entries.
|
|
/// Example: Index on (a, b)
|
|
/// Input: [ [{=: 1}, {in: 2,3}], [{=:2}, {=:3}]
|
|
/// Result: [ [{=: 1}, {=: 2}],[{=:1}, {=:3}], [{=:2}, {=:3}]]
|
|
void Index::expandInSearchValues(VPackSlice const base, VPackBuilder& result) const {
|
|
TRI_ASSERT(base.isArray());
|
|
|
|
VPackArrayBuilder baseGuard(&result);
|
|
for (auto const& oneLookup : VPackArrayIterator(base)) {
|
|
TRI_ASSERT(oneLookup.isArray());
|
|
|
|
bool usesIn = false;
|
|
for (auto const& it : VPackArrayIterator(oneLookup)) {
|
|
if (it.hasKey(StaticStrings::IndexIn)) {
|
|
usesIn = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!usesIn) {
|
|
// Shortcut, no multiply
|
|
// Just copy over base
|
|
result.add(oneLookup);
|
|
return;
|
|
}
|
|
|
|
std::unordered_map<size_t, std::vector<VPackSlice>> elements;
|
|
arangodb::basics::VelocyPackHelper::VPackLess<true> sorter;
|
|
size_t n = static_cast<size_t>(oneLookup.length());
|
|
for (VPackValueLength i = 0; i < n; ++i) {
|
|
VPackSlice current = oneLookup.at(i);
|
|
if (current.hasKey(StaticStrings::IndexIn)) {
|
|
VPackSlice inList = current.get(StaticStrings::IndexIn);
|
|
if (!inList.isArray()) {
|
|
// IN value is a non-array
|
|
result.clear();
|
|
result.openArray();
|
|
return;
|
|
}
|
|
|
|
TRI_ASSERT(inList.isArray());
|
|
VPackValueLength nList = inList.length();
|
|
|
|
if (nList == 0) {
|
|
// Empty Array. short circuit, no matches possible
|
|
result.clear();
|
|
result.openArray();
|
|
return;
|
|
}
|
|
|
|
std::unordered_set<VPackSlice, arangodb::basics::VelocyPackHelper::VPackHash, arangodb::basics::VelocyPackHelper::VPackEqual>
|
|
tmp(static_cast<size_t>(nList),
|
|
arangodb::basics::VelocyPackHelper::VPackHash(),
|
|
arangodb::basics::VelocyPackHelper::VPackEqual());
|
|
|
|
for (auto const& el : VPackArrayIterator(inList)) {
|
|
tmp.emplace(el);
|
|
}
|
|
auto& vector = elements[i];
|
|
vector.insert(vector.end(), tmp.begin(), tmp.end());
|
|
std::sort(vector.begin(), vector.end(), sorter);
|
|
}
|
|
}
|
|
// If there is an entry in elements for one depth it was an in,
|
|
// all of them are now unique so we simply have to multiply
|
|
|
|
size_t level = n - 1;
|
|
std::vector<size_t> positions(n, 0);
|
|
bool done = false;
|
|
while (!done) {
|
|
TRI_IF_FAILURE("Index::permutationIN") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
VPackArrayBuilder guard(&result);
|
|
for (size_t i = 0; i < n; ++i) {
|
|
auto list = elements.find(i);
|
|
if (list == elements.end()) {
|
|
// Insert
|
|
result.add(oneLookup.at(i));
|
|
} else {
|
|
VPackObjectBuilder objGuard(&result);
|
|
result.add(StaticStrings::IndexEq, list->second.at(positions[i]));
|
|
}
|
|
}
|
|
while (true) {
|
|
auto list = elements.find(level);
|
|
if (list != elements.end() && ++positions[level] < list->second.size()) {
|
|
level = n - 1;
|
|
// abort inner iteration
|
|
break;
|
|
}
|
|
positions[level] = 0;
|
|
if (level == 0) {
|
|
done = true;
|
|
break;
|
|
}
|
|
--level;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool Index::covers(std::unordered_set<std::string> const& attributes) const {
|
|
// check if we can use covering indexes
|
|
if (_fields.size() < attributes.size()) {
|
|
// we will not be able to satisfy all requested projections with this index
|
|
return false;
|
|
}
|
|
|
|
std::string result;
|
|
for (size_t i = 0; i < _fields.size(); ++i) {
|
|
result.clear();
|
|
TRI_AttributeNamesToString(_fields[i], result, false);
|
|
if (std::find(attributes.begin(), attributes.end(), result) == attributes.end()) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void Index::warmup(arangodb::transaction::Methods*, std::shared_ptr<basics::LocalTaskQueue>) {
|
|
// Do nothing. If an index needs some warmup
|
|
// it has to explicitly implement it.
|
|
}
|
|
|
|
/// @brief generate error message
|
|
/// @param key the conflicting key
|
|
Result& Index::addErrorMsg(Result& r, std::string const& key) {
|
|
// now provide more context based on index
|
|
r.appendErrorMessage(" - in index ");
|
|
r.appendErrorMessage(name());
|
|
r.appendErrorMessage(" of type ");
|
|
r.appendErrorMessage(oldtypeName());
|
|
|
|
// build fields string
|
|
r.appendErrorMessage(" over '");
|
|
|
|
for (size_t i = 0; i < _fields.size(); i++) {
|
|
std::string msg;
|
|
TRI_AttributeNamesToString(_fields[i], msg);
|
|
r.appendErrorMessage(msg);
|
|
if (i != _fields.size() - 1) {
|
|
r.appendErrorMessage(", ");
|
|
}
|
|
}
|
|
r.appendErrorMessage("'");
|
|
|
|
// provide conflicting key
|
|
if (!key.empty()) {
|
|
r.appendErrorMessage("; conflicting key: ");
|
|
r.appendErrorMessage(key);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
/// @brief append the index description to an output stream
|
|
std::ostream& operator<<(std::ostream& stream, arangodb::Index const* index) {
|
|
stream << index->context();
|
|
return stream;
|
|
}
|
|
|
|
/// @brief append the index description to an output stream
|
|
std::ostream& operator<<(std::ostream& stream, arangodb::Index const& index) {
|
|
stream << index.context();
|
|
return stream;
|
|
}
|
|
|
|
double Index::getTimestamp(arangodb::velocypack::Slice const& doc,
|
|
std::string const& attributeName) const {
|
|
VPackSlice value = doc.get(attributeName);
|
|
|
|
if (value.isString()) {
|
|
// string value. we expect it to be YYYY-MM-DD etc.
|
|
tp_sys_clock_ms tp;
|
|
if (basics::parseDateTime(value.copyString(), tp)) {
|
|
return static_cast<double>(
|
|
std::chrono::duration_cast<std::chrono::seconds>(tp.time_since_epoch())
|
|
.count());
|
|
}
|
|
// invalid date format
|
|
// fall-through intentional
|
|
} else if (value.isNumber()) {
|
|
// numeric value. we take it as it is
|
|
return value.getNumericValue<double>();
|
|
}
|
|
|
|
// attribute not found in document, or invalid type
|
|
return -1.0;
|
|
}
|
|
|
|
/// @brief return the name of the (sole) index attribute
|
|
/// it is only allowed to call this method if the index contains a
|
|
/// single attribute
|
|
std::string const& Index::getAttribute() const {
|
|
TRI_ASSERT(_fields.size() == 1);
|
|
auto const& fields = _fields[0];
|
|
TRI_ASSERT(fields.size() == 1);
|
|
auto const& field = fields[0];
|
|
TRI_ASSERT(!field.shouldExpand);
|
|
return field.name;
|
|
}
|
|
|
|
AttributeAccessParts::AttributeAccessParts(arangodb::aql::AstNode const* comparison,
|
|
arangodb::aql::Variable const* variable)
|
|
: comparison(comparison),
|
|
attribute(nullptr),
|
|
value(nullptr),
|
|
opType(arangodb::aql::NODE_TYPE_NOP) {
|
|
|
|
// first assume a.b == value
|
|
attribute = comparison->getMember(0);
|
|
value = comparison->getMember(1);
|
|
opType = comparison->type;
|
|
|
|
if (attribute->type != arangodb::aql::NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
// got value == a.b -> flip the two sides
|
|
attribute = comparison->getMember(1);
|
|
value = comparison->getMember(0);
|
|
opType = aql::Ast::ReverseOperator(opType);
|
|
}
|
|
|
|
TRI_ASSERT(attribute->type == aql::NODE_TYPE_ATTRIBUTE_ACCESS);
|
|
TRI_ASSERT(attribute->isAttributeAccessForVariable(variable, true));
|
|
}
|