1
0
Fork 0
arangodb/arangod/Indexes/Index.h

503 lines
18 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Steemann
////////////////////////////////////////////////////////////////////////////////
#ifndef ARANGOD_INDEXES_INDEX_H
#define ARANGOD_INDEXES_INDEX_H 1
#include "Aql/AstNode.h"
#include "Basics/AttributeNameParser.h"
#include "Basics/Common.h"
#include "Basics/Exceptions.h"
#include "Basics/Result.h"
#include "Basics/StaticStrings.h"
#include "VocBase/LocalDocumentId.h"
#include "VocBase/voc-types.h"
#include "VocBase/vocbase.h"
#include <velocypack/StringRef.h>
#include <iosfwd>
namespace arangodb {
namespace basics {
class LocalTaskQueue;
}
class IndexIterator;
class LogicalCollection;
struct IndexIteratorOptions;
namespace velocypack {
class Builder;
class Slice;
} // namespace velocypack
namespace aql {
class SortCondition;
struct Variable;
} // namespace aql
namespace transaction {
class Methods;
}
/// @brief static limits for fulltext index
struct FulltextIndexLimits {
/// @brief maximum length of an indexed word in characters
static constexpr int maxWordLength = 40;
/// @brief default minimum word length for a fulltext index
static constexpr int minWordLengthDefault = 2;
/// @brief maximum number of search words in a query
static constexpr int maxSearchWords = 32;
};
class Index {
public:
Index() = delete;
Index(Index const&) = delete;
Index& operator=(Index const&) = delete;
Index(TRI_idx_iid_t iid, LogicalCollection& collection, std::string const& name,
std::vector<std::vector<arangodb::basics::AttributeName>> const& fields,
bool unique, bool sparse);
Index(TRI_idx_iid_t iid, LogicalCollection& collection,
arangodb::velocypack::Slice const& slice);
virtual ~Index();
public:
/// @brief index types
enum IndexType {
TRI_IDX_TYPE_UNKNOWN = 0,
TRI_IDX_TYPE_PRIMARY_INDEX,
TRI_IDX_TYPE_GEO_INDEX,
TRI_IDX_TYPE_GEO1_INDEX,
TRI_IDX_TYPE_GEO2_INDEX,
TRI_IDX_TYPE_HASH_INDEX,
TRI_IDX_TYPE_EDGE_INDEX,
TRI_IDX_TYPE_FULLTEXT_INDEX,
TRI_IDX_TYPE_SKIPLIST_INDEX,
TRI_IDX_TYPE_TTL_INDEX,
TRI_IDX_TYPE_PERSISTENT_INDEX,
TRI_IDX_TYPE_IRESEARCH_LINK,
TRI_IDX_TYPE_NO_ACCESS_INDEX
};
/// @brief: mode to signal how operation should behave
enum OperationMode { normal, internal, rollback };
/// @brief: helper struct returned by index methods that determine the costs
/// of index usage for filtering
struct FilterCosts {
/// @brief whether or not the index supports the filter condition
bool supportsCondition = false;
/// @brief number of attributes of filter condition covered by this index
size_t coveredAttributes = 0;
/// @brief estimated items to be returned for this condition.
size_t estimatedItems = 0;
/// @brief estimated costs for this filter condition
double estimatedCosts = 0.0;
static FilterCosts zeroCosts();
static FilterCosts defaultCosts(size_t itemsInIndex);
};
/// @brief: helper struct returned by index methods that determine the costs
/// of index usage
struct SortCosts {
/// @brief whether or not the index supports the sort clause
bool supportsCondition = false;
/// @brief number of attributes of the sort clause covered by this index
size_t coveredAttributes = 0;
/// @brief estimated costs for this sort clause
double estimatedCosts = 0.0;
static SortCosts zeroCosts(size_t coveredAttributes);
static SortCosts defaultCosts(size_t itemsInIndex, bool isPersistent);
};
public:
/// @brief return the index id
inline TRI_idx_iid_t id() const { return _iid; }
/// @brief return the index name
inline std::string const& name() const {
if (_name == StaticStrings::IndexNameEdgeFrom || _name == StaticStrings::IndexNameEdgeTo) {
return StaticStrings::IndexNameEdge;
}
return _name;
}
/// @brief set the name, if it is currently unset
void name(std::string const&);
/// @brief return the index fields
inline std::vector<std::vector<arangodb::basics::AttributeName>> const& fields() const {
return _fields;
}
/// @brief return the fields covered by this index.
/// Typically just the fields, but e.g. EdgeIndex on _from also covers
/// _to
virtual std::vector<std::vector<arangodb::basics::AttributeName>> const& coveredFields() const {
return fields();
}
/// @brief return the index fields names
inline std::vector<std::vector<std::string>> fieldNames() const {
std::vector<std::vector<std::string>> result;
for (auto const& it : _fields) {
std::vector<std::string> parts;
parts.reserve(it.size());
for (auto const& it2 : it) {
parts.emplace_back(it2.name);
}
result.emplace_back(std::move(parts));
}
return result;
}
/// @brief whether or not the ith attribute is expanded (somewhere)
inline bool isAttributeExpanded(size_t i) const {
if (i >= _fields.size()) {
return false;
}
return TRI_AttributeNamesHaveExpansion(_fields[i]);
}
/// @brief whether or not any attribute is expanded
inline bool isAttributeExpanded(std::vector<arangodb::basics::AttributeName> const& attribute) const {
for (auto const& it : _fields) {
if (!arangodb::basics::AttributeName::namesMatch(attribute, it)) {
continue;
}
return TRI_AttributeNamesHaveExpansion(it);
}
return false;
}
/// @brief whether or not any attribute is expanded
inline bool attributeMatches(std::vector<arangodb::basics::AttributeName> const& attribute,
bool isPrimary = false) const {
for (auto const& it : _fields) {
if (arangodb::basics::AttributeName::isIdentical(attribute, it, true)) {
return true;
}
}
if (isPrimary) {
static std::vector<arangodb::basics::AttributeName> const vec_id{
{StaticStrings::IdString, false}};
return arangodb::basics::AttributeName::isIdentical(attribute, vec_id, true);
}
return false;
}
/// @brief whether or not any attribute is expanded
inline bool hasExpansion() const { return _useExpansion; }
/// @brief if index needs explicit reversal and wouldn`t be reverted by storage rollback
virtual bool needsReversal() const { return false; }
/// @brief whether or not the index covers all the attributes passed in
virtual bool covers(std::unordered_set<std::string> const& attributes) const;
/// @brief return the underlying collection
inline LogicalCollection& collection() const { return _collection; }
/// @brief return a contextual string for logging
std::string context() const;
/// @brief whether or not the index is sparse
inline bool sparse() const { return _sparse; }
/// @brief whether or not the index is unique
inline bool unique() const { return _unique; }
/// @brief validate fields from slice
static void validateFields(velocypack::Slice const& slice);
/// @brief return the name of the index
char const* oldtypeName() const { return oldtypeName(type()); }
/// @brief return the index type based on a type name
static IndexType type(char const* type, size_t len);
static IndexType type(std::string const& type);
public:
virtual char const* typeName() const = 0;
static bool allowExpansion(IndexType type) {
return (type == TRI_IDX_TYPE_HASH_INDEX || type == TRI_IDX_TYPE_SKIPLIST_INDEX ||
type == TRI_IDX_TYPE_PERSISTENT_INDEX);
}
virtual IndexType type() const = 0;
/// @brief return the name of an index type
static char const* oldtypeName(IndexType);
/// @brief validate an index id
static bool validateId(char const*);
/// @brief validate an index name
static bool validateName(char const*);
/// @brief validate an index handle (collection name + / + index id)
static bool validateHandle(char const*, size_t*);
/// @brief validate an index handle (by name) (collection name + / + index name)
static bool validateHandleName(char const*, size_t*);
/// @brief generate a new index id
static TRI_idx_iid_t generateId();
/// @brief check if two index definitions share any identifiers (_id, name)
static bool CompareIdentifiers(velocypack::Slice const& lhs, velocypack::Slice const& rhs);
/// @brief index comparator, used by the coordinator to detect if two index
/// contents are the same
static bool Compare(velocypack::Slice const& lhs, velocypack::Slice const& rhs);
/// @brief whether or not the index is persistent (storage on durable media)
/// or not (RAM only)
virtual bool isPersistent() const = 0;
virtual bool canBeDropped() const = 0;
/// @brief whether or not the index provides an iterator that can extract
/// attribute values from the index data, without having to refer to the
/// actual document data
/// By default, indexes do not have this type of iterator, but they can
/// add it as a performance optimization
virtual bool hasCoveringIterator() const { return false; }
/// @brief Checks if this index is identical to the given definition
virtual bool matchesDefinition(arangodb::velocypack::Slice const&) const;
/// @brief whether or not the index is sorted
virtual bool isSorted() const = 0;
/// @brief if true this index should not be shown externally
virtual bool isHidden() const = 0;
/// @brief if true this index should not be shown externally
virtual bool inProgress() const { return false; }
/// @brief whether or not the index has a selectivity estimate
virtual bool hasSelectivityEstimate() const = 0;
/// @brief return the selectivity estimate of the index
/// must only be called if hasSelectivityEstimate() returns true
///
/// The extra arangodb::velocypack::StringRef is only used in the edge index
/// as direction attribute attribute, a Slice would be more flexible.
virtual double selectivityEstimate(arangodb::velocypack::StringRef const& extra =
arangodb::velocypack::StringRef()) const;
/// @brief update the cluster selectivity estimate
virtual void updateClusterSelectivityEstimate(double /*estimate*/) {
TRI_ASSERT(false); // should never be called except on Coordinator
}
/// @brief whether or not the index is implicitly unique
/// this can be the case if the index is not declared as unique,
/// but contains a unique attribute such as _key
bool implicitlyUnique() const;
virtual size_t memory() const = 0;
/// @brief serialization flags for indexes.
/// note that these must be mutually exclusive when bit-ORed
enum class Serialize : uint8_t {
/// @brief serialize figures for index
Basics = 0,
/// @brief serialize figures for index
Figures = 2,
/// @brief serialize selectivity estimates
Estimates = 4,
/// @brief serialize object ids for persistence
Internals = 8
};
/// @brief helper for building flags
template <typename... Args>
static inline constexpr std::underlying_type<Serialize>::type makeFlags(Serialize flag,
Args... args) {
return static_cast<std::underlying_type<Serialize>::type>(flag) + makeFlags(args...);
}
static inline constexpr std::underlying_type<Serialize>::type makeFlags() {
return static_cast<std::underlying_type<Serialize>::type>(Serialize::Basics);
}
static inline constexpr bool hasFlag(std::underlying_type<Serialize>::type flags,
Serialize aflag) {
return (flags & static_cast<std::underlying_type<Serialize>::type>(aflag)) != 0;
}
/// serialize an index to velocypack, using the serialization flags above
virtual void toVelocyPack(arangodb::velocypack::Builder&,
std::underlying_type<Index::Serialize>::type flags) const;
std::shared_ptr<arangodb::velocypack::Builder> toVelocyPack(
std::underlying_type<Serialize>::type flags) const;
virtual void toVelocyPackFigures(arangodb::velocypack::Builder&) const;
std::shared_ptr<arangodb::velocypack::Builder> toVelocyPackFigures() const;
virtual void load() = 0;
virtual void unload() = 0;
// called when the index is dropped
virtual Result drop();
/// @brief called after the collection was truncated
/// @param tick at which truncate was applied
virtual void afterTruncate(TRI_voc_tick_t tick) {}
/// @brief whether or not the filter condition is supported by the index
/// returns detailed information about the costs associated with using this index
virtual FilterCosts supportsFilterCondition(std::vector<std::shared_ptr<arangodb::Index>> const& allIndexes,
arangodb::aql::AstNode const* node,
arangodb::aql::Variable const* reference,
size_t itemsInIndex) const;
/// @brief whether or not the sort condition is supported by the index
/// returns detailed information about the costs associated with using this index
virtual SortCosts supportsSortCondition(arangodb::aql::SortCondition const* sortCondition,
arangodb::aql::Variable const* reference,
size_t itemsInIndex) const;
/// @brief specialize the condition for use with this index. this will remove all
/// elements from the condition that are not supported by the index.
/// for example, if the condition is `doc.value1 == 38 && doc.value2 > 9`, but the index is
/// only on `doc.value1`, this will return a new AstNode that points to just the condition
/// `doc.value1 == 38`.
/// must only be called if supportsFilterCondition has indicated that the index supports
/// at least a part of the filter condition
virtual arangodb::aql::AstNode* specializeCondition(arangodb::aql::AstNode* node,
arangodb::aql::Variable const* reference) const;
/// @brief create a new index iterator for the (specialized) condition
virtual std::unique_ptr<IndexIterator> iteratorForCondition(transaction::Methods* trx,
aql::AstNode const* node,
aql::Variable const* reference,
IndexIteratorOptions const& opts);
bool canUseConditionPart(arangodb::aql::AstNode const* access,
arangodb::aql::AstNode const* other,
arangodb::aql::AstNode const* op,
arangodb::aql::Variable const* reference,
std::unordered_set<std::string>& nonNullAttributes, bool) const;
/// @brief Transform the list of search slices to search values.
/// This will multiply all IN entries and simply return all other
/// entries.
void expandInSearchValues(arangodb::velocypack::Slice const,
arangodb::velocypack::Builder&) const;
virtual void warmup(arangodb::transaction::Methods* trx,
std::shared_ptr<basics::LocalTaskQueue> queue);
static size_t sortWeight(arangodb::aql::AstNode const* node);
protected:
/// @brief return the name of the (sole) index attribute
/// it is only allowed to call this method if the index contains a
/// single attribute
std::string const& getAttribute() const;
/// @brief generate error result
/// @param code the error key
/// @param key the conflicting key
arangodb::Result& addErrorMsg(Result& r, int code,
std::string const& key = "") {
if (code != TRI_ERROR_NO_ERROR) {
r.reset(code);
return addErrorMsg(r, key);
}
return r;
}
/// @brief generate error result
/// @param key the conflicting key
arangodb::Result& addErrorMsg(Result& r, std::string const& key = "");
/// @brief extracts a timestamp value from a document
/// returns a negative value if the document does not contain the specified
/// attribute, or the attribute does not contain a valid timestamp or date string
double getTimestamp(arangodb::velocypack::Slice const& doc,
std::string const& attributeName) const;
TRI_idx_iid_t const _iid;
LogicalCollection& _collection;
std::string _name;
std::vector<std::vector<arangodb::basics::AttributeName>> const _fields;
bool const _useExpansion;
mutable bool _unique;
mutable bool _sparse;
// use this with c++17 -- attributeMatches
// static inline std::vector<arangodb::basics::AttributeName> const vec_id {{ StaticStrings::IdString, false }};
};
/// @brief simple struct that takes an AstNode of type comparison and
/// splits it into the comparison operator, the attribute access and the
/// lookup value parts
/// only works for conditions such as a.b == 2 or 45 < a.xx.c
/// the collection variable (a in the above examples) is passed in "variable"
struct AttributeAccessParts {
AttributeAccessParts(arangodb::aql::AstNode const* comparison,
arangodb::aql::Variable const* variable);
/// @brief comparison operation, e.g. NODE_TYPE_OPERATOR_BINARY_EQ
arangodb::aql::AstNode const* comparison;
/// @brief attribute access node
arangodb::aql::AstNode const* attribute;
/// @brief lookup value
arangodb::aql::AstNode const* value;
/// @brief operation type
arangodb::aql::AstNodeType opType;
};
} // namespace arangodb
std::ostream& operator<<(std::ostream&, arangodb::Index const*);
std::ostream& operator<<(std::ostream&, arangodb::Index const&);
#endif