mirror of https://gitee.com/bigwinds/arangodb
466 lines
21 KiB
C
466 lines
21 KiB
C
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief data feeders for selects
|
|
///
|
|
/// @file
|
|
///
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2010-2012 triagens GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// @author Jan Steemann
|
|
/// @author Copyright 2012, triagens GmbH, Cologne, Germany
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef TRIAGENS_DURHAM_VOC_BASE_DATA_FEEDER_H
|
|
#define TRIAGENS_DURHAM_VOC_BASE_DATA_FEEDER_H 1
|
|
|
|
#include <BasicsC/strings.h>
|
|
#include <BasicsC/string-buffer.h>
|
|
#include <BasicsC/json.h>
|
|
|
|
#include "VocBase/simple-collection.h"
|
|
#include "VocBase/query-result-types.h"
|
|
#include "VocBase/query-context.h"
|
|
#include "VocBase/query-base.h"
|
|
#include "QL/ast-query.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- documentation
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @page IndexUsage Index usage
|
|
///
|
|
/// When executing a query, the database will automatically check if it can use
|
|
/// an index to speed up the query. It will check all available indexes for the
|
|
/// collections used in the query and will picks the ones that are (most)
|
|
/// appropriate. This process is called index selection.
|
|
///
|
|
/// The index selection is done for each collection used in a query. If a
|
|
/// collection is used multiple times in a query (e.g.
|
|
/// @LIT{users u1 INNER JOIN users u2 ON (u1.id == u2.id)}, then there will be
|
|
/// a separate index selection per collection instance.
|
|
///
|
|
/// @section IndexRequirement Requirements
|
|
///
|
|
/// Which index is used depends on which indexes are available for the collections
|
|
/// used and what is contained in the query's WHERE and JOIN conditions.
|
|
///
|
|
/// An index can only be used if the WHERE/JOIN conditions refer to indexed
|
|
/// attributes. It depends on the index type what kinds of comparisons are allowed
|
|
/// in order to use the index. For example, the primary index and hash indexes
|
|
/// only support equality comparisons whereas other index types might allow
|
|
/// range queries as well. It also depends on the index type whether just a
|
|
/// subset of the indexed attributes is sufficient in order to use an index.
|
|
///
|
|
/// The query optimizer needs to detect that an index can actually be used, and
|
|
/// it will only allow using indexes if the indexed attributes are not used in
|
|
/// combination with logical @LIT{||} or logical @LIT{!}. Furthermore, the
|
|
/// optimizer currently cannot make use of indexes if the same attribute is
|
|
/// compared to multiple values at the same time (i.e. a so-called in-list
|
|
/// comparison). For example, the following condition would not allow to use
|
|
/// an index: @LIT{WHERE users.id == 3 || users.id == 4 || users.id == 9}
|
|
///
|
|
/// There is no way to explicitly specify which index to use/prefer/reject in a
|
|
/// query as there sometimes is in other database products.
|
|
///
|
|
/// @section IndexTypes Index types
|
|
///
|
|
/// There are the following index types:
|
|
/// - primary index (automatically created for the "_id" attribute of a collection)
|
|
/// - hash index (used-defined index on one or many attributes of a collection)
|
|
/// - geo index (user-defined index on two attributes of a collection)
|
|
///
|
|
/// @subsection PrimaryIndex Primary index
|
|
///
|
|
/// The collection's primary index will only be used to access the documents of a
|
|
/// collection if the WHERE/JOIN condition for the collection contains an equality
|
|
/// predicate for the @LIT{_id} attribute. The compare value must either be a
|
|
/// string constant (e.g. @LIT{u._id == "345055525:346693925"} or a reference to
|
|
/// another attribute (e.g. @LIT{u._id == x.value}.
|
|
///
|
|
/// A collection's primary index will not be used for any comparison other than
|
|
/// equality comparisons or for multi-attribute predicates.
|
|
///
|
|
/// @subsection HashIndex Hash index
|
|
///
|
|
/// Hash indexes for collections can be used if all of the indexed attributes are
|
|
/// specified in the WHERE/JOIN condition. It is not sufficient to use just a subset
|
|
/// of the indexed attributes in a query. The condition for each attribute must
|
|
/// also be an equality predicate. The compare value must be a string or numeric
|
|
/// constant or a reference to another attribute.
|
|
///
|
|
/// Provided there is an index on @LIT{u.first} and @LIT{u.last}, the index could
|
|
/// be used for the following predicates:
|
|
/// - @LIT{u.first == 'Jack' && u.last == 'Sparrow'}
|
|
/// - @LIT{u.last == 'Sparrow' && u.first == 'Jack'}
|
|
///
|
|
/// A hash index will not be used for any comparison other than equality comparsions
|
|
/// or for conditions that do not contain all indexed attributes.
|
|
///
|
|
/// @subsection GeoIndex Geo index
|
|
///
|
|
/// Geo indexes are automatically used when a geo restriction is specified for a
|
|
/// collection in the FROM clause of a query. Geo indexes are ignored for all other
|
|
/// conditions specified in the ON or WHERE clauses of a query.
|
|
///
|
|
/// @section IndexPreference Index preference
|
|
///
|
|
/// As mentioned before, The index selection process will pick the most appropriate
|
|
/// index for each collection. The definition of "appropriate" in this context is:
|
|
///
|
|
/// - If a geo restriction is specified for a collection, the most appropriate geo
|
|
/// index for the collection will be used. If there is no geo index defined for
|
|
/// the collection, the query will fail.
|
|
/// - If no geo restriction is specified and the primary index can be used, the
|
|
/// primary index will be used. The reason for this is that the primary index is
|
|
/// unique and guaranteed to return at most one document.
|
|
/// Furthermore, the primary index is present in memory anyway and access to it is
|
|
/// fast.
|
|
/// - If the primary index cannot be used, all candidate hash indexes will be
|
|
/// checked. If there are multiple candidate, the hash index with the most
|
|
/// attributes indexes is picked. The assumption behind this is that the more
|
|
/// attributes are indexed, the less selective the index is expected to be and
|
|
/// the less documents it is supposed to return for each compare value. If there
|
|
/// is only one candidate hash index, it will be used.
|
|
/// - If no index can be used to access the documents in a collection, a full
|
|
/// collection scan will be done.
|
|
///
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @addtogroup VocBase
|
|
/// @{
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief forward declaration to resolve header inclusion issues
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
typedef void TRI_join_t;
|
|
|
|
typedef void TRI_part_t;
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- general feeder attributes
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief data feeder access types
|
|
///
|
|
/// - ACCESS_ALL: full table scan, no index used
|
|
/// - ACCESS_CONST: index usage, index is queried with const value(s)
|
|
/// - ACCESS_REF: index usage, index is queried with values from other tables
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
typedef enum {
|
|
ACCESS_ALL = 1,
|
|
ACCESS_CONST = 1,
|
|
ACCESS_REF = 2
|
|
}
|
|
TRI_index_access_type_e;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief data feeder types
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
typedef enum {
|
|
FEEDER_TABLE_SCAN = 1,
|
|
FEEDER_PRIMARY_LOOKUP = 2,
|
|
FEEDER_HASH_LOOKUP = 3,
|
|
FEEDER_SKIPLIST_LOOKUP = 4,
|
|
FEEDER_GEO_LOOKUP = 5
|
|
}
|
|
TRI_data_feeder_type_e;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief general data feeder interface (used by all variants)
|
|
///
|
|
/// A data feeder is a means of accessing the documents in a collection in a
|
|
/// select query.
|
|
///
|
|
/// For each collection in a query, one data feeder will be used. If a collection
|
|
/// is invoked multiple times in a select (e.g. A INNER JOIN A) then there will
|
|
/// be multiple data feeders (in this case for collection A). This is because
|
|
/// the data feeder also contains state information (current position) that is
|
|
/// distinct for multiple instances of one collection in the same join.
|
|
///
|
|
/// The data feeder's internal state depends on the data feeder type (@ref
|
|
/// TRI_data_feeder_type_e).
|
|
///
|
|
/// Index-based data feeders might access the index values using constants or
|
|
/// references to other fields. Using constants (e.g. a.id == 5) is of course
|
|
/// the fastest way because the compare value is constant for the complete join
|
|
/// process. The compare value can be set up once at the start and will simply
|
|
/// be reused.
|
|
/// If the compare value is not constant but a reference to another field
|
|
/// (e.g. a.id == b.id), then the compare value is dynamic and will be determined
|
|
/// by a Javascript function for each iteration. The Javascript function is
|
|
/// set up once only.
|
|
///
|
|
/// Data feeders are first initialized by calling their init() function. This
|
|
/// function must set up all internal structures. Const access data feeders
|
|
/// can initialize their compare value(s) with the constants here already so
|
|
/// they do not need to be initialized in each join comparison. Ref access data
|
|
/// feeders can initialize their Javascript function here.
|
|
///
|
|
/// The rewind() function will be called at the start of the join execution to
|
|
/// reset the data feeder position to the beginning of the data. The rewind
|
|
/// function is called multiple times for inner collections in a join (once for
|
|
/// each combination of documents in outer scope).
|
|
///
|
|
/// The current() function is called during join execution to return the current
|
|
/// document. It might return a nil pointer if there are no more documents.
|
|
/// The current() function is expected to move the position pointer forward by
|
|
/// one document.
|
|
///
|
|
/// The free() function is finally called after join processing is done and is
|
|
/// expected to free all internal structures.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
typedef struct TRI_data_feeder_s {
|
|
TRI_data_feeder_type_e _type;
|
|
TRI_index_access_type_e _accessType;
|
|
TRI_idx_iid_t _indexId;
|
|
TRI_vector_pointer_t* _ranges;
|
|
TRI_join_t* _join;
|
|
TRI_part_t* _part;
|
|
TRI_query_instance_t* _instance;
|
|
size_t _level;
|
|
void* _state;
|
|
const TRI_doc_collection_t* _collection;
|
|
|
|
void (*init) (struct TRI_data_feeder_s*);
|
|
void (*rewind) (struct TRI_data_feeder_s*);
|
|
bool (*current) (struct TRI_data_feeder_s*);
|
|
void (*free) (struct TRI_data_feeder_s*);
|
|
}
|
|
TRI_data_feeder_t;
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- table scan
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief internals/guts of table scan data feeder
|
|
///
|
|
/// A table scanner is used to access the documents in a collection sequentially.
|
|
/// The documents are accessed in order of definition in the collection's hash
|
|
/// table. The hash table might also contain empty entries (nil pointers) or
|
|
/// deleted documents. The data feeder abstracts all this and provides easy
|
|
/// access to all (relevant) documents in the hash table.
|
|
///
|
|
/// The table scanner does not have any other internal state than positioning
|
|
/// information. As it will return all documents anyway, it does not have any
|
|
/// distinction between const and ref access types.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
typedef struct TRI_data_feeder_table_scan_s {
|
|
void **_start;
|
|
void **_end;
|
|
void **_current;
|
|
}
|
|
TRI_data_feeder_table_scan_t;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief Create a new data feeder (table scan) - DEPRECATED
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_data_feeder_t* TRI_CreateDataFeederTableScanX (const TRI_doc_collection_t*,
|
|
TRI_join_t*,
|
|
size_t);
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief Create a new data feeder (table scan)
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_data_feeder_t* TRI_CreateDataFeederTableScan (TRI_query_instance_t* const,
|
|
const TRI_doc_collection_t*,
|
|
const size_t);
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- primary index
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief internals/guts of primary index data feeder
|
|
///
|
|
/// The primary index data feeder will always use the (unique) primary index of
|
|
/// a collection to find exactly one (or zero) documents. It supports const and
|
|
/// ref access.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
typedef struct TRI_data_feeder_primary_lookup_s {
|
|
bool _hasCompared;
|
|
bool _isEmpty;
|
|
TRI_voc_did_t _didValue;
|
|
TRI_js_exec_context_t _context;
|
|
}
|
|
TRI_data_feeder_primary_lookup_t;
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief Create a new data feeder (primary index lookup) - DEPRECATED
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_data_feeder_t* TRI_CreateDataFeederPrimaryLookupX (const TRI_doc_collection_t*,
|
|
TRI_join_t*,
|
|
size_t);
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief Create a new data feeder (primary index lookup)
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_data_feeder_t* TRI_CreateDataFeederPrimaryLookup (TRI_query_instance_t* const,
|
|
const TRI_doc_collection_t*,
|
|
const size_t,
|
|
const TRI_vector_pointer_t*);
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- hash index
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief internals/guts of hash lookup data feeder
|
|
///
|
|
/// The hash index data feeder will use a unique or non-unique hash index
|
|
/// defined for a collection. It will return any documents available in the hash
|
|
/// for the compare values. It supports const and ref access.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
typedef struct TRI_data_feeder_hash_lookup_s {
|
|
bool _isEmpty;
|
|
TRI_index_t* _index;
|
|
HashIndexElements* _hashElements;
|
|
TRI_js_exec_context_t _context;
|
|
size_t _position;
|
|
}
|
|
TRI_data_feeder_hash_lookup_t;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief Create a new data feeder (hash index lookup) - DEPRECATED
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_data_feeder_t* TRI_CreateDataFeederHashLookupX (const TRI_doc_collection_t*,
|
|
TRI_join_t*,
|
|
size_t);
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief Create a new data feeder (hash index lookup)
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_data_feeder_t* TRI_CreateDataFeederHashLookup (TRI_query_instance_t* const,
|
|
const TRI_doc_collection_t*,
|
|
const size_t,
|
|
const TRI_idx_iid_t,
|
|
const TRI_vector_pointer_t*);
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- skiplists
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief internals/guts of skiplist data feeder
|
|
///
|
|
/// The skiplist data feeder will use a unique or non-unique skiplist
|
|
/// defined for a collection. It will return any documents available in the list
|
|
/// for the compare values. It supports const and ref access.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
typedef struct TRI_data_feeder_skiplist_lookup_s {
|
|
bool _isEmpty;
|
|
TRI_index_t* _index;
|
|
TRI_skiplist_iterator_t* _skiplistIterator;
|
|
TRI_js_exec_context_t _context;
|
|
size_t _position;
|
|
}
|
|
TRI_data_feeder_skiplist_lookup_t;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief Create a new data feeder (skiplist lookup) - DEPRECATED
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_data_feeder_t* TRI_CreateDataFeederSkiplistLookupX (const TRI_doc_collection_t*,
|
|
TRI_join_t*,
|
|
size_t);
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief Create a new data feeder (skiplist lookup)
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_data_feeder_t* TRI_CreateDataFeederSkiplistLookup (TRI_query_instance_t* const,
|
|
const TRI_doc_collection_t*,
|
|
const size_t,
|
|
const TRI_idx_iid_t,
|
|
const TRI_vector_pointer_t*);
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- geo index
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief internals/guts of geo index data feeder
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
typedef struct TRI_data_feeder_geo_lookup_s {
|
|
bool _isEmpty;
|
|
TRI_index_t* _index;
|
|
QL_ast_query_geo_restriction_t* _restriction;
|
|
GeoCoordinates* _coordinates;
|
|
size_t _position;
|
|
}
|
|
TRI_data_feeder_geo_lookup_t;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief Create a new data feeder (geo index lookup) - DEPRECATED
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_data_feeder_t* TRI_CreateDataFeederGeoLookupX (const TRI_doc_collection_t*,
|
|
TRI_join_t*,
|
|
size_t,
|
|
QL_ast_query_geo_restriction_t*);
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief Create a new data feeder (geo index lookup)
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_data_feeder_t* TRI_CreateDataFeederGeoLookup (TRI_query_instance_t* const,
|
|
const TRI_doc_collection_t*,
|
|
const size_t,
|
|
const TRI_idx_iid_t,
|
|
const QL_ast_query_geo_restriction_t*);
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @}
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif
|
|
|
|
// Local Variables:
|
|
// mode: outline-minor
|
|
// outline-regexp: "^\\(/// @brief\\|/// {@inheritDoc}\\|/// @addtogroup\\|// --SECTION--\\|/// @\\}\\)"
|
|
// End:
|
|
|