//////////////////////////////////////////////////////////////////////////////// /// @brief data feeders for selects /// /// @file /// /// DISCLAIMER /// /// Copyright 2010-2012 triagens GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is triAGENS GmbH, Cologne, Germany /// /// @author Jan Steemann /// @author Copyright 2012, triagens GmbH, Cologne, Germany //////////////////////////////////////////////////////////////////////////////// #ifndef TRIAGENS_DURHAM_VOC_BASE_DATA_FEEDER_H #define TRIAGENS_DURHAM_VOC_BASE_DATA_FEEDER_H 1 #include #include #include #include "VocBase/simple-collection.h" #include "VocBase/query-result-types.h" #include "VocBase/query-context.h" #include "VocBase/query-base.h" #include "QL/ast-query.h" #ifdef __cplusplus extern "C" { #endif // ----------------------------------------------------------------------------- // --SECTION-- documentation // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @page IndexUsage Index usage /// /// When executing a query, the database will automatically check if it can use /// an index to speed up the query. It will check all available indexes for the /// collections used in the query and will picks the ones that are (most) /// appropriate. This process is called index selection. /// /// The index selection is done for each collection used in a query. If a /// collection is used multiple times in a query (e.g. /// @LIT{users u1 INNER JOIN users u2 ON (u1.id == u2.id)}, then there will be /// a separate index selection per collection instance. /// /// @section IndexRequirement Requirements /// /// Which index is used depends on which indexes are available for the collections /// used and what is contained in the query's WHERE and JOIN conditions. /// /// An index can only be used if the WHERE/JOIN conditions refer to indexed /// attributes. It depends on the index type what kinds of comparisons are allowed /// in order to use the index. For example, the primary index and hash indexes /// only support equality comparisons whereas other index types might allow /// range queries as well. It also depends on the index type whether just a /// subset of the indexed attributes is sufficient in order to use an index. /// /// The query optimizer needs to detect that an index can actually be used, and /// it will only allow using indexes if the indexed attributes are not used in /// combination with logical @LIT{||} or logical @LIT{!}. Furthermore, the /// optimizer currently cannot make use of indexes if the same attribute is /// compared to multiple values at the same time (i.e. a so-called in-list /// comparison). For example, the following condition would not allow to use /// an index: @LIT{WHERE users.id == 3 || users.id == 4 || users.id == 9} /// /// There is no way to explicitly specify which index to use/prefer/reject in a /// query as there sometimes is in other database products. /// /// @section IndexTypes Index types /// /// There are the following index types: /// - primary index (automatically created for the "_id" attribute of a collection) /// - hash index (used-defined index on one or many attributes of a collection) /// - geo index (user-defined index on two attributes of a collection) /// /// @subsection PrimaryIndex Primary index /// /// The collection's primary index will only be used to access the documents of a /// collection if the WHERE/JOIN condition for the collection contains an equality /// predicate for the @LIT{_id} attribute. The compare value must either be a /// string constant (e.g. @LIT{u._id == "345055525:346693925"} or a reference to /// another attribute (e.g. @LIT{u._id == x.value}. /// /// A collection's primary index will not be used for any comparison other than /// equality comparisons or for multi-attribute predicates. /// /// @subsection HashIndex Hash index /// /// Hash indexes for collections can be used if all of the indexed attributes are /// specified in the WHERE/JOIN condition. It is not sufficient to use just a subset /// of the indexed attributes in a query. The condition for each attribute must /// also be an equality predicate. The compare value must be a string or numeric /// constant or a reference to another attribute. /// /// Provided there is an index on @LIT{u.first} and @LIT{u.last}, the index could /// be used for the following predicates: /// - @LIT{u.first == 'Jack' && u.last == 'Sparrow'} /// - @LIT{u.last == 'Sparrow' && u.first == 'Jack'} /// /// A hash index will not be used for any comparison other than equality comparsions /// or for conditions that do not contain all indexed attributes. /// /// @subsection GeoIndex Geo index /// /// Geo indexes are automatically used when a geo restriction is specified for a /// collection in the FROM clause of a query. Geo indexes are ignored for all other /// conditions specified in the ON or WHERE clauses of a query. /// /// @section IndexPreference Index preference /// /// As mentioned before, The index selection process will pick the most appropriate /// index for each collection. The definition of "appropriate" in this context is: /// /// - If a geo restriction is specified for a collection, the most appropriate geo /// index for the collection will be used. If there is no geo index defined for /// the collection, the query will fail. /// - If no geo restriction is specified and the primary index can be used, the /// primary index will be used. The reason for this is that the primary index is /// unique and guaranteed to return at most one document. /// Furthermore, the primary index is present in memory anyway and access to it is /// fast. /// - If the primary index cannot be used, all candidate hash indexes will be /// checked. If there are multiple candidate, the hash index with the most /// attributes indexes is picked. The assumption behind this is that the more /// attributes are indexed, the less selective the index is expected to be and /// the less documents it is supposed to return for each compare value. If there /// is only one candidate hash index, it will be used. /// - If no index can be used to access the documents in a collection, a full /// collection scan will be done. /// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// /// @addtogroup VocBase /// @{ //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// /// @brief forward declaration to resolve header inclusion issues //////////////////////////////////////////////////////////////////////////////// typedef void TRI_join_t; typedef void TRI_part_t; // ----------------------------------------------------------------------------- // --SECTION-- general feeder attributes // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief data feeder access types /// /// - ACCESS_ALL: full table scan, no index used /// - ACCESS_CONST: index usage, index is queried with const value(s) /// - ACCESS_REF: index usage, index is queried with values from other tables //////////////////////////////////////////////////////////////////////////////// typedef enum { ACCESS_ALL = 1, ACCESS_CONST = 1, ACCESS_REF = 2 } TRI_index_access_type_e; //////////////////////////////////////////////////////////////////////////////// /// @brief data feeder types //////////////////////////////////////////////////////////////////////////////// typedef enum { FEEDER_TABLE_SCAN = 1, FEEDER_PRIMARY_LOOKUP = 2, FEEDER_HASH_LOOKUP = 3, FEEDER_SKIPLIST_LOOKUP = 4, FEEDER_GEO_LOOKUP = 5 } TRI_data_feeder_type_e; //////////////////////////////////////////////////////////////////////////////// /// @brief general data feeder interface (used by all variants) /// /// A data feeder is a means of accessing the documents in a collection in a /// select query. /// /// For each collection in a query, one data feeder will be used. If a collection /// is invoked multiple times in a select (e.g. A INNER JOIN A) then there will /// be multiple data feeders (in this case for collection A). This is because /// the data feeder also contains state information (current position) that is /// distinct for multiple instances of one collection in the same join. /// /// The data feeder's internal state depends on the data feeder type (@ref /// TRI_data_feeder_type_e). /// /// Index-based data feeders might access the index values using constants or /// references to other fields. Using constants (e.g. a.id == 5) is of course /// the fastest way because the compare value is constant for the complete join /// process. The compare value can be set up once at the start and will simply /// be reused. /// If the compare value is not constant but a reference to another field /// (e.g. a.id == b.id), then the compare value is dynamic and will be determined /// by a Javascript function for each iteration. The Javascript function is /// set up once only. /// /// Data feeders are first initialized by calling their init() function. This /// function must set up all internal structures. Const access data feeders /// can initialize their compare value(s) with the constants here already so /// they do not need to be initialized in each join comparison. Ref access data /// feeders can initialize their Javascript function here. /// /// The rewind() function will be called at the start of the join execution to /// reset the data feeder position to the beginning of the data. The rewind /// function is called multiple times for inner collections in a join (once for /// each combination of documents in outer scope). /// /// The current() function is called during join execution to return the current /// document. It might return a nil pointer if there are no more documents. /// The current() function is expected to move the position pointer forward by /// one document. /// /// The free() function is finally called after join processing is done and is /// expected to free all internal structures. //////////////////////////////////////////////////////////////////////////////// typedef struct TRI_data_feeder_s { TRI_data_feeder_type_e _type; TRI_index_access_type_e _accessType; TRI_idx_iid_t _indexId; TRI_vector_pointer_t* _ranges; TRI_join_t* _join; TRI_part_t* _part; TRI_query_instance_t* _instance; size_t _level; void* _state; const TRI_doc_collection_t* _collection; void (*init) (struct TRI_data_feeder_s*); void (*rewind) (struct TRI_data_feeder_s*); bool (*current) (struct TRI_data_feeder_s*); void (*free) (struct TRI_data_feeder_s*); } TRI_data_feeder_t; // ----------------------------------------------------------------------------- // --SECTION-- table scan // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief internals/guts of table scan data feeder /// /// A table scanner is used to access the documents in a collection sequentially. /// The documents are accessed in order of definition in the collection's hash /// table. The hash table might also contain empty entries (nil pointers) or /// deleted documents. The data feeder abstracts all this and provides easy /// access to all (relevant) documents in the hash table. /// /// The table scanner does not have any other internal state than positioning /// information. As it will return all documents anyway, it does not have any /// distinction between const and ref access types. //////////////////////////////////////////////////////////////////////////////// typedef struct TRI_data_feeder_table_scan_s { void **_start; void **_end; void **_current; } TRI_data_feeder_table_scan_t; //////////////////////////////////////////////////////////////////////////////// /// @brief Create a new data feeder (table scan) - DEPRECATED //////////////////////////////////////////////////////////////////////////////// TRI_data_feeder_t* TRI_CreateDataFeederTableScanX (const TRI_doc_collection_t*, TRI_join_t*, size_t); //////////////////////////////////////////////////////////////////////////////// /// @brief Create a new data feeder (table scan) //////////////////////////////////////////////////////////////////////////////// TRI_data_feeder_t* TRI_CreateDataFeederTableScan (TRI_query_instance_t* const, const TRI_doc_collection_t*, const size_t); // ----------------------------------------------------------------------------- // --SECTION-- primary index // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief internals/guts of primary index data feeder /// /// The primary index data feeder will always use the (unique) primary index of /// a collection to find exactly one (or zero) documents. It supports const and /// ref access. //////////////////////////////////////////////////////////////////////////////// typedef struct TRI_data_feeder_primary_lookup_s { bool _hasCompared; bool _isEmpty; TRI_voc_did_t _didValue; TRI_js_exec_context_t _context; } TRI_data_feeder_primary_lookup_t; //////////////////////////////////////////////////////////////////////////////// /// @brief Create a new data feeder (primary index lookup) - DEPRECATED //////////////////////////////////////////////////////////////////////////////// TRI_data_feeder_t* TRI_CreateDataFeederPrimaryLookupX (const TRI_doc_collection_t*, TRI_join_t*, size_t); //////////////////////////////////////////////////////////////////////////////// /// @brief Create a new data feeder (primary index lookup) //////////////////////////////////////////////////////////////////////////////// TRI_data_feeder_t* TRI_CreateDataFeederPrimaryLookup (TRI_query_instance_t* const, const TRI_doc_collection_t*, const size_t, const TRI_vector_pointer_t*); // ----------------------------------------------------------------------------- // --SECTION-- hash index // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief internals/guts of hash lookup data feeder /// /// The hash index data feeder will use a unique or non-unique hash index /// defined for a collection. It will return any documents available in the hash /// for the compare values. It supports const and ref access. //////////////////////////////////////////////////////////////////////////////// typedef struct TRI_data_feeder_hash_lookup_s { bool _isEmpty; TRI_index_t* _index; HashIndexElements* _hashElements; TRI_js_exec_context_t _context; size_t _position; } TRI_data_feeder_hash_lookup_t; //////////////////////////////////////////////////////////////////////////////// /// @brief Create a new data feeder (hash index lookup) - DEPRECATED //////////////////////////////////////////////////////////////////////////////// TRI_data_feeder_t* TRI_CreateDataFeederHashLookupX (const TRI_doc_collection_t*, TRI_join_t*, size_t); //////////////////////////////////////////////////////////////////////////////// /// @brief Create a new data feeder (hash index lookup) //////////////////////////////////////////////////////////////////////////////// TRI_data_feeder_t* TRI_CreateDataFeederHashLookup (TRI_query_instance_t* const, const TRI_doc_collection_t*, const size_t, const TRI_idx_iid_t, const TRI_vector_pointer_t*); // ----------------------------------------------------------------------------- // --SECTION-- skiplists // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief internals/guts of skiplist data feeder /// /// The skiplist data feeder will use a unique or non-unique skiplist /// defined for a collection. It will return any documents available in the list /// for the compare values. It supports const and ref access. //////////////////////////////////////////////////////////////////////////////// typedef struct TRI_data_feeder_skiplist_lookup_s { bool _isEmpty; TRI_index_t* _index; TRI_skiplist_iterator_t* _skiplistIterator; TRI_js_exec_context_t _context; size_t _position; } TRI_data_feeder_skiplist_lookup_t; //////////////////////////////////////////////////////////////////////////////// /// @brief Create a new data feeder (skiplist lookup) - DEPRECATED //////////////////////////////////////////////////////////////////////////////// TRI_data_feeder_t* TRI_CreateDataFeederSkiplistLookupX (const TRI_doc_collection_t*, TRI_join_t*, size_t); //////////////////////////////////////////////////////////////////////////////// /// @brief Create a new data feeder (skiplist lookup) //////////////////////////////////////////////////////////////////////////////// TRI_data_feeder_t* TRI_CreateDataFeederSkiplistLookup (TRI_query_instance_t* const, const TRI_doc_collection_t*, const size_t, const TRI_idx_iid_t, const TRI_vector_pointer_t*); // ----------------------------------------------------------------------------- // --SECTION-- geo index // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief internals/guts of geo index data feeder //////////////////////////////////////////////////////////////////////////////// typedef struct TRI_data_feeder_geo_lookup_s { bool _isEmpty; TRI_index_t* _index; QL_ast_query_geo_restriction_t* _restriction; GeoCoordinates* _coordinates; size_t _position; } TRI_data_feeder_geo_lookup_t; //////////////////////////////////////////////////////////////////////////////// /// @brief Create a new data feeder (geo index lookup) - DEPRECATED //////////////////////////////////////////////////////////////////////////////// TRI_data_feeder_t* TRI_CreateDataFeederGeoLookupX (const TRI_doc_collection_t*, TRI_join_t*, size_t, QL_ast_query_geo_restriction_t*); //////////////////////////////////////////////////////////////////////////////// /// @brief Create a new data feeder (geo index lookup) //////////////////////////////////////////////////////////////////////////////// TRI_data_feeder_t* TRI_CreateDataFeederGeoLookup (TRI_query_instance_t* const, const TRI_doc_collection_t*, const size_t, const TRI_idx_iid_t, const QL_ast_query_geo_restriction_t*); //////////////////////////////////////////////////////////////////////////////// /// @} //////////////////////////////////////////////////////////////////////////////// #ifdef __cplusplus } #endif #endif // Local Variables: // mode: outline-minor // outline-regexp: "^\\(/// @brief\\|/// {@inheritDoc}\\|/// @addtogroup\\|// --SECTION--\\|/// @\\}\\)" // End: