arangodb/VocBase/query-data-feeder.h

////////////////////////////////////////////////////////////////////////////////
/// @brief data feeders for selects
///
/// @file
///
/// DISCLAIMER
///
/// Copyright 2010-2012 triagens GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
///     http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is triAGENS GmbH, Cologne, Germany
///
/// @author Jan Steemann
/// @author Copyright 2012, triagens GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////

#ifndef TRIAGENS_DURHAM_VOC_BASE_DATA_FEEDER_H
#define TRIAGENS_DURHAM_VOC_BASE_DATA_FEEDER_H 1

#include <BasicsC/strings.h>
#include <BasicsC/string-buffer.h>
#include <BasicsC/json.h>

#include "VocBase/simple-collection.h"
#include "VocBase/query-result-types.h"
#include "VocBase/query-context.h"
#include "VocBase/query-base.h"
#include "QL/ast-query.h"

#ifdef __cplusplus
extern "C" {
#endif

// -----------------------------------------------------------------------------
// --SECTION--                                                     documentation
// -----------------------------------------------------------------------------

////////////////////////////////////////////////////////////////////////////////
/// @page IndexUsage Index usage
///
/// When executing a query, the database will automatically check if it can use
/// an index to speed up the query. It will check all available indexes for the
/// collections used in the query and will picks the ones that are (most)
/// appropriate. This process is called index selection.
///
/// The index selection is done for each collection used in a query. If a
/// collection is used multiple times in a query (e.g.
/// @LIT{users u1 INNER JOIN users u2 ON (u1.id == u2.id)}, then there will be
/// a separate index selection per collection instance.
///
/// @section IndexRequirement Requirements
///
/// Which index is used depends on which indexes are available for the collections
/// used and what is contained in the query's WHERE and JOIN conditions.
///
/// An index can only be used if the WHERE/JOIN conditions refer to indexed
/// attributes. It depends on the index type what kinds of comparisons are allowed
/// in order to use the index. For example, the primary index and hash indexes
/// only support equality comparisons whereas other index types might allow
/// range queries as well. It also depends on the index type whether just a
/// subset of the indexed attributes is sufficient in order to use an index.
///
/// The query optimizer needs to detect that an index can actually be used, and
/// it will only allow using indexes if the indexed attributes are not used in
/// combination with logical @LIT{||} or logical @LIT{!}. Furthermore, the
/// optimizer currently cannot make use of indexes if the same attribute is
/// compared to multiple values at the same time (i.e. a so-called in-list
/// comparison). For example, the following condition would not allow to use
/// an index: @LIT{WHERE users.id == 3 || users.id == 4 || users.id == 9}
///
/// There is no way to explicitly specify which index to use/prefer/reject in a
/// query as there sometimes is in other database products.
///
/// @section IndexTypes Index types
///
/// There are the following index types:
/// - primary index (automatically created for the "_id" attribute of a collection)
/// - hash index (used-defined index on one or many attributes of a collection)
/// - geo index (user-defined index on two attributes of a collection)
///
/// @subsection PrimaryIndex Primary index
///
/// The collection's primary index will only be used to access the documents of a
/// collection if the WHERE/JOIN condition for the collection contains an equality
/// predicate for the @LIT{_id} attribute. The compare value must either be a
/// string constant (e.g. @LIT{u._id == "345055525:346693925"} or a reference to
/// another attribute (e.g. @LIT{u._id == x.value}.
///
/// A collection's primary index will not be used for any comparison other than
/// equality comparisons or for multi-attribute predicates.
///
/// @subsection HashIndex Hash index
///
/// Hash indexes for collections can be used if all of the indexed attributes are
/// specified in the WHERE/JOIN condition. It is not sufficient to use just a subset
/// of the indexed attributes in a query. The condition for each attribute must
/// also be an equality predicate. The compare value must be a string or numeric
/// constant or a reference to another attribute.
///
/// Provided there is an index on @LIT{u.first} and @LIT{u.last}, the index could
/// be used for the following predicates:
/// - @LIT{u.first == 'Jack' && u.last == 'Sparrow'}
/// - @LIT{u.last == 'Sparrow' && u.first == 'Jack'}
///
/// A hash index will not be used for any comparison other than equality comparsions
/// or for conditions that do not contain all indexed attributes.
///
/// @subsection GeoIndex Geo index
///
/// Geo indexes are automatically used when a geo restriction is specified for a
/// collection in the FROM clause of a query. Geo indexes are ignored for all other
/// conditions specified in the ON or WHERE clauses of a query.
///
/// @section IndexPreference Index preference
///
/// As mentioned before, The index selection process will pick the most appropriate
/// index for each collection. The definition of "appropriate" in this context is:
///
/// - If a geo restriction is specified for a collection, the most appropriate geo
///   index for the collection will be used. If there is no geo index defined for
///   the collection, the query will fail.
/// - If no geo restriction is specified and the primary index can be used, the
///   primary index will be used. The reason for this is that the primary index is
///   unique and guaranteed to return at most one document.
///   Furthermore, the primary index is present in memory anyway and access to it is
///   fast.
/// - If the primary index cannot be used, all candidate hash indexes will be
///   checked. If there are multiple candidate, the hash index with the most
///   attributes indexes is picked. The assumption behind this is that the more
///   attributes are indexed, the less selective the index is expected to be and
///   the less documents it is supposed to return for each compare value. If there
///   is only one candidate hash index, it will be used.
/// - If no index can be used to access the documents in a collection, a full
///   collection scan will be done.
///
////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////
/// @addtogroup VocBase
/// @{
////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////
/// @brief forward declaration to resolve header inclusion issues
////////////////////////////////////////////////////////////////////////////////

typedef void TRI_join_t;

typedef void TRI_part_t;

// -----------------------------------------------------------------------------
// --SECTION--                                         general feeder attributes
// -----------------------------------------------------------------------------

////////////////////////////////////////////////////////////////////////////////
/// @brief data feeder access types
///
/// - ACCESS_ALL: full table scan, no index used
/// - ACCESS_CONST: index usage, index is queried with const value(s)
/// - ACCESS_REF: index usage, index is queried with values from other tables
////////////////////////////////////////////////////////////////////////////////

typedef enum {
  ACCESS_ALL    = 1,
  ACCESS_CONST  = 1,
  ACCESS_REF    = 2
}
TRI_index_access_type_e;

////////////////////////////////////////////////////////////////////////////////
/// @brief data feeder types
////////////////////////////////////////////////////////////////////////////////

typedef enum {
  FEEDER_TABLE_SCAN      = 1,
  FEEDER_PRIMARY_LOOKUP  = 2,
  FEEDER_HASH_LOOKUP     = 3,
  FEEDER_SKIPLIST_LOOKUP = 4,
  FEEDER_GEO_LOOKUP      = 5
}
TRI_data_feeder_type_e;

////////////////////////////////////////////////////////////////////////////////
/// @brief general data feeder interface (used by all variants)
///
/// A data feeder is a means of accessing the documents in a collection in a
/// select query.
///
/// For each collection in a query, one data feeder will be used. If a collection
/// is invoked multiple times in a select (e.g. A INNER JOIN A) then there will
/// be multiple data feeders (in this case for collection A). This is because
/// the data feeder also contains state information (current position) that is
/// distinct for multiple instances of one collection in the same join.
///
/// The data feeder's internal state depends on the data feeder type (@ref
/// TRI_data_feeder_type_e).
///
/// Index-based data feeders might access the index values using constants or
/// references to other fields. Using constants (e.g. a.id == 5) is of course
/// the fastest way because the compare value is constant for the complete join
/// process. The compare value can be set up once at the start and will simply
/// be reused.
/// If the compare value is not constant but a reference to another field
/// (e.g. a.id == b.id), then the compare value is dynamic and will be determined
/// by a Javascript function for each iteration. The Javascript function is
/// set up once only.
///
/// Data feeders are first initialized by calling their init() function. This
/// function must set up all internal structures. Const access data feeders
/// can initialize their compare value(s) with the constants here already so
/// they do not need to be initialized in each join comparison. Ref access data
/// feeders can initialize their Javascript function here.
///
/// The rewind() function will be called at the start of the join execution to
/// reset the data feeder position to the beginning of the data. The rewind
/// function is called multiple times for inner collections in a join (once for
/// each combination of documents in outer scope).
///
/// The current() function is called during join execution to return the current
/// document. It might return a nil pointer if there are no more documents.
/// The current() function is expected to move the position pointer forward by
/// one document.
///
/// The free() function is finally called after join processing is done and is
/// expected to free all internal structures.
////////////////////////////////////////////////////////////////////////////////

typedef struct TRI_data_feeder_s {
  TRI_data_feeder_type_e _type;
  TRI_index_access_type_e _accessType;
  TRI_idx_iid_t _indexId;
  TRI_vector_pointer_t* _ranges;
  TRI_join_t* _join;
  TRI_part_t* _part;
  TRI_query_instance_t* _instance;
  size_t _level;
  void* _state;
  const TRI_doc_collection_t* _collection;

  void (*init) (struct TRI_data_feeder_s*);
  void (*rewind) (struct TRI_data_feeder_s*);
  bool (*current) (struct TRI_data_feeder_s*);
  void (*free) (struct TRI_data_feeder_s*);
}
TRI_data_feeder_t;

// -----------------------------------------------------------------------------
// --SECTION--                                                        table scan
// -----------------------------------------------------------------------------

////////////////////////////////////////////////////////////////////////////////
/// @brief internals/guts of table scan data feeder
///
/// A table scanner is used to access the documents in a collection sequentially.
/// The documents are accessed in order of definition in the collection's hash
/// table. The hash table might also contain empty entries (nil pointers) or
/// deleted documents. The data feeder abstracts all this and provides easy
/// access to all (relevant) documents in the hash table.
///
/// The table scanner does not have any other internal state than positioning
/// information. As it will return all documents anyway, it does not have any
/// distinction between const and ref access types.
////////////////////////////////////////////////////////////////////////////////

typedef struct TRI_data_feeder_table_scan_s {
  void **_start;
  void **_end;
  void **_current;
}
TRI_data_feeder_table_scan_t;

////////////////////////////////////////////////////////////////////////////////
/// @brief Create a new data feeder (table scan) - DEPRECATED
////////////////////////////////////////////////////////////////////////////////

TRI_data_feeder_t* TRI_CreateDataFeederTableScanX (const TRI_doc_collection_t*,
                                                  TRI_join_t*,
                                                  size_t);

////////////////////////////////////////////////////////////////////////////////
/// @brief Create a new data feeder (table scan)
////////////////////////////////////////////////////////////////////////////////

TRI_data_feeder_t* TRI_CreateDataFeederTableScan (TRI_query_instance_t* const,
                                                  const TRI_doc_collection_t*,
                                                  const size_t);

// -----------------------------------------------------------------------------
// --SECTION--                                                     primary index
// -----------------------------------------------------------------------------

////////////////////////////////////////////////////////////////////////////////
/// @brief internals/guts of primary index data feeder
///
/// The primary index data feeder will always use the (unique) primary index of
/// a collection to find exactly one (or zero) documents. It supports const and
/// ref access.
////////////////////////////////////////////////////////////////////////////////

typedef struct TRI_data_feeder_primary_lookup_s {
  bool _hasCompared;
  bool _isEmpty;
  TRI_voc_did_t _didValue;
  TRI_js_exec_context_t _context;
}
TRI_data_feeder_primary_lookup_t;


////////////////////////////////////////////////////////////////////////////////
/// @brief Create a new data feeder (primary index lookup) - DEPRECATED
////////////////////////////////////////////////////////////////////////////////

TRI_data_feeder_t* TRI_CreateDataFeederPrimaryLookupX (const TRI_doc_collection_t*,
                                                      TRI_join_t*,
                                                      size_t);

////////////////////////////////////////////////////////////////////////////////
/// @brief Create a new data feeder (primary index lookup)
////////////////////////////////////////////////////////////////////////////////

TRI_data_feeder_t* TRI_CreateDataFeederPrimaryLookup (TRI_query_instance_t* const,
                                                      const TRI_doc_collection_t*,
                                                      const size_t,
                                                      const TRI_vector_pointer_t*);

// -----------------------------------------------------------------------------
// --SECTION--                                                        hash index
// -----------------------------------------------------------------------------

////////////////////////////////////////////////////////////////////////////////
/// @brief internals/guts of hash lookup data feeder
///
/// The hash index data feeder will use a unique or non-unique hash index
/// defined for a collection. It will return any documents available in the hash
/// for the compare values. It supports const and ref access.
////////////////////////////////////////////////////////////////////////////////

typedef struct TRI_data_feeder_hash_lookup_s {
  bool _isEmpty;
  TRI_index_t* _index;
  HashIndexElements* _hashElements;
  TRI_js_exec_context_t _context;
  size_t _position;
}
TRI_data_feeder_hash_lookup_t;

////////////////////////////////////////////////////////////////////////////////
/// @brief Create a new data feeder (hash index lookup) - DEPRECATED
////////////////////////////////////////////////////////////////////////////////

TRI_data_feeder_t* TRI_CreateDataFeederHashLookupX (const TRI_doc_collection_t*,
                                                   TRI_join_t*,
                                                   size_t);

////////////////////////////////////////////////////////////////////////////////
/// @brief Create a new data feeder (hash index lookup)
////////////////////////////////////////////////////////////////////////////////

TRI_data_feeder_t* TRI_CreateDataFeederHashLookup (TRI_query_instance_t* const,
                                                   const TRI_doc_collection_t*,
                                                   const size_t,
                                                   const TRI_idx_iid_t,
                                                   const TRI_vector_pointer_t*);

// -----------------------------------------------------------------------------
// --SECTION--                                                         skiplists
// -----------------------------------------------------------------------------

////////////////////////////////////////////////////////////////////////////////
/// @brief internals/guts of skiplist data feeder
///
/// The skiplist data feeder will use a unique or non-unique skiplist
/// defined for a collection. It will return any documents available in the list
/// for the compare values. It supports const and ref access.
////////////////////////////////////////////////////////////////////////////////

typedef struct TRI_data_feeder_skiplist_lookup_s {
  bool _isEmpty;
  TRI_index_t* _index;
  TRI_skiplist_iterator_t* _skiplistIterator;
  TRI_js_exec_context_t _context;
  size_t _position;
}
TRI_data_feeder_skiplist_lookup_t;

////////////////////////////////////////////////////////////////////////////////
/// @brief Create a new data feeder (skiplist lookup) - DEPRECATED
////////////////////////////////////////////////////////////////////////////////

TRI_data_feeder_t* TRI_CreateDataFeederSkiplistLookupX (const TRI_doc_collection_t*,
                                                       TRI_join_t*,
                                                       size_t);

////////////////////////////////////////////////////////////////////////////////
/// @brief Create a new data feeder (skiplist lookup)
////////////////////////////////////////////////////////////////////////////////

TRI_data_feeder_t* TRI_CreateDataFeederSkiplistLookup (TRI_query_instance_t* const,
                                                       const TRI_doc_collection_t*,
                                                       const size_t,
                                                       const TRI_idx_iid_t,
                                                       const TRI_vector_pointer_t*);

// -----------------------------------------------------------------------------
// --SECTION--                                                         geo index
// -----------------------------------------------------------------------------

////////////////////////////////////////////////////////////////////////////////
/// @brief internals/guts of geo index data feeder
////////////////////////////////////////////////////////////////////////////////

typedef struct TRI_data_feeder_geo_lookup_s {
  bool _isEmpty;
  TRI_index_t* _index;
  QL_ast_query_geo_restriction_t* _restriction;
  GeoCoordinates* _coordinates;
  size_t _position;
}
TRI_data_feeder_geo_lookup_t;

////////////////////////////////////////////////////////////////////////////////
/// @brief Create a new data feeder (geo index lookup) - DEPRECATED
////////////////////////////////////////////////////////////////////////////////

TRI_data_feeder_t* TRI_CreateDataFeederGeoLookupX (const TRI_doc_collection_t*,
                                                  TRI_join_t*,
                                                  size_t,
                                                  QL_ast_query_geo_restriction_t*);

////////////////////////////////////////////////////////////////////////////////
/// @brief Create a new data feeder (geo index lookup)
////////////////////////////////////////////////////////////////////////////////

TRI_data_feeder_t* TRI_CreateDataFeederGeoLookup (TRI_query_instance_t* const,
                                                  const TRI_doc_collection_t*,
                                                  const size_t,
                                                  const TRI_idx_iid_t,
                                                  const QL_ast_query_geo_restriction_t*);

////////////////////////////////////////////////////////////////////////////////
/// @}
////////////////////////////////////////////////////////////////////////////////

#ifdef __cplusplus
}
#endif

#endif

// Local Variables:
// mode: outline-minor
// outline-regexp: "^\\(/// @brief\\|/// {@inheritDoc}\\|/// @addtogroup\\|// --SECTION--\\|/// @\\}\\)"
// End: