1
0
Fork 0
arangodb/arangod/MMFiles/mmfiles-fulltext-handles.h

139 lines
6.5 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Steemann
////////////////////////////////////////////////////////////////////////////////
#ifndef ARANGOD_MMFILES_MMFILES_FULLTEXT_HANDLES_H
#define ARANGOD_MMFILES_MMFILES_FULLTEXT_HANDLES_H 1
#include "mmfiles-fulltext-common.h"
#include "VocBase/voc-types.h"
/// @brief typedef for a fulltext handle entry
typedef uint32_t TRI_fulltext_handle_t;
namespace arangodb {
struct DocumentIdentifierToken;
}
/// @brief a slot containing _numUsed handles and has some statistics about
/// itself
///
/// the fulltext index will not store document ids in its nodes, because that
/// will be complicated in the case of deleting a document. in this case, all
/// nodes would need to be traversed to find where the document was referenced.
/// this would be too slow. instead of storing document ids, a node stores
/// handles. handles are increasing integer numbers that are each mapped to a
/// specific document. when a document is deleted from the index, its handle is
/// marked as deleted, but the handle value may remain stored in one or many
/// index nodes. handles of deleted documents are removed from result sets at
/// the end of each index query on-the-fly, so query results are still correct.
/// To finally get rid of handles of deleted documents, the index can perform
/// a compaction. The compaction rewrites a new, dense handle list consisting
/// with only handles that point to existing documents. The old handles used in
/// nodes become invalid by this, so the handles stores in the nodes have to
/// be rewritten. When the rewrite is done, the old handle list is freed and
/// the new one is put in place.
///
/// Inserting a new document will simply allocate a new handle, and the handle
/// will be stored for the node. We simply assign the next handle number for
/// the document. After that, we can quickly look up the document id for a
/// handle value. It's more tricky the other way around, because there is no
/// simple mapping from document ids to handles. To find the handle for a
/// document id, we have to check all handles already used.
/// As this would mean traversing over all handles used and comparing their
/// document values with the sought document id, there is some optimisation:
/// handles are stored in slots of fixed sizes. Each slot has some statistics
/// about the number of used and deleted documents/handles in it, as well as
/// its min and max document values.
/// When looking for a specific document id in all handles in the case of
/// deletion, the slot statistics are used to early prune non-relevant slots
/// from
/// the further search. The simple min/max document id check implemented is
/// sufficient because normally document memory is contiguous so the pointers
/// to documents are just adjacent (second pointer is higher than first
/// pointer).
/// This is only true for documents that are created on the same memory page
/// but this should be the common case to optimize for.
typedef struct TRI_fulltext_handle_slot_s {
uint32_t _numUsed; // number of handles used in slot
uint32_t _numDeleted; // number of deleted handles in slot
TRI_voc_rid_t _min; // minimum handle value in slot
TRI_voc_rid_t _max; // maximum handle value in slot
TRI_voc_rid_t* _documents; // document ids for the slots
uint8_t* _deleted; // deleted flags for the slots
} TRI_fulltext_handle_slot_t;
/// @brief typedef for a fulltext handles instance
typedef struct TRI_fulltext_handles_s {
TRI_fulltext_handle_t _next; // next handle to use
uint32_t _numSlots; // current number of slots
TRI_fulltext_handle_slot_t** _slots; // pointers to slots
uint32_t _slotSize; // the size of each slot
uint32_t _numDeleted; // total number of deleted documents
TRI_fulltext_handle_t* _map; // a temporary map for remapping existing
// handles to new handles during compaction
} TRI_fulltext_handles_t;
/// @brief create a handles instance
TRI_fulltext_handles_t* TRI_CreateHandlesMMFilesFulltextIndex(const uint32_t);
/// @brief free a handles instance
void TRI_FreeHandlesMMFilesFulltextIndex(TRI_fulltext_handles_t*);
/// @brief get number of documents (including deleted)
uint32_t TRI_NumHandlesHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const);
/// @brief get number of deleted documents
uint32_t TRI_NumDeletedHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const);
/// @brief get handle list fill grade
double TRI_DeletionGradeHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const);
/// @brief whether or not the handle list should be compacted
bool TRI_ShouldCompactHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const);
/// @brief compact the handle list
TRI_fulltext_handles_t* TRI_CompactHandleMMFilesFulltextIndex(
TRI_fulltext_handles_t* const);
/// @brief insert a document and return a handle for it
TRI_fulltext_handle_t TRI_InsertHandleMMFilesFulltextIndex(
TRI_fulltext_handles_t* const, const TRI_voc_rid_t);
/// @brief mark a document as deleted in the handle list
bool TRI_DeleteDocumentHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const,
const TRI_voc_rid_t);
/// @brief get the document id for a handle
arangodb::DocumentIdentifierToken TRI_GetDocumentMMFilesFulltextIndex(
const TRI_fulltext_handles_t* const, const TRI_fulltext_handle_t);
/// @brief dump all handles
#if TRI_FULLTEXT_DEBUG
void TRI_DumpHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const);
#endif
/// @brief return the memory usage for the handles
size_t TRI_MemoryHandleMMFilesFulltextIndex(const TRI_fulltext_handles_t* const);
#endif