1
0
Fork 0
arangodb/arangod/FulltextIndex/fulltext-index.cpp

1777 lines
56 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// @brief full text search
///
/// @file
///
/// DISCLAIMER
///
/// Copyright 2014 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Steemann
/// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany
/// @author Copyright 2012-2013, triAGENS GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////
#include "fulltext-index.h"
#include "Basics/locks.h"
#include "Basics/logging.h"
#include "fulltext-handles.h"
#include "fulltext-list.h"
#include "fulltext-query.h"
#include "fulltext-result.h"
#include "fulltext-wordlist.h"
// -----------------------------------------------------------------------------
// --SECTION-- private defines
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief use padding for pointers in binary data
////////////////////////////////////////////////////////////////////////////////
#undef FULLTEXT_PADDING
////////////////////////////////////////////////////////////////////////////////
/// @brief maximum length of an indexed word in bytes
/// a UTF-8 character can contain up to 4 bytes
////////////////////////////////////////////////////////////////////////////////
#define MAX_WORD_BYTES ((TRI_FULLTEXT_MAX_WORD_LENGTH) * 4)
// -----------------------------------------------------------------------------
// --SECTION-- private types
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief the type of characters indexed. should be one byte long
////////////////////////////////////////////////////////////////////////////////
typedef uint8_t node_char_t;
////////////////////////////////////////////////////////////////////////////////
/// @brief typedef for follower nodes. this is just void because for the
/// compiler it is a sequence of binary data with no internal structure
////////////////////////////////////////////////////////////////////////////////
typedef void followers_t;
////////////////////////////////////////////////////////////////////////////////
/// @brief a node in the fulltext index
///
/// the _followers property is a pointer to dynamic memory. If it is NULL, then
/// the node does not have any followers/sub-nodes. if the _followers property
/// is non-NULL, it contains a byte stream consisting of the following values:
/// - uint8_t numAllocated: number of sub-nodes we have allocated memory for
/// - uint8_t numFollowers: the actual number of sub-nodes for the node
/// - node_char_t* keys: keys of sub-nodes, sorted binary
/// - node_t** sub-nodes: pointers to sub-nodes, in the same order as keys
/// this structure is fiddly, but saves a lot of memory and malloc calls when
/// compared to a "proper" structure.
/// As the "properties" inside _followers are just binary data for the compiler,
/// it is not wise to access them directly, but use the access functions this
/// file provides. There is no need to calculate the offsets of the different
/// sub-properties directly, as this is all done by special functions which
/// provide the offsets at relatively low costs.
///
/// The _handles property is a pointer to dynamic memory, too. If it is NULL,
/// then the node does not have any handles attached. If it is non-NULL, it
/// contains a byte stream consisting of the following values:
/// - uint32_t numAllocated: number of handles allocated for the node
/// - unit32_t numEntries: number of handles currently in use
/// - TRI_fulltext_handle_t* handles: all the handle values subsequently
/// Note that the highest bit of the numAllocated value contains a flag whether
/// the handles list is sorted or not. It is therefore not safe to access the
/// properties directly, but instead always the special functions provided in
/// fulltext-list.c must be used. These provide access to the individual values
/// at relatively low cost
////////////////////////////////////////////////////////////////////////////////
typedef struct node_s {
followers_t* _followers;
TRI_fulltext_list_t* _handles;
}
node_t;
////////////////////////////////////////////////////////////////////////////////
/// @brief the actual fulltext index
////////////////////////////////////////////////////////////////////////////////
typedef struct {
node_t* _root; // root node of the index
TRI_fulltext_handles_t* _handles; // handles management instance
TRI_read_write_lock_t _lock;
size_t _memoryAllocated; // total memory used by index
#if TRI_FULLTEXT_DEBUG
size_t _memoryBase; // base memory
size_t _memoryNodes; // total memory used by nodes (node_t only)
size_t _memoryFollowers; // total memory used for followers (no documents)
uint32_t _nodesAllocated; // number of nodes currently in use
#endif
uint32_t _nodeChunkSize; // how many sub-nodes to allocate per chunk
uint32_t _initialNodeHandles; // how many handles to allocate per node
}
index_t;
// -----------------------------------------------------------------------------
// --SECTION-- forwards
// -----------------------------------------------------------------------------
static uint32_t NodeNumFollowers (const node_t* const);
static uint32_t NodeNumAllocated (const node_t* const);
static node_char_t* NodeFollowersKeys (const node_t* const);
static node_t** NodeFollowersNodes (const node_t* const);
static void FreeFollowers (index_t* const, node_t*);
static void FreeNode (index_t* const, node_t*);
static size_t MemorySubNodeList (const uint32_t);
// -----------------------------------------------------------------------------
// --SECTION-- private functions
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief print some indentation
////////////////////////////////////////////////////////////////////////////////
#if TRI_FULLTEXT_DEBUG
void Indent (const uint32_t level) {
uint32_t i;
for (i = 0; i < level; ++i) {
printf(" ");
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// @brief dump the contents of a node
////////////////////////////////////////////////////////////////////////////////
#if TRI_FULLTEXT_DEBUG
void DumpNode (const node_t* const node, uint32_t level) {
uint32_t numFollowers;
uint32_t numHandles;
uint32_t i;
numFollowers = NodeNumFollowers(node);
if (node->_handles != nullptr) {
numHandles = TRI_NumEntriesListFulltextIndex(node->_handles);
}
else {
numHandles = 0;
}
if (numFollowers == 0) {
printf(" (x) ");
}
else {
printf(" ");
}
if (level < 20) {
Indent(20 - level);
}
printf("node %p (%lu followers, %lu handles)\n", node, (unsigned long) numFollowers, (unsigned long) numHandles);
if (numFollowers > 0) {
node_char_t* followerKeys = NodeFollowersKeys(node);
node_t** followerNodes = NodeFollowersNodes(node);
for (i = 0; i < numFollowers; ++i) {
node_char_t followerKey = followerKeys[i];
node_t* followerNode = followerNodes[i];
Indent(level);
printf("%c", (char) followerKey);
DumpNode(followerNode, level + 1);
}
}
if (numHandles > 0) {
Indent(level);
if (level < 20) {
Indent(20 - level);
}
printf("(");
TRI_DumpListFulltextIndex(node->_handles);
printf(")\n");
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// @brief return the padding to be applied when allocating memory for the
/// sub-node list. the padding is done between the (uint8_t) keys and the
/// (node_t*) pointers. padding can be used to align the pointers to some
/// "good" boundary
////////////////////////////////////////////////////////////////////////////////
static inline size_t Padding (const uint32_t numEntries) {
#ifdef FULLTEXT_PADDING
size_t offset = sizeof(uint8_t) + // numAllocated
sizeof(uint8_t) + // numUsed
(sizeof(node_char_t) * numEntries); // followerKeys
if (offset % PAD == 0) {
// already aligned
return 0;
}
else {
// not aligned, apply padding
return PAD - (offset % PAD);
}
#else
return 0;
#endif
}
////////////////////////////////////////////////////////////////////////////////
/// @brief re-allocate memory for the index and update memory usage statistics
////////////////////////////////////////////////////////////////////////////////
static inline void* ReallocateMemory (index_t* const idx,
void* old,
const size_t newSize,
const size_t oldSize) {
void* data;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(old != nullptr);
TRI_ASSERT(newSize > 0);
TRI_ASSERT(oldSize > 0);
#endif
data = TRI_Reallocate(TRI_UNKNOWN_MEM_ZONE, old, newSize);
if (data != nullptr) {
idx->_memoryAllocated += newSize;
idx->_memoryAllocated -= oldSize;
}
return data;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief allocate memory for the index and update memory usage statistics
////////////////////////////////////////////////////////////////////////////////
static inline void* AllocateMemory (index_t* const idx, const size_t size) {
void* data;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(size > 0);
#endif
data = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, size, false);
if (data != nullptr) {
idx->_memoryAllocated += size;
}
return data;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief free memory and update memory usage statistics
////////////////////////////////////////////////////////////////////////////////
static inline void FreeMemory (index_t* const idx,
void* data,
const size_t size) {
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(size > 0);
TRI_ASSERT(idx->_memoryAllocated >= size);
#endif
idx->_memoryAllocated -= size;
TRI_Free(TRI_UNKNOWN_MEM_ZONE, data);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief adjust the number of followers for a node
/// note: if the value is set to 0, this might free the sub-nodes list
////////////////////////////////////////////////////////////////////////////////
static inline void SetNodeNumFollowers (index_t* const idx,
node_t* const node,
uint32_t value) {
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
TRI_ASSERT(node->_followers != nullptr);
TRI_ASSERT(value <= 255);
#endif
// note: value must be <= current number of followers
if (value == 0) {
// new value is 0, now free old sub-nodes list (if any)
uint32_t numAllocated = NodeNumAllocated(node);
#if TRI_FULLTEXT_DEBUG
idx->_memoryFollowers -= MemorySubNodeList(numAllocated);
#endif
FreeMemory(idx, node->_followers, MemorySubNodeList(numAllocated));
node->_followers = nullptr;
}
else {
// value is not 0, now write the new value
uint8_t* head = (uint8_t*) node->_followers;
*(++head) = (uint8_t) value;
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get the number of used sub-nodes in a sub node list
////////////////////////////////////////////////////////////////////////////////
static inline uint32_t NodeNumFollowers (node_t const* node) {
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
if (node == nullptr || node->_followers == nullptr) {
return 0;
}
uint8_t* head = (uint8_t*) node->_followers;
return (uint32_t) *(++head);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get the number of allocated (not necessarily used) sub-nodes in a
/// sub node list
////////////////////////////////////////////////////////////////////////////////
static uint32_t NodeNumAllocated (const node_t* const node) {
uint8_t* head;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
if (node->_followers == nullptr) {
return 0;
}
head = (uint8_t*) node->_followers;
return (uint32_t) *head;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief initialize a sub-node list with length information
////////////////////////////////////////////////////////////////////////////////
static void InitializeSubNodeList (void* data,
const uint32_t numAllocated,
const uint32_t numFollowers) {
uint8_t* head = (uint8_t*) data;
*(head++) = (uint8_t) numAllocated;
*head = (uint8_t) numFollowers;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get a pointer to the start of the keys in a sub-node list
/// the caller must make sure the node actually has sub-nodes
////////////////////////////////////////////////////////////////////////////////
static inline node_char_t* FollowersKeys (void* data) {
uint8_t* head = (uint8_t*) data;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(data != nullptr);
#endif
return (node_char_t*) (head + 2); // numAllocated + numEntries
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get a pointer to the start of the keys in a sub-node list
/// the caller must make sure the node actually has sub-nodes
////////////////////////////////////////////////////////////////////////////////
static inline node_char_t* NodeFollowersKeys (const node_t* const node) {
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node->_followers != nullptr);
#endif
return FollowersKeys(node->_followers);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get a pointer to the start of the node-list in a sub-node list
/// the caller must make sure the node actually has sub-nodes
////////////////////////////////////////////////////////////////////////////////
static inline node_t** FollowersNodes (void* data) {
uint8_t* head = (uint8_t*) data;
uint8_t numAllocated = *head;
uint8_t* keys = (uint8_t*) (head + 2); // numAllocated + numEntries
return (node_t**) (uint8_t*) ((keys + numAllocated) + Padding(numAllocated));
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get a pointer to the start of the node-list in a sub-node list
/// the caller must make sure the node actually has sub-nodes
////////////////////////////////////////////////////////////////////////////////
static inline node_t** NodeFollowersNodes (const node_t* const node) {
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
TRI_ASSERT(node->_followers != nullptr);
#endif
return FollowersNodes(node->_followers);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get a pointer to the start of the node-list in a sub-node list
/// the caller must make sure the node actually has sub-nodes
////////////////////////////////////////////////////////////////////////////////
static inline node_t** FollowersNodesPos (void* data, const uint32_t numAllocated) {
uint8_t* head = (uint8_t*) data;
uint8_t* keys = (uint8_t*) (head + 2); // numAllocated + numEntries
return (node_t**) (uint8_t*) ((keys + numAllocated) + Padding(numAllocated));
}
////////////////////////////////////////////////////////////////////////////////
/// @brief return the memory required to store a sub-node list of the
/// specific length
////////////////////////////////////////////////////////////////////////////////
static size_t MemorySubNodeList (const uint32_t numEntries) {
return sizeof(uint8_t) + // numAllocated
sizeof(uint8_t) + // numEntries
((sizeof(node_char_t) + sizeof(node_t*)) * numEntries) + // follower keys & nodes
Padding(numEntries);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief check the current list of sub-nodes for a node and increase its
/// size if it is too small to hold another node
////////////////////////////////////////////////////////////////////////////////
static bool ExtendSubNodeList (index_t* const idx,
node_t* const node,
const uint32_t numFollowers,
const uint32_t numAllocated) {
size_t nextSize;
uint32_t nextAllocated;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
// current list has reached its limit, we must increase it
nextAllocated = numAllocated + idx->_nodeChunkSize;
nextSize = MemorySubNodeList(nextAllocated);
if (node->_followers == nullptr) {
// allocate a new list
node->_followers = AllocateMemory(idx, nextSize);
if (node->_followers == nullptr) {
// out of memory
return false;
}
// initialize the chunk of memory we just got
InitializeSubNodeList(node->_followers, nextAllocated, numFollowers);
#if TRI_FULLTEXT_DEBUG
idx->_memoryFollowers += nextSize;
#endif
return true;
}
else {
// re-allocate an existing list
followers_t* followers;
size_t oldSize;
oldSize = MemorySubNodeList(numAllocated);
followers = ReallocateMemory(idx, node->_followers, nextSize, oldSize);
if (followers == nullptr) {
// out of memory
return false;
}
// initialize the chunk of memory we just got
InitializeSubNodeList(followers, nextAllocated, numFollowers);
#if TRI_FULLTEXT_DEBUG
idx->_memoryFollowers += nextSize;
idx->_memoryFollowers -= oldSize;
#endif
// note the new pointer
node->_followers = followers;
if (numFollowers > 0) {
// copy existing sub-nodes into the new sub-nodes list
memmove(FollowersNodes(followers), FollowersNodesPos(followers, numAllocated), sizeof(node_t*) * numFollowers);
}
return true;
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief create a new, empty node
////////////////////////////////////////////////////////////////////////////////
static node_t* CreateNode (index_t* const idx) {
node_t* node = static_cast<node_t*>(AllocateMemory(idx, sizeof(node_t)));
if (node == nullptr) {
return nullptr;
}
node->_followers = nullptr;
node->_handles = nullptr;
#if TRI_FULLTEXT_DEBUG
idx->_nodesAllocated++;
idx->_memoryNodes += sizeof(node_t);
#endif
return node;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief free a node's follower nodes
////////////////////////////////////////////////////////////////////////////////
static void FreeFollowers (index_t* const idx, node_t* node) {
uint32_t numFollowers;
uint32_t numAllocated;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
if (node->_followers == nullptr) {
return;
}
numFollowers = NodeNumFollowers(node);
if (numFollowers > 0) {
node_t** followerNodes;
uint32_t i;
followerNodes = NodeFollowersNodes(node);
for (i = 0; i < numFollowers; ++i) {
FreeNode(idx, followerNodes[i]);
}
}
numAllocated = NodeNumAllocated(node);
#if TRI_FULLTEXT_DEBUG
idx->_memoryFollowers -= MemorySubNodeList(numAllocated);
#endif
FreeMemory(idx, node->_followers, MemorySubNodeList(numAllocated));
node->_followers = nullptr;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief free a node in the index
////////////////////////////////////////////////////////////////////////////////
static void FreeNode (index_t* const idx, node_t* node) {
if (node == nullptr) {
return;
}
if (node->_handles != nullptr) {
// free handles
idx->_memoryAllocated -= TRI_MemoryListFulltextIndex(node->_handles);
TRI_FreeListFulltextIndex(node->_handles);
}
// free followers
if (node->_followers != nullptr) {
FreeFollowers(idx, node);
}
// free node itself
FreeMemory(idx, node, sizeof(node_t));
#if TRI_FULLTEXT_DEBUG
idx->_memoryNodes -= sizeof(node_t);
idx->_nodesAllocated--;
#endif
}
////////////////////////////////////////////////////////////////////////////////
/// @brief recursively cleanup nodes (used during compaction)
/// the map contains a rewrite-map of document handles
////////////////////////////////////////////////////////////////////////////////
static bool CleanupNodes (index_t* idx,
node_t* node,
void* map) {
bool isActive;
// assume we can delete the node we are processing
// we may set this flag to true further down below if we find the node is
// still useful
isActive = false;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
if (node->_followers != nullptr) {
// recurse into sub-nodes
node_char_t* followerKeys = NodeFollowersKeys(node);
node_t** followerNodes = NodeFollowersNodes(node);
uint32_t numFollowers;
uint32_t i, j;
numFollowers = NodeNumFollowers(node);
j = 0;
// traverse over all follower nodes and during that rewrite the
// node's follower list with only the followers that are still in
// use. this will delete all unused subnodes from the node's follower
// list, leaving the node's follower list potentially empty
for (i = 0; i < numFollowers; ++i) {
node_t* follower;
follower = followerNodes[i];
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(follower != nullptr);
#endif
// recursively clean up sub-nodes
if (! CleanupNodes(idx, follower, map)) {
// the sub-node is empty, kill it!
FreeNode(idx, follower);
// and go to next follower
continue;
}
// sub-node is still relevant
isActive = true;
if (i != j) {
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(i > j);
#endif
// move nodes
followerKeys[j] = followerKeys[i];
followerNodes[j] = followerNodes[i];
}
++j;
}
if (i != j) {
// number of followers has changed
// this might delete the memory for the followers!
SetNodeNumFollowers(idx, node, j);
}
}
// rewrite the node's handle list if present
if (node->_handles != nullptr) {
uint32_t remain;
remain = TRI_RewriteListFulltextIndex(node->_handles, map);
if (remain > 0) {
// there are still handles left in the rewritten handles list
// we must keep this node
isActive = true;
}
else {
// no handles left, we can delete the node's handle list
idx->_memoryAllocated -= TRI_MemoryListFulltextIndex(node->_handles);
TRI_FreeListFulltextIndex(node->_handles);
node->_handles = nullptr;
}
}
return isActive;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief find a sub-node of a node with only one sub-node
/// the caller must make sure the node actually has exactly one sub-node!
////////////////////////////////////////////////////////////////////////////////
static inline node_t* FindDirectSubNodeSingle (const node_t* const node,
const node_char_t c) {
node_char_t* followerKeys;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
TRI_ASSERT(NodeNumFollowers(node) == 1);
#endif
followerKeys = NodeFollowersKeys(node);
if (followerKeys[0] == c) {
node_t** followerNodes = NodeFollowersNodes(node);
return followerNodes[0];
}
return nullptr;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief find a sub-node of a node using a linear search
/// this will compare the node's follower characters with the character passed
/// followers are sorted so it will stop at the first character that is higher
/// than the character passed
/// the caller must make sure the node actually has sub-nodes!
////////////////////////////////////////////////////////////////////////////////
static inline node_t* FindDirectSubNodeLinear (const node_t* const node,
const node_char_t c) {
node_char_t* followerKeys;
uint32_t numFollowers;
uint32_t i;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
numFollowers = NodeNumFollowers(node);
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(numFollowers >= 1);
#endif
followerKeys = NodeFollowersKeys(node);
for (i = 0; i < numFollowers; ++i) {
node_char_t followerKey = followerKeys[i];
if (followerKey > c) {
// we're are already beyond of what we look for
break; // trampoline to return NULL
}
else if (followerKey == c) {
node_t** followerNodes = NodeFollowersNodes(node);
return followerNodes[i];
}
}
return nullptr;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief find a sub-node of a node using a binary search
/// the caller must ensure the node actually has sub-nodes!
////////////////////////////////////////////////////////////////////////////////
static node_t* FindDirectSubNodeBinary (const node_t* const node,
const node_char_t c) {
node_char_t* followerKeys;
node_t** followerNodes;
uint32_t numFollowers;
uint32_t l, r;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
numFollowers = NodeNumFollowers(node);
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(numFollowers >= 1);
#endif
followerKeys = NodeFollowersKeys(node);
followerNodes = NodeFollowersNodes(node);
l = 0;
// this is safe (look at the function documentation)
r = numFollowers - 1;
while (true) {
node_char_t followerKey;
uint32_t m;
// determine midpoint
m = l + ((r - l) / 2);
followerKey = followerKeys[m];
if (followerKey == c) {
return followerNodes[m];
}
if (followerKey > c) {
if (m == 0) {
// we must abort because the following subtraction would
// make the uin32_t underflow to UINT32_MAX!
return nullptr;
}
// this is safe
r = m - 1;
}
else {
l = m + 1;
}
if (r < l) {
return nullptr;
}
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief find a node's sub-node, identified by its start character
////////////////////////////////////////////////////////////////////////////////
static inline node_t* FindDirectSubNode (const node_t* const node,
const node_char_t c) {
uint32_t numFollowers;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
numFollowers = NodeNumFollowers(node);
if (numFollowers >= 8) {
return FindDirectSubNodeBinary(node, c);
}
else if (numFollowers > 1) {
return FindDirectSubNodeLinear(node, c);
}
else if (numFollowers == 1) {
return FindDirectSubNodeSingle(node, c);
}
return nullptr;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief find a node by its key, starting from the index root
////////////////////////////////////////////////////////////////////////////////
static node_t* FindNode (const index_t* idx,
const char* const key,
const size_t keyLength) {
node_t* node;
node_char_t* p;
size_t i;
node = (node_t*) idx->_root;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
p = (node_char_t*) key;
for (i = 0; i < keyLength; ++i) {
node = FindDirectSubNode(node, *(p++));
if (node == nullptr) {
return nullptr;
}
}
return node;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief create a list with the handles of a node
////////////////////////////////////////////////////////////////////////////////
static TRI_fulltext_list_t* GetDirectNodeHandles (const node_t* const node) {
return TRI_CloneListFulltextIndex(node->_handles);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief recursively merge node and sub-node handles into the result list
////////////////////////////////////////////////////////////////////////////////
static TRI_fulltext_list_t* MergeSubNodeHandles (const node_t* const node,
TRI_fulltext_list_t* list) {
node_t** followerNodes;
uint32_t numFollowers;
uint32_t i;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
numFollowers = NodeNumFollowers(node);
if (numFollowers == 0) {
return list;
}
followerNodes = NodeFollowersNodes(node);
for (i = 0; i < numFollowers; ++i) {
node_t* follower;
follower = followerNodes[i];
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(follower != nullptr);
#endif
if (follower->_handles != nullptr) {
// OR-merge the follower node's documents with what we already have found
list = TRI_UnioniseListFulltextIndex(list, GetDirectNodeHandles(follower));
if (list == nullptr) {
return nullptr;
}
}
// recurse into sub-nodes
list = MergeSubNodeHandles(follower, list);
if (list == nullptr) {
return nullptr;
}
}
return list;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief recursively create a result list with the handles of a node and
/// all of its sub-nodes
////////////////////////////////////////////////////////////////////////////////
static inline TRI_fulltext_list_t* GetSubNodeHandles (const node_t* const node) {
TRI_fulltext_list_t* list;
list = GetDirectNodeHandles(node);
return MergeSubNodeHandles(node, list);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief insert a new sub-node underneath an existing node
/// the caller must make sure that the node already has memory allocated for
/// the _followers property
////////////////////////////////////////////////////////////////////////////////
static node_t* InsertSubNode (index_t* const idx,
node_t* const node,
const uint32_t position,
const node_char_t key) {
node_t** followerNodes;
node_char_t* followerKeys;
node_t* subNode;
uint32_t numFollowers;
uint32_t moveCount;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
// create the sub-node
subNode = CreateNode(idx);
if (subNode == nullptr) {
// out of memory
return nullptr;
}
numFollowers = NodeNumFollowers(node);
followerKeys = NodeFollowersKeys(node);
followerNodes = NodeFollowersNodes(node);
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(numFollowers >= position);
#endif
// we have to move this many elements
moveCount = numFollowers - position;
if (moveCount > 0) {
// make room for the new node
memmove(followerKeys + position + 1, followerKeys + position, moveCount * sizeof(node_char_t));
memmove(followerNodes + position + 1, followerNodes + position, moveCount * sizeof(node_t*));
}
// register the new sub node
followerNodes[position] = subNode;
followerKeys[position] = key;
SetNodeNumFollowers(idx, node, numFollowers + 1);
return subNode;
}
////////////////////////////////////////////////////////////////////////////////
/// ensure that a specific sub-node (with a specific key) is there
/// if it is not there, it will be created by this function
////////////////////////////////////////////////////////////////////////////////
static node_t* EnsureSubNode (index_t* const idx,
node_t* node,
const node_char_t c) {
uint32_t numFollowers;
uint32_t numAllocated;
uint32_t i;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
// search the node and find the correct insert position if it does not exist
numFollowers = NodeNumFollowers(node);
if (numFollowers > 0) {
// linear search
// TODO: optimise this search
node_char_t* followerKeys;
followerKeys = NodeFollowersKeys(node);
// divide the search space in 2 halves
uint32_t start;
if (numFollowers >= 8 && followerKeys[numFollowers / 2] < c) {
start = numFollowers / 2;
}
else {
start = 0;
}
for (i = start; i < numFollowers; ++i) {
node_char_t followerKey;
followerKey = followerKeys[i];
if (followerKey > c) {
// we have found a key beyond what we're looking for. abort the search
// i now contains the correct insert position
break;
}
else if (followerKey == c) {
// found the node, return it
node_t** followerNodes = NodeFollowersNodes(node);
return followerNodes[i];
}
}
}
else {
// no followers yet. insert position is 0
i = 0;
}
// must insert a new node
numAllocated = NodeNumAllocated(node);
// we'll be doing an insert. make sure the node has enough space for containing
// a list with one element more
if (numFollowers >= numAllocated) {
if (! ExtendSubNodeList(idx, node, numFollowers, numAllocated)) {
// out of memory
return nullptr;
}
}
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node->_followers != nullptr);
#endif
return InsertSubNode(idx, node, i, c);
}
////////////////////////////////////////////////////////////////////////////////
/// insert a handle for a node
////////////////////////////////////////////////////////////////////////////////
static bool InsertHandle (index_t* const idx,
node_t* const node,
const TRI_fulltext_handle_t handle) {
TRI_fulltext_list_t* list;
TRI_fulltext_list_t* oldList;
size_t oldAlloc;
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
if (node->_handles == nullptr) {
// node does not yet have any handles. now allocate a new chunk of handles
node->_handles = TRI_CreateListFulltextIndex(idx->_initialNodeHandles);
if (node->_handles != nullptr) {
idx->_memoryAllocated += TRI_MemoryListFulltextIndex(node->_handles);
}
}
if (node->_handles == nullptr) {
// out of memory
return false;
}
oldList = node->_handles;
oldAlloc = TRI_MemoryListFulltextIndex(oldList);
// adding to the list might change the list pointer!
list = TRI_InsertListFulltextIndex(node->_handles, handle);
if (list == nullptr) {
// out of memory
return false;
}
if (list != oldList) {
// the insert might have changed the pointer
node->_handles = list;
idx->_memoryAllocated += TRI_MemoryListFulltextIndex(list);
idx->_memoryAllocated -= oldAlloc;
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
/// turn a handle list into a proper document list result
/// this will also exclude all deleted documents
////////////////////////////////////////////////////////////////////////////////
static TRI_fulltext_result_t* MakeListResult (index_t* const idx,
TRI_fulltext_list_t* list,
size_t maxResults) {
TRI_fulltext_result_t* result;
TRI_fulltext_list_entry_t* listEntries;
uint32_t numResults;
uint32_t i, pos;
if (list == nullptr) {
return nullptr;
}
// we have a list of handles
// now turn the handles into documents and exclude deleted ones on the fly
numResults = TRI_NumEntriesListFulltextIndex(list);
if (static_cast<size_t>(numResults) > maxResults && maxResults > 0) {
// cap the number of results
numResults = static_cast<uint32_t>(maxResults);
}
result = TRI_CreateResultFulltextIndex(numResults);
if (result == nullptr) {
TRI_FreeListFulltextIndex(list);
return nullptr;
}
pos = 0;
listEntries = TRI_StartListFulltextIndex(list);
for (i = 0; i < numResults; ++i) {
TRI_fulltext_handle_t handle;
TRI_fulltext_doc_t doc;
handle = listEntries[i];
doc = TRI_GetDocumentFulltextIndex(idx->_handles, handle);
if (doc == 0) {
// deleted document
continue;
}
result->_documents[pos++] = doc;
}
result->_numDocuments = pos;
// don't need the list anymore
TRI_FreeListFulltextIndex(list);
return result;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief find all documents from the index that match the key
////////////////////////////////////////////////////////////////////////////////
#if 0
TRI_fulltext_result_t* FindDocuments (index_t* const idx,
const char* const key,
const size_t keyLength,
const bool recursive) {
node_t* node;
TRI_fulltext_list_t* list;
node = FindNode(idx, key, keyLength);
if (node == nullptr) {
// not found, create empty result
return TRI_CreateResultFulltextIndex(0);
}
if (recursive) {
// prefix matching
list = GetSubNodeHandles(node);
}
else {
// complete match
list = GetDirectNodeHandles(node);
}
return MakeListResult(idx, list, 0);
}
#endif
// -----------------------------------------------------------------------------
// --SECTION-- string functions
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief determine the common prefix length of two words
////////////////////////////////////////////////////////////////////////////////
static inline size_t CommonPrefixLength (const char* const lhs,
const char* const rhs) {
char* p1;
char* p2;
size_t length = 0;
for (p1 = (char*) lhs, p2 = (char*) rhs; *p1 && *p2 && *p1 == *p2; ++p1, ++p2, ++length);
return length;
}
// -----------------------------------------------------------------------------
// --SECTION-- constructors / destructors
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief create the fulltext index
////////////////////////////////////////////////////////////////////////////////
TRI_fts_index_t* TRI_CreateFtsIndex (uint32_t handleChunkSize,
uint32_t nodeChunkSize,
uint32_t initialNodeHandles) {
index_t* idx = static_cast<index_t*>(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(index_t), false));
if (idx == nullptr) {
return nullptr;
}
idx->_memoryAllocated = sizeof(index_t);
#if TRI_FULLTEXT_DEBUG
idx->_memoryBase = sizeof(index_t);
idx->_memoryNodes = 0;
idx->_memoryFollowers = 0;
idx->_nodesAllocated = 0;
#endif
// how many followers to allocate at once
idx->_nodeChunkSize = nodeChunkSize;
// how many handles to create per node by default
idx->_initialNodeHandles = initialNodeHandles;
// create the root node
idx->_root = CreateNode(idx);
if (idx->_root == nullptr) {
// out of memory
TRI_Free(TRI_UNKNOWN_MEM_ZONE, idx);
return nullptr;
}
// create an instance for managing document handles
idx->_handles = TRI_CreateHandlesFulltextIndex(handleChunkSize);
if (idx->_handles == nullptr) {
// out of memory
TRI_Free(TRI_UNKNOWN_MEM_ZONE, idx->_root);
TRI_Free(TRI_UNKNOWN_MEM_ZONE, idx);
return nullptr;
}
idx->_memoryAllocated += sizeof(TRI_fulltext_handles_t);
#if TRI_FULLTEXT_DEBUG
idx->_memoryBase += sizeof(TRI_fulltext_handles_t);
#endif
TRI_InitReadWriteLock(&idx->_lock);
return (TRI_fts_index_t*) idx;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief free the fulltext index
////////////////////////////////////////////////////////////////////////////////
void TRI_FreeFtsIndex (TRI_fts_index_t* ftx) {
index_t* idx = (index_t*) ftx;
// free root node (this will recursively free all other nodes)
FreeNode(idx, idx->_root);
// free handles
TRI_FreeHandlesFulltextIndex(idx->_handles);
idx->_handles = nullptr;
idx->_memoryAllocated -= sizeof(TRI_fulltext_handles_t);
#if TRI_FULLTEXT_DEBUG
idx->_memoryBase -= sizeof(TRI_fulltext_handles_t);
TRI_ASSERT(idx->_memoryBase == sizeof(index_t));
TRI_ASSERT(idx->_memoryFollowers == 0);
TRI_ASSERT(idx->_memoryNodes == 0);
TRI_ASSERT(idx->_memoryAllocated == sizeof(index_t));
#endif
TRI_DestroyReadWriteLock(&idx->_lock);
// free index itself
TRI_Free(TRI_UNKNOWN_MEM_ZONE, idx);
}
// -----------------------------------------------------------------------------
// --SECTION-- document addition / removal functions
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief delete a document from the index
////////////////////////////////////////////////////////////////////////////////
void TRI_DeleteDocumentFulltextIndex (TRI_fts_index_t* const ftx,
const TRI_fulltext_doc_t document) {
index_t* idx = (index_t*) ftx;
TRI_WriteLockReadWriteLock(&idx->_lock);
TRI_DeleteDocumentHandleFulltextIndex(idx->_handles, document);
TRI_WriteUnlockReadWriteLock(&idx->_lock);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief insert a list of words into the index
/// calling this function requires a wordlist that has word with the correct
/// lengths. especially, words in the wordlist must not be longer than
/// MAX_WORD_BYTES. the caller must check this before calling this function
///
/// The function will sort the wordlist in place to
/// - filter out duplicates on insertion
/// - save redundant lookups of prefix nodes for adjacent words with shared
/// prefixes
////////////////////////////////////////////////////////////////////////////////
bool TRI_InsertWordsFulltextIndex (TRI_fts_index_t* const ftx,
const TRI_fulltext_doc_t document,
TRI_fulltext_wordlist_t* wordlist) {
index_t* idx;
TRI_fulltext_handle_t handle;
node_t* paths[MAX_WORD_BYTES + 4];
size_t lastLength;
size_t w;
if (wordlist->_numWords == 0) {
return true;
}
// initialize to satisfy scan-build
paths[0] = nullptr;
paths[MAX_WORD_BYTES] = nullptr;
// the words must be sorted so we can avoid duplicate words and use an optimisation
// for words with common prefixes (which will be adjacent in the sorted list of words)
TRI_SortWordlistFulltextIndex(wordlist);
idx = (index_t*) ftx;
TRI_WriteLockReadWriteLock(&idx->_lock);
// get a new handle for the document
handle = TRI_InsertHandleFulltextIndex(idx->_handles, document);
if (handle == 0) {
TRI_WriteUnlockReadWriteLock(&idx->_lock);
return false;
}
// if words are all different, we must start from the root node. the root node is also the
// start for the 1st word inserted
paths[0] = idx->_root;
lastLength = 0;
w = 0;
while (w < wordlist->_numWords) {
node_t* node;
char* p;
size_t start;
size_t i;
// LOG_DEBUG("checking word %s", wordlist->_words[w]);
if (w > 0) {
// check if current word has a shared/common prefix with the previous word inserted
// in case this is true, we can use an optimisation and do not need to traverse the
// tree from the root again. instead, we just start at the node at the end of the
// shared/common prefix. this will save us a lot of tree lookups
start = CommonPrefixLength(wordlist->_words[w - 1], wordlist->_words[w]);
if (start > MAX_WORD_BYTES) {
start = MAX_WORD_BYTES;
}
// check if current word is the same as the last word. we do not want to insert the
// same word multiple times for the same document
if (start > 0 && start == lastLength && start == strlen(wordlist->_words[w])) {
// duplicate word, skip it and continue with next word
w++;
continue;
}
}
else {
start = 0;
}
// for words with common prefixes, use the most appropriate start node we
// do not need to traverse the tree from the root again
node = paths[start];
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
// now insert into the tree, starting at the next character after the common prefix
p = wordlist->_words[w++] + start;
for (i = start; *p && i <= MAX_WORD_BYTES; ++i) {
node_char_t c = (node_char_t) *(p++);
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
node = EnsureSubNode(idx, node, c);
if (node == nullptr) {
TRI_WriteUnlockReadWriteLock(&idx->_lock);
return false;
}
#if TRI_FULLTEXT_DEBUG
TRI_ASSERT(node != nullptr);
#endif
paths[i + 1] = node;
}
if (! InsertHandle(idx, node, handle)) {
// document was added at least once, mark it as deleted
TRI_DeleteDocumentHandleFulltextIndex(idx->_handles, document);
TRI_WriteUnlockReadWriteLock(&idx->_lock);
return false;
}
// store length of word just inserted
// we'll use that to compare with the next word for duplicate removal
lastLength = i;
}
TRI_WriteUnlockReadWriteLock(&idx->_lock);
return true;
}
// -----------------------------------------------------------------------------
// --SECTION-- query functions
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief find all documents that contain a word (exact match)
////////////////////////////////////////////////////////////////////////////////
#if 0
TRI_fulltext_result_t* TRI_FindExactFulltextIndex (TRI_fts_index_t* const ftx,
const char* const key,
const size_t keyLength) {
return FindDocuments((index_t*) ftx, key, keyLength, false);
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// @brief find all documents that contain a word (exact match)
////////////////////////////////////////////////////////////////////////////////
#if 0
TRI_fulltext_result_t* TRI_FindPrefixFulltextIndex (TRI_fts_index_t* const ftx,
const char* key,
const size_t keyLength) {
return FindDocuments((index_t*) ftx, key, keyLength, true);
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// @brief execute a query on the fulltext index
/// note: this will free the query
////////////////////////////////////////////////////////////////////////////////
TRI_fulltext_result_t* TRI_QueryFulltextIndex (TRI_fts_index_t* const ftx,
TRI_fulltext_query_t* query) {
index_t* idx;
TRI_fulltext_list_t* result;
size_t i;
if (query == nullptr) {
return nullptr;
}
if (query->_numWords == 0) {
// query is empty
TRI_FreeQueryFulltextIndex(query);
return TRI_CreateResultFulltextIndex(0);
}
auto maxResults = query->_maxResults;
idx = (index_t*) ftx;
TRI_ReadLockReadWriteLock(&idx->_lock);
// initial result is empty
result = nullptr;
// iterate over all words in query
for (i = 0; i < query->_numWords; ++i) {
char* word;
TRI_fulltext_query_match_e match;
TRI_fulltext_query_operation_e operation;
TRI_fulltext_list_t* list;
node_t* node;
word = query->_words[i];
if (word == nullptr) {
break;
}
match = query->_matches[i];
operation = query->_operations[i];
LOG_DEBUG("searching for word: '%s'", word);
if ((operation == TRI_FULLTEXT_AND || operation == TRI_FULLTEXT_EXCLUDE) &&
i > 0 &&
TRI_NumEntriesListFulltextIndex(result) == 0) {
// current result set is empty so logical AND or EXCLUDE will not have any result either
continue;
}
list = nullptr;
node = FindNode(idx, word, strlen(word));
if (node != nullptr) {
if (match == TRI_FULLTEXT_COMPLETE) {
// complete matching
list = GetDirectNodeHandles(node);
}
else if (match == TRI_FULLTEXT_PREFIX) {
// prefix matching
list = GetSubNodeHandles(node);
}
else {
LOG_WARNING("invalid matching option for fulltext index query");
list = TRI_CreateListFulltextIndex(0);
}
}
else {
list = TRI_CreateListFulltextIndex(0);
}
if (operation == TRI_FULLTEXT_AND) {
// perform a logical AND of current and previous result (if any)
result = TRI_IntersectListFulltextIndex(result, list);
}
else if (operation == TRI_FULLTEXT_OR) {
// perform a logical OR of current and previous result (if any)
result = TRI_UnioniseListFulltextIndex(result, list);
}
else if (operation == TRI_FULLTEXT_EXCLUDE) {
// perform a logical exclusion of current from previous result (if any)
result = TRI_ExcludeListFulltextIndex(result, list);
}
if (result == nullptr) {
// out of memory
break;
}
}
TRI_ReadUnlockReadWriteLock(&idx->_lock);
TRI_FreeQueryFulltextIndex(query);
if (result == nullptr) {
// if we haven't found anything...
return TRI_CreateResultFulltextIndex(0);
}
// now convert the handle list into a result (this will also filter out
// deleted documents)
return MakeListResult(idx, result, maxResults);
}
// -----------------------------------------------------------------------------
// --SECTION-- public functions
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief dump index tree
////////////////////////////////////////////////////////////////////////////////
#if TRI_FULLTEXT_DEBUG
void TRI_DumpTreeFtsIndex (const TRI_fts_index_t* const ftx) {
index_t* idx = (index_t*) ftx;
TRI_DumpHandleFulltextIndex(idx->_handles);
DumpNode(idx->_root, 0);
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// @brief dump index statistics
////////////////////////////////////////////////////////////////////////////////
#if TRI_FULLTEXT_DEBUG
void TRI_DumpStatsFtsIndex (const TRI_fts_index_t* const ftx) {
index_t* idx = (index_t*) ftx;
TRI_fulltext_stats_t stats;
stats = TRI_StatsFulltextIndex(idx);
printf("memoryTotal %llu\n", (unsigned long long) stats._memoryTotal);
#if TRI_FULLTEXT_DEBUG
printf("memoryOwn %llu\n", (unsigned long long) stats._memoryOwn);
printf("memoryBase %llu\n", (unsigned long long) stats._memoryBase);
printf("memoryNodes %llu\n", (unsigned long long) stats._memoryNodes);
printf("memoryFollowers %llu\n", (unsigned long long) stats._memoryFollowers);
printf("memoryDocuments %llu\n", (unsigned long long) stats._memoryDocuments);
printf("numNodes %llu\n", (unsigned long long) stats._numNodes);
#endif
if (idx->_handles != nullptr) {
printf("memoryHandles %llu\n", (unsigned long long) stats._memoryHandles);
printf("numDocuments %llu\n", (unsigned long long) stats._numDocuments);
printf("numDeleted %llu\n", (unsigned long long) stats._numDeleted);
printf("deletionGrade %f\n", stats._handleDeletionGrade);
printf("should compact %d\n", (int) stats._shouldCompact);
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// @brief return stats about the index
////////////////////////////////////////////////////////////////////////////////
TRI_fulltext_stats_t TRI_StatsFulltextIndex (const TRI_fts_index_t* const ftx) {
index_t* idx;
TRI_fulltext_stats_t stats;
idx = (index_t*) ftx;
TRI_ReadLockReadWriteLock(&idx->_lock);
stats._memoryTotal = TRI_MemoryFulltextIndex(idx);
#if TRI_FULLTEXT_DEBUG
stats._memoryOwn = idx->_memoryAllocated;
stats._memoryBase = idx->_memoryBase;
stats._memoryNodes = idx->_memoryNodes;
stats._memoryFollowers = idx->_memoryFollowers;
stats._memoryDocuments = idx->_memoryAllocated - idx->_memoryNodes - idx->_memoryBase;
stats._numNodes = idx->_nodesAllocated;
#endif
if (idx->_handles != nullptr) {
stats._memoryHandles = TRI_MemoryHandleFulltextIndex(idx->_handles);
stats._numDocuments = TRI_NumHandlesHandleFulltextIndex(idx->_handles);
stats._numDeleted = TRI_NumDeletedHandleFulltextIndex(idx->_handles);
stats._handleDeletionGrade = TRI_DeletionGradeHandleFulltextIndex(idx->_handles);
stats._shouldCompact = TRI_ShouldCompactHandleFulltextIndex(idx->_handles);
}
else {
stats._memoryHandles = 0;
stats._numDocuments = 0;
stats._numDeleted = 0;
stats._handleDeletionGrade = 0.0;
stats._shouldCompact = false;
}
TRI_ReadUnlockReadWriteLock(&idx->_lock);
return stats;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief return the total memory used by the index
////////////////////////////////////////////////////////////////////////////////
size_t TRI_MemoryFulltextIndex (const TRI_fts_index_t* const ftx) {
index_t* idx = (index_t*) ftx;
if (idx->_handles != nullptr) {
return idx->_memoryAllocated + TRI_MemoryHandleFulltextIndex(idx->_handles);
}
return idx->_memoryAllocated;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief compact the fulltext index
/// note: the caller must hold a lock on the index before called this
////////////////////////////////////////////////////////////////////////////////
bool TRI_CompactFulltextIndex (TRI_fts_index_t* const ftx) {
index_t* idx;
TRI_fulltext_handles_t* clone;
idx = (index_t*) ftx;
// but don't block if the index is busy
// try to acquire the write lock to clean up
if (! TRI_TryWriteLockReadWriteLock(&idx->_lock)) {
return true;
}
if (! TRI_ShouldCompactHandleFulltextIndex(idx->_handles)) {
// not enough cleanup work to do
TRI_WriteUnlockReadWriteLock(&idx->_lock);
return true;
}
// this will create a copy of the handles from the existing index, but will
// re-align the handle numbers consecutively, starting at 1.
// this will also populate the _map property, which can be used to clean up
// handles of existing nodes
clone = TRI_CompactHandleFulltextIndex(idx->_handles);
if (clone == nullptr) {
TRI_WriteUnlockReadWriteLock(&idx->_lock);
return false;
}
CleanupNodes(idx, idx->_root, clone->_map);
// delete the original handle list
TRI_FreeHandlesFulltextIndex(idx->_handles);
// free the rewrite map
TRI_Free(TRI_UNKNOWN_MEM_ZONE, clone->_map);
clone->_map = nullptr;
// cleanup finished, now switch over
idx->_handles = clone;
TRI_WriteUnlockReadWriteLock(&idx->_lock);
return true;
}
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// Local Variables:
// mode: outline-minor
// outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}"
// End: