mirror of https://gitee.com/bigwinds/arangodb
596 lines
15 KiB
C++
596 lines
15 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Jan Steemann
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "mmfiles-fulltext-list.h"
|
|
|
|
/// @brief we'll set this bit (the highest of a uint32_t) if the list is sorted
|
|
/// if the list is not sorted, this bit is cleared
|
|
/// This is done as a space optimisation. A big index will contain a lot of
|
|
/// document lists, and saving an extra boolean value will likely cost an extra
|
|
/// 4 or 8 bytes due to padding. Avoiding saving the sorted flag in an extra
|
|
/// member greatly reduces the index sizes
|
|
#define SORTED_BIT 2147483648UL
|
|
|
|
/// @brief growth factor for lists
|
|
#define GROWTH_FACTOR 1.2
|
|
|
|
/// @brief compare two entries in a list
|
|
static int CompareEntries(const void* lhs, const void* rhs) {
|
|
TRI_fulltext_list_entry_t l = (*(TRI_fulltext_list_entry_t*)lhs);
|
|
TRI_fulltext_list_entry_t r = (*(TRI_fulltext_list_entry_t*)rhs);
|
|
|
|
if (l < r) {
|
|
return -1;
|
|
}
|
|
|
|
if (l > r) {
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/// @brief return whether the list is sorted
|
|
/// this will check the sorted bit at the start of the list
|
|
static inline bool IsSorted(const TRI_fulltext_list_t* const list) {
|
|
uint32_t* head = (uint32_t*)list;
|
|
|
|
return ((*head & SORTED_BIT) != 0);
|
|
}
|
|
|
|
/// @brief return whether the list is sorted
|
|
static inline void SetIsSorted(TRI_fulltext_list_t* const list,
|
|
bool const value) {
|
|
uint32_t* head = (uint32_t*)list;
|
|
|
|
// yes, we could also do this without branching and with more bit twiddling
|
|
// but this is not the critical path in the code
|
|
if (value) {
|
|
(*head) |= SORTED_BIT;
|
|
} else {
|
|
(*head) &= ~SORTED_BIT;
|
|
}
|
|
}
|
|
|
|
/// @brief return the pointer to the start of the list entries
|
|
static inline TRI_fulltext_list_entry_t* GetStart(
|
|
const TRI_fulltext_list_t* const list) {
|
|
uint32_t* head = (uint32_t*)list;
|
|
++head; // numAllocated
|
|
++head; // numEntries
|
|
|
|
return (TRI_fulltext_list_entry_t*)head;
|
|
}
|
|
|
|
/// @brief return the number of entries
|
|
static inline uint32_t GetNumEntries(const TRI_fulltext_list_t* const list) {
|
|
uint32_t* head = (uint32_t*)list;
|
|
return *(++head);
|
|
}
|
|
|
|
/// @brief set the number of entries
|
|
static inline void SetNumEntries(TRI_fulltext_list_t* const list,
|
|
uint32_t value) {
|
|
uint32_t* head = (uint32_t*)list;
|
|
|
|
*(++head) = value;
|
|
}
|
|
|
|
/// @brief return the number of allocated entries
|
|
static inline uint32_t GetNumAllocated(TRI_fulltext_list_t const* list) {
|
|
uint32_t* head = (uint32_t*)list;
|
|
|
|
return (*head & ~SORTED_BIT);
|
|
}
|
|
|
|
/// @brief initialize a new list
|
|
static void InitList(TRI_fulltext_list_t* list, uint32_t size) {
|
|
uint32_t* head = (uint32_t*)list;
|
|
|
|
*(head++) = size;
|
|
*(head) = 0;
|
|
}
|
|
|
|
/// @brief sort a list in place
|
|
static void SortList(TRI_fulltext_list_t* list) {
|
|
if (IsSorted(list)) {
|
|
// nothing to do
|
|
return;
|
|
}
|
|
|
|
uint32_t numEntries = GetNumEntries(list);
|
|
|
|
if (numEntries > 1) {
|
|
// only sort if more than one elements
|
|
qsort(GetStart(list), numEntries, sizeof(TRI_fulltext_list_entry_t),
|
|
&CompareEntries);
|
|
}
|
|
|
|
SetIsSorted(list, true);
|
|
}
|
|
|
|
/// @brief get the memory usage for a list of the specified size
|
|
static inline size_t MemoryList(uint32_t size) {
|
|
return sizeof(uint32_t) + // numAllocated
|
|
sizeof(uint32_t) + // numEntries
|
|
size * sizeof(TRI_fulltext_list_entry_t); // entries
|
|
}
|
|
|
|
/// @brief increase an existing list
|
|
static TRI_fulltext_list_t* IncreaseList(TRI_fulltext_list_t* list,
|
|
uint32_t size) {
|
|
TRI_fulltext_list_t* copy =
|
|
TRI_Reallocate(TRI_UNKNOWN_MEM_ZONE, list, MemoryList(size));
|
|
|
|
if (copy != nullptr) {
|
|
InitList(copy, size);
|
|
}
|
|
|
|
return copy;
|
|
}
|
|
|
|
/// @brief clone a list by copying an existing one
|
|
TRI_fulltext_list_t* TRI_CloneListMMFilesFulltextIndex(
|
|
TRI_fulltext_list_t const* source) {
|
|
uint32_t numEntries;
|
|
|
|
if (source == nullptr) {
|
|
numEntries = 0;
|
|
} else {
|
|
numEntries = GetNumEntries(source);
|
|
}
|
|
|
|
TRI_fulltext_list_t* list = TRI_CreateListMMFilesFulltextIndex(numEntries);
|
|
|
|
if (list != nullptr) {
|
|
if (numEntries > 0) {
|
|
memcpy(GetStart(list), GetStart(source),
|
|
numEntries * sizeof(TRI_fulltext_list_entry_t));
|
|
SetNumEntries(list, numEntries);
|
|
}
|
|
}
|
|
|
|
return list;
|
|
}
|
|
|
|
/// @brief create a new list
|
|
TRI_fulltext_list_t* TRI_CreateListMMFilesFulltextIndex(uint32_t size) {
|
|
TRI_fulltext_list_t* list =
|
|
TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, MemoryList(size), false);
|
|
|
|
if (list == nullptr) {
|
|
// out of memory
|
|
return nullptr;
|
|
}
|
|
|
|
InitList(list, size);
|
|
|
|
return list;
|
|
}
|
|
|
|
/// @brief free a list
|
|
void TRI_FreeListMMFilesFulltextIndex(TRI_fulltext_list_t* list) {
|
|
TRI_Free(TRI_UNKNOWN_MEM_ZONE, list);
|
|
}
|
|
|
|
/// @brief get the memory usage of a list
|
|
size_t TRI_MemoryListMMFilesFulltextIndex(TRI_fulltext_list_t const* list) {
|
|
uint32_t size = GetNumAllocated(list);
|
|
return MemoryList(size);
|
|
}
|
|
|
|
/// @brief unionise two lists (a.k.a. logical OR)
|
|
/// this will create a new list and free both lhs & rhs
|
|
TRI_fulltext_list_t* TRI_UnioniseListMMFilesFulltextIndex(TRI_fulltext_list_t* lhs,
|
|
TRI_fulltext_list_t* rhs) {
|
|
TRI_fulltext_list_t* list;
|
|
TRI_fulltext_list_entry_t last;
|
|
TRI_fulltext_list_entry_t* lhsEntries;
|
|
TRI_fulltext_list_entry_t* rhsEntries;
|
|
TRI_fulltext_list_entry_t* listEntries;
|
|
uint32_t l, r;
|
|
uint32_t numLhs, numRhs;
|
|
uint32_t listPos;
|
|
|
|
if (lhs == nullptr) {
|
|
return rhs;
|
|
}
|
|
if (rhs == nullptr) {
|
|
return lhs;
|
|
}
|
|
|
|
numLhs = GetNumEntries(lhs);
|
|
numRhs = GetNumEntries(rhs);
|
|
|
|
// check the easy cases when one of the lists is empty
|
|
if (numLhs == 0) {
|
|
TRI_FreeListMMFilesFulltextIndex(lhs);
|
|
return rhs;
|
|
}
|
|
|
|
if (numRhs == 0) {
|
|
TRI_FreeListMMFilesFulltextIndex(rhs);
|
|
return lhs;
|
|
}
|
|
|
|
list = TRI_CreateListMMFilesFulltextIndex(numLhs + numRhs);
|
|
if (list == nullptr) {
|
|
TRI_FreeListMMFilesFulltextIndex(lhs);
|
|
TRI_FreeListMMFilesFulltextIndex(rhs);
|
|
return nullptr;
|
|
}
|
|
|
|
SortList(lhs);
|
|
lhsEntries = GetStart(lhs);
|
|
l = 0;
|
|
|
|
SortList(rhs);
|
|
rhsEntries = GetStart(rhs);
|
|
r = 0;
|
|
|
|
listPos = 0;
|
|
listEntries = GetStart(list);
|
|
last = 0;
|
|
|
|
while (true) {
|
|
while (l < numLhs && lhsEntries[l] <= last) {
|
|
++l;
|
|
}
|
|
|
|
while (r < numRhs && rhsEntries[r] <= last) {
|
|
++r;
|
|
}
|
|
|
|
if (l >= numLhs && r >= numRhs) {
|
|
break;
|
|
}
|
|
|
|
if (l >= numLhs && r < numRhs) {
|
|
listEntries[listPos++] = last = rhsEntries[r++];
|
|
} else if (l < numLhs && r >= numRhs) {
|
|
listEntries[listPos++] = last = lhsEntries[l++];
|
|
} else if (lhsEntries[l] < rhsEntries[r]) {
|
|
listEntries[listPos++] = last = lhsEntries[l++];
|
|
} else {
|
|
listEntries[listPos++] = last = rhsEntries[r++];
|
|
}
|
|
}
|
|
|
|
SetNumEntries(list, listPos);
|
|
SetIsSorted(list, true);
|
|
|
|
TRI_FreeListMMFilesFulltextIndex(lhs);
|
|
TRI_FreeListMMFilesFulltextIndex(rhs);
|
|
|
|
return list;
|
|
}
|
|
|
|
/// @brief intersect two lists (a.k.a. logical AND)
|
|
/// this will create a new list and free both lhs & rhs
|
|
TRI_fulltext_list_t* TRI_IntersectListMMFilesFulltextIndex(TRI_fulltext_list_t* lhs,
|
|
TRI_fulltext_list_t* rhs) {
|
|
TRI_fulltext_list_t* list;
|
|
TRI_fulltext_list_entry_t last;
|
|
TRI_fulltext_list_entry_t* lhsEntries;
|
|
TRI_fulltext_list_entry_t* rhsEntries;
|
|
TRI_fulltext_list_entry_t* listEntries;
|
|
uint32_t l, r;
|
|
uint32_t numLhs, numRhs;
|
|
uint32_t listPos;
|
|
|
|
// check if one of the pointers is NULL
|
|
if (lhs == nullptr) {
|
|
return rhs;
|
|
}
|
|
|
|
if (rhs == nullptr) {
|
|
return lhs;
|
|
}
|
|
|
|
numLhs = GetNumEntries(lhs);
|
|
numRhs = GetNumEntries(rhs);
|
|
|
|
// printf("list intersection lhs: %lu rhs: %lu\n\n", (unsigned long) numLhs,
|
|
// (unsigned long) numRhs);
|
|
|
|
// check the easy cases when one of the lists is empty
|
|
if (numLhs == 0 || numRhs == 0) {
|
|
if (lhs != nullptr) {
|
|
TRI_FreeListMMFilesFulltextIndex(lhs);
|
|
}
|
|
if (rhs != nullptr) {
|
|
TRI_FreeListMMFilesFulltextIndex(rhs);
|
|
}
|
|
|
|
return TRI_CreateListMMFilesFulltextIndex(0);
|
|
}
|
|
|
|
// we have at least one entry in each list
|
|
list = TRI_CreateListMMFilesFulltextIndex(numLhs < numRhs ? numLhs : numRhs);
|
|
if (list == nullptr) {
|
|
TRI_FreeListMMFilesFulltextIndex(lhs);
|
|
TRI_FreeListMMFilesFulltextIndex(rhs);
|
|
return nullptr;
|
|
}
|
|
|
|
SortList(lhs);
|
|
lhsEntries = GetStart(lhs);
|
|
l = 0;
|
|
|
|
SortList(rhs);
|
|
rhsEntries = GetStart(rhs);
|
|
r = 0;
|
|
|
|
listPos = 0;
|
|
listEntries = GetStart(list);
|
|
last = 0;
|
|
|
|
while (true) {
|
|
while (l < numLhs && lhsEntries[l] <= last) {
|
|
++l;
|
|
}
|
|
|
|
while (r < numRhs && rhsEntries[r] <= last) {
|
|
++r;
|
|
}
|
|
|
|
again:
|
|
if (l >= numLhs || r >= numRhs) {
|
|
break;
|
|
}
|
|
|
|
if (lhsEntries[l] < rhsEntries[r]) {
|
|
++l;
|
|
goto again;
|
|
} else if (lhsEntries[l] > rhsEntries[r]) {
|
|
++r;
|
|
goto again;
|
|
}
|
|
|
|
// match
|
|
listEntries[listPos++] = last = lhsEntries[l];
|
|
++l;
|
|
++r;
|
|
}
|
|
|
|
SetNumEntries(list, listPos);
|
|
SetIsSorted(list, true);
|
|
|
|
TRI_FreeListMMFilesFulltextIndex(lhs);
|
|
TRI_FreeListMMFilesFulltextIndex(rhs);
|
|
|
|
// printf("result list has %lu\n\n", (unsigned long) listPos);
|
|
|
|
return list;
|
|
}
|
|
|
|
/// @brief exclude values from a list
|
|
/// this will modify list in place
|
|
TRI_fulltext_list_t* TRI_ExcludeListMMFilesFulltextIndex(
|
|
TRI_fulltext_list_t* list, TRI_fulltext_list_t* exclude) {
|
|
TRI_fulltext_list_entry_t* listEntries;
|
|
TRI_fulltext_list_entry_t* excludeEntries;
|
|
uint32_t numEntries;
|
|
uint32_t numExclude;
|
|
uint32_t i, j, listPos;
|
|
|
|
if (list == nullptr) {
|
|
TRI_FreeListMMFilesFulltextIndex(exclude);
|
|
return list;
|
|
}
|
|
|
|
if (exclude == nullptr) {
|
|
return list;
|
|
}
|
|
|
|
numEntries = GetNumEntries(list);
|
|
numExclude = GetNumEntries(exclude);
|
|
|
|
if (numEntries == 0 || numExclude == 0) {
|
|
// original list or exclusion list are empty
|
|
TRI_FreeListMMFilesFulltextIndex(exclude);
|
|
return list;
|
|
}
|
|
|
|
SortList(list);
|
|
|
|
listEntries = GetStart(list);
|
|
excludeEntries = GetStart(exclude);
|
|
|
|
j = 0;
|
|
listPos = 0;
|
|
for (i = 0; i < numEntries; ++i) {
|
|
TRI_fulltext_list_entry_t entry;
|
|
|
|
entry = listEntries[i];
|
|
while (j < numExclude && excludeEntries[j] < entry) {
|
|
++j;
|
|
}
|
|
|
|
if (j < numExclude && excludeEntries[j] == entry) {
|
|
// entry is contained in exclusion list
|
|
continue;
|
|
}
|
|
|
|
if (listPos != i) {
|
|
listEntries[listPos] = listEntries[i];
|
|
}
|
|
++listPos;
|
|
}
|
|
|
|
// we may have less results in the list of exclusion
|
|
SetNumEntries(list, listPos);
|
|
TRI_FreeListMMFilesFulltextIndex(exclude);
|
|
|
|
return list;
|
|
}
|
|
|
|
/// @brief insert an element into a list
|
|
/// this might free the old list and allocate a new, bigger one
|
|
TRI_fulltext_list_t* TRI_InsertListMMFilesFulltextIndex(
|
|
TRI_fulltext_list_t* list, const TRI_fulltext_list_entry_t entry) {
|
|
TRI_fulltext_list_entry_t* listEntries;
|
|
uint32_t numAllocated;
|
|
uint32_t numEntries;
|
|
bool unsort;
|
|
|
|
numAllocated = GetNumAllocated(list);
|
|
numEntries = GetNumEntries(list);
|
|
listEntries = GetStart(list);
|
|
unsort = false;
|
|
|
|
if (numEntries > 0) {
|
|
TRI_fulltext_list_entry_t lastEntry;
|
|
|
|
// check whether the entry is already contained in the list
|
|
lastEntry = listEntries[numEntries - 1];
|
|
if (entry == lastEntry) {
|
|
// entry is already contained. no need to insert the same value again
|
|
return list;
|
|
}
|
|
|
|
if (entry < lastEntry) {
|
|
// we're adding at the end. we must update the sorted property if
|
|
// the list is not sorted anymore
|
|
unsort = true;
|
|
}
|
|
}
|
|
|
|
if (numEntries + 1 >= numAllocated) {
|
|
// must allocate more memory
|
|
TRI_fulltext_list_t* clone;
|
|
uint32_t newSize;
|
|
|
|
newSize = (uint32_t)(numEntries * GROWTH_FACTOR);
|
|
|
|
if (newSize == numEntries) {
|
|
// 0 * something might not be enough...
|
|
newSize = numEntries + 1;
|
|
}
|
|
|
|
// increase the existing list
|
|
clone = IncreaseList(list, newSize);
|
|
if (clone == nullptr) {
|
|
return nullptr;
|
|
}
|
|
|
|
// switch over
|
|
if (list != clone) {
|
|
list = clone;
|
|
listEntries = GetStart(list);
|
|
}
|
|
}
|
|
|
|
if (unsort) {
|
|
SetIsSorted(list, false);
|
|
}
|
|
|
|
// insert at the end
|
|
listEntries[numEntries] = entry;
|
|
SetNumEntries(list, numEntries + 1);
|
|
|
|
return list;
|
|
}
|
|
|
|
/// @brief rewrites the list of entries using a map of handles
|
|
/// returns the number of entries remaining in the list after rewrite
|
|
/// the map is provided by the routines that handle the compaction
|
|
uint32_t TRI_RewriteListMMFilesFulltextIndex(TRI_fulltext_list_t* list,
|
|
void const* data) {
|
|
TRI_fulltext_list_entry_t* listEntries;
|
|
TRI_fulltext_list_entry_t* map;
|
|
uint32_t numEntries;
|
|
uint32_t i, j;
|
|
|
|
numEntries = GetNumEntries(list);
|
|
if (numEntries == 0) {
|
|
return 0;
|
|
}
|
|
|
|
map = (TRI_fulltext_list_entry_t*)data;
|
|
listEntries = GetStart(list);
|
|
j = 0;
|
|
|
|
for (i = 0; i < numEntries; ++i) {
|
|
TRI_fulltext_list_entry_t entry;
|
|
TRI_fulltext_list_entry_t mapped;
|
|
|
|
entry = listEntries[i];
|
|
if (entry == 0) {
|
|
continue;
|
|
}
|
|
|
|
mapped = map[entry];
|
|
if (mapped == 0) {
|
|
// original value has been deleted
|
|
continue;
|
|
}
|
|
|
|
listEntries[j++] = mapped;
|
|
}
|
|
|
|
if (j != numEntries) {
|
|
SetNumEntries(list, j);
|
|
}
|
|
|
|
return j;
|
|
}
|
|
|
|
/// @brief dump the contents of a list
|
|
#if TRI_FULLTEXT_DEBUG
|
|
void TRI_DumpListMMFilesFulltextIndex(TRI_fulltext_list_t const* list) {
|
|
TRI_fulltext_list_entry_t* listEntries;
|
|
uint32_t numEntries;
|
|
uint32_t i;
|
|
|
|
numEntries = GetNumEntries(list);
|
|
listEntries = GetStart(list);
|
|
|
|
printf("(");
|
|
|
|
for (i = 0; i < numEntries; ++i) {
|
|
TRI_fulltext_list_entry_t entry;
|
|
|
|
if (i > 0) {
|
|
printf(", ");
|
|
}
|
|
|
|
entry = listEntries[i];
|
|
printf("%lu", (unsigned long)entry);
|
|
}
|
|
|
|
printf(")");
|
|
}
|
|
#endif
|
|
|
|
/// @brief return the number of entries
|
|
uint32_t TRI_NumEntriesListMMFilesFulltextIndex(TRI_fulltext_list_t const* list) {
|
|
return GetNumEntries(list);
|
|
}
|
|
|
|
/// @brief return a pointer to the first list entry
|
|
TRI_fulltext_list_entry_t* TRI_StartListMMFilesFulltextIndex(
|
|
TRI_fulltext_list_t const* list) {
|
|
return GetStart(list);
|
|
}
|