//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Jan Steemann //////////////////////////////////////////////////////////////////////////////// #include "mmfiles-fulltext-list.h" /// @brief we'll set this bit (the highest of a uint32_t) if the list is sorted /// if the list is not sorted, this bit is cleared /// This is done as a space optimisation. A big index will contain a lot of /// document lists, and saving an extra boolean value will likely cost an extra /// 4 or 8 bytes due to padding. Avoiding saving the sorted flag in an extra /// member greatly reduces the index sizes #define SORTED_BIT 2147483648UL /// @brief growth factor for lists #define GROWTH_FACTOR 1.2 /// @brief compare two entries in a list static int CompareEntries(const void* lhs, const void* rhs) { TRI_fulltext_list_entry_t l = (*(TRI_fulltext_list_entry_t*)lhs); TRI_fulltext_list_entry_t r = (*(TRI_fulltext_list_entry_t*)rhs); if (l < r) { return -1; } if (l > r) { return 1; } return 0; } /// @brief return whether the list is sorted /// this will check the sorted bit at the start of the list static inline bool IsSorted(const TRI_fulltext_list_t* const list) { uint32_t* head = (uint32_t*)list; return ((*head & SORTED_BIT) != 0); } /// @brief return whether the list is sorted static inline void SetIsSorted(TRI_fulltext_list_t* const list, bool const value) { uint32_t* head = (uint32_t*)list; // yes, we could also do this without branching and with more bit twiddling // but this is not the critical path in the code if (value) { (*head) |= SORTED_BIT; } else { (*head) &= ~SORTED_BIT; } } /// @brief return the pointer to the start of the list entries static inline TRI_fulltext_list_entry_t* GetStart( const TRI_fulltext_list_t* const list) { uint32_t* head = (uint32_t*)list; ++head; // numAllocated ++head; // numEntries return (TRI_fulltext_list_entry_t*)head; } /// @brief return the number of entries static inline uint32_t GetNumEntries(const TRI_fulltext_list_t* const list) { uint32_t* head = (uint32_t*)list; return *(++head); } /// @brief set the number of entries static inline void SetNumEntries(TRI_fulltext_list_t* const list, uint32_t value) { uint32_t* head = (uint32_t*)list; *(++head) = value; } /// @brief return the number of allocated entries static inline uint32_t GetNumAllocated(TRI_fulltext_list_t const* list) { uint32_t* head = (uint32_t*)list; return (*head & ~SORTED_BIT); } /// @brief initialize a new list static void InitList(TRI_fulltext_list_t* list, uint32_t size) { uint32_t* head = (uint32_t*)list; *(head++) = size; *(head) = 0; } /// @brief sort a list in place static void SortList(TRI_fulltext_list_t* list) { if (IsSorted(list)) { // nothing to do return; } uint32_t numEntries = GetNumEntries(list); if (numEntries > 1) { // only sort if more than one elements qsort(GetStart(list), numEntries, sizeof(TRI_fulltext_list_entry_t), &CompareEntries); } SetIsSorted(list, true); } /// @brief get the memory usage for a list of the specified size static inline size_t MemoryList(uint32_t size) { return sizeof(uint32_t) + // numAllocated sizeof(uint32_t) + // numEntries size * sizeof(TRI_fulltext_list_entry_t); // entries } /// @brief increase an existing list static TRI_fulltext_list_t* IncreaseList(TRI_fulltext_list_t* list, uint32_t size) { TRI_fulltext_list_t* copy = TRI_Reallocate(TRI_UNKNOWN_MEM_ZONE, list, MemoryList(size)); if (copy != nullptr) { InitList(copy, size); } return copy; } /// @brief clone a list by copying an existing one TRI_fulltext_list_t* TRI_CloneListMMFilesFulltextIndex( TRI_fulltext_list_t const* source) { uint32_t numEntries; if (source == nullptr) { numEntries = 0; } else { numEntries = GetNumEntries(source); } TRI_fulltext_list_t* list = TRI_CreateListMMFilesFulltextIndex(numEntries); if (list != nullptr) { if (numEntries > 0) { memcpy(GetStart(list), GetStart(source), numEntries * sizeof(TRI_fulltext_list_entry_t)); SetNumEntries(list, numEntries); } } return list; } /// @brief create a new list TRI_fulltext_list_t* TRI_CreateListMMFilesFulltextIndex(uint32_t size) { TRI_fulltext_list_t* list = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, MemoryList(size), false); if (list == nullptr) { // out of memory return nullptr; } InitList(list, size); return list; } /// @brief free a list void TRI_FreeListMMFilesFulltextIndex(TRI_fulltext_list_t* list) { TRI_Free(TRI_UNKNOWN_MEM_ZONE, list); } /// @brief get the memory usage of a list size_t TRI_MemoryListMMFilesFulltextIndex(TRI_fulltext_list_t const* list) { uint32_t size = GetNumAllocated(list); return MemoryList(size); } /// @brief unionise two lists (a.k.a. logical OR) /// this will create a new list and free both lhs & rhs TRI_fulltext_list_t* TRI_UnioniseListMMFilesFulltextIndex(TRI_fulltext_list_t* lhs, TRI_fulltext_list_t* rhs) { TRI_fulltext_list_t* list; TRI_fulltext_list_entry_t last; TRI_fulltext_list_entry_t* lhsEntries; TRI_fulltext_list_entry_t* rhsEntries; TRI_fulltext_list_entry_t* listEntries; uint32_t l, r; uint32_t numLhs, numRhs; uint32_t listPos; if (lhs == nullptr) { return rhs; } if (rhs == nullptr) { return lhs; } numLhs = GetNumEntries(lhs); numRhs = GetNumEntries(rhs); // check the easy cases when one of the lists is empty if (numLhs == 0) { TRI_FreeListMMFilesFulltextIndex(lhs); return rhs; } if (numRhs == 0) { TRI_FreeListMMFilesFulltextIndex(rhs); return lhs; } list = TRI_CreateListMMFilesFulltextIndex(numLhs + numRhs); if (list == nullptr) { TRI_FreeListMMFilesFulltextIndex(lhs); TRI_FreeListMMFilesFulltextIndex(rhs); return nullptr; } SortList(lhs); lhsEntries = GetStart(lhs); l = 0; SortList(rhs); rhsEntries = GetStart(rhs); r = 0; listPos = 0; listEntries = GetStart(list); last = 0; while (true) { while (l < numLhs && lhsEntries[l] <= last) { ++l; } while (r < numRhs && rhsEntries[r] <= last) { ++r; } if (l >= numLhs && r >= numRhs) { break; } if (l >= numLhs && r < numRhs) { listEntries[listPos++] = last = rhsEntries[r++]; } else if (l < numLhs && r >= numRhs) { listEntries[listPos++] = last = lhsEntries[l++]; } else if (lhsEntries[l] < rhsEntries[r]) { listEntries[listPos++] = last = lhsEntries[l++]; } else { listEntries[listPos++] = last = rhsEntries[r++]; } } SetNumEntries(list, listPos); SetIsSorted(list, true); TRI_FreeListMMFilesFulltextIndex(lhs); TRI_FreeListMMFilesFulltextIndex(rhs); return list; } /// @brief intersect two lists (a.k.a. logical AND) /// this will create a new list and free both lhs & rhs TRI_fulltext_list_t* TRI_IntersectListMMFilesFulltextIndex(TRI_fulltext_list_t* lhs, TRI_fulltext_list_t* rhs) { TRI_fulltext_list_t* list; TRI_fulltext_list_entry_t last; TRI_fulltext_list_entry_t* lhsEntries; TRI_fulltext_list_entry_t* rhsEntries; TRI_fulltext_list_entry_t* listEntries; uint32_t l, r; uint32_t numLhs, numRhs; uint32_t listPos; // check if one of the pointers is NULL if (lhs == nullptr) { return rhs; } if (rhs == nullptr) { return lhs; } numLhs = GetNumEntries(lhs); numRhs = GetNumEntries(rhs); // printf("list intersection lhs: %lu rhs: %lu\n\n", (unsigned long) numLhs, // (unsigned long) numRhs); // check the easy cases when one of the lists is empty if (numLhs == 0 || numRhs == 0) { if (lhs != nullptr) { TRI_FreeListMMFilesFulltextIndex(lhs); } if (rhs != nullptr) { TRI_FreeListMMFilesFulltextIndex(rhs); } return TRI_CreateListMMFilesFulltextIndex(0); } // we have at least one entry in each list list = TRI_CreateListMMFilesFulltextIndex(numLhs < numRhs ? numLhs : numRhs); if (list == nullptr) { TRI_FreeListMMFilesFulltextIndex(lhs); TRI_FreeListMMFilesFulltextIndex(rhs); return nullptr; } SortList(lhs); lhsEntries = GetStart(lhs); l = 0; SortList(rhs); rhsEntries = GetStart(rhs); r = 0; listPos = 0; listEntries = GetStart(list); last = 0; while (true) { while (l < numLhs && lhsEntries[l] <= last) { ++l; } while (r < numRhs && rhsEntries[r] <= last) { ++r; } again: if (l >= numLhs || r >= numRhs) { break; } if (lhsEntries[l] < rhsEntries[r]) { ++l; goto again; } else if (lhsEntries[l] > rhsEntries[r]) { ++r; goto again; } // match listEntries[listPos++] = last = lhsEntries[l]; ++l; ++r; } SetNumEntries(list, listPos); SetIsSorted(list, true); TRI_FreeListMMFilesFulltextIndex(lhs); TRI_FreeListMMFilesFulltextIndex(rhs); // printf("result list has %lu\n\n", (unsigned long) listPos); return list; } /// @brief exclude values from a list /// this will modify list in place TRI_fulltext_list_t* TRI_ExcludeListMMFilesFulltextIndex( TRI_fulltext_list_t* list, TRI_fulltext_list_t* exclude) { TRI_fulltext_list_entry_t* listEntries; TRI_fulltext_list_entry_t* excludeEntries; uint32_t numEntries; uint32_t numExclude; uint32_t i, j, listPos; if (list == nullptr) { TRI_FreeListMMFilesFulltextIndex(exclude); return list; } if (exclude == nullptr) { return list; } numEntries = GetNumEntries(list); numExclude = GetNumEntries(exclude); if (numEntries == 0 || numExclude == 0) { // original list or exclusion list are empty TRI_FreeListMMFilesFulltextIndex(exclude); return list; } SortList(list); listEntries = GetStart(list); excludeEntries = GetStart(exclude); j = 0; listPos = 0; for (i = 0; i < numEntries; ++i) { TRI_fulltext_list_entry_t entry; entry = listEntries[i]; while (j < numExclude && excludeEntries[j] < entry) { ++j; } if (j < numExclude && excludeEntries[j] == entry) { // entry is contained in exclusion list continue; } if (listPos != i) { listEntries[listPos] = listEntries[i]; } ++listPos; } // we may have less results in the list of exclusion SetNumEntries(list, listPos); TRI_FreeListMMFilesFulltextIndex(exclude); return list; } /// @brief insert an element into a list /// this might free the old list and allocate a new, bigger one TRI_fulltext_list_t* TRI_InsertListMMFilesFulltextIndex( TRI_fulltext_list_t* list, const TRI_fulltext_list_entry_t entry) { TRI_fulltext_list_entry_t* listEntries; uint32_t numAllocated; uint32_t numEntries; bool unsort; numAllocated = GetNumAllocated(list); numEntries = GetNumEntries(list); listEntries = GetStart(list); unsort = false; if (numEntries > 0) { TRI_fulltext_list_entry_t lastEntry; // check whether the entry is already contained in the list lastEntry = listEntries[numEntries - 1]; if (entry == lastEntry) { // entry is already contained. no need to insert the same value again return list; } if (entry < lastEntry) { // we're adding at the end. we must update the sorted property if // the list is not sorted anymore unsort = true; } } if (numEntries + 1 >= numAllocated) { // must allocate more memory TRI_fulltext_list_t* clone; uint32_t newSize; newSize = (uint32_t)(numEntries * GROWTH_FACTOR); if (newSize == numEntries) { // 0 * something might not be enough... newSize = numEntries + 1; } // increase the existing list clone = IncreaseList(list, newSize); if (clone == nullptr) { return nullptr; } // switch over if (list != clone) { list = clone; listEntries = GetStart(list); } } if (unsort) { SetIsSorted(list, false); } // insert at the end listEntries[numEntries] = entry; SetNumEntries(list, numEntries + 1); return list; } /// @brief rewrites the list of entries using a map of handles /// returns the number of entries remaining in the list after rewrite /// the map is provided by the routines that handle the compaction uint32_t TRI_RewriteListMMFilesFulltextIndex(TRI_fulltext_list_t* list, void const* data) { TRI_fulltext_list_entry_t* listEntries; TRI_fulltext_list_entry_t* map; uint32_t numEntries; uint32_t i, j; numEntries = GetNumEntries(list); if (numEntries == 0) { return 0; } map = (TRI_fulltext_list_entry_t*)data; listEntries = GetStart(list); j = 0; for (i = 0; i < numEntries; ++i) { TRI_fulltext_list_entry_t entry; TRI_fulltext_list_entry_t mapped; entry = listEntries[i]; if (entry == 0) { continue; } mapped = map[entry]; if (mapped == 0) { // original value has been deleted continue; } listEntries[j++] = mapped; } if (j != numEntries) { SetNumEntries(list, j); } return j; } /// @brief dump the contents of a list #if TRI_FULLTEXT_DEBUG void TRI_DumpListMMFilesFulltextIndex(TRI_fulltext_list_t const* list) { TRI_fulltext_list_entry_t* listEntries; uint32_t numEntries; uint32_t i; numEntries = GetNumEntries(list); listEntries = GetStart(list); printf("("); for (i = 0; i < numEntries; ++i) { TRI_fulltext_list_entry_t entry; if (i > 0) { printf(", "); } entry = listEntries[i]; printf("%lu", (unsigned long)entry); } printf(")"); } #endif /// @brief return the number of entries uint32_t TRI_NumEntriesListMMFilesFulltextIndex(TRI_fulltext_list_t const* list) { return GetNumEntries(list); } /// @brief return a pointer to the first list entry TRI_fulltext_list_entry_t* TRI_StartListMMFilesFulltextIndex( TRI_fulltext_list_t const* list) { return GetStart(list); }