//////////////////////////////////////////////////////////////////////////////// /// @brief index /// /// @file /// /// DISCLAIMER /// /// Copyright 2014 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Dr. Frank Celler /// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany /// @author Copyright 2011-2013, triAGENS GmbH, Cologne, Germany //////////////////////////////////////////////////////////////////////////////// #include "index.h" #include "Basics/conversions.h" #include "Basics/Exceptions.h" #include "Basics/fasthash.h" #include "Basics/files.h" #include "Basics/json.h" #include "Basics/logging.h" #include "Basics/string-buffer.h" #include "Basics/tri-strings.h" #include "Basics/json-utilities.h" #include "Basics/JsonHelper.h" #include "Basics/Utf8Helper.h" #include "CapConstraint/cap-constraint.h" #include "FulltextIndex/fulltext-index.h" #include "FulltextIndex/fulltext-wordlist.h" #include "GeoIndex/geo-index.h" #include "HashIndex/hash-index.h" #include "ShapedJson/shape-accessor.h" #include "ShapedJson/shaped-json.h" #include "VocBase/document-collection.h" #include "VocBase/edge-collection.h" #include "VocBase/server.h" #include "VocBase/voc-shaper.h" #include "Wal/LogfileManager.h" #include "Wal/Marker.h" // ----------------------------------------------------------------------------- // --SECTION-- INDEX // ----------------------------------------------------------------------------- // ----------------------------------------------------------------------------- // --SECTION-- constructors and destructors // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief initialise basic index properties //////////////////////////////////////////////////////////////////////////////// void TRI_InitIndex (TRI_index_t* idx, TRI_idx_iid_t iid, TRI_idx_type_e type, TRI_document_collection_t* document, bool sparse, bool unique) { TRI_ASSERT(idx != nullptr); if (iid > 0) { // use iid if specified idx->_iid = iid; } else if (type == TRI_IDX_TYPE_PRIMARY_INDEX) { // override iid idx->_iid = 0; } else { idx->_iid = TRI_NewTickServer(); } idx->_type = type; idx->_collection = document; idx->_unique = unique; idx->_sparse = sparse; idx->_hasSelectivityEstimate = false; // init common functions idx->selectivityEstimate = nullptr; idx->memory = nullptr; idx->removeIndex = nullptr; idx->cleanup = nullptr; idx->sizeHint = nullptr; idx->postInsert = nullptr; LOG_TRACE("initialising index of type %s", TRI_TypeNameIndex(idx->_type)); } // ----------------------------------------------------------------------------- // --SECTION-- public functions // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief return the name of an index type //////////////////////////////////////////////////////////////////////////////// TRI_idx_type_e TRI_TypeIndex (char const* type) { if (TRI_EqualString(type, "primary")) { return TRI_IDX_TYPE_PRIMARY_INDEX; } else if (TRI_EqualString(type, "edge")) { return TRI_IDX_TYPE_EDGE_INDEX; } else if (TRI_EqualString(type, "hash")) { return TRI_IDX_TYPE_HASH_INDEX; } else if (TRI_EqualString(type, "skiplist")) { return TRI_IDX_TYPE_SKIPLIST_INDEX; } else if (TRI_EqualString(type, "fulltext")) { return TRI_IDX_TYPE_FULLTEXT_INDEX; } else if (TRI_EqualString(type, "cap")) { return TRI_IDX_TYPE_CAP_CONSTRAINT; } else if (TRI_EqualString(type, "geo1")) { return TRI_IDX_TYPE_GEO1_INDEX; } else if (TRI_EqualString(type, "geo2")) { return TRI_IDX_TYPE_GEO2_INDEX; } return TRI_IDX_TYPE_UNKNOWN; } //////////////////////////////////////////////////////////////////////////////// /// @brief return the name of an index type //////////////////////////////////////////////////////////////////////////////// char const* TRI_TypeNameIndex (TRI_idx_type_e type) { switch (type) { case TRI_IDX_TYPE_PRIMARY_INDEX: return "primary"; case TRI_IDX_TYPE_GEO1_INDEX: return "geo1"; case TRI_IDX_TYPE_GEO2_INDEX: return "geo2"; case TRI_IDX_TYPE_HASH_INDEX: return "hash"; case TRI_IDX_TYPE_EDGE_INDEX: return "edge"; case TRI_IDX_TYPE_FULLTEXT_INDEX: return "fulltext"; case TRI_IDX_TYPE_SKIPLIST_INDEX: return "skiplist"; case TRI_IDX_TYPE_CAP_CONSTRAINT: return "cap"; case TRI_IDX_TYPE_PRIORITY_QUEUE_INDEX: case TRI_IDX_TYPE_BITARRAY_INDEX: case TRI_IDX_TYPE_UNKNOWN: default: { } } return ""; } //////////////////////////////////////////////////////////////////////////////// /// @brief validate an index id //////////////////////////////////////////////////////////////////////////////// bool TRI_ValidateIdIndex (char const* key) { char const* p = key; while (1) { const char c = *p; if (c == '\0') { return (p - key) > 0; } if (c >= '0' && c <= '9') { ++p; continue; } return false; } } //////////////////////////////////////////////////////////////////////////////// /// @brief validate an index id (collection name + / + index id) //////////////////////////////////////////////////////////////////////////////// bool TRI_ValidateIndexIdIndex (char const* key, size_t* split) { char const* p = key; char c = *p; // extract collection name if (! (c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) { return false; } ++p; while (1) { c = *p; if ((c == '_') || (c == '-') || (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { ++p; continue; } if (c == '/') { break; } return false; } if (p - key > TRI_COL_NAME_LENGTH) { return false; } // store split position *split = p - key; ++p; // validate index id return TRI_ValidateIdIndex(p); } //////////////////////////////////////////////////////////////////////////////// /// @brief free an index //////////////////////////////////////////////////////////////////////////////// void TRI_FreeIndex (TRI_index_t* idx) { TRI_ASSERT(idx); LOG_TRACE("freeing index"); switch (idx->_type) { case TRI_IDX_TYPE_GEO1_INDEX: case TRI_IDX_TYPE_GEO2_INDEX: TRI_FreeGeoIndex(idx); break; case TRI_IDX_TYPE_HASH_INDEX: TRI_FreeHashIndex(idx); break; case TRI_IDX_TYPE_EDGE_INDEX: TRI_FreeEdgeIndex(idx); break; case TRI_IDX_TYPE_SKIPLIST_INDEX: TRI_FreeSkiplistIndex(idx); break; case TRI_IDX_TYPE_FULLTEXT_INDEX: TRI_FreeFulltextIndex(idx); break; case TRI_IDX_TYPE_CAP_CONSTRAINT: TRI_FreeCapConstraint(idx); break; case TRI_IDX_TYPE_PRIMARY_INDEX: TRI_FreePrimaryIndex(idx); break; default: // no action necessary break; } } //////////////////////////////////////////////////////////////////////////////// /// @brief removes an index file //////////////////////////////////////////////////////////////////////////////// bool TRI_RemoveIndexFile (TRI_document_collection_t* collection, TRI_index_t* idx) { // construct filename char* number = TRI_StringUInt64(idx->_iid); if (number == nullptr) { TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); LOG_ERROR("out of memory when creating index number"); return false; } char* name = TRI_Concatenate3String("index-", number, ".json"); if (name == nullptr) { TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); TRI_FreeString(TRI_CORE_MEM_ZONE, number); LOG_ERROR("out of memory when creating index name"); return false; } char* filename = TRI_Concatenate2File(collection->_directory, name); if (filename == nullptr) { TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); TRI_FreeString(TRI_CORE_MEM_ZONE, number); TRI_FreeString(TRI_CORE_MEM_ZONE, name); LOG_ERROR("out of memory when creating index filename"); return false; } TRI_FreeString(TRI_CORE_MEM_ZONE, name); TRI_FreeString(TRI_CORE_MEM_ZONE, number); int res = TRI_UnlinkFile(filename); TRI_FreeString(TRI_CORE_MEM_ZONE, filename); if (res != TRI_ERROR_NO_ERROR) { LOG_ERROR("cannot remove index definition: %s", TRI_last_error()); return false; } return true; } //////////////////////////////////////////////////////////////////////////////// /// @brief saves an index //////////////////////////////////////////////////////////////////////////////// int TRI_SaveIndex (TRI_document_collection_t* document, TRI_index_t* idx, bool writeMarker) { // convert into JSON TRI_json_t* json = idx->json(idx); if (json == nullptr) { LOG_TRACE("cannot save index definition: index cannot be jsonified"); return TRI_set_errno(TRI_ERROR_INTERNAL); } // construct filename char* number = TRI_StringUInt64(idx->_iid); char* name = TRI_Concatenate3String("index-", number, ".json"); char* filename = TRI_Concatenate2File(document->_directory, name); TRI_FreeString(TRI_CORE_MEM_ZONE, name); TRI_FreeString(TRI_CORE_MEM_ZONE, number); TRI_vocbase_t* vocbase = document->_vocbase; // and save bool ok = TRI_SaveJson(filename, json, document->_vocbase->_settings.forceSyncProperties); TRI_FreeString(TRI_CORE_MEM_ZONE, filename); if (! ok) { LOG_ERROR("cannot save index definition: %s", TRI_last_error()); TRI_FreeJson(TRI_CORE_MEM_ZONE, json); return TRI_errno(); } if (! writeMarker) { return TRI_ERROR_NO_ERROR; } int res = TRI_ERROR_NO_ERROR; try { triagens::wal::CreateIndexMarker marker(vocbase->_id, document->_info._cid, idx->_iid, triagens::basics::JsonHelper::toString(json)); triagens::wal::SlotInfoCopy slotInfo = triagens::wal::LogfileManager::instance()->allocateAndWrite(marker, false); if (slotInfo.errorCode != TRI_ERROR_NO_ERROR) { THROW_ARANGO_EXCEPTION(slotInfo.errorCode); } TRI_FreeJson(TRI_CORE_MEM_ZONE, json); return TRI_ERROR_NO_ERROR; } catch (triagens::basics::Exception const& ex) { res = ex.code(); } catch (...) { res = TRI_ERROR_INTERNAL; } TRI_FreeJson(TRI_CORE_MEM_ZONE, json); // TODO: what to do here? return res; } //////////////////////////////////////////////////////////////////////////////// /// @brief looks up an index identifier //////////////////////////////////////////////////////////////////////////////// TRI_index_t* TRI_LookupIndex (TRI_document_collection_t* document, TRI_idx_iid_t iid) { size_t const n = document->_allIndexes._length; for (size_t i = 0; i < n; ++i) { TRI_index_t* idx = static_cast(document->_allIndexes._buffer[i]); if (idx->_iid == iid) { return idx; } } TRI_set_errno(TRI_ERROR_ARANGO_NO_INDEX); return nullptr; } //////////////////////////////////////////////////////////////////////////////// /// @brief creates a basic index description as JSON /// this only contains the common index fields and needs to be extended by the /// specialised index //////////////////////////////////////////////////////////////////////////////// TRI_json_t* TRI_JsonIndex (TRI_memory_zone_t* zone, TRI_index_t const* idx) { TRI_json_t* json = TRI_CreateObjectJson(zone); if (json != nullptr) { char* number = TRI_StringUInt64(idx->_iid); TRI_Insert3ObjectJson(zone, json, "id", TRI_CreateStringCopyJson(zone, number, strlen(number))); TRI_Insert3ObjectJson(zone, json, "type", TRI_CreateStringCopyJson(zone, TRI_TypeNameIndex(idx->_type), strlen(TRI_TypeNameIndex(idx->_type)))); TRI_Insert3ObjectJson(zone, json, "unique", TRI_CreateBooleanJson(zone, idx->_unique)); if (idx->_type != TRI_IDX_TYPE_CAP_CONSTRAINT) { // only show sparse flag for these index types, as it can't be set on others TRI_Insert3ObjectJson(zone, json, "sparse", TRI_CreateBooleanJson(zone, idx->_sparse)); } if (idx->_hasSelectivityEstimate) { TRI_Insert3ObjectJson(zone, json, "selectivityEstimate", TRI_CreateNumberJson(zone, idx->selectivityEstimate(idx))); } TRI_FreeString(TRI_CORE_MEM_ZONE, number); } return json; } //////////////////////////////////////////////////////////////////////////////// /// @brief copies a path vector //////////////////////////////////////////////////////////////////////////////// void TRI_CopyPathVector (TRI_vector_t* dst, TRI_vector_t* src) { TRI_InitVector(dst, TRI_CORE_MEM_ZONE, sizeof(TRI_shape_pid_t)); size_t const n = TRI_LengthVector(src); for (size_t j = 0; j < n; ++j) { TRI_shape_pid_t shape = *((TRI_shape_pid_t*) (TRI_AtVector(src, j))); TRI_PushBackVector(dst, &shape); } } // ----------------------------------------------------------------------------- // --SECTION-- PRIMARY INDEX // ----------------------------------------------------------------------------- // ----------------------------------------------------------------------------- // --SECTION-- private functions // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief insert methods does nothing //////////////////////////////////////////////////////////////////////////////// static int InsertPrimary (TRI_index_t* idx, TRI_doc_mptr_t const* doc, bool isRollback) { return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief remove methods does nothing //////////////////////////////////////////////////////////////////////////////// static int RemovePrimary (TRI_index_t* idx, TRI_doc_mptr_t const* doc, bool isRollback) { return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief return the memory used by the index //////////////////////////////////////////////////////////////////////////////// static size_t MemoryPrimary (TRI_index_t const* idx) { return static_cast(idx->_collection->_primaryIndex._nrAlloc) * sizeof(void*); } //////////////////////////////////////////////////////////////////////////////// /// @brief return the selectivity estimate for the index //////////////////////////////////////////////////////////////////////////////// static double SelectivityEstimatePrimary (TRI_index_t const* idx) { return 1.0; } //////////////////////////////////////////////////////////////////////////////// /// @brief JSON description of a primary index //////////////////////////////////////////////////////////////////////////////// static TRI_json_t* JsonPrimary (TRI_index_t const* idx) { TRI_json_t* json = TRI_JsonIndex(TRI_CORE_MEM_ZONE, idx); if (json == nullptr) { return nullptr; } TRI_json_t* fields = TRI_CreateArrayJson(TRI_CORE_MEM_ZONE); TRI_PushBack3ArrayJson(TRI_CORE_MEM_ZONE, fields, TRI_CreateStringCopyJson(TRI_CORE_MEM_ZONE, TRI_VOC_ATTRIBUTE_KEY, strlen(TRI_VOC_ATTRIBUTE_KEY))); TRI_Insert3ObjectJson(TRI_CORE_MEM_ZONE, json, "fields", fields); return json; } // ----------------------------------------------------------------------------- // --SECTION-- constructors / destructors // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief create the primary index //////////////////////////////////////////////////////////////////////////////// TRI_index_t* TRI_CreatePrimaryIndex (TRI_document_collection_t* document) { // create primary index TRI_index_t* idx = static_cast(TRI_Allocate(TRI_CORE_MEM_ZONE, sizeof(TRI_index_t), false)); if (idx == nullptr) { return nullptr; } char* id = TRI_DuplicateStringZ(TRI_CORE_MEM_ZONE, TRI_VOC_ATTRIBUTE_KEY); TRI_InitVectorString(&idx->_fields, TRI_CORE_MEM_ZONE); TRI_PushBackVectorString(&idx->_fields, id); TRI_InitIndex(idx, 0, TRI_IDX_TYPE_PRIMARY_INDEX, document, false, true); idx->_hasSelectivityEstimate = true; idx->selectivityEstimate = &SelectivityEstimatePrimary; idx->memory = MemoryPrimary; idx->json = JsonPrimary; idx->insert = InsertPrimary; idx->remove = RemovePrimary; return idx; } //////////////////////////////////////////////////////////////////////////////// /// @brief free a primary index //////////////////////////////////////////////////////////////////////////////// void TRI_FreePrimaryIndex (TRI_index_t* idx) { TRI_DestroyVectorString(&idx->_fields); TRI_Free(TRI_CORE_MEM_ZONE, idx); } // ----------------------------------------------------------------------------- // --SECTION-- EDGE INDEX // ----------------------------------------------------------------------------- // ----------------------------------------------------------------------------- // --SECTION-- private functions // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief hashes an edge key //////////////////////////////////////////////////////////////////////////////// static uint64_t HashElementKey (void const* data) { TRI_edge_header_t const* h = static_cast(data); char const* key = h->_key; uint64_t hash = h->_cid; hash ^= (uint64_t) fasthash64(key, strlen(key), 0x87654321); return fasthash64(&hash, sizeof(hash), 0x56781234); } //////////////////////////////////////////////////////////////////////////////// /// @brief hashes an edge (_from case) //////////////////////////////////////////////////////////////////////////////// static uint64_t HashElementEdgeFrom (void const* data, bool byKey) { uint64_t hash; if (! byKey) { hash = (uint64_t) data; } else { TRI_doc_mptr_t const* mptr = static_cast(data); TRI_df_marker_t const* marker = static_cast(mptr->getDataPtr()); // ONLY IN INDEX, PROTECTED by RUNTIME if (marker->_type == TRI_DOC_MARKER_KEY_EDGE) { TRI_doc_edge_key_marker_t const* edge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME char const* key = (char const*) edge + edge->_offsetFromKey; // LOG_TRACE("HASH FROM: COLLECTION: %llu, KEY: %s", (unsigned long long) edge->_fromCid, key); hash = edge->_fromCid; hash ^= (uint64_t) fasthash64(key, strlen(key), 0x87654321); } else if (marker->_type == TRI_WAL_MARKER_EDGE) { triagens::wal::edge_marker_t const* edge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME char const* key = (char const*) edge + edge->_offsetFromKey; // LOG_TRACE("HASH FROM: COLLECTION: %llu, KEY: %s", (unsigned long long) edge->_fromCid, key); hash = edge->_fromCid; hash ^= (uint64_t) fasthash64(key, strlen(key), 0x87654321); } } return fasthash64(&hash, sizeof(hash), 0x56781234); } //////////////////////////////////////////////////////////////////////////////// /// @brief hashes an edge (_to case) //////////////////////////////////////////////////////////////////////////////// static uint64_t HashElementEdgeTo (void const* data, bool byKey) { uint64_t hash; if (! byKey) { hash = (uint64_t) data; } else { TRI_doc_mptr_t const* mptr = static_cast(data); TRI_df_marker_t const* marker = static_cast(mptr->getDataPtr()); // ONLY IN INDEX, PROTECTED by RUNTIME if (marker->_type == TRI_DOC_MARKER_KEY_EDGE) { TRI_doc_edge_key_marker_t const* edge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME char const* key = (char const*) edge + edge->_offsetToKey; // LOG_TRACE("HASH TO: COLLECTION: %llu, KEY: %s", (unsigned long long) edge->_toCid, key); hash = edge->_toCid; hash ^= (uint64_t) fasthash64(key, strlen(key), 0x87654321); } else if (marker->_type == TRI_WAL_MARKER_EDGE) { triagens::wal::edge_marker_t const* edge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME char const* key = (char const*) edge + edge->_offsetToKey; // LOG_TRACE("HASH TO: COLLECTION: %llu, KEY: %s", (unsigned long long) edge->_toCid, key); hash = edge->_toCid; hash ^= (uint64_t) fasthash64(key, strlen(key), 0x87654321); } } return fasthash64(&hash, sizeof(hash), 0x56781234); } //////////////////////////////////////////////////////////////////////////////// /// @brief checks if key and element match (_from case) //////////////////////////////////////////////////////////////////////////////// static bool IsEqualKeyEdgeFrom (void const* left, void const* right) { // left is a key // right is an element, that is a master pointer TRI_edge_header_t const* l = static_cast(left); char const* lKey = l->_key; TRI_doc_mptr_t const* rMptr = static_cast(right); TRI_df_marker_t const* marker = static_cast(rMptr->getDataPtr()); // ONLY IN INDEX, PROTECTED by RUNTIME if (marker->_type == TRI_DOC_MARKER_KEY_EDGE) { TRI_doc_edge_key_marker_t const* rEdge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME char const* rKey = (char const*) rEdge + rEdge->_offsetFromKey; // LOG_TRACE("ISEQUAL FROM: LCOLLECTION: %llu, LKEY: %s, RCOLLECTION: %llu, RKEY: %s", (unsigned long long) l->_cid, lKey, (unsigned long long) rEdge->_fromCid, rKey); return (l->_cid == rEdge->_fromCid) && (strcmp(lKey, rKey) == 0); } else if (marker->_type == TRI_WAL_MARKER_EDGE) { triagens::wal::edge_marker_t const* rEdge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME char const* rKey = (char const*) rEdge + rEdge->_offsetFromKey; // LOG_TRACE("ISEQUAL FROM: LCOLLECTION: %llu, LKEY: %s, RCOLLECTION: %llu, RKEY: %s", (unsigned long long) l->_cid, lKey, (unsigned long long) rEdge->_fromCid, rKey); return (l->_cid == rEdge->_fromCid) && (strcmp(lKey, rKey) == 0); } return false; } //////////////////////////////////////////////////////////////////////////////// /// @brief checks if key and element match (_to case) //////////////////////////////////////////////////////////////////////////////// static bool IsEqualKeyEdgeTo (void const* left, void const* right) { // left is a key // right is an element, that is a master pointer TRI_edge_header_t const* l = static_cast(left); char const* lKey = l->_key; TRI_doc_mptr_t const* rMptr = static_cast(right); TRI_df_marker_t const* marker = static_cast(rMptr->getDataPtr()); // ONLY IN INDEX, PROTECTED by RUNTIME if (marker->_type == TRI_DOC_MARKER_KEY_EDGE) { TRI_doc_edge_key_marker_t const* rEdge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME char const* rKey = (char const*) rEdge + rEdge->_offsetToKey; // LOG_TRACE("ISEQUAL TO: LCOLLECTION: %llu, LKEY: %s, RCOLLECTION: %llu, RKEY: %s", (unsigned long long) l->_cid, lKey, (unsigned long long) rEdge->_toCid, rKey); return (l->_cid == rEdge->_toCid) && (strcmp(lKey, rKey) == 0); } else if (marker->_type == TRI_WAL_MARKER_EDGE) { triagens::wal::edge_marker_t const* rEdge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME char const* rKey = (char const*) rEdge + rEdge->_offsetToKey; // LOG_TRACE("ISEQUAL TO: LCOLLECTION: %llu, LKEY: %s, RCOLLECTION: %llu, RKEY: %s", (unsigned long long) l->_cid, lKey, (unsigned long long) rEdge->_toCid, rKey); return (l->_cid == rEdge->_toCid) && (strcmp(lKey, rKey) == 0); } return false; } //////////////////////////////////////////////////////////////////////////////// /// @brief checks for elements are equal (_from and _to case) //////////////////////////////////////////////////////////////////////////////// static bool IsEqualElementEdge (void const* left, void const* right) { return left == right; } //////////////////////////////////////////////////////////////////////////////// /// @brief checks for elements are equal (_from case) //////////////////////////////////////////////////////////////////////////////// static bool IsEqualElementEdgeFromByKey (void const* left, void const* right) { char const* lKey = nullptr; char const* rKey = nullptr; TRI_voc_cid_t lCid = 0; TRI_voc_cid_t rCid = 0; TRI_df_marker_t const* marker; // left element TRI_doc_mptr_t const* lMptr = static_cast(left); marker = static_cast(lMptr->getDataPtr()); // ONLY IN INDEX, PROTECTED by RUNTIME if (marker->_type == TRI_DOC_MARKER_KEY_EDGE) { TRI_doc_edge_key_marker_t const* lEdge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME lKey = (char const*) lEdge + lEdge->_offsetFromKey; lCid = lEdge->_fromCid; } else if (marker->_type == TRI_WAL_MARKER_EDGE) { triagens::wal::edge_marker_t const* lEdge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME lKey = (char const*) lEdge + lEdge->_offsetFromKey; lCid = lEdge->_fromCid; } // right element TRI_doc_mptr_t const* rMptr = static_cast(right); marker = static_cast(rMptr->getDataPtr()); // ONLY IN INDEX, PROTECTED by RUNTIME if (marker->_type == TRI_DOC_MARKER_KEY_EDGE) { TRI_doc_edge_key_marker_t const* rEdge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME rKey = (char const*) rEdge + rEdge->_offsetFromKey; rCid = rEdge->_fromCid; } else if (marker->_type == TRI_WAL_MARKER_EDGE) { triagens::wal::edge_marker_t const* rEdge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME rKey = (char const*) rEdge + rEdge->_offsetFromKey; rCid = rEdge->_fromCid; } if (lKey == nullptr || rKey == nullptr) { return false; } // LOG_TRACE("ISEQUALELEMENT FROM: LCOLLECTION: %llu, LKEY: %s, RCOLLECTION: %llu, RKEY: %s", (unsigned long long) lCid, lKey, (unsigned long long) rCid, rKey); return ((lCid == rCid) && (strcmp(lKey, rKey) == 0)); } //////////////////////////////////////////////////////////////////////////////// /// @brief checks for elements are equal (_to case) //////////////////////////////////////////////////////////////////////////////// static bool IsEqualElementEdgeToByKey (void const* left, void const* right) { char const* lKey = nullptr; char const* rKey = nullptr; TRI_voc_cid_t lCid = 0; TRI_voc_cid_t rCid = 0; TRI_df_marker_t const* marker; // left element TRI_doc_mptr_t const* lMptr = static_cast(left); marker = static_cast(lMptr->getDataPtr()); // ONLY IN INDEX, PROTECTED by RUNTIME if (marker->_type == TRI_DOC_MARKER_KEY_EDGE) { TRI_doc_edge_key_marker_t const* lEdge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME lKey = (char const*) lEdge + lEdge->_offsetToKey; lCid = lEdge->_toCid; } else if (marker->_type == TRI_WAL_MARKER_EDGE) { triagens::wal::edge_marker_t const* lEdge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME lKey = (char const*) lEdge + lEdge->_offsetToKey; lCid = lEdge->_toCid; } // right element TRI_doc_mptr_t const* rMptr = static_cast(right); marker = static_cast(rMptr->getDataPtr()); // ONLY IN INDEX, PROTECTED by RUNTIME if (marker->_type == TRI_DOC_MARKER_KEY_EDGE) { TRI_doc_edge_key_marker_t const* rEdge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME rKey = (char const*) rEdge + rEdge->_offsetToKey; rCid = rEdge->_toCid; } else if (marker->_type == TRI_WAL_MARKER_EDGE) { triagens::wal::edge_marker_t const* rEdge = reinterpret_cast(marker); // ONLY IN INDEX, PROTECTED by RUNTIME rKey = (char const*) rEdge + rEdge->_offsetToKey; rCid = rEdge->_toCid; } if (lKey == nullptr || rKey == nullptr) { return false; } // LOG_TRACE("ISEQUALELEMENT TO: LCOLLECTION: %llu, LKEY: %s, RCOLLECTION: %llu, RKEY: %s", (unsigned long long) lCid, lKey, (unsigned long long) rCid, rKey); return ((lCid == rCid) && (strcmp(lKey, rKey) == 0)); } //////////////////////////////////////////////////////////////////////////////// /// @brief insert method for edges //////////////////////////////////////////////////////////////////////////////// static int InsertEdge (TRI_index_t* idx, TRI_doc_mptr_t const* mptr, bool isRollback) { TRI_EdgeIndexHash_t* edgesIndex; // OUT edgesIndex = ((TRI_edge_index_t*) idx)->_edges_from; edgesIndex->insert(CONST_CAST(mptr), true, isRollback); // IN edgesIndex = ((TRI_edge_index_t*) idx)->_edges_to; edgesIndex->insert(CONST_CAST(mptr), true, isRollback); return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief remove an edge //////////////////////////////////////////////////////////////////////////////// static int RemoveEdge (TRI_index_t* idx, TRI_doc_mptr_t const* mptr, bool isRollback) { TRI_EdgeIndexHash_t* edgesIndex; // OUT edgesIndex = ((TRI_edge_index_t*) idx)->_edges_from; edgesIndex->remove(mptr); // IN edgesIndex = ((TRI_edge_index_t*) idx)->_edges_to; edgesIndex->remove(mptr); return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief return the memory used by the index //////////////////////////////////////////////////////////////////////////////// static size_t MemoryEdge (TRI_index_t const* idx) { return ((TRI_edge_index_t*) idx)->_edges_from->memoryUsage() + ((TRI_edge_index_t*) idx)->_edges_to->memoryUsage(); } //////////////////////////////////////////////////////////////////////////////// /// @brief return a selectivity esimtate for the index //////////////////////////////////////////////////////////////////////////////// static double SelectivityEstimateEdge (TRI_index_t const* idx) { // return average selectivity of the two index parts return ( ((TRI_edge_index_t*) idx)->_edges_from->selectivity() + ((TRI_edge_index_t*) idx)->_edges_to->selectivity() ) * 0.5; } //////////////////////////////////////////////////////////////////////////////// /// @brief JSON description of edge index //////////////////////////////////////////////////////////////////////////////// static TRI_json_t* JsonEdge (TRI_index_t const* idx) { TRI_json_t* json = TRI_JsonIndex(TRI_CORE_MEM_ZONE, idx); if (json == nullptr) { return nullptr; } TRI_json_t* fields = TRI_CreateArrayJson(TRI_CORE_MEM_ZONE); TRI_PushBack3ArrayJson(TRI_CORE_MEM_ZONE, fields, TRI_CreateStringCopyJson(TRI_CORE_MEM_ZONE, TRI_VOC_ATTRIBUTE_FROM, strlen(TRI_VOC_ATTRIBUTE_FROM))); TRI_PushBack3ArrayJson(TRI_CORE_MEM_ZONE, fields, TRI_CreateStringCopyJson(TRI_CORE_MEM_ZONE, TRI_VOC_ATTRIBUTE_TO, strlen(TRI_VOC_ATTRIBUTE_TO))); TRI_Insert3ObjectJson(TRI_CORE_MEM_ZONE, json, "fields", fields); return json; } //////////////////////////////////////////////////////////////////////////////// /// @brief provides a size hint for the edge index //////////////////////////////////////////////////////////////////////////////// static int SizeHintEdge (TRI_index_t* idx, size_t size) { TRI_EdgeIndexHash_t* edgesIndex = ((TRI_edge_index_t*) idx)->_edges_from; // we assume this is called when setting up the index and the index // is still empty TRI_ASSERT(edgesIndex->size() == 0); // set an initial size for the index for some new nodes to be created // without resizing int err = edgesIndex->resize(static_cast(size + 2049)); if (err != TRI_ERROR_NO_ERROR) { return err; } edgesIndex = ((TRI_edge_index_t*) idx)->_edges_to; // we assume this is called when setting up the index and the index // is still empty TRI_ASSERT(edgesIndex->size() == 0); // set an initial size for the index for some new nodes to be created // without resizing return edgesIndex->resize(static_cast(size + 2049)); } // ----------------------------------------------------------------------------- // --SECTION-- constructors / destructors // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief create the edge index //////////////////////////////////////////////////////////////////////////////// TRI_index_t* TRI_CreateEdgeIndex (TRI_document_collection_t* document, TRI_idx_iid_t iid) { TRI_index_t* idx; char* id; // create index TRI_edge_index_t* edgeIndex; try { edgeIndex = new TRI_edge_index_t(); } catch (...) { return nullptr; } try { edgeIndex->_edges_from = new TRI_EdgeIndexHash_t( HashElementKey, HashElementEdgeFrom, IsEqualKeyEdgeFrom, IsEqualElementEdge, IsEqualElementEdgeFromByKey, document->_info._indexBuckets); } catch (...) { delete edgeIndex; return nullptr; } try { edgeIndex->_edges_to = new TRI_EdgeIndexHash_t( HashElementKey, HashElementEdgeTo, IsEqualKeyEdgeTo, IsEqualElementEdge, IsEqualElementEdgeToByKey, document->_info._indexBuckets); } catch (...) { delete edgeIndex->_edges_from; delete edgeIndex; return nullptr; } idx = static_cast(edgeIndex); TRI_InitVectorString(&idx->_fields, TRI_CORE_MEM_ZONE); id = TRI_DuplicateStringZ(TRI_CORE_MEM_ZONE, TRI_VOC_ATTRIBUTE_FROM); TRI_PushBackVectorString(&idx->_fields, id); TRI_InitIndex(idx, iid, TRI_IDX_TYPE_EDGE_INDEX, document, false, false); idx->_hasSelectivityEstimate = true; idx->selectivityEstimate = SelectivityEstimateEdge; idx->memory = MemoryEdge; idx->json = JsonEdge; idx->insert = InsertEdge; idx->remove = RemoveEdge; idx->sizeHint = SizeHintEdge; return idx; } //////////////////////////////////////////////////////////////////////////////// /// @brief destroys the edge index, but does not free the pointer //////////////////////////////////////////////////////////////////////////////// void TRI_DestroyEdgeIndex (TRI_index_t* idx) { TRI_edge_index_t* edgesIndex = (TRI_edge_index_t*) idx; LOG_TRACE("destroying edge index"); delete edgesIndex->_edges_to; delete edgesIndex->_edges_from; TRI_DestroyVectorString(&idx->_fields); } //////////////////////////////////////////////////////////////////////////////// /// @brief frees the edge index //////////////////////////////////////////////////////////////////////////////// void TRI_FreeEdgeIndex (TRI_index_t* idx) { TRI_edge_index_t* edgesIndex = (TRI_edge_index_t*) idx; TRI_DestroyEdgeIndex(idx); delete edgesIndex; } // ----------------------------------------------------------------------------- // --SECTION-- SKIPLIST INDEX // ----------------------------------------------------------------------------- // ----------------------------------------------------------------------------- // --SECTION-- private functions // ----------------------------------------------------------------------------- // ............................................................................. // Helper function for TRI_LookupSkiplistIndex // ............................................................................. static int FillLookupSLOperator (TRI_index_operator_t* slOperator, TRI_document_collection_t* document) { if (slOperator == nullptr) { return TRI_ERROR_INTERNAL; } switch (slOperator->_type) { case TRI_AND_INDEX_OPERATOR: case TRI_NOT_INDEX_OPERATOR: case TRI_OR_INDEX_OPERATOR: { TRI_logical_index_operator_t* logicalOperator = (TRI_logical_index_operator_t*) slOperator; int result = FillLookupSLOperator(logicalOperator->_left, document); if (result == TRI_ERROR_NO_ERROR) { result = FillLookupSLOperator(logicalOperator->_right, document); } if (result != TRI_ERROR_NO_ERROR) { return result; } break; } case TRI_EQ_INDEX_OPERATOR: case TRI_GE_INDEX_OPERATOR: case TRI_GT_INDEX_OPERATOR: case TRI_NE_INDEX_OPERATOR: case TRI_LE_INDEX_OPERATOR: case TRI_LT_INDEX_OPERATOR: { TRI_relation_index_operator_t* relationOperator = (TRI_relation_index_operator_t*) slOperator; relationOperator->_numFields = TRI_LengthVector(&relationOperator->_parameters->_value._objects); relationOperator->_fields = static_cast(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_shaped_json_t) * relationOperator->_numFields, false)); if (relationOperator->_fields != nullptr) { for (size_t j = 0; j < relationOperator->_numFields; ++j) { TRI_json_t const* jsonObject = static_cast(TRI_AtVector(&(relationOperator->_parameters->_value._objects), j)); // find out if the search value is a list or an array if ((TRI_IsArrayJson(jsonObject) || TRI_IsObjectJson(jsonObject)) && slOperator->_type != TRI_EQ_INDEX_OPERATOR) { // non-equality operator used on list or array data type, this is disallowed // because we need to shape these objects first. however, at this place (index lookup) // we never want to create new shapes so we will have a problem if we cannot find an // existing shape for the search value. in this case we would need to raise an error // but then the query results would depend on the state of the shaper and if it had // seen previous such objects // we still allow looking for list or array values using equality. this is safe. TRI_Free(TRI_UNKNOWN_MEM_ZONE, relationOperator->_fields); relationOperator->_fields = nullptr; return TRI_ERROR_BAD_PARAMETER; } // now shape the search object (but never create any new shapes) TRI_shaped_json_t* shapedObject = TRI_ShapedJsonJson(document->getShaper(), jsonObject, false); // ONLY IN INDEX, PROTECTED by RUNTIME if (shapedObject != nullptr) { // found existing shape relationOperator->_fields[j] = *shapedObject; // shallow copy here is ok TRI_Free(TRI_UNKNOWN_MEM_ZONE, shapedObject); // don't require storage anymore } else { // shape not found TRI_Free(TRI_UNKNOWN_MEM_ZONE, relationOperator->_fields); relationOperator->_fields = nullptr; return TRI_RESULT_ELEMENT_NOT_FOUND; } } } else { relationOperator->_numFields = 0; // out of memory? } break; } } return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief attempts to locate an entry in the skip list index //////////////////////////////////////////////////////////////////////////////// // ............................................................................. // Note: this function will not destroy the passed slOperator before it returns // Warning: who ever calls this function is responsible for destroying // the TRI_index_operator_t* and the TRI_skiplist_iterator_t* results // ............................................................................. TRI_skiplist_iterator_t* TRI_LookupSkiplistIndex (TRI_index_t* idx, TRI_index_operator_t* slOperator, bool reverse) { if (slOperator == nullptr) { return nullptr; } // ......................................................................... // fill the relation operators which may be embedded in the slOperator with // additional information. Recall the slOperator is what information was // received from a user for query the skiplist. // ......................................................................... TRI_skiplist_index_t* skiplistIndex = (TRI_skiplist_index_t*) idx; int errorResult = FillLookupSLOperator(slOperator, skiplistIndex->base._collection); if (errorResult != TRI_ERROR_NO_ERROR) { TRI_set_errno(errorResult); return nullptr; } TRI_skiplist_iterator_t* iteratorResult; iteratorResult = SkiplistIndex_find(skiplistIndex->_skiplistIndex, &skiplistIndex->_paths, slOperator, reverse); return iteratorResult; } //////////////////////////////////////////////////////////////////////////////// /// @brief helper for skiplist methods //////////////////////////////////////////////////////////////////////////////// static int SkiplistIndexHelper (TRI_skiplist_index_t const* skiplistIndex, TRI_skiplist_index_element_t* skiplistElement, TRI_doc_mptr_t const* document) { // .......................................................................... // Assign the document to the SkiplistIndexElement structure so that it can // be retrieved later. // .......................................................................... TRI_ASSERT(document != nullptr); TRI_ASSERT(document->getDataPtr() != nullptr); // ONLY IN INDEX, PROTECTED by RUNTIME TRI_shaped_json_t shapedJson; TRI_EXTRACT_SHAPED_JSON_MARKER(shapedJson, document->getDataPtr()); // ONLY IN INDEX, PROTECTED by RUNTIME if (shapedJson._sid == TRI_SHAPE_ILLEGAL) { LOG_WARNING("encountered invalid marker with shape id 0"); return TRI_ERROR_INTERNAL; } bool const sparse = skiplistIndex->base._sparse; int res = TRI_ERROR_NO_ERROR; skiplistElement->_document = const_cast(document); char const* ptr = skiplistElement->_document->getShapedJsonPtr(); // ONLY IN INDEX, PROTECTED by RUNTIME auto subObjects = SkiplistIndex_Subobjects(skiplistElement); size_t const n = TRI_LengthVector(&skiplistIndex->_paths); for (size_t j = 0; j < n; ++j) { TRI_shape_pid_t shape = *((TRI_shape_pid_t*) TRI_AtVector(&skiplistIndex->_paths, j)); // .......................................................................... // Determine if document has that particular shape // .......................................................................... TRI_shape_access_t const* acc = TRI_FindAccessorVocShaper(skiplistIndex->base._collection->getShaper(), shapedJson._sid, shape); // ONLY IN INDEX, PROTECTED by RUNTIME if (acc == nullptr || acc->_resultSid == TRI_SHAPE_ILLEGAL) { // OK, the document does not contain the attributed needed by // the index, are we sparse? subObjects[j]._sid = BasicShapes::TRI_SHAPE_SID_NULL; res = TRI_ERROR_ARANGO_INDEX_DOCUMENT_ATTRIBUTE_MISSING; if (sparse) { // no need to continue return res; } continue; } // .......................................................................... // Extract the field // .......................................................................... TRI_shaped_json_t shapedObject; if (! TRI_ExecuteShapeAccessor(acc, &shapedJson, &shapedObject)) { return TRI_ERROR_INTERNAL; } if (shapedObject._sid == BasicShapes::TRI_SHAPE_SID_NULL) { res = TRI_ERROR_ARANGO_INDEX_DOCUMENT_ATTRIBUTE_MISSING; if (sparse) { // no need to continue return res; } } // ......................................................................... // Store the field // ......................................................................... TRI_FillShapedSub(&subObjects[j], &shapedObject, ptr); } return res; } //////////////////////////////////////////////////////////////////////////////// /// @brief inserts a document into a skip list index //////////////////////////////////////////////////////////////////////////////// static int InsertSkiplistIndex (TRI_index_t* idx, TRI_doc_mptr_t const* doc, bool isRollback) { TRI_skiplist_index_t* skiplistIndex = (TRI_skiplist_index_t*) idx; // ........................................................................... // Allocate storage to shaped json objects stored as a simple list. // These will be used for comparisions // ........................................................................... auto skiplistElement = static_cast(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, SkiplistIndex_ElementSize(skiplistIndex->_skiplistIndex), false)); if (skiplistElement == nullptr) { return TRI_ERROR_OUT_OF_MEMORY; } int res = SkiplistIndexHelper(skiplistIndex, skiplistElement, doc); // ........................................................................... // most likely the cause of this error is that the index is sparse // and not all attributes the index needs are set -- so the document // is ignored. So not really an error at all. Note that this does // not happen in a non-sparse skiplist index, in which empty // attributes are always treated as if they were bound to null, so // TRI_ERROR_ARANGO_INDEX_DOCUMENT_ATTRIBUTE_MISSING cannot happen at // all. // ........................................................................... // ......................................................................... // It may happen that the document does not have the necessary // attributes to be included within the hash index, in this case do // not report back an error. // ......................................................................... if (res == TRI_ERROR_ARANGO_INDEX_DOCUMENT_ATTRIBUTE_MISSING) { if (idx->_sparse) { TRI_Free(TRI_UNKNOWN_MEM_ZONE, skiplistElement); return TRI_ERROR_NO_ERROR; } res = TRI_ERROR_NO_ERROR; } if (res != TRI_ERROR_NO_ERROR) { TRI_Free(TRI_UNKNOWN_MEM_ZONE, skiplistElement); return res; } // insert into the index. the memory for the element will be owned or freed // by the index return SkiplistIndex_insert(skiplistIndex->_skiplistIndex, skiplistElement); } //////////////////////////////////////////////////////////////////////////////// /// @brief return the memory used by the index //////////////////////////////////////////////////////////////////////////////// static size_t MemorySkiplistIndex (TRI_index_t const* idx) { TRI_skiplist_index_t const* skiplistIndex = (TRI_skiplist_index_t const*) idx; if (skiplistIndex == nullptr) { return 0; } return SkiplistIndex_memoryUsage(skiplistIndex->_skiplistIndex); } //////////////////////////////////////////////////////////////////////////////// /// @brief describes a skiplist index as a json object //////////////////////////////////////////////////////////////////////////////// static TRI_json_t* JsonSkiplistIndex (TRI_index_t const* idx) { // .......................................................................... // Recast as a skiplist index // .......................................................................... TRI_skiplist_index_t const* skiplistIndex = (TRI_skiplist_index_t const*) idx; if (skiplistIndex == nullptr) { return nullptr; } TRI_document_collection_t* document = idx->_collection; // .......................................................................... // Allocate sufficent memory for the field list // .......................................................................... size_t const n = TRI_LengthVector(&skiplistIndex->_paths); char const** fieldList = static_cast(TRI_Allocate(TRI_CORE_MEM_ZONE, (sizeof(char*) * n) , false)); // .......................................................................... // Convert the attributes (field list of the skiplist index) into strings // .......................................................................... for (size_t j = 0; j < n; ++j) { TRI_shape_pid_t shape = *((TRI_shape_pid_t*) TRI_AtVector(&skiplistIndex->_paths, j)); TRI_shape_path_t const* path = document->getShaper()->lookupAttributePathByPid(document->getShaper(), shape); // ONLY IN INDEX, PROTECTED by RUNTIME if (path == nullptr) { TRI_Free(TRI_CORE_MEM_ZONE, (void*) fieldList); return nullptr; } fieldList[j] = ((const char*) path) + sizeof(TRI_shape_path_t) + path->_aidLength * sizeof(TRI_shape_aid_t); } // .......................................................................... // create json object and fill it // .......................................................................... TRI_json_t* json = TRI_JsonIndex(TRI_CORE_MEM_ZONE, idx); TRI_json_t* fields = TRI_CreateArrayJson(TRI_CORE_MEM_ZONE); for (size_t j = 0; j < n; ++j) { TRI_PushBack3ArrayJson(TRI_CORE_MEM_ZONE, fields, TRI_CreateStringCopyJson(TRI_CORE_MEM_ZONE, fieldList[j], strlen(fieldList[j]))); } TRI_Insert3ObjectJson(TRI_CORE_MEM_ZONE, json, "fields", fields); TRI_Free(TRI_CORE_MEM_ZONE, (void*) fieldList); return json; } //////////////////////////////////////////////////////////////////////////////// /// @brief removes a document from a skiplist index //////////////////////////////////////////////////////////////////////////////// static int RemoveSkiplistIndex (TRI_index_t* idx, TRI_doc_mptr_t const* doc, bool isRollback) { // ........................................................................... // Obtain the skiplist index structure // ........................................................................... TRI_skiplist_index_t* skiplistIndex = (TRI_skiplist_index_t*) idx; auto skiplistElement = static_cast(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, SkiplistIndex_ElementSize(skiplistIndex->_skiplistIndex), false)); if (skiplistElement == nullptr) { return TRI_ERROR_OUT_OF_MEMORY; } // .......................................................................... // Fill the json field list from the document // .......................................................................... int res = SkiplistIndexHelper(skiplistIndex, skiplistElement, doc); // .......................................................................... // Error returned generally implies that the document never was part of the // skiplist index // .......................................................................... if (res == TRI_ERROR_ARANGO_INDEX_DOCUMENT_ATTRIBUTE_MISSING) { if (idx->_sparse) { TRI_Free(TRI_UNKNOWN_MEM_ZONE, skiplistElement); return TRI_ERROR_NO_ERROR; } res = TRI_ERROR_NO_ERROR; } if (res != TRI_ERROR_NO_ERROR) { TRI_Free(TRI_UNKNOWN_MEM_ZONE, skiplistElement); return res; } // attempt the removal for skiplist indexes // ownership for the index element is transferred to the index return SkiplistIndex_remove(skiplistIndex->_skiplistIndex, skiplistElement); } //////////////////////////////////////////////////////////////////////////////// /// @brief creates a skiplist index //////////////////////////////////////////////////////////////////////////////// TRI_index_t* TRI_CreateSkiplistIndex (TRI_document_collection_t* document, TRI_idx_iid_t iid, TRI_vector_pointer_t* fields, TRI_vector_t* paths, bool sparse, bool unique) { TRI_skiplist_index_t* skiplistIndex = static_cast(TRI_Allocate(TRI_CORE_MEM_ZONE, sizeof(TRI_skiplist_index_t), false)); if (skiplistIndex == nullptr) { return nullptr; } TRI_index_t* idx = &skiplistIndex->base; TRI_InitIndex(idx, iid, TRI_IDX_TYPE_SKIPLIST_INDEX, document, sparse, unique); idx->memory = MemorySkiplistIndex; idx->json = JsonSkiplistIndex; idx->insert = InsertSkiplistIndex; idx->remove = RemoveSkiplistIndex; // ........................................................................... // Copy the contents of the shape list vector into a new vector and store this // ........................................................................... TRI_CopyPathVector(&skiplistIndex->_paths, paths); TRI_InitVectorString(&idx->_fields, TRI_CORE_MEM_ZONE); TRI_CopyDataFromVectorPointerVectorString(TRI_CORE_MEM_ZONE, &idx->_fields, fields); skiplistIndex->_skiplistIndex = SkiplistIndex_new(document, TRI_LengthVector(paths), unique); if (skiplistIndex->_skiplistIndex == nullptr) { TRI_DestroyVector(&skiplistIndex->_paths); TRI_DestroyVectorString(&idx->_fields); TRI_Free(TRI_CORE_MEM_ZONE, skiplistIndex); LOG_WARNING("skiplist index creation failed -- internal error when " "creating skiplist structure"); return nullptr; } return idx; } //////////////////////////////////////////////////////////////////////////////// /// @brief frees the memory allocated, but does not free the pointer //////////////////////////////////////////////////////////////////////////////// void TRI_DestroySkiplistIndex (TRI_index_t* idx) { if (idx == nullptr) { return; } LOG_TRACE("destroying skiplist index"); TRI_DestroyVectorString(&idx->_fields); TRI_skiplist_index_t* sl = (TRI_skiplist_index_t*) idx; TRI_DestroyVector(&sl->_paths); SkiplistIndex_free(sl->_skiplistIndex); } //////////////////////////////////////////////////////////////////////////////// /// @brief frees the memory allocated and frees the pointer //////////////////////////////////////////////////////////////////////////////// void TRI_FreeSkiplistIndex (TRI_index_t* idx) { if (idx == nullptr) { return; } TRI_DestroySkiplistIndex(idx); TRI_Free(TRI_CORE_MEM_ZONE, idx); } // ----------------------------------------------------------------------------- // --SECTION-- FULLTEXT INDEX // ----------------------------------------------------------------------------- // ----------------------------------------------------------------------------- // --SECTION-- private functions // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief extraction context //////////////////////////////////////////////////////////////////////////////// struct TextExtractorContext { std::vector>* _positions; TRI_shaper_t* _shaper; }; //////////////////////////////////////////////////////////////////////////////// /// @brief walk over an array shape and extract the string values //////////////////////////////////////////////////////////////////////////////// static bool ArrayTextExtractor (TRI_shaper_t* shaper, TRI_shape_t const* shape, char const*, char const* shapedJson, uint64_t length, void* data) { char* text; size_t textLength; bool ok = TRI_StringValueShapedJson(shape, shapedJson, &text, &textLength); if (ok) { // add string value found try { static_cast(data)->_positions->emplace_back(text, textLength); } catch (...) { } } return true; } //////////////////////////////////////////////////////////////////////////////// /// @brief walk over a list shape and extract the string values //////////////////////////////////////////////////////////////////////////////// static bool ListTextExtractor (TRI_shaper_t* shaper, TRI_shape_t const* shape, char const* shapedJson, uint64_t length, void* data) { if (shape->_type == TRI_SHAPE_ARRAY) { // a sub-object TRI_IterateShapeDataArray(static_cast(data)->_shaper, shape, shapedJson, ArrayTextExtractor, data); } else if (shape->_type == TRI_SHAPE_SHORT_STRING || shape->_type == TRI_SHAPE_LONG_STRING) { char* text; size_t textLength; bool ok = TRI_StringValueShapedJson(shape, shapedJson, &text, &textLength); if (ok) { // add string value found try { static_cast(data)->_positions->emplace_back(text, textLength); } catch (...) { } } } return true; } //////////////////////////////////////////////////////////////////////////////// /// @brief callback function called by the fulltext index to determine the /// words to index for a specific document //////////////////////////////////////////////////////////////////////////////// static TRI_fulltext_wordlist_t* GetWordlist (TRI_index_t* idx, TRI_doc_mptr_t const* document) { TRI_fulltext_index_t* fulltextIndex; TRI_fulltext_wordlist_t* wordlist; TRI_shaped_json_t shaped; TRI_shaped_json_t shapedJson; TRI_shape_t const* shape; TRI_vector_string_t* words; bool ok; fulltextIndex = (TRI_fulltext_index_t*) idx; // extract the shape TRI_shaper_t* shaper = fulltextIndex->base._collection->getShaper(); TRI_EXTRACT_SHAPED_JSON_MARKER(shaped, document->getDataPtr()); // ONLY IN INDEX, PROTECTED by RUNTIME ok = TRI_ExtractShapedJsonVocShaper(shaper, &shaped, 0, fulltextIndex->_attribute, &shapedJson, &shape); // ONLY IN INDEX, PROTECTED by RUNTIME if (! ok || shape == nullptr) { return nullptr; } // extract the string value for the indexed attribute if (shape->_type == TRI_SHAPE_SHORT_STRING || shape->_type == TRI_SHAPE_LONG_STRING) { char* text; size_t textLength; ok = TRI_StringValueShapedJson(shape, shapedJson._data.data, &text, &textLength); if (! ok) { return nullptr; } // parse the document text words = TRI_get_words(text, textLength, (size_t) fulltextIndex->_minWordLength, (size_t) TRI_FULLTEXT_MAX_WORD_LENGTH, true); } else if (shape->_type == TRI_SHAPE_ARRAY) { std::vector> values; TextExtractorContext context{ &values, shaper }; TRI_IterateShapeDataArray(shaper, shape, shapedJson._data.data, ArrayTextExtractor, &context); words = nullptr; for (auto const& it : values) { if (! TRI_get_words(words, it.first, it.second, (size_t) fulltextIndex->_minWordLength, (size_t) TRI_FULLTEXT_MAX_WORD_LENGTH, true)) { if (words != nullptr) { TRI_FreeVectorString(TRI_UNKNOWN_MEM_ZONE, words); } return nullptr; } } } else if (shape->_type == TRI_SHAPE_LIST || shape->_type == TRI_SHAPE_HOMOGENEOUS_LIST || shape->_type == TRI_SHAPE_HOMOGENEOUS_SIZED_LIST) { std::vector> values; TextExtractorContext context{ &values, shaper }; TRI_IterateShapeDataList(shaper, shape, shapedJson._data.data, ListTextExtractor, &context); words = nullptr; for (auto const& it : values) { if (! TRI_get_words(words, it.first, it.second, (size_t) fulltextIndex->_minWordLength, (size_t) TRI_FULLTEXT_MAX_WORD_LENGTH, true)) { if (words != nullptr) { TRI_FreeVectorString(TRI_UNKNOWN_MEM_ZONE, words); } return nullptr; } } } else { words = nullptr; } if (words == nullptr) { return nullptr; } wordlist = TRI_CreateWordlistFulltextIndex(words->_buffer, words->_length); if (wordlist == nullptr) { TRI_FreeVectorString(TRI_UNKNOWN_MEM_ZONE, words); return nullptr; } // this really is a hack, but it works well: // make the word list vector think it's empty and free it // this does not free the word list, that we have already over the result words->_length = 0; words->_buffer = nullptr; TRI_FreeVectorString(TRI_UNKNOWN_MEM_ZONE, words); return wordlist; } //////////////////////////////////////////////////////////////////////////////// /// @brief inserts a document into the fulltext index //////////////////////////////////////////////////////////////////////////////// static int InsertFulltextIndex (TRI_index_t* idx, TRI_doc_mptr_t const* doc, bool isRollback) { TRI_fulltext_index_t* fulltextIndex; int res; fulltextIndex = (TRI_fulltext_index_t*) idx; if (idx == nullptr) { LOG_WARNING("internal error in InsertFulltextIndex"); return TRI_ERROR_INTERNAL; } res = TRI_ERROR_NO_ERROR; TRI_fulltext_wordlist_t* wordlist = GetWordlist(idx, doc); if (wordlist == nullptr) { // TODO: distinguish the cases "empty wordlist" and "out of memory" // LOG_WARNING("could not build wordlist"); return res; } if (wordlist->_numWords > 0) { // TODO: use status codes if (! TRI_InsertWordsFulltextIndex(fulltextIndex->_fulltextIndex, (TRI_fulltext_doc_t) ((uintptr_t) doc), wordlist)) { LOG_ERROR("adding document to fulltext index failed"); res = TRI_ERROR_INTERNAL; } } TRI_FreeWordlistFulltextIndex(wordlist); return res; } //////////////////////////////////////////////////////////////////////////////// /// @brief return the memory used by the index //////////////////////////////////////////////////////////////////////////////// static size_t MemoryFulltextIndex (TRI_index_t const* idx) { TRI_fulltext_index_t const* fulltextIndex = (TRI_fulltext_index_t const*) idx; return TRI_MemoryFulltextIndex(fulltextIndex->_fulltextIndex); } //////////////////////////////////////////////////////////////////////////////// /// @brief describes a fulltext index as a json object //////////////////////////////////////////////////////////////////////////////// static TRI_json_t* JsonFulltextIndex (TRI_index_t const* idx) { TRI_json_t* json; TRI_json_t* fields; TRI_shape_path_t const* path; char const* attributeName; TRI_fulltext_index_t const* fulltextIndex = (TRI_fulltext_index_t const*) idx; if (fulltextIndex == nullptr) { return nullptr; } TRI_document_collection_t* document = idx->_collection; // convert attribute to string path = document->getShaper()->lookupAttributePathByPid(document->getShaper(), fulltextIndex->_attribute); // ONLY IN INDEX, PROTECTED by RUNTIME if (path == 0) { return nullptr; } attributeName = ((char const*) path) + sizeof(TRI_shape_path_t) + (path->_aidLength * sizeof(TRI_shape_aid_t)); json = TRI_JsonIndex(TRI_CORE_MEM_ZONE, idx); TRI_Insert3ObjectJson(TRI_CORE_MEM_ZONE, json, "minLength", TRI_CreateNumberJson(TRI_CORE_MEM_ZONE, (double) fulltextIndex->_minWordLength)); fields = TRI_CreateArrayJson(TRI_CORE_MEM_ZONE); TRI_PushBack3ArrayJson(TRI_CORE_MEM_ZONE, fields, TRI_CreateStringCopyJson(TRI_CORE_MEM_ZONE, attributeName, strlen(attributeName))); TRI_Insert3ObjectJson(TRI_CORE_MEM_ZONE, json, "fields", fields); return json; } //////////////////////////////////////////////////////////////////////////////// /// @brief removes a document from a fulltext index //////////////////////////////////////////////////////////////////////////////// static int RemoveFulltextIndex (TRI_index_t* idx, TRI_doc_mptr_t const* doc, bool isRollback) { TRI_fulltext_index_t* fulltextIndex = (TRI_fulltext_index_t*) idx; TRI_DeleteDocumentFulltextIndex(fulltextIndex->_fulltextIndex, (TRI_fulltext_doc_t) ((uintptr_t) doc)); return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief cleanup function for the fulltext index /// /// This will incrementally clean the index by removing document/word pairs /// for deleted documents //////////////////////////////////////////////////////////////////////////////// static int CleanupFulltextIndex (TRI_index_t* idx) { LOG_TRACE("fulltext cleanup called"); TRI_fulltext_index_t* fulltextIndex = (TRI_fulltext_index_t*) idx; int res = TRI_ERROR_NO_ERROR; // check whether we should do a cleanup at all if (! TRI_CompactFulltextIndex(fulltextIndex->_fulltextIndex)) { res = TRI_ERROR_INTERNAL; } return res; } // ----------------------------------------------------------------------------- // --SECTION-- public functions // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief creates a fulltext index //////////////////////////////////////////////////////////////////////////////// TRI_index_t* TRI_CreateFulltextIndex (TRI_document_collection_t* document, TRI_idx_iid_t iid, const char* attributeName, const bool indexSubstrings, int minWordLength) { TRI_index_t* idx; TRI_fts_index_t* fts; TRI_shaper_t* shaper; char* copy; TRI_shape_pid_t attribute; // look up the attribute shaper = document->getShaper(); // ONLY IN INDEX, PROTECTED by RUNTIME attribute = shaper->findOrCreateAttributePathByName(shaper, attributeName); if (attribute == 0) { return nullptr; } copy = TRI_DuplicateStringZ(TRI_CORE_MEM_ZONE, attributeName); TRI_fulltext_index_t* fulltextIndex = static_cast(TRI_Allocate(TRI_CORE_MEM_ZONE, sizeof(TRI_fulltext_index_t), false)); fts = TRI_CreateFtsIndex(2048, 1, 1); if (fts == nullptr) { TRI_Free(TRI_CORE_MEM_ZONE, fulltextIndex); return nullptr; } idx = &fulltextIndex->base; TRI_InitIndex(idx, iid, TRI_IDX_TYPE_FULLTEXT_INDEX, document, true, false); idx->memory = MemoryFulltextIndex; idx->json = JsonFulltextIndex; idx->insert = InsertFulltextIndex; idx->remove = RemoveFulltextIndex; idx->cleanup = CleanupFulltextIndex; fulltextIndex->_fulltextIndex = fts; fulltextIndex->_indexSubstrings = indexSubstrings; fulltextIndex->_attribute = attribute; fulltextIndex->_minWordLength = (minWordLength > 0 ? minWordLength : 1); TRI_InitVectorString(&idx->_fields, TRI_CORE_MEM_ZONE); TRI_PushBackVectorString(&idx->_fields, copy); return idx; } //////////////////////////////////////////////////////////////////////////////// /// @brief frees the memory allocated, but does not free the pointer //////////////////////////////////////////////////////////////////////////////// void TRI_DestroyFulltextIndex (TRI_index_t* idx) { if (idx == nullptr) { return; } TRI_fulltext_index_t* fulltextIndex = (TRI_fulltext_index_t*) idx; TRI_DestroyVectorString(&idx->_fields); LOG_TRACE("destroying fulltext index"); TRI_FreeFtsIndex(fulltextIndex->_fulltextIndex); } //////////////////////////////////////////////////////////////////////////////// /// @brief frees the memory allocated and frees the pointer //////////////////////////////////////////////////////////////////////////////// void TRI_FreeFulltextIndex (TRI_index_t* idx) { if (idx == nullptr) { return; } TRI_DestroyFulltextIndex(idx); TRI_Free(TRI_CORE_MEM_ZONE, idx); } //////////////////////////////////////////////////////////////////////////////// /// @brief index comparator, used by the coordinator to detect if two index /// contents are the same //////////////////////////////////////////////////////////////////////////////// bool IndexComparator (TRI_json_t const* lhs, TRI_json_t const* rhs) { TRI_json_t* typeJson = TRI_LookupObjectJson(lhs, "type"); TRI_ASSERT(TRI_IsStringJson(typeJson)); // type must be identical if (! TRI_CheckSameValueJson(typeJson, TRI_LookupObjectJson(rhs, "type"))) { return false; } TRI_idx_type_e type = TRI_TypeIndex(typeJson->_value._string.data); // unique must be identical if present TRI_json_t* value = TRI_LookupObjectJson(lhs, "unique"); if (TRI_IsBooleanJson(value)) { if (! TRI_CheckSameValueJson(value, TRI_LookupObjectJson(rhs, "unique"))) { return false; } } // sparse must be identical if present value = TRI_LookupObjectJson(lhs, "sparse"); if (TRI_IsBooleanJson(value)) { if (! TRI_CheckSameValueJson(value, TRI_LookupObjectJson(rhs, "sparse"))) { return false; } } if (type == TRI_IDX_TYPE_GEO1_INDEX) { // geoJson must be identical if present value = TRI_LookupObjectJson(lhs, "geoJson"); if (TRI_IsBooleanJson(value)) { if (! TRI_CheckSameValueJson(value, TRI_LookupObjectJson(rhs, "geoJson"))) { return false; } } } else if (type == TRI_IDX_TYPE_FULLTEXT_INDEX) { // minLength value = TRI_LookupObjectJson(lhs, "minLength"); if (TRI_IsNumberJson(value)) { if (! TRI_CheckSameValueJson(value, TRI_LookupObjectJson(rhs, "minLength"))) { return false; } } } else if (type == TRI_IDX_TYPE_CAP_CONSTRAINT) { // size, byteSize value = TRI_LookupObjectJson(lhs, "size"); if (TRI_IsNumberJson(value)) { if (! TRI_CheckSameValueJson(value, TRI_LookupObjectJson(rhs, "size"))) { return false; } } value = TRI_LookupObjectJson(lhs, "byteSize"); if (TRI_IsNumberJson(value)) { if (! TRI_CheckSameValueJson(value, TRI_LookupObjectJson(rhs, "byteSize"))) { return false; } } } // other index types: fields must be identical if present value = TRI_LookupObjectJson(lhs, "fields"); if (TRI_IsArrayJson(value)) { if (type == TRI_IDX_TYPE_HASH_INDEX) { size_t const nv = TRI_LengthArrayJson(value); // compare fields in arbitrary order TRI_json_t const* r = TRI_LookupObjectJson(rhs, "fields"); if (! TRI_IsArrayJson(r) || nv != TRI_LengthArrayJson(r)) { return false; } size_t const nr = TRI_LengthArrayJson(r); for (size_t i = 0; i < nv; ++i) { TRI_json_t const* v = TRI_LookupArrayJson(value, i); bool found = false; for (size_t j = 0; j < nr; ++j) { if (TRI_CheckSameValueJson(v, TRI_LookupArrayJson(r, j))) { found = true; break; } } if (! found) { return false; } } } else { if (! TRI_CheckSameValueJson(value, TRI_LookupObjectJson(rhs, "fields"))) { return false; } } } return true; } // ----------------------------------------------------------------------------- // --SECTION-- END-OF-FILE // ----------------------------------------------------------------------------- // Local Variables: // mode: outline-minor // outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}" // End: