mirror of https://gitee.com/bigwinds/arangodb
5482 lines
162 KiB
C++
5482 lines
162 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Dr. Frank Celler
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "collection.h"
|
|
|
|
#include "ApplicationFeatures/ApplicationServer.h"
|
|
#include "ApplicationFeatures/PageSizeFeature.h"
|
|
#include "Aql/QueryCache.h"
|
|
#include "Basics/Barrier.h"
|
|
#include "Basics/FileUtils.h"
|
|
#include "Basics/ReadLocker.h"
|
|
#include "Basics/StringUtils.h"
|
|
#include "Basics/ThreadPool.h"
|
|
#include "Basics/Timers.h"
|
|
#include "Basics/VelocyPackHelper.h"
|
|
#include "Basics/WriteLocker.h"
|
|
#include "Basics/files.h"
|
|
#include "Basics/memory-map.h"
|
|
#include "Basics/tri-strings.h"
|
|
#include "Cluster/ServerState.h"
|
|
#include "Cluster/ClusterMethods.h"
|
|
#include "FulltextIndex/fulltext-index.h"
|
|
#include "Indexes/EdgeIndex.h"
|
|
#include "Indexes/FulltextIndex.h"
|
|
#include "Indexes/GeoIndex2.h"
|
|
#include "Indexes/HashIndex.h"
|
|
#include "Indexes/PrimaryIndex.h"
|
|
#include "Indexes/SkiplistIndex.h"
|
|
#include "Logger/Logger.h"
|
|
#include "RestServer/DatabaseFeature.h"
|
|
#include "StorageEngine/EngineSelectorFeature.h"
|
|
#include "StorageEngine/StorageEngine.h"
|
|
#include "Utils/CollectionNameResolver.h"
|
|
#include "Utils/CollectionReadLocker.h"
|
|
#include "Utils/CollectionWriteLocker.h"
|
|
#include "Utils/SingleCollectionTransaction.h"
|
|
#include "Utils/StandaloneTransactionContext.h"
|
|
#include "VocBase/DatafileHelper.h"
|
|
#include "VocBase/IndexPoolFeature.h"
|
|
#include "VocBase/KeyGenerator.h"
|
|
#include "VocBase/ticks.h"
|
|
#include "VocBase/vocbase.h"
|
|
#include "Wal/DocumentOperation.h"
|
|
#include "Wal/LogfileManager.h"
|
|
#include "Wal/Marker.h"
|
|
#include "Wal/Slots.h"
|
|
|
|
#ifdef ARANGODB_ENABLE_ROCKSDB
|
|
#include "Indexes/RocksDBIndex.h"
|
|
|
|
#include <rocksdb/db.h>
|
|
#include <rocksdb/options.h>
|
|
#include <rocksdb/utilities/optimistic_transaction_db.h>
|
|
#include <rocksdb/utilities/transaction.h>
|
|
#endif
|
|
|
|
#include <velocypack/Collection.h>
|
|
#include <velocypack/Iterator.h>
|
|
#include <velocypack/Value.h>
|
|
#include <velocypack/velocypack-aliases.h>
|
|
|
|
// TODO: remove this
|
|
#define TRI_VOC_PARAMETER_FILE "parameter.json"
|
|
|
|
using namespace arangodb;
|
|
using namespace arangodb::basics;
|
|
|
|
int TRI_AddOperationTransaction(TRI_transaction_t*,
|
|
arangodb::wal::DocumentOperation&, bool&);
|
|
|
|
/// @brief helper struct for filling indexes
|
|
class IndexFiller {
|
|
public:
|
|
IndexFiller(arangodb::Transaction* trx, TRI_collection_t* document,
|
|
arangodb::Index* idx, std::function<void(int)> callback)
|
|
: _trx(trx), _document(document), _idx(idx), _callback(callback) {}
|
|
|
|
void operator()() {
|
|
int res = TRI_ERROR_INTERNAL;
|
|
|
|
try {
|
|
res = _document->fillIndex(_trx, _idx);
|
|
} catch (...) {
|
|
}
|
|
|
|
_callback(res);
|
|
}
|
|
|
|
private:
|
|
arangodb::Transaction* _trx;
|
|
TRI_collection_t* _document;
|
|
arangodb::Index* _idx;
|
|
std::function<void(int)> _callback;
|
|
};
|
|
|
|
|
|
struct OpenIndexIteratorContext {
|
|
arangodb::Transaction* trx;
|
|
TRI_collection_t* collection;
|
|
};
|
|
|
|
/// @brief iterator for index open
|
|
static bool OpenIndexIterator(std::string const& filename, void* data) {
|
|
// load VelocyPack description of the index
|
|
std::shared_ptr<VPackBuilder> builder;
|
|
try {
|
|
builder = arangodb::basics::VelocyPackHelper::velocyPackFromFile(filename.c_str());
|
|
} catch (...) {
|
|
// Failed to parse file
|
|
LOG(ERR) << "failed to parse index definition from '" << filename << "'";
|
|
return false;
|
|
}
|
|
|
|
VPackSlice description = builder->slice();
|
|
// VelocyPack must be an index description
|
|
if (!description.isObject()) {
|
|
LOG(ERR) << "cannot read index definition from '" << filename << "'";
|
|
return false;
|
|
}
|
|
|
|
auto ctx = static_cast<OpenIndexIteratorContext*>(data);
|
|
arangodb::Transaction* trx = ctx->trx;
|
|
TRI_collection_t* document = ctx->collection;
|
|
|
|
int res = document->indexFromVelocyPack(trx, description, nullptr);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
// error was already printed if we get here
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/// @brief converts extracts a field list from a VelocyPack object
|
|
/// Does not copy any data, caller has to make sure that data
|
|
/// in slice stays valid until this return value is destroyed.
|
|
static VPackSlice ExtractFields(VPackSlice const& slice, TRI_idx_iid_t iid) {
|
|
VPackSlice fld = slice.get("fields");
|
|
if (!fld.isArray()) {
|
|
LOG(ERR) << "ignoring index " << iid << ", 'fields' must be an array";
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_BAD_PARAMETER);
|
|
}
|
|
|
|
for (auto const& sub : VPackArrayIterator(fld)) {
|
|
if (!sub.isString()) {
|
|
LOG(ERR) << "ignoring index " << iid
|
|
<< ", 'fields' must be an array of attribute paths";
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_BAD_PARAMETER);
|
|
}
|
|
}
|
|
return fld;
|
|
}
|
|
|
|
/// @brief ensures that an error code is set in all required places
|
|
static void EnsureErrorCode(int code) {
|
|
if (code == TRI_ERROR_NO_ERROR) {
|
|
// must have an error code
|
|
code = TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
TRI_set_errno(code);
|
|
errno = code;
|
|
}
|
|
|
|
static uint64_t GetNumericFilenamePart(char const* filename) {
|
|
char const* pos1 = strrchr(filename, '.');
|
|
|
|
if (pos1 == nullptr) {
|
|
return 0;
|
|
}
|
|
|
|
char const* pos2 = strrchr(filename, '-');
|
|
|
|
if (pos2 == nullptr || pos2 > pos1) {
|
|
return 0;
|
|
}
|
|
|
|
return StringUtils::uint64(pos2 + 1, pos1 - pos2 - 1);
|
|
}
|
|
|
|
TRI_collection_t::TRI_collection_t(TRI_vocbase_t* vocbase,
|
|
arangodb::VocbaseCollectionInfo const& parameters)
|
|
: _vocbase(vocbase),
|
|
_tickMax(0),
|
|
_info(parameters),
|
|
_state(TRI_COL_STATE_WRITE),
|
|
_lastError(TRI_ERROR_NO_ERROR),
|
|
_ditches(this),
|
|
_masterPointers(),
|
|
_uncollectedLogfileEntries(0),
|
|
_numberDocuments(0),
|
|
_lastCompaction(0.0),
|
|
_cleanupIndexes(0),
|
|
_persistentIndexes(0),
|
|
_nextCompactionStartIndex(0),
|
|
_lastCompactionStatus(nullptr),
|
|
_useSecondaryIndexes(true) {
|
|
|
|
// check if we can generate the key generator
|
|
std::shared_ptr<arangodb::velocypack::Buffer<uint8_t> const> buffer =
|
|
parameters.keyOptions();
|
|
|
|
VPackSlice slice;
|
|
if (buffer != nullptr) {
|
|
slice = VPackSlice(buffer->data());
|
|
}
|
|
|
|
std::unique_ptr<KeyGenerator> keyGenerator(KeyGenerator::factory(slice));
|
|
|
|
if (keyGenerator == nullptr) {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_INVALID_KEY_GENERATOR);
|
|
}
|
|
|
|
_keyGenerator.reset(keyGenerator.release());
|
|
|
|
setCompactionStatus("compaction not yet started");
|
|
if (ServerState::instance()->isDBServer()) {
|
|
_followers.reset(new FollowerInfo(this));
|
|
}
|
|
}
|
|
|
|
TRI_collection_t::~TRI_collection_t() {
|
|
try {
|
|
this->close();
|
|
} catch (...) {
|
|
// ignore any errors here
|
|
}
|
|
|
|
_ditches.destroy();
|
|
|
|
_info.clearKeyOptions();
|
|
|
|
// free memory allocated for indexes
|
|
for (auto& idx : allIndexes()) {
|
|
delete idx;
|
|
}
|
|
|
|
for (auto& it : _datafiles) {
|
|
TRI_FreeDatafile(it);
|
|
}
|
|
for (auto& it : _journals) {
|
|
TRI_FreeDatafile(it);
|
|
}
|
|
for (auto& it : _compactors) {
|
|
TRI_FreeDatafile(it);
|
|
}
|
|
}
|
|
|
|
/// @brief update statistics for a collection
|
|
/// note: the write-lock for the collection must be held to call this
|
|
void TRI_collection_t::setLastRevision(TRI_voc_rid_t rid, bool force) {
|
|
if (rid > 0) {
|
|
_info.setRevision(rid, force);
|
|
}
|
|
}
|
|
|
|
/// @brief whether or not a collection is fully collected
|
|
bool TRI_collection_t::isFullyCollected() {
|
|
READ_LOCKER(readLocker, _lock);
|
|
|
|
int64_t uncollected = _uncollectedLogfileEntries.load();
|
|
|
|
return (uncollected == 0);
|
|
}
|
|
|
|
void TRI_collection_t::setNextCompactionStartIndex(size_t index) {
|
|
MUTEX_LOCKER(mutexLocker, _compactionStatusLock);
|
|
_nextCompactionStartIndex = index;
|
|
}
|
|
|
|
size_t TRI_collection_t::getNextCompactionStartIndex() {
|
|
MUTEX_LOCKER(mutexLocker, _compactionStatusLock);
|
|
return _nextCompactionStartIndex;
|
|
}
|
|
|
|
void TRI_collection_t::setCompactionStatus(char const* reason) {
|
|
TRI_ASSERT(reason != nullptr);
|
|
struct tm tb;
|
|
time_t tt = time(nullptr);
|
|
TRI_gmtime(tt, &tb);
|
|
|
|
MUTEX_LOCKER(mutexLocker, _compactionStatusLock);
|
|
_lastCompactionStatus = reason;
|
|
|
|
strftime(&_lastCompactionStamp[0], sizeof(_lastCompactionStamp),
|
|
"%Y-%m-%dT%H:%M:%SZ", &tb);
|
|
}
|
|
|
|
void TRI_collection_t::getCompactionStatus(char const*& reason,
|
|
char* dst, size_t maxSize) {
|
|
memset(dst, 0, maxSize);
|
|
if (maxSize > sizeof(_lastCompactionStamp)) {
|
|
maxSize = sizeof(_lastCompactionStamp);
|
|
}
|
|
MUTEX_LOCKER(mutexLocker, _compactionStatusLock);
|
|
reason = _lastCompactionStatus;
|
|
memcpy(dst, &_lastCompactionStamp[0], maxSize);
|
|
}
|
|
|
|
/// @brief read locks a collection
|
|
int TRI_collection_t::beginRead() {
|
|
if (arangodb::Transaction::_makeNolockHeaders != nullptr) {
|
|
std::string collName(_info.name());
|
|
auto it = arangodb::Transaction::_makeNolockHeaders->find(collName);
|
|
if (it != arangodb::Transaction::_makeNolockHeaders->end()) {
|
|
// do not lock by command
|
|
// LOCKING-DEBUG
|
|
// std::cout << "BeginRead blocked: " << document->_info._name <<
|
|
// std::endl;
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
}
|
|
// LOCKING-DEBUG
|
|
// std::cout << "BeginRead: " << document->_info._name << std::endl;
|
|
READ_LOCKER(locker, _lock);
|
|
|
|
try {
|
|
_vocbase->_deadlockDetector.addReader(this, false);
|
|
} catch (...) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
locker.steal();
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief read unlocks a collection
|
|
int TRI_collection_t::endRead() {
|
|
if (arangodb::Transaction::_makeNolockHeaders != nullptr) {
|
|
std::string collName(_info.name());
|
|
auto it = arangodb::Transaction::_makeNolockHeaders->find(collName);
|
|
if (it != arangodb::Transaction::_makeNolockHeaders->end()) {
|
|
// do not lock by command
|
|
// LOCKING-DEBUG
|
|
// std::cout << "EndRead blocked: " << document->_info._name << std::endl;
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
}
|
|
|
|
try {
|
|
_vocbase->_deadlockDetector.unsetReader(this);
|
|
} catch (...) {
|
|
}
|
|
|
|
// LOCKING-DEBUG
|
|
// std::cout << "EndRead: " << document->_info._name << std::endl;
|
|
_lock.unlockRead();
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief write locks a collection
|
|
int TRI_collection_t::beginWrite() {
|
|
if (arangodb::Transaction::_makeNolockHeaders != nullptr) {
|
|
std::string collName(_info.name());
|
|
auto it = arangodb::Transaction::_makeNolockHeaders->find(collName);
|
|
if (it != arangodb::Transaction::_makeNolockHeaders->end()) {
|
|
// do not lock by command
|
|
// LOCKING-DEBUG
|
|
// std::cout << "BeginWrite blocked: " << document->_info._name <<
|
|
// std::endl;
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
}
|
|
// LOCKING_DEBUG
|
|
// std::cout << "BeginWrite: " << document->_info._name << std::endl;
|
|
WRITE_LOCKER(locker, _lock);
|
|
|
|
// register writer
|
|
try {
|
|
_vocbase->_deadlockDetector.addWriter(this, false);
|
|
} catch (...) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
locker.steal();
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief write unlocks a collection
|
|
int TRI_collection_t::endWrite() {
|
|
if (arangodb::Transaction::_makeNolockHeaders != nullptr) {
|
|
std::string collName(_info.name());
|
|
auto it = arangodb::Transaction::_makeNolockHeaders->find(collName);
|
|
if (it != arangodb::Transaction::_makeNolockHeaders->end()) {
|
|
// do not lock by command
|
|
// LOCKING-DEBUG
|
|
// std::cout << "EndWrite blocked: " << document->_info._name <<
|
|
// std::endl;
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
}
|
|
|
|
// unregister writer
|
|
try {
|
|
_vocbase->_deadlockDetector.unsetWriter(this);
|
|
} catch (...) {
|
|
// must go on here to unlock the lock
|
|
}
|
|
|
|
// LOCKING-DEBUG
|
|
// std::cout << "EndWrite: " << document->_info._name << std::endl;
|
|
_lock.unlockWrite();
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief read locks a collection, with a timeout (in µseconds)
|
|
int TRI_collection_t::beginReadTimed(uint64_t timeout,
|
|
uint64_t sleepPeriod) {
|
|
if (arangodb::Transaction::_makeNolockHeaders != nullptr) {
|
|
std::string collName(_info.name());
|
|
auto it = arangodb::Transaction::_makeNolockHeaders->find(collName);
|
|
if (it != arangodb::Transaction::_makeNolockHeaders->end()) {
|
|
// do not lock by command
|
|
// LOCKING-DEBUG
|
|
// std::cout << "BeginReadTimed blocked: " << document->_info._name <<
|
|
// std::endl;
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
}
|
|
uint64_t waited = 0;
|
|
if (timeout == 0) {
|
|
// we don't allow looping forever. limit waiting to 15 minutes max.
|
|
timeout = 15 * 60 * 1000 * 1000;
|
|
}
|
|
|
|
// LOCKING-DEBUG
|
|
// std::cout << "BeginReadTimed: " << document->_info._name << std::endl;
|
|
int iterations = 0;
|
|
bool wasBlocked = false;
|
|
|
|
while (true) {
|
|
TRY_READ_LOCKER(locker, _lock);
|
|
|
|
if (locker.isLocked()) {
|
|
// when we are here, we've got the read lock
|
|
_vocbase->_deadlockDetector.addReader(this, wasBlocked);
|
|
|
|
// keep lock and exit loop
|
|
locker.steal();
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
try {
|
|
if (!wasBlocked) {
|
|
// insert reader
|
|
wasBlocked = true;
|
|
if (_vocbase->_deadlockDetector.setReaderBlocked(this) ==
|
|
TRI_ERROR_DEADLOCK) {
|
|
// deadlock
|
|
LOG(TRACE) << "deadlock detected while trying to acquire read-lock on collection '" << _info.name() << "'";
|
|
return TRI_ERROR_DEADLOCK;
|
|
}
|
|
LOG(TRACE) << "waiting for read-lock on collection '" << _info.name() << "'";
|
|
} else if (++iterations >= 5) {
|
|
// periodically check for deadlocks
|
|
TRI_ASSERT(wasBlocked);
|
|
iterations = 0;
|
|
if (_vocbase->_deadlockDetector.detectDeadlock(this, false) ==
|
|
TRI_ERROR_DEADLOCK) {
|
|
// deadlock
|
|
_vocbase->_deadlockDetector.unsetReaderBlocked(this);
|
|
LOG(TRACE) << "deadlock detected while trying to acquire read-lock on collection '" << _info.name() << "'";
|
|
return TRI_ERROR_DEADLOCK;
|
|
}
|
|
}
|
|
} catch (...) {
|
|
// clean up!
|
|
if (wasBlocked) {
|
|
_vocbase->_deadlockDetector.unsetReaderBlocked(this);
|
|
}
|
|
// always exit
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
usleep((unsigned long)sleepPeriod);
|
|
#else
|
|
usleep((useconds_t)sleepPeriod);
|
|
#endif
|
|
|
|
waited += sleepPeriod;
|
|
|
|
if (waited > timeout) {
|
|
_vocbase->_deadlockDetector.unsetReaderBlocked(this);
|
|
LOG(TRACE) << "timed out waiting for read-lock on collection '" << _info.name() << "'";
|
|
return TRI_ERROR_LOCK_TIMEOUT;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// @brief write locks a collection, with a timeout
|
|
int TRI_collection_t::beginWriteTimed(uint64_t timeout,
|
|
uint64_t sleepPeriod) {
|
|
if (arangodb::Transaction::_makeNolockHeaders != nullptr) {
|
|
std::string collName(_info.name());
|
|
auto it = arangodb::Transaction::_makeNolockHeaders->find(collName);
|
|
if (it != arangodb::Transaction::_makeNolockHeaders->end()) {
|
|
// do not lock by command
|
|
// LOCKING-DEBUG
|
|
// std::cout << "BeginWriteTimed blocked: " << document->_info._name <<
|
|
// std::endl;
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
}
|
|
uint64_t waited = 0;
|
|
if (timeout == 0) {
|
|
// we don't allow looping forever. limit waiting to 15 minutes max.
|
|
timeout = 15 * 60 * 1000 * 1000;
|
|
}
|
|
|
|
// LOCKING-DEBUG
|
|
// std::cout << "BeginWriteTimed: " << document->_info._name << std::endl;
|
|
int iterations = 0;
|
|
bool wasBlocked = false;
|
|
|
|
while (true) {
|
|
TRY_WRITE_LOCKER(locker, _lock);
|
|
|
|
if (locker.isLocked()) {
|
|
// register writer
|
|
_vocbase->_deadlockDetector.addWriter(this, wasBlocked);
|
|
// keep lock and exit loop
|
|
locker.steal();
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
try {
|
|
if (!wasBlocked) {
|
|
// insert writer
|
|
wasBlocked = true;
|
|
if (_vocbase->_deadlockDetector.setWriterBlocked(this) ==
|
|
TRI_ERROR_DEADLOCK) {
|
|
// deadlock
|
|
LOG(TRACE) << "deadlock detected while trying to acquire write-lock on collection '" << _info.name() << "'";
|
|
return TRI_ERROR_DEADLOCK;
|
|
}
|
|
LOG(TRACE) << "waiting for write-lock on collection '" << _info.name() << "'";
|
|
} else if (++iterations >= 5) {
|
|
// periodically check for deadlocks
|
|
TRI_ASSERT(wasBlocked);
|
|
iterations = 0;
|
|
if (_vocbase->_deadlockDetector.detectDeadlock(this, true) ==
|
|
TRI_ERROR_DEADLOCK) {
|
|
// deadlock
|
|
_vocbase->_deadlockDetector.unsetWriterBlocked(this);
|
|
LOG(TRACE) << "deadlock detected while trying to acquire write-lock on collection '" << _info.name() << "'";
|
|
return TRI_ERROR_DEADLOCK;
|
|
}
|
|
}
|
|
} catch (...) {
|
|
// clean up!
|
|
if (wasBlocked) {
|
|
_vocbase->_deadlockDetector.unsetWriterBlocked(this);
|
|
}
|
|
// always exit
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
usleep((unsigned long)sleepPeriod);
|
|
#else
|
|
usleep((useconds_t)sleepPeriod);
|
|
#endif
|
|
|
|
waited += sleepPeriod;
|
|
|
|
if (waited > timeout) {
|
|
_vocbase->_deadlockDetector.unsetWriterBlocked(this);
|
|
LOG(TRACE) << "timed out waiting for write-lock on collection '" << _info.name() << "'";
|
|
return TRI_ERROR_LOCK_TIMEOUT;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// @brief returns information about the collection
|
|
/// note: the collection lock must be held when calling this function
|
|
TRI_doc_collection_info_t* TRI_collection_t::figures() {
|
|
// prefill with 0's to init counters
|
|
auto info = static_cast<TRI_doc_collection_info_t*>(TRI_Allocate(
|
|
TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_doc_collection_info_t), true));
|
|
|
|
if (info == nullptr) {
|
|
return nullptr;
|
|
}
|
|
|
|
DatafileStatisticsContainer dfi = _datafileStatistics.all();
|
|
info->_numberAlive += static_cast<TRI_voc_ssize_t>(dfi.numberAlive);
|
|
info->_numberDead += static_cast<TRI_voc_ssize_t>(dfi.numberDead);
|
|
info->_numberDeletions += static_cast<TRI_voc_ssize_t>(dfi.numberDeletions);
|
|
|
|
info->_sizeAlive += dfi.sizeAlive;
|
|
info->_sizeDead += dfi.sizeDead;
|
|
|
|
// add the file sizes for datafiles and journals
|
|
TRI_collection_t* base = this;
|
|
|
|
for (auto& df : base->_datafiles) {
|
|
info->_datafileSize += (int64_t)df->_initSize;
|
|
++info->_numberDatafiles;
|
|
}
|
|
|
|
for (auto& df : base->_journals) {
|
|
info->_journalfileSize += (int64_t)df->_initSize;
|
|
++info->_numberJournalfiles;
|
|
}
|
|
|
|
for (auto& df : base->_compactors) {
|
|
info->_compactorfileSize += (int64_t)df->_initSize;
|
|
++info->_numberCompactorfiles;
|
|
}
|
|
|
|
// add index information
|
|
info->_numberIndexes = 0;
|
|
info->_sizeIndexes = 0;
|
|
|
|
info->_sizeIndexes += static_cast<int64_t>(_masterPointers.memory());
|
|
|
|
for (auto const& idx : allIndexes()) {
|
|
info->_sizeIndexes += idx->memory();
|
|
info->_numberIndexes++;
|
|
}
|
|
|
|
info->_uncollectedLogfileEntries = _uncollectedLogfileEntries;
|
|
info->_tickMax = _tickMax;
|
|
|
|
info->_numberDocumentDitches = _ditches.numDocumentDitches();
|
|
info->_waitingForDitch = _ditches.head();
|
|
|
|
// fills in compaction status
|
|
getCompactionStatus(info->_lastCompactionStatus,
|
|
&info->_lastCompactionStamp[0],
|
|
sizeof(info->_lastCompactionStamp));
|
|
|
|
return info;
|
|
}
|
|
|
|
/// @brief add an index to the collection
|
|
/// note: this may throw. it's the caller's responsibility to catch and clean up
|
|
void TRI_collection_t::addIndex(arangodb::Index* idx) {
|
|
_indexes.emplace_back(idx);
|
|
|
|
// update statistics
|
|
if (idx->type() == arangodb::Index::TRI_IDX_TYPE_FULLTEXT_INDEX) {
|
|
++_cleanupIndexes;
|
|
}
|
|
if (idx->isPersistent()) {
|
|
++_persistentIndexes;
|
|
}
|
|
}
|
|
|
|
/// @brief get an index by id
|
|
arangodb::Index* TRI_collection_t::removeIndex(TRI_idx_iid_t iid) {
|
|
size_t const n = _indexes.size();
|
|
|
|
for (size_t i = 0; i < n; ++i) {
|
|
arangodb::Index* idx = _indexes[i];
|
|
|
|
if (!idx->canBeDropped()) {
|
|
continue;
|
|
}
|
|
|
|
if (idx->id() == iid) {
|
|
// found!
|
|
idx->drop();
|
|
|
|
_indexes.erase(_indexes.begin() + i);
|
|
|
|
// update statistics
|
|
if (idx->type() == arangodb::Index::TRI_IDX_TYPE_FULLTEXT_INDEX) {
|
|
--_cleanupIndexes;
|
|
}
|
|
if (idx->isPersistent()) {
|
|
--_persistentIndexes;
|
|
}
|
|
|
|
return idx;
|
|
}
|
|
}
|
|
|
|
// not found
|
|
return nullptr;
|
|
}
|
|
|
|
/// @brief get all indexes of the collection
|
|
std::vector<arangodb::Index*> const& TRI_collection_t::allIndexes() const {
|
|
return _indexes;
|
|
}
|
|
|
|
/// @brief return the primary index
|
|
arangodb::PrimaryIndex* TRI_collection_t::primaryIndex() {
|
|
TRI_ASSERT(!_indexes.empty());
|
|
// the primary index must be the index at position #0
|
|
return static_cast<arangodb::PrimaryIndex*>(_indexes[0]);
|
|
}
|
|
|
|
/// @brief return the collection's edge index, if it exists
|
|
arangodb::EdgeIndex* TRI_collection_t::edgeIndex() {
|
|
if (_indexes.size() >= 2 &&
|
|
_indexes[1]->type() == arangodb::Index::TRI_IDX_TYPE_EDGE_INDEX) {
|
|
// edge index must be the index at position #1
|
|
return static_cast<arangodb::EdgeIndex*>(_indexes[1]);
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
/// @brief get an index by id
|
|
arangodb::Index* TRI_collection_t::lookupIndex(TRI_idx_iid_t iid) const {
|
|
for (auto const& it : _indexes) {
|
|
if (it->id() == iid) {
|
|
return it;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
/// @brief checks if a collection name is allowed
|
|
/// Returns true if the name is allowed and false otherwise
|
|
bool TRI_collection_t::IsAllowedName(bool allowSystem, std::string const& name) {
|
|
bool ok;
|
|
char const* ptr;
|
|
size_t length = 0;
|
|
|
|
// check allow characters: must start with letter or underscore if system is
|
|
// allowed
|
|
for (ptr = name.c_str(); *ptr; ++ptr) {
|
|
if (length == 0) {
|
|
if (allowSystem) {
|
|
ok = (*ptr == '_') || ('a' <= *ptr && *ptr <= 'z') ||
|
|
('A' <= *ptr && *ptr <= 'Z');
|
|
} else {
|
|
ok = ('a' <= *ptr && *ptr <= 'z') || ('A' <= *ptr && *ptr <= 'Z');
|
|
}
|
|
} else {
|
|
ok = (*ptr == '_') || (*ptr == '-') || ('0' <= *ptr && *ptr <= '9') ||
|
|
('a' <= *ptr && *ptr <= 'z') || ('A' <= *ptr && *ptr <= 'Z');
|
|
}
|
|
|
|
if (!ok) {
|
|
return false;
|
|
}
|
|
|
|
++length;
|
|
}
|
|
|
|
// invalid name length
|
|
if (length == 0 || length > TRI_COL_NAME_LENGTH) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
std::string TRI_collection_t::label() const {
|
|
return _vocbase->name() + " / " + _info.name();
|
|
}
|
|
|
|
/// @brief updates the parameter info block
|
|
int TRI_collection_t::updateCollectionInfo(TRI_vocbase_t* vocbase,
|
|
VPackSlice const& slice,
|
|
bool doSync) {
|
|
WRITE_LOCKER(writeLocker, _infoLock);
|
|
|
|
if (!slice.isNone()) {
|
|
try {
|
|
_info.update(slice, false, vocbase);
|
|
} catch (...) {
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
}
|
|
|
|
return _info.saveToFile(path(), doSync);
|
|
}
|
|
|
|
/// @brief seal a datafile
|
|
int TRI_collection_t::sealDatafile(TRI_datafile_t* datafile, bool isCompactor) {
|
|
int res = TRI_SealDatafile(datafile);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
LOG(ERR) << "failed to seal journal '" << datafile->getName(datafile)
|
|
<< "': " << TRI_last_error();
|
|
} else if (!isCompactor && datafile->isPhysical(datafile)) {
|
|
// rename the file
|
|
std::string dname("datafile-" + std::to_string(datafile->_fid) + ".db");
|
|
std::string filename = arangodb::basics::FileUtils::buildFilename(path(), dname);
|
|
|
|
bool ok = TRI_RenameDatafile(datafile, filename.c_str());
|
|
|
|
if (ok) {
|
|
LOG(TRACE) << "closed file '" << datafile->getName(datafile) << "'";
|
|
} else {
|
|
LOG(ERR) << "failed to rename datafile '" << datafile->getName(datafile)
|
|
<< "' to '" << filename << "': " << TRI_last_error();
|
|
res = TRI_ERROR_INTERNAL;
|
|
}
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/// @brief rotate the active journal - will do nothing if there is no journal
|
|
int TRI_collection_t::rotateActiveJournal() {
|
|
WRITE_LOCKER(writeLocker, _filesLock);
|
|
|
|
// note: only journals need to be handled here as the journal is the
|
|
// only place that's ever written to. if a journal is full, it will have been
|
|
// sealed and synced already
|
|
if (_journals.empty()) {
|
|
return TRI_ERROR_ARANGO_NO_JOURNAL;
|
|
}
|
|
|
|
TRI_datafile_t* datafile = _journals[0];
|
|
TRI_ASSERT(datafile != nullptr);
|
|
|
|
if (_state != TRI_COL_STATE_WRITE) {
|
|
return TRI_ERROR_ARANGO_NO_JOURNAL;
|
|
}
|
|
|
|
// make sure we have enough room in the target vector before we go on
|
|
_datafiles.reserve(_datafiles.size() + 1);
|
|
|
|
int res = sealDatafile(datafile, false);
|
|
|
|
TRI_ASSERT(!_journals.empty());
|
|
_journals.erase(_journals.begin());
|
|
TRI_ASSERT(_journals.empty());
|
|
|
|
// shouldn't throw as we reserved enough space before
|
|
_datafiles.emplace_back(datafile);
|
|
|
|
return res;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief sync the active journal - will do nothing if there is no journal
|
|
/// or if the journal is volatile
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int TRI_collection_t::syncActiveJournal() {
|
|
WRITE_LOCKER(writeLocker, _filesLock);
|
|
|
|
// note: only journals need to be handled here as the journal is the
|
|
// only place that's ever written to. if a journal is full, it will have been
|
|
// sealed and synced already
|
|
if (_journals.empty()) {
|
|
// nothing to do
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
TRI_datafile_t* datafile = _journals[0];
|
|
TRI_ASSERT(datafile != nullptr);
|
|
|
|
int res = TRI_ERROR_NO_ERROR;
|
|
|
|
// we only need to care about physical datafiles
|
|
// anonymous regions do not need to be synced
|
|
if (datafile->isPhysical(datafile)) {
|
|
char const* synced = datafile->_synced;
|
|
char* written = datafile->_written;
|
|
|
|
if (synced < written) {
|
|
bool ok = datafile->sync(datafile, synced, written);
|
|
|
|
if (ok) {
|
|
LOG_TOPIC(TRACE, Logger::COLLECTOR) << "msync succeeded "
|
|
<< (void*)synced << ", size "
|
|
<< (written - synced);
|
|
datafile->_synced = written;
|
|
} else {
|
|
res = TRI_errno();
|
|
if (res == TRI_ERROR_NO_ERROR) {
|
|
// oops, error code got lost
|
|
res = TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
LOG_TOPIC(ERR, Logger::COLLECTOR)
|
|
<< "msync failed with: " << TRI_last_error();
|
|
datafile->_state = TRI_DF_STATE_WRITE_ERROR;
|
|
}
|
|
}
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief reserve space in the current journal. if no create exists or the
|
|
/// current journal cannot provide enough space, close the old journal and
|
|
/// create a new one
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int TRI_collection_t::reserveJournalSpace(TRI_voc_tick_t tick,
|
|
TRI_voc_size_t size,
|
|
char*& resultPosition,
|
|
TRI_datafile_t*& resultDatafile) {
|
|
// reset results
|
|
resultPosition = nullptr;
|
|
resultDatafile = nullptr;
|
|
|
|
WRITE_LOCKER(writeLocker, _filesLock);
|
|
|
|
// start with configured journal size
|
|
TRI_voc_size_t targetSize = _info.maximalSize();
|
|
|
|
// make sure that the document fits
|
|
while (targetSize - 256 < size) {
|
|
targetSize *= 2;
|
|
}
|
|
|
|
while (_state == TRI_COL_STATE_WRITE) {
|
|
TRI_datafile_t* datafile = nullptr;
|
|
|
|
if (_journals.empty()) {
|
|
// create enough room in the journals vector
|
|
_journals.reserve(_journals.size() + 1);
|
|
|
|
datafile = createDatafile(tick, targetSize, false);
|
|
|
|
if (datafile == nullptr) {
|
|
int res = TRI_errno();
|
|
// could not create a datafile, this is a serious error
|
|
|
|
if (res == TRI_ERROR_NO_ERROR) {
|
|
// oops, error code got lost
|
|
res = TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
// shouldn't throw as we reserved enough space before
|
|
_journals.emplace_back(datafile);
|
|
} else {
|
|
// select datafile
|
|
datafile = _journals[0];
|
|
}
|
|
|
|
TRI_ASSERT(datafile != nullptr);
|
|
|
|
// try to reserve space in the datafile
|
|
|
|
TRI_df_marker_t* position = nullptr;
|
|
int res = TRI_ReserveElementDatafile(datafile, size, &position, targetSize);
|
|
|
|
// found a datafile with enough space left
|
|
if (res == TRI_ERROR_NO_ERROR) {
|
|
datafile->_written = ((char*)position) + size;
|
|
// set result
|
|
resultPosition = reinterpret_cast<char*>(position);
|
|
resultDatafile = datafile;
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
if (res != TRI_ERROR_ARANGO_DATAFILE_FULL) {
|
|
// some other error
|
|
LOG_TOPIC(ERR, Logger::COLLECTOR) << "cannot select journal: '"
|
|
<< TRI_last_error() << "'";
|
|
return res;
|
|
}
|
|
|
|
// TRI_ERROR_ARANGO_DATAFILE_FULL...
|
|
// journal is full, close it and sync
|
|
LOG_TOPIC(DEBUG, Logger::COLLECTOR) << "closing full journal '"
|
|
<< datafile->getName(datafile) << "'";
|
|
|
|
// make sure we have enough room in the target vector before we go on
|
|
_datafiles.reserve(_datafiles.size() + 1);
|
|
|
|
res = sealDatafile(datafile, false);
|
|
|
|
// move journal into datafiles vector, regardless of whether an error
|
|
// occurred
|
|
TRI_ASSERT(!_journals.empty());
|
|
_journals.erase(_journals.begin());
|
|
TRI_ASSERT(_journals.empty());
|
|
// this shouldn't fail, as we have reserved space before already
|
|
_datafiles.emplace_back(datafile);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
// an error occurred, we must stop here
|
|
return res;
|
|
}
|
|
} // otherwise, next iteration!
|
|
|
|
return TRI_ERROR_ARANGO_NO_JOURNAL;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create compactor file
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_datafile_t* TRI_collection_t::createCompactor(TRI_voc_fid_t fid,
|
|
TRI_voc_size_t maximalSize) {
|
|
try {
|
|
WRITE_LOCKER(writeLocker, _filesLock);
|
|
|
|
TRI_ASSERT(_compactors.empty());
|
|
// reserve enough space for the later addition
|
|
_compactors.reserve(_compactors.size() + 1);
|
|
|
|
TRI_datafile_t* compactor =
|
|
createDatafile(fid, static_cast<TRI_voc_size_t>(maximalSize), true);
|
|
|
|
if (compactor != nullptr) {
|
|
// should not throw, as we've reserved enough space before
|
|
_compactors.emplace_back(compactor);
|
|
}
|
|
return compactor;
|
|
} catch (...) {
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief close an existing compactor
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int TRI_collection_t::closeCompactor(TRI_datafile_t* datafile) {
|
|
WRITE_LOCKER(writeLocker, _filesLock);
|
|
|
|
if (_compactors.size() != 1) {
|
|
return TRI_ERROR_ARANGO_NO_JOURNAL;
|
|
}
|
|
|
|
TRI_datafile_t* compactor = _compactors[0];
|
|
|
|
if (datafile != compactor) {
|
|
// wrong compactor file specified... should not happen
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
return sealDatafile(datafile, true);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief replace a datafile with a compactor
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int TRI_collection_t::replaceDatafileWithCompactor(TRI_datafile_t* datafile,
|
|
TRI_datafile_t* compactor) {
|
|
TRI_ASSERT(datafile != nullptr);
|
|
TRI_ASSERT(compactor != nullptr);
|
|
|
|
WRITE_LOCKER(writeLocker, _filesLock);
|
|
|
|
TRI_ASSERT(!_compactors.empty());
|
|
|
|
for (size_t i = 0; i < _datafiles.size(); ++i) {
|
|
if (_datafiles[i]->_fid == datafile->_fid) {
|
|
// found!
|
|
// now put the compactor in place of the datafile
|
|
_datafiles[i] = compactor;
|
|
|
|
// remove the compactor file from the list of compactors
|
|
TRI_ASSERT(_compactors[0] != nullptr);
|
|
TRI_ASSERT(_compactors[0]->_fid == compactor->_fid);
|
|
|
|
_compactors.erase(_compactors.begin());
|
|
TRI_ASSERT(_compactors.empty());
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
}
|
|
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief creates a datafile
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_datafile_t* TRI_collection_t::createDatafile(TRI_voc_fid_t fid,
|
|
TRI_voc_size_t journalSize,
|
|
bool isCompactor) {
|
|
TRI_ASSERT(fid > 0);
|
|
TRI_collection_t* document =
|
|
static_cast<TRI_collection_t*>(this);
|
|
TRI_ASSERT(document != nullptr);
|
|
|
|
// create an entry for the new datafile
|
|
try {
|
|
document->_datafileStatistics.create(fid);
|
|
} catch (...) {
|
|
EnsureErrorCode(TRI_ERROR_OUT_OF_MEMORY);
|
|
return nullptr;
|
|
}
|
|
|
|
TRI_datafile_t* datafile;
|
|
|
|
if (document->_info.isVolatile()) {
|
|
// in-memory collection
|
|
datafile = TRI_CreateDatafile(nullptr, fid, journalSize, true);
|
|
} else {
|
|
// construct a suitable filename (which may be temporary at the beginning)
|
|
std::string jname;
|
|
if (isCompactor) {
|
|
jname = "compaction-";
|
|
} else {
|
|
jname = "temp-";
|
|
}
|
|
|
|
jname.append(std::to_string(fid) + ".db");
|
|
std::string filename = arangodb::basics::FileUtils::buildFilename(path(), jname);
|
|
|
|
TRI_IF_FAILURE("CreateJournalDocumentCollection") {
|
|
// simulate disk full
|
|
document->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_FILESYSTEM_FULL);
|
|
|
|
EnsureErrorCode(TRI_ERROR_ARANGO_FILESYSTEM_FULL);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
// remove an existing temporary file first
|
|
if (TRI_ExistsFile(filename.c_str())) {
|
|
// remove an existing file first
|
|
TRI_UnlinkFile(filename.c_str());
|
|
}
|
|
|
|
datafile = TRI_CreateDatafile(filename.c_str(), fid, journalSize, true);
|
|
}
|
|
|
|
if (datafile == nullptr) {
|
|
if (TRI_errno() == TRI_ERROR_OUT_OF_MEMORY_MMAP) {
|
|
document->_lastError = TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY_MMAP);
|
|
} else {
|
|
document->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_NO_JOURNAL);
|
|
}
|
|
|
|
EnsureErrorCode(document->_lastError);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
// datafile is there now
|
|
TRI_ASSERT(datafile != nullptr);
|
|
|
|
if (isCompactor) {
|
|
LOG(TRACE) << "created new compactor '" << datafile->getName(datafile)
|
|
<< "'";
|
|
} else {
|
|
LOG(TRACE) << "created new journal '" << datafile->getName(datafile) << "'";
|
|
}
|
|
|
|
// create a collection header, still in the temporary file
|
|
TRI_df_marker_t* position;
|
|
int res = TRI_ReserveElementDatafile(
|
|
datafile, sizeof(TRI_col_header_marker_t), &position, journalSize);
|
|
|
|
TRI_IF_FAILURE("CreateJournalDocumentCollectionReserve1") {
|
|
res = TRI_ERROR_DEBUG;
|
|
}
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
document->_lastError = datafile->_lastError;
|
|
LOG(ERR) << "cannot create collection header in file '"
|
|
<< datafile->getName(datafile) << "': " << TRI_errno_string(res);
|
|
|
|
// close the journal and remove it
|
|
TRI_CloseDatafile(datafile);
|
|
TRI_UnlinkFile(datafile->getName(datafile));
|
|
TRI_FreeDatafile(datafile);
|
|
|
|
EnsureErrorCode(res);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
TRI_col_header_marker_t cm;
|
|
DatafileHelper::InitMarker(
|
|
reinterpret_cast<TRI_df_marker_t*>(&cm), TRI_DF_MARKER_COL_HEADER,
|
|
sizeof(TRI_col_header_marker_t), static_cast<TRI_voc_tick_t>(fid));
|
|
cm._cid = document->_info.id();
|
|
|
|
res = TRI_WriteCrcElementDatafile(datafile, position, &cm.base, false);
|
|
|
|
TRI_IF_FAILURE("CreateJournalDocumentCollectionReserve2") {
|
|
res = TRI_ERROR_DEBUG;
|
|
}
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
document->_lastError = datafile->_lastError;
|
|
LOG(ERR) << "cannot create collection header in file '"
|
|
<< datafile->getName(datafile) << "': " << TRI_last_error();
|
|
|
|
// close the datafile and remove it
|
|
TRI_CloseDatafile(datafile);
|
|
TRI_UnlinkFile(datafile->getName(datafile));
|
|
TRI_FreeDatafile(datafile);
|
|
|
|
EnsureErrorCode(document->_lastError);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
TRI_ASSERT(fid == datafile->_fid);
|
|
|
|
// if a physical file, we can rename it from the temporary name to the correct
|
|
// name
|
|
if (!isCompactor && datafile->isPhysical(datafile)) {
|
|
// and use the correct name
|
|
std::string jname("journal-" + std::to_string(datafile->_fid) + ".db");
|
|
std::string filename = arangodb::basics::FileUtils::buildFilename(path(), jname);
|
|
|
|
bool ok = TRI_RenameDatafile(datafile, filename.c_str());
|
|
|
|
if (!ok) {
|
|
LOG(ERR) << "failed to rename journal '" << datafile->getName(datafile)
|
|
<< "' to '" << filename << "': " << TRI_last_error();
|
|
|
|
TRI_CloseDatafile(datafile);
|
|
TRI_UnlinkFile(datafile->getName(datafile));
|
|
TRI_FreeDatafile(datafile);
|
|
|
|
EnsureErrorCode(document->_lastError);
|
|
|
|
return nullptr;
|
|
} else {
|
|
LOG(TRACE) << "renamed journal from '" << datafile->getName(datafile)
|
|
<< "' to '" << filename << "'";
|
|
}
|
|
}
|
|
|
|
return datafile;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
/// @brief remove a compactor file from the list of compactors
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
bool TRI_collection_t::removeCompactor(TRI_datafile_t* df) {
|
|
WRITE_LOCKER(writeLocker, _filesLock);
|
|
|
|
for (auto it = _compactors.begin(); it != _compactors.end(); ++it) {
|
|
if ((*it) == df) {
|
|
// and finally remove the file from the _compactors vector
|
|
_compactors.erase(it);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// not found
|
|
return false;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
/// @brief remove a datafile from the list of datafiles
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
bool TRI_collection_t::removeDatafile(TRI_datafile_t* df) {
|
|
WRITE_LOCKER(writeLocker, _filesLock);
|
|
|
|
for (auto it = _datafiles.begin(); it != _datafiles.end(); ++it) {
|
|
if ((*it) == df) {
|
|
// and finally remove the file from the _compactors vector
|
|
_datafiles.erase(it);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// not found
|
|
return false;
|
|
}
|
|
|
|
void TRI_collection_t::addIndexFile(std::string const& filename) {
|
|
WRITE_LOCKER(readLocker, _filesLock);
|
|
_indexFiles.emplace_back(filename);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief removes an index file from the _indexFiles vector
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
bool TRI_collection_t::removeIndexFileFromVector(TRI_idx_iid_t id) {
|
|
READ_LOCKER(readLocker, _filesLock);
|
|
|
|
for (auto it = _indexFiles.begin(); it != _indexFiles.end(); ++it) {
|
|
if (GetNumericFilenamePart((*it).c_str()) == id) {
|
|
// found
|
|
_indexFiles.erase(it);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/// @brief enumerate all indexes of the collection, but don't fill them yet
|
|
int TRI_collection_t::detectIndexes(arangodb::Transaction* trx) {
|
|
OpenIndexIteratorContext ctx;
|
|
ctx.trx = trx;
|
|
ctx.collection = this;
|
|
|
|
iterateIndexes(OpenIndexIterator, static_cast<void*>(&ctx));
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief iterates over all index files of a collection
|
|
void TRI_collection_t::iterateIndexes(
|
|
std::function<bool(std::string const&, void*)> const& callback,
|
|
void* data) {
|
|
// iterate over all index files
|
|
for (auto const& filename : _indexFiles) {
|
|
bool ok = callback(filename, data);
|
|
|
|
if (!ok) {
|
|
LOG(ERR) << "cannot load index '" << filename << "' for collection '"
|
|
<< _info.name() << "'";
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief compare two datafiles, based on the numeric part contained in
|
|
/// the filename
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static bool DatafileComparator(TRI_datafile_t const* lhs,
|
|
TRI_datafile_t const* rhs) {
|
|
uint64_t const numLeft =
|
|
(lhs->_filename != nullptr ? GetNumericFilenamePart(lhs->_filename) : 0);
|
|
uint64_t const numRight =
|
|
(rhs->_filename != nullptr ? GetNumericFilenamePart(rhs->_filename) : 0);
|
|
|
|
if (numLeft != numRight) {
|
|
return numLeft < numRight;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief checks a collection
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static bool CheckCollection(TRI_collection_t* collection, bool ignoreErrors) {
|
|
LOG_TOPIC(TRACE, Logger::DATAFILES) << "check collection directory '"
|
|
<< collection->path() << "'";
|
|
|
|
std::vector<TRI_datafile_t*> all;
|
|
std::vector<TRI_datafile_t*> compactors;
|
|
std::vector<TRI_datafile_t*> datafiles;
|
|
std::vector<TRI_datafile_t*> journals;
|
|
std::vector<TRI_datafile_t*> sealed;
|
|
bool stop = false;
|
|
|
|
// check files within the directory
|
|
std::vector<std::string> files = TRI_FilesDirectory(collection->path().c_str());
|
|
|
|
for (auto const& file : files) {
|
|
std::vector<std::string> parts = StringUtils::split(file, '.');
|
|
|
|
if (parts.size() < 2 || parts.size() > 3 || parts[0].empty()) {
|
|
LOG_TOPIC(DEBUG, Logger::DATAFILES)
|
|
<< "ignoring file '" << file
|
|
<< "' because it does not look like a datafile";
|
|
continue;
|
|
}
|
|
|
|
std::string extension = parts[1];
|
|
std::string isDead = (parts.size() > 2) ? parts[2] : "";
|
|
|
|
std::vector<std::string> next = StringUtils::split(parts[0], "-");
|
|
|
|
if (next.size() < 2) {
|
|
LOG_TOPIC(DEBUG, Logger::DATAFILES)
|
|
<< "ignoring file '" << file
|
|
<< "' because it does not look like a datafile";
|
|
continue;
|
|
}
|
|
|
|
std::string filename =
|
|
FileUtils::buildFilename(collection->path(), file);
|
|
std::string filetype = next[0];
|
|
next.erase(next.begin());
|
|
std::string qualifier = StringUtils::join(next, '-');
|
|
|
|
// .............................................................................
|
|
// file is dead
|
|
// .............................................................................
|
|
|
|
if (!isDead.empty() || filetype == "temp") {
|
|
if (isDead == "dead" || filetype == "temp") {
|
|
LOG_TOPIC(TRACE, Logger::DATAFILES)
|
|
<< "found temporary file '" << filename
|
|
<< "', which is probably a left-over. deleting it";
|
|
FileUtils::remove(filename);
|
|
continue;
|
|
} else {
|
|
LOG_TOPIC(DEBUG, Logger::DATAFILES)
|
|
<< "ignoring file '" << file
|
|
<< "' because it does not look like a datafile";
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// .............................................................................
|
|
// file is an index, just store the filename
|
|
// .............................................................................
|
|
|
|
if (filetype == "index" && extension == "json") {
|
|
collection->addIndexFile(filename);
|
|
continue;
|
|
}
|
|
|
|
// .............................................................................
|
|
// file is a journal or datafile, open the datafile
|
|
// .............................................................................
|
|
|
|
if (extension == "db") {
|
|
// found a compaction file. now rename it back
|
|
if (filetype == "compaction") {
|
|
std::string relName = "datafile-" + qualifier + "." + extension;
|
|
std::string newName =
|
|
FileUtils::buildFilename(collection->path(), relName);
|
|
|
|
if (FileUtils::exists(newName)) {
|
|
// we have a compaction-xxxx and a datafile-xxxx file. we'll keep
|
|
// the datafile
|
|
FileUtils::remove(filename);
|
|
|
|
LOG_TOPIC(WARN, Logger::DATAFILES)
|
|
<< "removing unfinished compaction file '" << filename << "'";
|
|
continue;
|
|
} else {
|
|
// this should fail, but shouldn't do any harm either...
|
|
FileUtils::remove(newName);
|
|
|
|
int res = TRI_RenameFile(filename.c_str(), newName.c_str());
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
LOG_TOPIC(ERR, Logger::DATAFILES)
|
|
<< "unable to rename compaction file '" << filename << "' to '"
|
|
<< newName << "'";
|
|
stop = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// reuse newName
|
|
filename = newName;
|
|
}
|
|
|
|
TRI_datafile_t* datafile =
|
|
TRI_OpenDatafile(filename.c_str(), ignoreErrors);
|
|
|
|
if (datafile == nullptr) {
|
|
collection->_lastError = TRI_errno();
|
|
LOG_TOPIC(ERR, Logger::DATAFILES) << "cannot open datafile '"
|
|
<< filename
|
|
<< "': " << TRI_last_error();
|
|
|
|
stop = true;
|
|
break;
|
|
}
|
|
|
|
all.emplace_back(datafile);
|
|
|
|
// check the document header
|
|
char const* ptr = datafile->_data;
|
|
|
|
// skip the datafile header
|
|
ptr +=
|
|
DatafileHelper::AlignedSize<size_t>(sizeof(TRI_df_header_marker_t));
|
|
TRI_col_header_marker_t const* cm =
|
|
reinterpret_cast<TRI_col_header_marker_t const*>(ptr);
|
|
|
|
if (cm->base.getType() != TRI_DF_MARKER_COL_HEADER) {
|
|
LOG(ERR) << "collection header mismatch in file '" << filename
|
|
<< "', expected TRI_DF_MARKER_COL_HEADER, found "
|
|
<< cm->base.getType();
|
|
|
|
stop = true;
|
|
break;
|
|
}
|
|
|
|
if (cm->_cid != collection->_info.id()) {
|
|
LOG(ERR) << "collection identifier mismatch, expected "
|
|
<< collection->_info.id() << ", found " << cm->_cid;
|
|
|
|
stop = true;
|
|
break;
|
|
}
|
|
|
|
// file is a journal
|
|
if (filetype == "journal") {
|
|
if (datafile->_isSealed) {
|
|
if (datafile->_state != TRI_DF_STATE_READ) {
|
|
LOG_TOPIC(WARN, Logger::DATAFILES)
|
|
<< "strange, journal '" << filename
|
|
<< "' is already sealed; must be a left over; will use "
|
|
"it as datafile";
|
|
}
|
|
|
|
sealed.emplace_back(datafile);
|
|
} else {
|
|
journals.emplace_back(datafile);
|
|
}
|
|
}
|
|
|
|
// file is a compactor
|
|
else if (filetype == "compactor") {
|
|
// ignore
|
|
}
|
|
|
|
// file is a datafile (or was a compaction file)
|
|
else if (filetype == "datafile" || filetype == "compaction") {
|
|
if (!datafile->_isSealed) {
|
|
LOG_TOPIC(ERR, Logger::DATAFILES)
|
|
<< "datafile '" << filename
|
|
<< "' is not sealed, this should never happen";
|
|
|
|
collection->_lastError =
|
|
TRI_set_errno(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE);
|
|
|
|
stop = true;
|
|
break;
|
|
} else {
|
|
datafiles.emplace_back(datafile);
|
|
}
|
|
}
|
|
|
|
else {
|
|
LOG_TOPIC(ERR, Logger::DATAFILES) << "unknown datafile '" << file
|
|
<< "'";
|
|
}
|
|
} else {
|
|
LOG_TOPIC(ERR, Logger::DATAFILES) << "unknown datafile '" << file << "'";
|
|
}
|
|
}
|
|
|
|
// convert the sealed journals into datafiles
|
|
if (!stop) {
|
|
for (auto& datafile : sealed) {
|
|
std::string dname("datafile-" + std::to_string(datafile->_fid) + ".db");
|
|
std::string filename = arangodb::basics::FileUtils::buildFilename(collection->path(), dname);
|
|
|
|
bool ok = TRI_RenameDatafile(datafile, filename.c_str());
|
|
|
|
if (ok) {
|
|
datafiles.emplace_back(datafile);
|
|
LOG(DEBUG) << "renamed sealed journal to '" << filename << "'";
|
|
} else {
|
|
collection->_lastError = datafile->_lastError;
|
|
stop = true;
|
|
LOG(ERR) << "cannot rename sealed log-file to " << filename
|
|
<< ", this should not happen: " << TRI_last_error();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// stop if necessary
|
|
if (stop) {
|
|
for (auto& datafile : all) {
|
|
LOG(TRACE) << "closing datafile '" << datafile->_filename << "'";
|
|
|
|
TRI_CloseDatafile(datafile);
|
|
TRI_FreeDatafile(datafile);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// sort the datafiles.
|
|
// this allows us to iterate them in the correct order
|
|
std::sort(datafiles.begin(), datafiles.end(), DatafileComparator);
|
|
std::sort(journals.begin(), journals.end(), DatafileComparator);
|
|
std::sort(compactors.begin(), compactors.end(), DatafileComparator);
|
|
|
|
// add the datafiles and journals
|
|
collection->_datafiles = datafiles;
|
|
collection->_journals = journals;
|
|
collection->_compactors = compactors;
|
|
|
|
return true;
|
|
}
|
|
|
|
/// @brief iterate over all datafiles in a vector
|
|
bool TRI_collection_t::iterateDatafilesVector(std::vector<TRI_datafile_t*> const& files,
|
|
std::function<bool(TRI_df_marker_t const*, TRI_datafile_t*)> const& cb) {
|
|
for (auto const& datafile : files) {
|
|
if (!TRI_IterateDatafile(datafile, cb)) {
|
|
return false;
|
|
}
|
|
|
|
if (datafile->isPhysical(datafile) && datafile->_isSealed) {
|
|
TRI_MMFileAdvise(datafile->_data, datafile->_maximalSize,
|
|
TRI_MADVISE_RANDOM);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/// @brief closes the datafiles passed in the vector
|
|
bool TRI_collection_t::closeDataFiles(std::vector<TRI_datafile_t*> const& files) {
|
|
bool result = true;
|
|
|
|
for (auto const& datafile : files) {
|
|
TRI_ASSERT(datafile != nullptr);
|
|
if (datafile->_state == TRI_DF_STATE_CLOSED) {
|
|
continue;
|
|
}
|
|
result &= TRI_CloseDatafile(datafile);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
VocbaseCollectionInfo::VocbaseCollectionInfo(CollectionInfo const& other)
|
|
: _version(TRI_COL_VERSION),
|
|
_type(other.type()),
|
|
_revision(0), // not known in the cluster case on the coordinator
|
|
_cid(other.id()), // this is on the coordinator and describes a
|
|
// cluster-wide collection, for safety reasons,
|
|
// we also set _cid
|
|
_planId(other.id()),
|
|
_maximalSize(other.journalSize()),
|
|
_initialCount(-1),
|
|
_indexBuckets(other.indexBuckets()),
|
|
_keyOptions(nullptr),
|
|
_isSystem(other.isSystem()),
|
|
_deleted(other.deleted()),
|
|
_doCompact(other.doCompact()),
|
|
_isVolatile(other.isVolatile()),
|
|
_waitForSync(other.waitForSync()) {
|
|
std::string const name = other.name();
|
|
memset(_name, 0, sizeof(_name));
|
|
memcpy(_name, name.c_str(), name.size());
|
|
|
|
VPackSlice keyOptionsSlice(other.keyOptions());
|
|
|
|
if (!keyOptionsSlice.isNone()) {
|
|
VPackBuilder builder;
|
|
builder.add(keyOptionsSlice);
|
|
_keyOptions = builder.steal();
|
|
}
|
|
}
|
|
|
|
VocbaseCollectionInfo::VocbaseCollectionInfo(TRI_vocbase_t* vocbase,
|
|
std::string const& name,
|
|
TRI_col_type_e type,
|
|
TRI_voc_size_t maximalSize,
|
|
VPackSlice const& keyOptions)
|
|
: _version(TRI_COL_VERSION),
|
|
_type(type),
|
|
_revision(0),
|
|
_cid(0),
|
|
_planId(0),
|
|
_maximalSize(32 * 1024 * 1024), // just to have a default
|
|
_initialCount(-1),
|
|
_indexBuckets(DatabaseFeature::DefaultIndexBuckets),
|
|
_keyOptions(nullptr),
|
|
_isSystem(false),
|
|
_deleted(false),
|
|
_doCompact(true),
|
|
_isVolatile(false),
|
|
_waitForSync(false) {
|
|
|
|
auto database = application_features::ApplicationServer::getFeature<DatabaseFeature>("Database");
|
|
_maximalSize = database->maximalJournalSize();
|
|
_waitForSync = database->waitForSync();
|
|
|
|
size_t pageSize = PageSizeFeature::getPageSize();
|
|
_maximalSize =
|
|
static_cast<TRI_voc_size_t>((maximalSize / pageSize) * pageSize);
|
|
if (_maximalSize == 0 && maximalSize != 0) {
|
|
_maximalSize = static_cast<TRI_voc_size_t>(pageSize);
|
|
}
|
|
memset(_name, 0, sizeof(_name));
|
|
TRI_CopyString(_name, name.c_str(), sizeof(_name) - 1);
|
|
|
|
if (!keyOptions.isNone()) {
|
|
VPackBuilder builder;
|
|
builder.add(keyOptions);
|
|
_keyOptions = builder.steal();
|
|
}
|
|
}
|
|
|
|
VocbaseCollectionInfo::VocbaseCollectionInfo(TRI_vocbase_t* vocbase,
|
|
std::string const& name,
|
|
VPackSlice const& options,
|
|
bool forceIsSystem)
|
|
: VocbaseCollectionInfo(vocbase, name, TRI_COL_TYPE_DOCUMENT, options,
|
|
forceIsSystem) {}
|
|
|
|
VocbaseCollectionInfo::VocbaseCollectionInfo(TRI_vocbase_t* vocbase,
|
|
std::string const& name,
|
|
TRI_col_type_e type,
|
|
VPackSlice const& options,
|
|
bool forceIsSystem)
|
|
: _version(TRI_COL_VERSION),
|
|
_type(type),
|
|
_revision(0),
|
|
_cid(0),
|
|
_planId(0),
|
|
_maximalSize(32 * 1024 * 1024), // just to have a default
|
|
_initialCount(-1),
|
|
_indexBuckets(DatabaseFeature::DefaultIndexBuckets),
|
|
_keyOptions(nullptr),
|
|
_isSystem(false),
|
|
_deleted(false),
|
|
_doCompact(true),
|
|
_isVolatile(false),
|
|
_waitForSync(false) {
|
|
|
|
auto database = application_features::ApplicationServer::getFeature<DatabaseFeature>("Database");
|
|
_maximalSize = database->maximalJournalSize();
|
|
_waitForSync = database->waitForSync();
|
|
|
|
memset(_name, 0, sizeof(_name));
|
|
|
|
TRI_CopyString(_name, name.c_str(), sizeof(_name) - 1);
|
|
|
|
if (options.isObject()) {
|
|
TRI_voc_size_t maximalSize;
|
|
if (options.hasKey("journalSize")) {
|
|
maximalSize =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<TRI_voc_size_t>(
|
|
options, "journalSize", _maximalSize);
|
|
} else {
|
|
maximalSize =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<TRI_voc_size_t>(
|
|
options, "maximalSize", _maximalSize);
|
|
}
|
|
|
|
size_t pageSize = PageSizeFeature::getPageSize();
|
|
_maximalSize =
|
|
static_cast<TRI_voc_size_t>((maximalSize / pageSize) * pageSize);
|
|
if (_maximalSize == 0 && maximalSize != 0) {
|
|
_maximalSize = static_cast<TRI_voc_size_t>(pageSize);
|
|
}
|
|
|
|
if (options.hasKey("count")) {
|
|
_initialCount =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<int64_t>(
|
|
options, "count", -1);
|
|
}
|
|
|
|
_doCompact = arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
options, "doCompact", true);
|
|
_waitForSync = arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
options, "waitForSync", _waitForSync);
|
|
_isVolatile = arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
options, "isVolatile", false);
|
|
_indexBuckets =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<uint32_t>(
|
|
options, "indexBuckets", DatabaseFeature::DefaultIndexBuckets);
|
|
_type = static_cast<TRI_col_type_e>(
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<size_t>(
|
|
options, "type", _type));
|
|
|
|
std::string cname =
|
|
arangodb::basics::VelocyPackHelper::getStringValue(options, "name", "");
|
|
if (!cname.empty()) {
|
|
TRI_CopyString(_name, cname.c_str(), sizeof(_name) - 1);
|
|
}
|
|
|
|
std::string cidString =
|
|
arangodb::basics::VelocyPackHelper::getStringValue(options, "cid", "");
|
|
if (!cidString.empty()) {
|
|
// note: this may throw
|
|
_cid = std::stoull(cidString);
|
|
}
|
|
|
|
if (options.hasKey("isSystem")) {
|
|
VPackSlice isSystemSlice = options.get("isSystem");
|
|
if (isSystemSlice.isBoolean()) {
|
|
_isSystem = isSystemSlice.getBoolean();
|
|
}
|
|
} else {
|
|
_isSystem = false;
|
|
}
|
|
|
|
if (options.hasKey("journalSize")) {
|
|
VPackSlice maxSizeSlice = options.get("journalSize");
|
|
TRI_voc_size_t maximalSize =
|
|
maxSizeSlice.getNumericValue<TRI_voc_size_t>();
|
|
if (maximalSize < TRI_JOURNAL_MINIMAL_SIZE) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER,
|
|
"journalSize is too small");
|
|
}
|
|
}
|
|
|
|
VPackSlice const planIdSlice = options.get("planId");
|
|
TRI_voc_cid_t planId = 0;
|
|
if (planIdSlice.isNumber()) {
|
|
planId = planIdSlice.getNumericValue<TRI_voc_cid_t>();
|
|
} else if (planIdSlice.isString()) {
|
|
std::string tmp = planIdSlice.copyString();
|
|
planId = static_cast<TRI_voc_cid_t>(StringUtils::uint64(tmp));
|
|
}
|
|
|
|
if (planId > 0) {
|
|
_planId = planId;
|
|
}
|
|
|
|
VPackSlice const cidSlice = options.get("id");
|
|
if (cidSlice.isNumber()) {
|
|
_cid = cidSlice.getNumericValue<TRI_voc_cid_t>();
|
|
} else if (cidSlice.isString()) {
|
|
std::string tmp = cidSlice.copyString();
|
|
_cid = static_cast<TRI_voc_cid_t>(StringUtils::uint64(tmp));
|
|
}
|
|
|
|
if (options.hasKey("keyOptions")) {
|
|
VPackSlice const slice = options.get("keyOptions");
|
|
VPackBuilder builder;
|
|
builder.add(slice);
|
|
// Copy the ownership of the options over
|
|
_keyOptions = builder.steal();
|
|
}
|
|
|
|
if (options.hasKey("deleted")) {
|
|
VPackSlice const slice = options.get("deleted");
|
|
if (slice.isBoolean()) {
|
|
_deleted = slice.getBoolean();
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifndef TRI_HAVE_ANONYMOUS_MMAP
|
|
if (_isVolatile) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
|
TRI_ERROR_BAD_PARAMETER,
|
|
"volatile collections are not supported on this platform");
|
|
}
|
|
#endif
|
|
|
|
if (_isVolatile && _waitForSync) {
|
|
// the combination of waitForSync and isVolatile makes no sense
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
|
TRI_ERROR_BAD_PARAMETER,
|
|
"volatile collections do not support the waitForSync option");
|
|
}
|
|
|
|
if (_indexBuckets < 1 || _indexBuckets > 1024) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
|
TRI_ERROR_BAD_PARAMETER,
|
|
"indexBuckets must be a two-power between 1 and 1024");
|
|
}
|
|
|
|
if (!TRI_collection_t::IsAllowedName(_isSystem || forceIsSystem, _name)) {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_ILLEGAL_NAME);
|
|
}
|
|
|
|
// fix _isSystem value if mis-specified by user
|
|
_isSystem = (*_name == '_');
|
|
}
|
|
|
|
VocbaseCollectionInfo VocbaseCollectionInfo::fromFile(
|
|
std::string const& path, TRI_vocbase_t* vocbase, std::string const& collectionName,
|
|
bool versionWarning) {
|
|
// find parameter file
|
|
std::string filename =
|
|
arangodb::basics::FileUtils::buildFilename(path, TRI_VOC_PARAMETER_FILE);
|
|
|
|
if (!TRI_ExistsFile(filename.c_str())) {
|
|
filename += ".tmp"; // try file with .tmp extension
|
|
if (!TRI_ExistsFile(filename.c_str())) {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_ILLEGAL_PARAMETER_FILE);
|
|
}
|
|
}
|
|
|
|
std::shared_ptr<VPackBuilder> content =
|
|
arangodb::basics::VelocyPackHelper::velocyPackFromFile(filename);
|
|
VPackSlice slice = content->slice();
|
|
if (!slice.isObject()) {
|
|
LOG(ERR) << "cannot open '" << filename
|
|
<< "', collection parameters are not readable";
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_ILLEGAL_PARAMETER_FILE);
|
|
}
|
|
|
|
if (filename.substr(filename.size() - 4, 4) == ".tmp") {
|
|
// we got a tmp file. Now try saving the original file
|
|
arangodb::basics::VelocyPackHelper::velocyPackToFile(filename.c_str(),
|
|
slice, true);
|
|
}
|
|
|
|
// fiddle "isSystem" value, which is not contained in the JSON file
|
|
bool isSystemValue = false;
|
|
if (slice.hasKey("name")) {
|
|
auto name = slice.get("name").copyString();
|
|
if (!name.empty()) {
|
|
isSystemValue = name[0] == '_';
|
|
}
|
|
}
|
|
|
|
VPackBuilder bx;
|
|
bx.openObject();
|
|
bx.add("isSystem", VPackValue(isSystemValue));
|
|
bx.close();
|
|
VPackSlice isSystem = bx.slice();
|
|
VPackBuilder b2 = VPackCollection::merge(slice, isSystem, false);
|
|
slice = b2.slice();
|
|
|
|
VocbaseCollectionInfo info(vocbase, collectionName, slice, isSystemValue);
|
|
|
|
// warn about wrong version of the collection
|
|
if (versionWarning && info.version() < TRI_COL_VERSION) {
|
|
if (info.name()[0] != '\0') {
|
|
// only warn if the collection version is older than expected, and if it's
|
|
// not a shape collection
|
|
LOG(WARN) << "collection '" << info.name()
|
|
<< "' has an old version and needs to be upgraded.";
|
|
}
|
|
}
|
|
return info;
|
|
}
|
|
|
|
// collection version
|
|
TRI_col_version_t VocbaseCollectionInfo::version() const { return _version; }
|
|
|
|
// collection type
|
|
TRI_col_type_e VocbaseCollectionInfo::type() const { return _type; }
|
|
|
|
// local collection identifier
|
|
TRI_voc_cid_t VocbaseCollectionInfo::id() const { return _cid; }
|
|
|
|
// cluster-wide collection identifier
|
|
TRI_voc_cid_t VocbaseCollectionInfo::planId() const { return _planId; }
|
|
|
|
// last revision id written
|
|
TRI_voc_rid_t VocbaseCollectionInfo::revision() const { return _revision; }
|
|
|
|
// maximal size of memory mapped file
|
|
TRI_voc_size_t VocbaseCollectionInfo::maximalSize() const {
|
|
return _maximalSize;
|
|
}
|
|
|
|
// initial count, used when loading a collection
|
|
int64_t VocbaseCollectionInfo::initialCount() const { return _initialCount; }
|
|
|
|
// number of buckets used in hash tables for indexes
|
|
uint32_t VocbaseCollectionInfo::indexBuckets() const { return _indexBuckets; }
|
|
|
|
// name of the collection
|
|
std::string VocbaseCollectionInfo::name() const { return std::string(_name); }
|
|
|
|
// options for key creation
|
|
std::shared_ptr<arangodb::velocypack::Buffer<uint8_t> const>
|
|
VocbaseCollectionInfo::keyOptions() const {
|
|
return _keyOptions;
|
|
}
|
|
|
|
// If true, collection has been deleted
|
|
bool VocbaseCollectionInfo::deleted() const { return _deleted; }
|
|
|
|
// If true, collection will be compacted
|
|
bool VocbaseCollectionInfo::doCompact() const { return _doCompact; }
|
|
|
|
// If true, collection is a system collection
|
|
bool VocbaseCollectionInfo::isSystem() const { return _isSystem; }
|
|
|
|
// If true, collection is memory-only
|
|
bool VocbaseCollectionInfo::isVolatile() const { return _isVolatile; }
|
|
|
|
// If true waits for mysnc
|
|
bool VocbaseCollectionInfo::waitForSync() const { return _waitForSync; }
|
|
|
|
void VocbaseCollectionInfo::setVersion(TRI_col_version_t version) {
|
|
_version = version;
|
|
}
|
|
|
|
void VocbaseCollectionInfo::rename(std::string const& name) {
|
|
TRI_CopyString(_name, name.c_str(), sizeof(_name) - 1);
|
|
}
|
|
|
|
void VocbaseCollectionInfo::setRevision(TRI_voc_rid_t rid, bool force) {
|
|
if (force || rid > _revision) {
|
|
_revision = rid;
|
|
}
|
|
}
|
|
|
|
void VocbaseCollectionInfo::setCollectionId(TRI_voc_cid_t cid) { _cid = cid; }
|
|
|
|
void VocbaseCollectionInfo::updateCount(size_t size) { _initialCount = size; }
|
|
|
|
void VocbaseCollectionInfo::setPlanId(TRI_voc_cid_t planId) {
|
|
_planId = planId;
|
|
}
|
|
|
|
void VocbaseCollectionInfo::setDeleted(bool deleted) { _deleted = deleted; }
|
|
|
|
void VocbaseCollectionInfo::clearKeyOptions() { _keyOptions.reset(); }
|
|
|
|
int VocbaseCollectionInfo::saveToFile(std::string const& path,
|
|
bool forceSync) const {
|
|
std::string filename =
|
|
basics::FileUtils::buildFilename(path, TRI_VOC_PARAMETER_FILE);
|
|
|
|
VPackBuilder builder;
|
|
builder.openObject();
|
|
toVelocyPack(builder);
|
|
builder.close();
|
|
|
|
bool ok = VelocyPackHelper::velocyPackToFile(filename.c_str(),
|
|
builder.slice(), forceSync);
|
|
|
|
if (!ok) {
|
|
int res = TRI_errno();
|
|
LOG(ERR) << "cannot save collection properties file '" << filename
|
|
<< "': " << TRI_errno_string(TRI_ERROR_OUT_OF_MEMORY);
|
|
return res;
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
void VocbaseCollectionInfo::update(VPackSlice const& slice, bool preferDefaults,
|
|
TRI_vocbase_t const* vocbase) {
|
|
// the following collection properties are intentionally not updated as
|
|
// updating
|
|
// them would be very complicated:
|
|
// - _cid
|
|
// - _name
|
|
// - _type
|
|
// - _isSystem
|
|
// - _isVolatile
|
|
// ... probably a few others missing here ...
|
|
|
|
if (preferDefaults) {
|
|
if (vocbase != nullptr) {
|
|
auto database = application_features::ApplicationServer::getFeature<DatabaseFeature>("Database");
|
|
|
|
_doCompact = arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
slice, "doCompact", true);
|
|
_waitForSync = arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
slice, "waitForSync", database->waitForSync());
|
|
if (slice.hasKey("journalSize")) {
|
|
_maximalSize = arangodb::basics::VelocyPackHelper::getNumericValue<int>(
|
|
slice, "journalSize", database->maximalJournalSize());
|
|
} else {
|
|
_maximalSize = arangodb::basics::VelocyPackHelper::getNumericValue<int>(
|
|
slice, "maximalSize", database->maximalJournalSize());
|
|
}
|
|
} else {
|
|
_doCompact = arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
slice, "doCompact", true);
|
|
_waitForSync = arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
slice, "waitForSync", false);
|
|
if (slice.hasKey("journalSize")) {
|
|
_maximalSize =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<TRI_voc_size_t>(
|
|
slice, "journalSize", TRI_JOURNAL_DEFAULT_MAXIMAL_SIZE);
|
|
} else {
|
|
_maximalSize =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<TRI_voc_size_t>(
|
|
slice, "maximalSize", TRI_JOURNAL_DEFAULT_MAXIMAL_SIZE);
|
|
}
|
|
}
|
|
_indexBuckets =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<uint32_t>(
|
|
slice, "indexBuckets", DatabaseFeature::DefaultIndexBuckets);
|
|
} else {
|
|
_doCompact = arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
slice, "doCompact", _doCompact);
|
|
_waitForSync = arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
slice, "waitForSync", _waitForSync);
|
|
if (slice.hasKey("journalSize")) {
|
|
_maximalSize =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<TRI_voc_size_t>(
|
|
slice, "journalSize", _maximalSize);
|
|
} else {
|
|
_maximalSize =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<TRI_voc_size_t>(
|
|
slice, "maximalSize", _maximalSize);
|
|
}
|
|
_indexBuckets =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<uint32_t>(
|
|
slice, "indexBuckets", _indexBuckets);
|
|
|
|
_initialCount =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<int64_t>(
|
|
slice, "count", _initialCount);
|
|
}
|
|
}
|
|
|
|
void VocbaseCollectionInfo::update(VocbaseCollectionInfo const& other) {
|
|
_version = other.version();
|
|
_type = other.type();
|
|
_cid = other.id();
|
|
_planId = other.planId();
|
|
_revision = other.revision();
|
|
_maximalSize = other.maximalSize();
|
|
_initialCount = other.initialCount();
|
|
_indexBuckets = other.indexBuckets();
|
|
|
|
TRI_CopyString(_name, other.name().c_str(), sizeof(_name) - 1);
|
|
|
|
_keyOptions = other.keyOptions();
|
|
|
|
_deleted = other.deleted();
|
|
_doCompact = other.doCompact();
|
|
_isSystem = other.isSystem();
|
|
_isVolatile = other.isVolatile();
|
|
_waitForSync = other.waitForSync();
|
|
}
|
|
|
|
std::shared_ptr<VPackBuilder> VocbaseCollectionInfo::toVelocyPack() const {
|
|
auto builder = std::make_shared<VPackBuilder>();
|
|
builder->openObject();
|
|
toVelocyPack(*builder);
|
|
builder->close();
|
|
return builder;
|
|
}
|
|
|
|
void VocbaseCollectionInfo::toVelocyPack(VPackBuilder& builder) const {
|
|
TRI_ASSERT(!builder.isClosed());
|
|
|
|
std::string cidString = std::to_string(id());
|
|
std::string planIdString = std::to_string(planId());
|
|
|
|
builder.add("version", VPackValue(version()));
|
|
builder.add("type", VPackValue(type()));
|
|
builder.add("cid", VPackValue(cidString));
|
|
|
|
if (planId() > 0) {
|
|
builder.add("planId", VPackValue(planIdString));
|
|
}
|
|
|
|
if (initialCount() >= 0) {
|
|
builder.add("count", VPackValue(initialCount()));
|
|
}
|
|
builder.add("indexBuckets", VPackValue(indexBuckets()));
|
|
builder.add("deleted", VPackValue(deleted()));
|
|
builder.add("doCompact", VPackValue(doCompact()));
|
|
builder.add("maximalSize", VPackValue(maximalSize()));
|
|
builder.add("name", VPackValue(name()));
|
|
builder.add("isVolatile", VPackValue(isVolatile()));
|
|
builder.add("waitForSync", VPackValue(waitForSync()));
|
|
builder.add("isSystem", VPackValue(isSystem()));
|
|
|
|
auto opts = keyOptions();
|
|
if (opts.get() != nullptr) {
|
|
VPackSlice const slice(opts->data());
|
|
builder.add("keyOptions", slice);
|
|
}
|
|
}
|
|
|
|
/// @brief renames a collection
|
|
int TRI_collection_t::rename(std::string const& name) {
|
|
// Save name for rollback
|
|
std::string oldName = _info.name();
|
|
_info.rename(name);
|
|
|
|
int res = _info.saveToFile(path(), true);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
// Rollback
|
|
_info.rename(oldName);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/// @brief iterates over a collection
|
|
bool TRI_collection_t::iterateDatafiles(std::function<bool(TRI_df_marker_t const*, TRI_datafile_t*)> const& cb) {
|
|
if (!iterateDatafilesVector(_datafiles, cb) ||
|
|
!iterateDatafilesVector(_compactors, cb) ||
|
|
!iterateDatafilesVector(_journals, cb)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/// @brief opens an existing collection
|
|
int TRI_collection_t::open(bool ignoreErrors) {
|
|
double start = TRI_microtime();
|
|
|
|
LOG_TOPIC(TRACE, Logger::PERFORMANCE)
|
|
<< "open-collection { collection: " << _vocbase->name() << "/"
|
|
<< _info.name();
|
|
|
|
try {
|
|
// check for journals and datafiles
|
|
bool ok = CheckCollection(this, ignoreErrors);
|
|
|
|
if (!ok) {
|
|
LOG(DEBUG) << "cannot open '" << _path << "', check failed";
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
LOG_TOPIC(TRACE, Logger::PERFORMANCE)
|
|
<< "[timer] " << Logger::FIXED(TRI_microtime() - start)
|
|
<< " s, open-collection { collection: " << _vocbase->name() << "/"
|
|
<< _info.name() << " }";
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
} catch (basics::Exception const& ex) {
|
|
LOG(ERR) << "cannot load collection parameter file '" << _path << "': " << ex.what();
|
|
return ex.code();
|
|
} catch (std::exception const& ex) {
|
|
LOG(ERR) << "cannot load collection parameter file '" << _path << "': " << ex.what();
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
}
|
|
|
|
/// @brief closes an open collection
|
|
int TRI_collection_t::close() {
|
|
// close compactor files
|
|
closeDataFiles(_compactors);
|
|
|
|
// close journal files
|
|
closeDataFiles(_journals);
|
|
|
|
// close datafiles
|
|
closeDataFiles(_datafiles);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief garbage-collect a collection's indexes
|
|
int TRI_collection_t::cleanupIndexes() {
|
|
int res = TRI_ERROR_NO_ERROR;
|
|
|
|
// cleaning indexes is expensive, so only do it if the flag is set for the
|
|
// collection
|
|
if (_cleanupIndexes > 0) {
|
|
WRITE_LOCKER(writeLocker, _lock);
|
|
|
|
for (auto& idx : allIndexes()) {
|
|
if (idx->type() == arangodb::Index::TRI_IDX_TYPE_FULLTEXT_INDEX) {
|
|
res = idx->cleanup();
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/// @brief fill the additional (non-primary) indexes
|
|
int TRI_collection_t::fillIndexes(arangodb::Transaction* trx,
|
|
TRI_vocbase_col_t* collection) {
|
|
// distribute the work to index threads plus this thread
|
|
auto const& indexes = allIndexes();
|
|
size_t const n = indexes.size();
|
|
|
|
if (n == 1) {
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
double start = TRI_microtime();
|
|
|
|
// only log performance infos for indexes with more than this number of
|
|
// entries
|
|
static size_t const NotificationSizeThreshold = 131072;
|
|
auto primaryIndex = this->primaryIndex();
|
|
|
|
if (primaryIndex->size() > NotificationSizeThreshold) {
|
|
LOG_TOPIC(TRACE, Logger::PERFORMANCE)
|
|
<< "fill-indexes-document-collection { collection: "
|
|
<< _vocbase->name() << "/" << _info.name()
|
|
<< " }, indexes: " << (n - 1);
|
|
}
|
|
|
|
TRI_ASSERT(n > 1);
|
|
|
|
std::atomic<int> result(TRI_ERROR_NO_ERROR);
|
|
|
|
{
|
|
arangodb::basics::Barrier barrier(n - 1);
|
|
|
|
auto indexPool = application_features::ApplicationServer::getFeature<IndexPoolFeature>("IndexPool")->getThreadPool();
|
|
|
|
auto callback = [&barrier, &result](int res) -> void {
|
|
// update the error code
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
int expected = TRI_ERROR_NO_ERROR;
|
|
result.compare_exchange_strong(expected, res,
|
|
std::memory_order_acquire);
|
|
}
|
|
|
|
barrier.join();
|
|
};
|
|
|
|
// now actually fill the secondary indexes
|
|
for (size_t i = 1; i < n; ++i) {
|
|
auto idx = indexes[i];
|
|
|
|
// index threads must come first, otherwise this thread will block the
|
|
// loop and
|
|
// prevent distribution to threads
|
|
if (indexPool != nullptr && i != (n - 1)) {
|
|
try {
|
|
// move task into thread pool
|
|
IndexFiller indexTask(trx, this, idx, callback);
|
|
|
|
static_cast<arangodb::basics::ThreadPool*>(indexPool)
|
|
->enqueue(indexTask);
|
|
} catch (...) {
|
|
// set error code
|
|
int expected = TRI_ERROR_NO_ERROR;
|
|
result.compare_exchange_strong(expected, TRI_ERROR_INTERNAL,
|
|
std::memory_order_acquire);
|
|
|
|
barrier.join();
|
|
}
|
|
} else {
|
|
// fill index in this thread
|
|
int res;
|
|
|
|
try {
|
|
res = fillIndex(trx, idx);
|
|
} catch (...) {
|
|
res = TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
int expected = TRI_ERROR_NO_ERROR;
|
|
result.compare_exchange_strong(expected, res,
|
|
std::memory_order_acquire);
|
|
}
|
|
|
|
barrier.join();
|
|
}
|
|
}
|
|
|
|
// barrier waits here until all threads have joined
|
|
}
|
|
|
|
LOG_TOPIC(TRACE, Logger::PERFORMANCE)
|
|
<< "[timer] " << Logger::FIXED(TRI_microtime() - start)
|
|
<< " s, fill-indexes-document-collection { collection: "
|
|
<< _vocbase->name() << "/" << _info.name()
|
|
<< " }, indexes: " << (n - 1);
|
|
|
|
return result.load();
|
|
}
|
|
|
|
/// @brief reads an element from the document collection
|
|
int TRI_collection_t::read(Transaction* trx, std::string const& key,
|
|
TRI_doc_mptr_t* mptr, bool lock) {
|
|
return read(trx, StringRef(key.c_str(), key.size()), mptr, lock);
|
|
}
|
|
|
|
int TRI_collection_t::read(Transaction* trx, StringRef const& key,
|
|
TRI_doc_mptr_t* mptr, bool lock) {
|
|
TRI_ASSERT(mptr != nullptr);
|
|
mptr->setVPack(nullptr);
|
|
|
|
TransactionBuilderLeaser builder(trx);
|
|
builder->add(VPackValuePair(key.data(), key.size(), VPackValueType::String));
|
|
VPackSlice slice = builder->slice();
|
|
|
|
{
|
|
TRI_IF_FAILURE("ReadDocumentNoLock") {
|
|
// test what happens if no lock can be acquired
|
|
return TRI_ERROR_DEBUG;
|
|
}
|
|
|
|
TRI_IF_FAILURE("ReadDocumentNoLockExcept") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
CollectionReadLocker collectionLocker(this, lock);
|
|
|
|
TRI_doc_mptr_t* header;
|
|
int res = lookupDocument(trx, slice, header);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
|
|
// we found a document, now copy it over
|
|
*mptr = *header;
|
|
}
|
|
|
|
TRI_ASSERT(mptr->vpack() != nullptr);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief inserts a document or edge into the collection
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int TRI_collection_t::insert(Transaction* trx, VPackSlice const slice,
|
|
TRI_doc_mptr_t* mptr,
|
|
OperationOptions& options,
|
|
TRI_voc_tick_t& resultMarkerTick,
|
|
bool lock) {
|
|
resultMarkerTick = 0;
|
|
VPackSlice fromSlice;
|
|
VPackSlice toSlice;
|
|
|
|
bool const isEdgeCollection = (_info.type() == TRI_COL_TYPE_EDGE);
|
|
|
|
if (isEdgeCollection) {
|
|
// _from:
|
|
fromSlice = slice.get(StaticStrings::FromString);
|
|
if (!fromSlice.isString()) {
|
|
return TRI_ERROR_ARANGO_INVALID_EDGE_ATTRIBUTE;
|
|
}
|
|
VPackValueLength len;
|
|
char const* docId = fromSlice.getString(len);
|
|
size_t split;
|
|
if (!TRI_ValidateDocumentIdKeyGenerator(docId, static_cast<size_t>(len), &split)) {
|
|
return TRI_ERROR_ARANGO_INVALID_EDGE_ATTRIBUTE;
|
|
}
|
|
// _to:
|
|
toSlice = slice.get(StaticStrings::ToString);
|
|
if (!toSlice.isString()) {
|
|
return TRI_ERROR_ARANGO_INVALID_EDGE_ATTRIBUTE;
|
|
}
|
|
docId = toSlice.getString(len);
|
|
if (!TRI_ValidateDocumentIdKeyGenerator(docId, static_cast<size_t>(len), &split)) {
|
|
return TRI_ERROR_ARANGO_INVALID_EDGE_ATTRIBUTE;
|
|
}
|
|
}
|
|
|
|
uint64_t hash = 0;
|
|
|
|
TransactionBuilderLeaser builder(trx);
|
|
VPackSlice newSlice;
|
|
int res = TRI_ERROR_NO_ERROR;
|
|
if (options.recoveryMarker == nullptr) {
|
|
TIMER_START(TRANSACTION_NEW_OBJECT_FOR_INSERT);
|
|
res = newObjectForInsert(trx, slice, fromSlice, toSlice, isEdgeCollection, hash, *builder.get(), options.isRestore);
|
|
TIMER_STOP(TRANSACTION_NEW_OBJECT_FOR_INSERT);
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
newSlice = builder->slice();
|
|
} else {
|
|
TRI_ASSERT(slice.isObject());
|
|
// we can get away with the fast hash function here, as key values are
|
|
// restricted to strings
|
|
hash = Transaction::extractKeyFromDocument(slice).hashString();
|
|
newSlice = slice;
|
|
}
|
|
|
|
TRI_ASSERT(mptr != nullptr);
|
|
mptr->setVPack(nullptr);
|
|
|
|
// create marker
|
|
arangodb::wal::CrudMarker insertMarker(TRI_DF_MARKER_VPACK_DOCUMENT, TRI_MarkerIdTransaction(trx->getInternals()), newSlice);
|
|
|
|
arangodb::wal::Marker const* marker;
|
|
if (options.recoveryMarker == nullptr) {
|
|
marker = &insertMarker;
|
|
} else {
|
|
marker = options.recoveryMarker;
|
|
}
|
|
|
|
// now insert into indexes
|
|
{
|
|
TRI_IF_FAILURE("InsertDocumentNoLock") {
|
|
// test what happens if no lock can be acquired
|
|
return TRI_ERROR_DEBUG;
|
|
}
|
|
|
|
arangodb::wal::DocumentOperation operation(
|
|
trx, marker, this, TRI_VOC_DOCUMENT_OPERATION_INSERT);
|
|
|
|
// DocumentOperation has taken over the ownership for the marker
|
|
TRI_ASSERT(operation.marker != nullptr);
|
|
|
|
TRI_IF_FAILURE("InsertDocumentNoHeader") {
|
|
// test what happens if no header can be acquired
|
|
return TRI_ERROR_DEBUG;
|
|
}
|
|
|
|
TRI_IF_FAILURE("InsertDocumentNoHeaderExcept") {
|
|
// test what happens if no header can be acquired
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
arangodb::CollectionWriteLocker collectionLocker(this, lock);
|
|
|
|
// create a new header
|
|
TRI_doc_mptr_t* header = operation.header = _masterPointers.request();
|
|
|
|
if (header == nullptr) {
|
|
// out of memory. no harm done here. just return the error
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
// update the header we got
|
|
void* mem = operation.marker->vpack();
|
|
TRI_ASSERT(mem != nullptr);
|
|
header->setHash(hash);
|
|
header->setVPack(mem); // PROTECTED by trx in trxCollection
|
|
|
|
TRI_ASSERT(VPackSlice(header->vpack()).isObject());
|
|
|
|
// insert into indexes
|
|
res = insertDocument(trx, header, operation, mptr, options.waitForSync);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
operation.revert();
|
|
} else {
|
|
TRI_ASSERT(mptr->vpack() != nullptr);
|
|
|
|
// store the tick that was used for writing the document
|
|
resultMarkerTick = operation.tick;
|
|
}
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/// @brief updates a document or edge in a collection
|
|
int TRI_collection_t::update(Transaction* trx,
|
|
VPackSlice const newSlice,
|
|
TRI_doc_mptr_t* mptr,
|
|
OperationOptions& options,
|
|
TRI_voc_tick_t& resultMarkerTick,
|
|
bool lock,
|
|
VPackSlice& prevRev,
|
|
TRI_doc_mptr_t& previous) {
|
|
resultMarkerTick = 0;
|
|
|
|
if (!newSlice.isObject()) {
|
|
return TRI_ERROR_ARANGO_DOCUMENT_TYPE_INVALID;
|
|
}
|
|
|
|
// initialize the result
|
|
TRI_ASSERT(mptr != nullptr);
|
|
mptr->setVPack(nullptr);
|
|
prevRev = VPackSlice();
|
|
|
|
TRI_voc_rid_t revisionId = 0;
|
|
if (options.isRestore) {
|
|
VPackSlice oldRev = TRI_ExtractRevisionIdAsSlice(newSlice);
|
|
if (!oldRev.isString()) {
|
|
return TRI_ERROR_ARANGO_DOCUMENT_REV_BAD;
|
|
}
|
|
bool isOld;
|
|
revisionId = TRI_StringToRid(oldRev.copyString(), isOld);
|
|
if (isOld) {
|
|
// Do not tolerate old revision IDs
|
|
revisionId = TRI_HybridLogicalClock();
|
|
}
|
|
} else {
|
|
revisionId = TRI_HybridLogicalClock();
|
|
}
|
|
|
|
VPackSlice key = newSlice.get(StaticStrings::KeyString);
|
|
if (key.isNone()) {
|
|
return TRI_ERROR_ARANGO_DOCUMENT_HANDLE_BAD;
|
|
}
|
|
|
|
bool const isEdgeCollection = (_info.type() == TRI_COL_TYPE_EDGE);
|
|
|
|
int res;
|
|
{
|
|
TRI_IF_FAILURE("UpdateDocumentNoLock") { return TRI_ERROR_DEBUG; }
|
|
|
|
arangodb::CollectionWriteLocker collectionLocker(this, lock);
|
|
|
|
// get the header pointer of the previous revision
|
|
TRI_doc_mptr_t* oldHeader;
|
|
res = lookupDocument(trx, key, oldHeader);
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
|
|
TRI_IF_FAILURE("UpdateDocumentNoMarker") {
|
|
// test what happens when no marker can be created
|
|
return TRI_ERROR_DEBUG;
|
|
}
|
|
|
|
TRI_IF_FAILURE("UpdateDocumentNoMarkerExcept") {
|
|
// test what happens when no marker can be created
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
prevRev = oldHeader->revisionIdAsSlice();
|
|
previous = *oldHeader;
|
|
|
|
// Check old revision:
|
|
if (!options.ignoreRevs) {
|
|
VPackSlice expectedRevSlice = newSlice.get(StaticStrings::RevString);
|
|
int res = checkRevision(trx, expectedRevSlice, prevRev);
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
}
|
|
|
|
if (newSlice.length() <= 1) {
|
|
// no need to do anything
|
|
*mptr = *oldHeader;
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
// merge old and new values
|
|
TransactionBuilderLeaser builder(trx);
|
|
if (options.recoveryMarker == nullptr) {
|
|
mergeObjectsForUpdate(
|
|
trx, VPackSlice(oldHeader->vpack()), newSlice, isEdgeCollection,
|
|
TRI_RidToString(revisionId), options.mergeObjects, options.keepNull,
|
|
*builder.get());
|
|
|
|
if (ServerState::isDBServer(trx->serverRole())) {
|
|
// Need to check that no sharding keys have changed:
|
|
if (arangodb::shardKeysChanged(
|
|
_vocbase->name(),
|
|
trx->resolver()->getCollectionNameCluster(_info.planId()),
|
|
VPackSlice(oldHeader->vpack()), builder->slice(), false)) {
|
|
return TRI_ERROR_CLUSTER_MUST_NOT_CHANGE_SHARDING_ATTRIBUTES;
|
|
}
|
|
}
|
|
}
|
|
|
|
// create marker
|
|
arangodb::wal::CrudMarker updateMarker(TRI_DF_MARKER_VPACK_DOCUMENT, TRI_MarkerIdTransaction(trx->getInternals()), builder->slice());
|
|
|
|
arangodb::wal::Marker const* marker;
|
|
if (options.recoveryMarker == nullptr) {
|
|
marker = &updateMarker;
|
|
} else {
|
|
marker = options.recoveryMarker;
|
|
}
|
|
|
|
arangodb::wal::DocumentOperation operation(
|
|
trx, marker, this, TRI_VOC_DOCUMENT_OPERATION_UPDATE);
|
|
|
|
// DocumentOperation has taken over the ownership for the marker
|
|
TRI_ASSERT(operation.marker != nullptr);
|
|
|
|
operation.header = oldHeader;
|
|
operation.init();
|
|
|
|
res = updateDocument(trx, revisionId, oldHeader, operation, mptr, options.waitForSync);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
operation.revert();
|
|
} else if (options.waitForSync) {
|
|
// store the tick that was used for writing the new document
|
|
resultMarkerTick = operation.tick;
|
|
}
|
|
}
|
|
|
|
if (res == TRI_ERROR_NO_ERROR) {
|
|
TRI_ASSERT(mptr->vpack() != nullptr);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/// @brief replaces a document or edge in a collection
|
|
int TRI_collection_t::replace(Transaction* trx,
|
|
VPackSlice const newSlice,
|
|
TRI_doc_mptr_t* mptr,
|
|
OperationOptions& options,
|
|
TRI_voc_tick_t& resultMarkerTick,
|
|
bool lock,
|
|
VPackSlice& prevRev,
|
|
TRI_doc_mptr_t& previous) {
|
|
resultMarkerTick = 0;
|
|
|
|
if (!newSlice.isObject()) {
|
|
return TRI_ERROR_ARANGO_DOCUMENT_TYPE_INVALID;
|
|
}
|
|
|
|
prevRev = VPackSlice();
|
|
VPackSlice fromSlice;
|
|
VPackSlice toSlice;
|
|
|
|
bool const isEdgeCollection = (_info.type() == TRI_COL_TYPE_EDGE);
|
|
|
|
if (isEdgeCollection) {
|
|
fromSlice = newSlice.get(StaticStrings::FromString);
|
|
if (!fromSlice.isString()) {
|
|
return TRI_ERROR_ARANGO_INVALID_EDGE_ATTRIBUTE;
|
|
}
|
|
toSlice = newSlice.get(StaticStrings::ToString);
|
|
if (!toSlice.isString()) {
|
|
return TRI_ERROR_ARANGO_INVALID_EDGE_ATTRIBUTE;
|
|
}
|
|
}
|
|
|
|
// initialize the result
|
|
TRI_ASSERT(mptr != nullptr);
|
|
mptr->setVPack(nullptr);
|
|
|
|
TRI_voc_rid_t revisionId = 0;
|
|
if (options.isRestore) {
|
|
VPackSlice oldRev = TRI_ExtractRevisionIdAsSlice(newSlice);
|
|
if (!oldRev.isString()) {
|
|
return TRI_ERROR_ARANGO_DOCUMENT_REV_BAD;
|
|
}
|
|
bool isOld;
|
|
revisionId = TRI_StringToRid(oldRev.copyString(), isOld);
|
|
if (isOld) {
|
|
// Do not tolerate old revision ticks:
|
|
revisionId = TRI_HybridLogicalClock();
|
|
}
|
|
} else {
|
|
revisionId = TRI_HybridLogicalClock();
|
|
}
|
|
|
|
int res;
|
|
{
|
|
TRI_IF_FAILURE("ReplaceDocumentNoLock") { return TRI_ERROR_DEBUG; }
|
|
|
|
arangodb::CollectionWriteLocker collectionLocker(this, lock);
|
|
|
|
// get the header pointer of the previous revision
|
|
TRI_doc_mptr_t* oldHeader;
|
|
VPackSlice key = newSlice.get(StaticStrings::KeyString);
|
|
if (key.isNone()) {
|
|
return TRI_ERROR_ARANGO_DOCUMENT_HANDLE_BAD;
|
|
}
|
|
res = lookupDocument(trx, key, oldHeader);
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
|
|
TRI_IF_FAILURE("ReplaceDocumentNoMarker") {
|
|
// test what happens when no marker can be created
|
|
return TRI_ERROR_DEBUG;
|
|
}
|
|
|
|
TRI_IF_FAILURE("ReplaceDocumentNoMarkerExcept") {
|
|
// test what happens when no marker can be created
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
prevRev = oldHeader->revisionIdAsSlice();
|
|
previous = *oldHeader;
|
|
|
|
// Check old revision:
|
|
if (!options.ignoreRevs) {
|
|
VPackSlice expectedRevSlice = newSlice.get(StaticStrings::RevString);
|
|
int res = checkRevision(trx, expectedRevSlice, prevRev);
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
}
|
|
|
|
// merge old and new values
|
|
TransactionBuilderLeaser builder(trx);
|
|
newObjectForReplace(
|
|
trx, VPackSlice(oldHeader->vpack()),
|
|
newSlice, fromSlice, toSlice, isEdgeCollection,
|
|
TRI_RidToString(revisionId), *builder.get());
|
|
|
|
if (ServerState::isDBServer(trx->serverRole())) {
|
|
// Need to check that no sharding keys have changed:
|
|
if (arangodb::shardKeysChanged(
|
|
_vocbase->name(),
|
|
trx->resolver()->getCollectionNameCluster(_info.planId()),
|
|
VPackSlice(oldHeader->vpack()), builder->slice(), false)) {
|
|
return TRI_ERROR_CLUSTER_MUST_NOT_CHANGE_SHARDING_ATTRIBUTES;
|
|
}
|
|
}
|
|
|
|
// create marker
|
|
arangodb::wal::CrudMarker replaceMarker(TRI_DF_MARKER_VPACK_DOCUMENT, TRI_MarkerIdTransaction(trx->getInternals()), builder->slice());
|
|
|
|
arangodb::wal::Marker const* marker;
|
|
if (options.recoveryMarker == nullptr) {
|
|
marker = &replaceMarker;
|
|
} else {
|
|
marker = options.recoveryMarker;
|
|
}
|
|
|
|
arangodb::wal::DocumentOperation operation(trx, marker, this, TRI_VOC_DOCUMENT_OPERATION_REPLACE);
|
|
|
|
// DocumentOperation has taken over the ownership for the marker
|
|
TRI_ASSERT(operation.marker != nullptr);
|
|
|
|
operation.header = oldHeader;
|
|
operation.init();
|
|
|
|
res = updateDocument(trx, revisionId, oldHeader, operation, mptr, options.waitForSync);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
operation.revert();
|
|
} else if (options.waitForSync) {
|
|
// store the tick that was used for writing the document
|
|
resultMarkerTick = operation.tick;
|
|
}
|
|
}
|
|
|
|
if (res == TRI_ERROR_NO_ERROR) {
|
|
TRI_ASSERT(mptr->vpack() != nullptr);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/// @brief removes a document or edge
|
|
int TRI_collection_t::remove(arangodb::Transaction* trx,
|
|
VPackSlice const slice,
|
|
OperationOptions& options,
|
|
TRI_voc_tick_t& resultMarkerTick,
|
|
bool lock,
|
|
VPackSlice& prevRev,
|
|
TRI_doc_mptr_t& previous) {
|
|
resultMarkerTick = 0;
|
|
|
|
// create remove marker
|
|
TRI_voc_rid_t revisionId = 0;
|
|
if (options.isRestore) {
|
|
VPackSlice oldRev = TRI_ExtractRevisionIdAsSlice(slice);
|
|
if (!oldRev.isString()) {
|
|
revisionId = TRI_HybridLogicalClock();
|
|
} else {
|
|
bool isOld;
|
|
revisionId = TRI_StringToRid(oldRev.copyString(), isOld);
|
|
if (isOld) {
|
|
// Do not tolerate old revisions
|
|
revisionId = TRI_HybridLogicalClock();
|
|
}
|
|
}
|
|
} else {
|
|
revisionId = TRI_HybridLogicalClock();
|
|
}
|
|
|
|
TransactionBuilderLeaser builder(trx);
|
|
newObjectForRemove(
|
|
trx, slice, TRI_RidToString(revisionId), *builder.get());
|
|
|
|
prevRev = VPackSlice();
|
|
|
|
TRI_IF_FAILURE("RemoveDocumentNoMarker") {
|
|
// test what happens when no marker can be created
|
|
return TRI_ERROR_DEBUG;
|
|
}
|
|
|
|
TRI_IF_FAILURE("RemoveDocumentNoMarkerExcept") {
|
|
// test what happens if no marker can be created
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
// create marker
|
|
arangodb::wal::CrudMarker removeMarker(TRI_DF_MARKER_VPACK_REMOVE, TRI_MarkerIdTransaction(trx->getInternals()), builder->slice());
|
|
|
|
arangodb::wal::Marker const* marker;
|
|
if (options.recoveryMarker == nullptr) {
|
|
marker = &removeMarker;
|
|
} else {
|
|
marker = options.recoveryMarker;
|
|
}
|
|
|
|
int res;
|
|
{
|
|
TRI_IF_FAILURE("RemoveDocumentNoLock") {
|
|
// test what happens if no lock can be acquired
|
|
return TRI_ERROR_DEBUG;
|
|
}
|
|
|
|
arangodb::wal::DocumentOperation operation(trx, marker, this, TRI_VOC_DOCUMENT_OPERATION_REMOVE);
|
|
|
|
// DocumentOperation has taken over the ownership for the marker
|
|
TRI_ASSERT(operation.marker != nullptr);
|
|
|
|
VPackSlice key;
|
|
if (slice.isString()) {
|
|
key = slice;
|
|
} else {
|
|
key = slice.get(StaticStrings::KeyString);
|
|
}
|
|
TRI_ASSERT(!key.isNone());
|
|
|
|
arangodb::CollectionWriteLocker collectionLocker(this, lock);
|
|
|
|
// get the header pointer of the previous revision
|
|
TRI_doc_mptr_t* oldHeader = nullptr;
|
|
res = lookupDocument(trx, key, oldHeader);
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
|
|
TRI_ASSERT(oldHeader != nullptr);
|
|
prevRev = oldHeader->revisionIdAsSlice();
|
|
previous = *oldHeader;
|
|
|
|
// Check old revision:
|
|
if (!options.ignoreRevs && slice.isObject()) {
|
|
VPackSlice expectedRevSlice = slice.get(StaticStrings::RevString);
|
|
int res = checkRevision(trx, expectedRevSlice, prevRev);
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
}
|
|
|
|
// we found a document to remove
|
|
TRI_ASSERT(oldHeader != nullptr);
|
|
operation.header = oldHeader;
|
|
operation.init();
|
|
|
|
// delete from indexes
|
|
res = deleteSecondaryIndexes(trx, oldHeader, false);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
insertSecondaryIndexes(trx, oldHeader, true);
|
|
return res;
|
|
}
|
|
|
|
res = deletePrimaryIndex(trx, oldHeader);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
insertSecondaryIndexes(trx, oldHeader, true);
|
|
return res;
|
|
}
|
|
|
|
operation.indexed();
|
|
_numberDocuments--;
|
|
|
|
TRI_IF_FAILURE("RemoveDocumentNoOperation") { return TRI_ERROR_DEBUG; }
|
|
|
|
TRI_IF_FAILURE("RemoveDocumentNoOperationExcept") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
res = TRI_AddOperationTransaction(trx->getInternals(), operation, options.waitForSync);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
operation.revert();
|
|
} else {
|
|
// store the tick that was used for removing the document
|
|
resultMarkerTick = operation.tick;
|
|
}
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/// @brief looks up a document by key, low level worker
|
|
/// the caller must make sure the read lock on the collection is held
|
|
/// the key must be a string slice, no revision check is performed
|
|
int TRI_collection_t::lookupDocument(
|
|
arangodb::Transaction* trx, VPackSlice const key,
|
|
TRI_doc_mptr_t*& header) {
|
|
|
|
if (!key.isString()) {
|
|
return TRI_ERROR_ARANGO_DOCUMENT_KEY_BAD;
|
|
}
|
|
|
|
header = primaryIndex()->lookupKey(trx, key);
|
|
|
|
if (header == nullptr) {
|
|
return TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND;
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief checks the revision of a document
|
|
int TRI_collection_t::checkRevision(Transaction* trx,
|
|
VPackSlice const expected,
|
|
VPackSlice const found) {
|
|
if (!expected.isNone() && found != expected) {
|
|
return TRI_ERROR_ARANGO_CONFLICT;
|
|
}
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief updates an existing document, low level worker
|
|
/// the caller must make sure the write lock on the collection is held
|
|
int TRI_collection_t::updateDocument(arangodb::Transaction* trx,
|
|
TRI_voc_rid_t revisionId,
|
|
TRI_doc_mptr_t* oldHeader,
|
|
arangodb::wal::DocumentOperation& operation,
|
|
TRI_doc_mptr_t* mptr, bool& waitForSync) {
|
|
|
|
// save the old data, remember
|
|
TRI_doc_mptr_t oldData = *oldHeader;
|
|
|
|
// remove old document from secondary indexes
|
|
// (it will stay in the primary index as the key won't change)
|
|
int res = deleteSecondaryIndexes(trx, oldHeader, false);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
// re-enter the document in case of failure, ignore errors during rollback
|
|
insertSecondaryIndexes(trx, oldHeader, true);
|
|
return res;
|
|
}
|
|
|
|
// update header
|
|
TRI_doc_mptr_t* newHeader = oldHeader;
|
|
|
|
// update the header. this will modify oldHeader, too !!!
|
|
void* mem = operation.marker->vpack();
|
|
TRI_ASSERT(mem != nullptr);
|
|
newHeader->setVPack(mem);
|
|
|
|
// insert new document into secondary indexes
|
|
res = insertSecondaryIndexes(trx, newHeader, false);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
// rollback
|
|
deleteSecondaryIndexes(trx, newHeader, true);
|
|
|
|
// copy back old header data
|
|
oldHeader->copy(oldData);
|
|
|
|
insertSecondaryIndexes(trx, oldHeader, true);
|
|
|
|
return res;
|
|
}
|
|
|
|
operation.indexed();
|
|
|
|
TRI_IF_FAILURE("UpdateDocumentNoOperation") { return TRI_ERROR_DEBUG; }
|
|
|
|
TRI_IF_FAILURE("UpdateDocumentNoOperationExcept") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
res = TRI_AddOperationTransaction(trx->getInternals(), operation, waitForSync);
|
|
|
|
if (res == TRI_ERROR_NO_ERROR) {
|
|
// write new header into result
|
|
*mptr = *newHeader;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/// @brief insert a document, low level worker
|
|
/// the caller must make sure the write lock on the collection is held
|
|
int TRI_collection_t::insertDocument(
|
|
arangodb::Transaction* trx, TRI_doc_mptr_t* header,
|
|
arangodb::wal::DocumentOperation& operation, TRI_doc_mptr_t* mptr,
|
|
bool& waitForSync) {
|
|
TRI_ASSERT(header != nullptr);
|
|
TRI_ASSERT(mptr != nullptr);
|
|
|
|
// .............................................................................
|
|
// insert into indexes
|
|
// .............................................................................
|
|
|
|
// insert into primary index first
|
|
int res = insertPrimaryIndex(trx, header);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
// insert has failed
|
|
return res;
|
|
}
|
|
|
|
// insert into secondary indexes
|
|
res = insertSecondaryIndexes(trx, header, false);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
deleteSecondaryIndexes(trx, header, true);
|
|
deletePrimaryIndex(trx, header);
|
|
return res;
|
|
}
|
|
|
|
_numberDocuments++;
|
|
|
|
operation.indexed();
|
|
|
|
TRI_IF_FAILURE("InsertDocumentNoOperation") { return TRI_ERROR_DEBUG; }
|
|
|
|
TRI_IF_FAILURE("InsertDocumentNoOperationExcept") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
res = TRI_AddOperationTransaction(trx->getInternals(), operation, waitForSync);
|
|
|
|
if (res == TRI_ERROR_NO_ERROR) {
|
|
*mptr = *header;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/// @brief creates a new entry in the primary index
|
|
int TRI_collection_t::insertPrimaryIndex(arangodb::Transaction* trx,
|
|
TRI_doc_mptr_t* header) {
|
|
TRI_IF_FAILURE("InsertPrimaryIndex") { return TRI_ERROR_DEBUG; }
|
|
|
|
TRI_doc_mptr_t* found;
|
|
|
|
TRI_ASSERT(header != nullptr);
|
|
TRI_ASSERT(header->vpack() != nullptr);
|
|
|
|
// insert into primary index
|
|
int res = primaryIndex()->insertKey(trx, header, (void const**)&found);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
|
|
if (found == nullptr) {
|
|
// success
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
return TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED;
|
|
}
|
|
|
|
/// @brief creates a new entry in the secondary indexes
|
|
int TRI_collection_t::insertSecondaryIndexes(
|
|
arangodb::Transaction* trx, TRI_doc_mptr_t const* header, bool isRollback) {
|
|
TRI_IF_FAILURE("InsertSecondaryIndexes") { return TRI_ERROR_DEBUG; }
|
|
|
|
bool const useSecondary = useSecondaryIndexes();
|
|
if (!useSecondary && _persistentIndexes == 0) {
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
int result = TRI_ERROR_NO_ERROR;
|
|
|
|
auto const& indexes = allIndexes();
|
|
size_t const n = indexes.size();
|
|
|
|
for (size_t i = 1; i < n; ++i) {
|
|
auto idx = indexes[i];
|
|
|
|
if (!useSecondary && !idx->isPersistent()) {
|
|
continue;
|
|
}
|
|
|
|
int res = idx->insert(trx, header, isRollback);
|
|
|
|
// in case of no-memory, return immediately
|
|
if (res == TRI_ERROR_OUT_OF_MEMORY) {
|
|
return res;
|
|
}
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
if (res == TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED ||
|
|
result == TRI_ERROR_NO_ERROR) {
|
|
// "prefer" unique constraint violated
|
|
result = res;
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// @brief deletes an entry from the primary index
|
|
int TRI_collection_t::deletePrimaryIndex(
|
|
arangodb::Transaction* trx, TRI_doc_mptr_t const* header) {
|
|
TRI_IF_FAILURE("DeletePrimaryIndex") { return TRI_ERROR_DEBUG; }
|
|
|
|
auto found = primaryIndex()->removeKey(
|
|
trx, Transaction::extractKeyFromDocument(VPackSlice(header->vpack())));
|
|
|
|
if (found == nullptr) {
|
|
return TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND;
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief deletes an entry from the secondary indexes
|
|
int TRI_collection_t::deleteSecondaryIndexes(
|
|
arangodb::Transaction* trx, TRI_doc_mptr_t const* header, bool isRollback) {
|
|
|
|
bool const useSecondary = useSecondaryIndexes();
|
|
if (!useSecondary && _persistentIndexes == 0) {
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
TRI_IF_FAILURE("DeleteSecondaryIndexes") { return TRI_ERROR_DEBUG; }
|
|
|
|
int result = TRI_ERROR_NO_ERROR;
|
|
|
|
auto const& indexes = allIndexes();
|
|
size_t const n = indexes.size();
|
|
|
|
for (size_t i = 1; i < n; ++i) {
|
|
auto idx = indexes[i];
|
|
|
|
if (!useSecondary && !idx->isPersistent()) {
|
|
continue;
|
|
}
|
|
|
|
int res = idx->remove(trx, header, isRollback);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
// an error occurred
|
|
result = res;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// @brief new object for insert, computes the hash of the key
|
|
int TRI_collection_t::newObjectForInsert(
|
|
Transaction* trx,
|
|
VPackSlice const& value,
|
|
VPackSlice const& fromSlice,
|
|
VPackSlice const& toSlice,
|
|
bool isEdgeCollection,
|
|
uint64_t& hash,
|
|
VPackBuilder& builder,
|
|
bool isRestore) {
|
|
|
|
TRI_voc_tick_t newRev = 0;
|
|
builder.openObject();
|
|
|
|
// add system attributes first, in this order:
|
|
// _key, _id, _from, _to, _rev
|
|
|
|
// _key
|
|
VPackSlice s = value.get(StaticStrings::KeyString);
|
|
if (s.isNone()) {
|
|
TRI_ASSERT(!isRestore); // need key in case of restore
|
|
newRev = TRI_HybridLogicalClock();
|
|
std::string keyString = _keyGenerator->generate(TRI_NewTickServer());
|
|
if (keyString.empty()) {
|
|
return TRI_ERROR_ARANGO_OUT_OF_KEYS;
|
|
}
|
|
uint8_t* where = builder.add(StaticStrings::KeyString,
|
|
VPackValue(keyString));
|
|
s = VPackSlice(where); // point to newly built value, the string
|
|
} else if (!s.isString()) {
|
|
return TRI_ERROR_ARANGO_DOCUMENT_KEY_BAD;
|
|
} else {
|
|
std::string keyString = s.copyString();
|
|
int res = _keyGenerator->validate(keyString, isRestore);
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
builder.add(StaticStrings::KeyString, s);
|
|
}
|
|
|
|
// _id
|
|
uint8_t* p = builder.add(StaticStrings::IdString,
|
|
VPackValuePair(9ULL, VPackValueType::Custom));
|
|
*p++ = 0xf3; // custom type for _id
|
|
if (ServerState::isDBServer(trx->serverRole()) &&
|
|
_info.name()[0] != '_') {
|
|
// db server in cluster, note: the local collections _statistics,
|
|
// _statisticsRaw and _statistics15 (which are the only system collections)
|
|
// must not be treated as shards but as local collections, we recognise
|
|
// this by looking at the first letter of the collection name in _info
|
|
DatafileHelper::StoreNumber<uint64_t>(p, _info.planId(), sizeof(uint64_t));
|
|
} else {
|
|
// local server
|
|
DatafileHelper::StoreNumber<uint64_t>(p, _info.id(), sizeof(uint64_t));
|
|
}
|
|
// we can get away with the fast hash function here, as key values are
|
|
// restricted to strings
|
|
hash = s.hashString();
|
|
|
|
// _from and _to
|
|
if (isEdgeCollection) {
|
|
TRI_ASSERT(!fromSlice.isNone());
|
|
TRI_ASSERT(!toSlice.isNone());
|
|
builder.add(StaticStrings::FromString, fromSlice);
|
|
builder.add(StaticStrings::ToString, toSlice);
|
|
}
|
|
|
|
// _rev
|
|
std::string newRevSt;
|
|
if (isRestore) {
|
|
VPackSlice oldRev = TRI_ExtractRevisionIdAsSlice(value);
|
|
if (!oldRev.isString()) {
|
|
return TRI_ERROR_ARANGO_DOCUMENT_REV_BAD;
|
|
}
|
|
bool isOld;
|
|
TRI_voc_rid_t oldRevision = TRI_StringToRid(oldRev.copyString(), isOld);
|
|
if (isOld) {
|
|
oldRevision = TRI_HybridLogicalClock();
|
|
}
|
|
newRevSt = TRI_RidToString(oldRevision);
|
|
} else {
|
|
if (newRev == 0) {
|
|
newRev = TRI_HybridLogicalClock();
|
|
}
|
|
newRevSt = TRI_RidToString(newRev);
|
|
}
|
|
builder.add(StaticStrings::RevString, VPackValue(newRevSt));
|
|
|
|
// add other attributes after the system attributes
|
|
TRI_SanitizeObjectWithEdges(value, builder);
|
|
|
|
builder.close();
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief new object for replace, oldValue must have _key and _id correctly set
|
|
void TRI_collection_t::newObjectForReplace(
|
|
Transaction* trx,
|
|
VPackSlice const& oldValue,
|
|
VPackSlice const& newValue,
|
|
VPackSlice const& fromSlice,
|
|
VPackSlice const& toSlice,
|
|
bool isEdgeCollection,
|
|
std::string const& rev,
|
|
VPackBuilder& builder) {
|
|
|
|
builder.openObject();
|
|
|
|
// add system attributes first, in this order:
|
|
// _key, _id, _from, _to, _rev
|
|
|
|
// _key
|
|
VPackSlice s = oldValue.get(StaticStrings::KeyString);
|
|
TRI_ASSERT(!s.isNone());
|
|
builder.add(StaticStrings::KeyString, s);
|
|
|
|
// _id
|
|
s = oldValue.get(StaticStrings::IdString);
|
|
TRI_ASSERT(!s.isNone());
|
|
builder.add(StaticStrings::IdString, s);
|
|
|
|
// _from and _to here
|
|
if (isEdgeCollection) {
|
|
TRI_ASSERT(!fromSlice.isNone());
|
|
TRI_ASSERT(!toSlice.isNone());
|
|
builder.add(StaticStrings::FromString, fromSlice);
|
|
builder.add(StaticStrings::ToString, toSlice);
|
|
}
|
|
|
|
// _rev
|
|
builder.add(StaticStrings::RevString, VPackValue(rev));
|
|
|
|
// add other attributes after the system attributes
|
|
TRI_SanitizeObjectWithEdges(newValue, builder);
|
|
|
|
builder.close();
|
|
}
|
|
|
|
/// @brief merge two objects for update, oldValue must have correctly set
|
|
/// _key and _id attributes
|
|
void TRI_collection_t::mergeObjectsForUpdate(
|
|
arangodb::Transaction* trx,
|
|
VPackSlice const& oldValue,
|
|
VPackSlice const& newValue,
|
|
bool isEdgeCollection,
|
|
std::string const& rev,
|
|
bool mergeObjects, bool keepNull,
|
|
VPackBuilder& b) {
|
|
|
|
b.openObject();
|
|
|
|
VPackSlice keySlice = oldValue.get(StaticStrings::KeyString);
|
|
VPackSlice idSlice = oldValue.get(StaticStrings::IdString);
|
|
TRI_ASSERT(!keySlice.isNone());
|
|
TRI_ASSERT(!idSlice.isNone());
|
|
|
|
// Find the attributes in the newValue object:
|
|
VPackSlice fromSlice;
|
|
VPackSlice toSlice;
|
|
|
|
std::unordered_map<std::string, VPackSlice> newValues;
|
|
{
|
|
VPackObjectIterator it(newValue, false);
|
|
while (it.valid()) {
|
|
std::string key = it.key().copyString();
|
|
if (!key.empty() && key[0] == '_' &&
|
|
(key == StaticStrings::KeyString ||
|
|
key == StaticStrings::IdString ||
|
|
key == StaticStrings::RevString ||
|
|
key == StaticStrings::FromString ||
|
|
key == StaticStrings::ToString)) {
|
|
// note _from and _to and ignore _id, _key and _rev
|
|
if (key == StaticStrings::FromString) {
|
|
fromSlice = it.value();
|
|
} else if (key == StaticStrings::ToString) {
|
|
toSlice = it.value();
|
|
} // else do nothing
|
|
} else {
|
|
// regular attribute
|
|
newValues.emplace(std::move(key), it.value());
|
|
}
|
|
|
|
it.next();
|
|
}
|
|
}
|
|
|
|
if (isEdgeCollection) {
|
|
if (fromSlice.isNone()) {
|
|
fromSlice = oldValue.get(StaticStrings::FromString);
|
|
}
|
|
if (toSlice.isNone()) {
|
|
toSlice = oldValue.get(StaticStrings::ToString);
|
|
}
|
|
}
|
|
|
|
// add system attributes first, in this order:
|
|
// _key, _id, _from, _to, _rev
|
|
|
|
// _key
|
|
b.add(StaticStrings::KeyString, keySlice);
|
|
|
|
// _id
|
|
b.add(StaticStrings::IdString, idSlice);
|
|
|
|
// _from, _to
|
|
if (isEdgeCollection) {
|
|
TRI_ASSERT(!fromSlice.isNone());
|
|
TRI_ASSERT(!toSlice.isNone());
|
|
b.add(StaticStrings::FromString, fromSlice);
|
|
b.add(StaticStrings::ToString, toSlice);
|
|
}
|
|
|
|
// _rev
|
|
b.add(StaticStrings::RevString, VPackValue(rev));
|
|
|
|
// add other attributes after the system attributes
|
|
{
|
|
VPackObjectIterator it(oldValue, false);
|
|
while (it.valid()) {
|
|
std::string key = it.key().copyString();
|
|
// exclude system attributes in old value now
|
|
if (!key.empty() && key[0] == '_' &&
|
|
(key == StaticStrings::KeyString ||
|
|
key == StaticStrings::IdString ||
|
|
key == StaticStrings::RevString ||
|
|
key == StaticStrings::FromString ||
|
|
key == StaticStrings::ToString)) {
|
|
it.next();
|
|
continue;
|
|
}
|
|
|
|
auto found = newValues.find(key);
|
|
|
|
if (found == newValues.end()) {
|
|
// use old value
|
|
b.add(key, it.value());
|
|
} else if (mergeObjects && it.value().isObject() &&
|
|
(*found).second.isObject()) {
|
|
// merge both values
|
|
auto& value = (*found).second;
|
|
if (keepNull || (!value.isNone() && !value.isNull())) {
|
|
VPackBuilder sub = VPackCollection::merge(it.value(), value,
|
|
true, !keepNull);
|
|
b.add(key, sub.slice());
|
|
}
|
|
// clear the value in the map so its not added again
|
|
(*found).second = VPackSlice();
|
|
} else {
|
|
// use new value
|
|
auto& value = (*found).second;
|
|
if (keepNull || (!value.isNone() && !value.isNull())) {
|
|
b.add(key, value);
|
|
}
|
|
// clear the value in the map so its not added again
|
|
(*found).second = VPackSlice();
|
|
}
|
|
it.next();
|
|
}
|
|
}
|
|
|
|
// add remaining values that were only in new object
|
|
for (auto& it : newValues) {
|
|
auto& s = it.second;
|
|
if (s.isNone()) {
|
|
continue;
|
|
}
|
|
if (!keepNull && s.isNull()) {
|
|
continue;
|
|
}
|
|
b.add(std::move(it.first), s);
|
|
}
|
|
|
|
b.close();
|
|
}
|
|
|
|
/// @brief new object for remove, must have _key set
|
|
void TRI_collection_t::newObjectForRemove(
|
|
Transaction* trx,
|
|
VPackSlice const& oldValue,
|
|
std::string const& rev,
|
|
VPackBuilder& builder) {
|
|
|
|
// create an object consisting of _key and _rev (in this order)
|
|
builder.openObject();
|
|
if (oldValue.isString()) {
|
|
builder.add(StaticStrings::KeyString, oldValue);
|
|
} else {
|
|
VPackSlice s = oldValue.get(StaticStrings::KeyString);
|
|
TRI_ASSERT(s.isString());
|
|
builder.add(StaticStrings::KeyString, s);
|
|
}
|
|
builder.add(StaticStrings::RevString, VPackValue(rev));
|
|
builder.close();
|
|
}
|
|
|
|
/// @brief rolls back a document operation
|
|
int TRI_collection_t::rollbackOperation(arangodb::Transaction* trx,
|
|
TRI_voc_document_operation_e type,
|
|
TRI_doc_mptr_t* header,
|
|
TRI_doc_mptr_t const* oldData) {
|
|
if (type == TRI_VOC_DOCUMENT_OPERATION_INSERT) {
|
|
// ignore any errors we're getting from this
|
|
deletePrimaryIndex(trx, header);
|
|
deleteSecondaryIndexes(trx, header, true);
|
|
|
|
TRI_ASSERT(_numberDocuments > 0);
|
|
_numberDocuments--;
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
} else if (type == TRI_VOC_DOCUMENT_OPERATION_UPDATE ||
|
|
type == TRI_VOC_DOCUMENT_OPERATION_REPLACE) {
|
|
// copy the existing header's state
|
|
TRI_doc_mptr_t copy = *header;
|
|
|
|
// remove the current values from the indexes
|
|
deleteSecondaryIndexes(trx, header, true);
|
|
// revert to the old state
|
|
header->copy(*oldData);
|
|
// re-insert old state
|
|
int res = insertSecondaryIndexes(trx, header, true);
|
|
// revert again to the new state, because other parts of the new state
|
|
// will be reverted at some other place
|
|
header->copy(copy);
|
|
|
|
return res;
|
|
} else if (type == TRI_VOC_DOCUMENT_OPERATION_REMOVE) {
|
|
int res = insertPrimaryIndex(trx, header);
|
|
|
|
if (res == TRI_ERROR_NO_ERROR) {
|
|
res = insertSecondaryIndexes(trx, header, true);
|
|
_numberDocuments++;
|
|
} else {
|
|
LOG(ERR) << "error rolling back remove operation";
|
|
}
|
|
return res;
|
|
}
|
|
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
/// @brief fill an index in batches
|
|
int TRI_collection_t::fillIndexBatch(arangodb::Transaction* trx,
|
|
arangodb::Index* idx) {
|
|
auto indexPool = application_features::ApplicationServer::getFeature<IndexPoolFeature>("IndexPool")->getThreadPool();
|
|
|
|
double start = TRI_microtime();
|
|
|
|
LOG_TOPIC(TRACE, Logger::PERFORMANCE)
|
|
<< "fill-index-batch { collection: " << _vocbase->name() << "/"
|
|
<< _info.name() << " }, " << idx->context()
|
|
<< ", threads: " << indexPool->numThreads()
|
|
<< ", buckets: " << _info.indexBuckets();
|
|
|
|
// give the index a size hint
|
|
auto primaryIndex = this->primaryIndex();
|
|
|
|
auto nrUsed = primaryIndex->size();
|
|
|
|
idx->sizeHint(trx, nrUsed);
|
|
|
|
// process documents a million at a time
|
|
size_t blockSize = 1024 * 1024;
|
|
|
|
if (nrUsed < blockSize) {
|
|
blockSize = nrUsed;
|
|
}
|
|
if (blockSize == 0) {
|
|
blockSize = 1;
|
|
}
|
|
|
|
int res = TRI_ERROR_NO_ERROR;
|
|
|
|
std::vector<TRI_doc_mptr_t const*> documents;
|
|
documents.reserve(blockSize);
|
|
|
|
if (nrUsed > 0) {
|
|
arangodb::basics::BucketPosition position;
|
|
uint64_t total = 0;
|
|
while (true) {
|
|
TRI_doc_mptr_t const* mptr =
|
|
primaryIndex->lookupSequential(trx, position, total);
|
|
|
|
if (mptr == nullptr) {
|
|
break;
|
|
}
|
|
|
|
documents.emplace_back(mptr);
|
|
|
|
if (documents.size() == blockSize) {
|
|
res = idx->batchInsert(trx, &documents, indexPool->numThreads());
|
|
documents.clear();
|
|
|
|
// some error occurred
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// process the remainder of the documents
|
|
if (res == TRI_ERROR_NO_ERROR && !documents.empty()) {
|
|
res = idx->batchInsert(trx, &documents, indexPool->numThreads());
|
|
}
|
|
|
|
LOG_TOPIC(TRACE, Logger::PERFORMANCE)
|
|
<< "[timer] " << Logger::FIXED(TRI_microtime() - start)
|
|
<< " s, fill-index-batch { collection: " << _vocbase->name()
|
|
<< "/" << _info.name() << " }, " << idx->context()
|
|
<< ", threads: " << indexPool->numThreads()
|
|
<< ", buckets: " << _info.indexBuckets();
|
|
|
|
return res;
|
|
}
|
|
|
|
/// @brief fill an index sequentially
|
|
int TRI_collection_t::fillIndexSequential(arangodb::Transaction* trx,
|
|
arangodb::Index* idx) {
|
|
double start = TRI_microtime();
|
|
|
|
LOG_TOPIC(TRACE, Logger::PERFORMANCE)
|
|
<< "fill-index-sequential { collection: " << _vocbase->name()
|
|
<< "/" << _info.name() << " }, " << idx->context()
|
|
<< ", buckets: " << _info.indexBuckets();
|
|
|
|
// give the index a size hint
|
|
auto primaryIndex = this->primaryIndex();
|
|
size_t nrUsed = primaryIndex->size();
|
|
|
|
idx->sizeHint(trx, nrUsed);
|
|
|
|
if (nrUsed > 0) {
|
|
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
|
static int const LoopSize = 10000;
|
|
int counter = 0;
|
|
int loops = 0;
|
|
#endif
|
|
|
|
arangodb::basics::BucketPosition position;
|
|
uint64_t total = 0;
|
|
|
|
while (true) {
|
|
TRI_doc_mptr_t const* mptr =
|
|
primaryIndex->lookupSequential(trx, position, total);
|
|
|
|
if (mptr == nullptr) {
|
|
break;
|
|
}
|
|
|
|
int res = idx->insert(trx, mptr, false);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
|
if (++counter == LoopSize) {
|
|
counter = 0;
|
|
++loops;
|
|
LOG(TRACE) << "indexed " << (LoopSize * loops)
|
|
<< " documents of collection " << _info.id();
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
LOG_TOPIC(TRACE, Logger::PERFORMANCE)
|
|
<< "[timer] " << Logger::FIXED(TRI_microtime() - start)
|
|
<< " s, fill-index-sequential { collection: " << _vocbase->name()
|
|
<< "/" << _info.name() << " }, " << idx->context()
|
|
<< ", buckets: " << _info.indexBuckets();
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief initializes an index with all existing documents
|
|
int TRI_collection_t::fillIndex(arangodb::Transaction* trx,
|
|
arangodb::Index* idx,
|
|
bool skipPersistent) {
|
|
if (!useSecondaryIndexes()) {
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
if (idx->isPersistent() && skipPersistent) {
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
try {
|
|
size_t nrUsed = primaryIndex()->size();
|
|
auto indexPool = application_features::ApplicationServer::getFeature<IndexPoolFeature>("IndexPool")->getThreadPool();
|
|
|
|
int res;
|
|
|
|
if (indexPool != nullptr && idx->hasBatchInsert() && nrUsed > 256 * 1024 &&
|
|
_info.indexBuckets() > 1) {
|
|
// use batch insert if there is an index pool,
|
|
// the collection has more than one index bucket
|
|
// and it contains a significant amount of documents
|
|
res = fillIndexBatch(trx, idx);
|
|
} else {
|
|
res = fillIndexSequential(trx, idx);
|
|
}
|
|
|
|
return res;
|
|
} catch (arangodb::basics::Exception const& ex) {
|
|
return ex.code();
|
|
} catch (std::bad_alloc&) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
} catch (...) {
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
}
|
|
|
|
/// @brief saves an index
|
|
int TRI_collection_t::saveIndex(arangodb::Index* idx, bool writeMarker) {
|
|
// convert into JSON
|
|
std::shared_ptr<VPackBuilder> builder;
|
|
try {
|
|
builder = idx->toVelocyPack(false);
|
|
} catch (...) {
|
|
LOG(ERR) << "cannot save index definition.";
|
|
return TRI_set_errno(TRI_ERROR_INTERNAL);
|
|
}
|
|
if (builder == nullptr) {
|
|
LOG(ERR) << "cannot save index definition.";
|
|
return TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
|
}
|
|
|
|
// construct filename
|
|
std::string name("index-" + std::to_string(idx->id()) + ".json");
|
|
std::string filename = arangodb::basics::FileUtils::buildFilename(path(), name);
|
|
|
|
VPackSlice const idxSlice = builder->slice();
|
|
// and save
|
|
bool doSync = application_features::ApplicationServer::getFeature<DatabaseFeature>("Database")->forceSyncProperties();
|
|
bool ok = arangodb::basics::VelocyPackHelper::velocyPackToFile(
|
|
filename.c_str(), idxSlice, doSync);
|
|
|
|
if (!ok) {
|
|
LOG(ERR) << "cannot save index definition: " << TRI_last_error();
|
|
|
|
return TRI_errno();
|
|
}
|
|
|
|
if (!writeMarker) {
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
int res = TRI_ERROR_NO_ERROR;
|
|
|
|
try {
|
|
arangodb::wal::CollectionMarker marker(TRI_DF_MARKER_VPACK_CREATE_INDEX, _vocbase->id(), _info.id(), idxSlice);
|
|
|
|
arangodb::wal::SlotInfoCopy slotInfo =
|
|
arangodb::wal::LogfileManager::instance()->allocateAndWrite(marker, false);
|
|
|
|
if (slotInfo.errorCode != TRI_ERROR_NO_ERROR) {
|
|
THROW_ARANGO_EXCEPTION(slotInfo.errorCode);
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
} catch (arangodb::basics::Exception const& ex) {
|
|
res = ex.code();
|
|
} catch (...) {
|
|
res = TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
// TODO: what to do here?
|
|
return res;
|
|
}
|
|
|
|
/// @brief returns a description of all indexes
|
|
/// the caller must have read-locked the underlying collection!
|
|
std::vector<std::shared_ptr<VPackBuilder>> TRI_collection_t::indexesToVelocyPack(bool withFigures) {
|
|
auto const& indexes = allIndexes();
|
|
|
|
std::vector<std::shared_ptr<VPackBuilder>> result;
|
|
result.reserve(indexes.size());
|
|
|
|
for (auto const& idx : indexes) {
|
|
auto builder = idx->toVelocyPack(withFigures);
|
|
|
|
// shouldn't fail because of reserve
|
|
result.emplace_back(builder);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// @brief removes an index file
|
|
bool TRI_collection_t::removeIndexFile(TRI_idx_iid_t id) {
|
|
// construct filename
|
|
std::string name("index-" + std::to_string(id) + ".json");
|
|
std::string filename = arangodb::basics::FileUtils::buildFilename(path(), name);
|
|
|
|
int res = TRI_UnlinkFile(filename.c_str());
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
LOG(ERR) << "cannot remove index definition: " << TRI_last_error();
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/// @brief drops an index, including index file removal and replication
|
|
bool TRI_collection_t::dropIndex(TRI_idx_iid_t iid, bool writeMarker) {
|
|
if (iid == 0) {
|
|
// invalid index id or primary index
|
|
return true;
|
|
}
|
|
|
|
arangodb::Index* found = nullptr;
|
|
{
|
|
arangodb::aql::QueryCache::instance()->invalidate(
|
|
_vocbase, _info.name());
|
|
found = removeIndex(iid);
|
|
}
|
|
|
|
if (found != nullptr) {
|
|
bool result = removeIndexFile(found->id());
|
|
|
|
delete found;
|
|
found = nullptr;
|
|
|
|
if (writeMarker) {
|
|
int res = TRI_ERROR_NO_ERROR;
|
|
|
|
try {
|
|
VPackBuilder markerBuilder;
|
|
markerBuilder.openObject();
|
|
markerBuilder.add("id", VPackValue(iid));
|
|
markerBuilder.close();
|
|
|
|
arangodb::wal::CollectionMarker marker(TRI_DF_MARKER_VPACK_DROP_INDEX, _vocbase->id(), _info.id(), markerBuilder.slice());
|
|
|
|
arangodb::wal::SlotInfoCopy slotInfo =
|
|
arangodb::wal::LogfileManager::instance()->allocateAndWrite(marker, false);
|
|
|
|
if (slotInfo.errorCode != TRI_ERROR_NO_ERROR) {
|
|
THROW_ARANGO_EXCEPTION(slotInfo.errorCode);
|
|
}
|
|
|
|
return true;
|
|
} catch (arangodb::basics::Exception const& ex) {
|
|
res = ex.code();
|
|
} catch (...) {
|
|
res = TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
LOG(WARN) << "could not save index drop marker in log: " << TRI_errno_string(res);
|
|
}
|
|
|
|
// TODO: what to do here?
|
|
return result;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/// @brief finds a path based, unique or non-unique index
|
|
static arangodb::Index* LookupPathIndexDocumentCollection(
|
|
TRI_collection_t* collection,
|
|
std::vector<std::vector<arangodb::basics::AttributeName>> const& paths,
|
|
arangodb::Index::IndexType type, int sparsity, bool unique,
|
|
bool allowAnyAttributeOrder) {
|
|
for (auto const& idx : collection->allIndexes()) {
|
|
if (idx->type() != type) {
|
|
continue;
|
|
}
|
|
|
|
// .........................................................................
|
|
// Now perform checks which are specific to the type of index
|
|
// .........................................................................
|
|
|
|
switch (idx->type()) {
|
|
case arangodb::Index::TRI_IDX_TYPE_HASH_INDEX: {
|
|
auto hashIndex = static_cast<arangodb::HashIndex*>(idx);
|
|
|
|
if (unique != hashIndex->unique() ||
|
|
(sparsity != -1 && sparsity != (hashIndex->sparse() ? 1 : 0))) {
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case arangodb::Index::TRI_IDX_TYPE_SKIPLIST_INDEX: {
|
|
auto skiplistIndex = static_cast<arangodb::SkiplistIndex*>(idx);
|
|
|
|
if (unique != skiplistIndex->unique() ||
|
|
(sparsity != -1 && sparsity != (skiplistIndex->sparse() ? 1 : 0))) {
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
#ifdef ARANGODB_ENABLE_ROCKSDB
|
|
case arangodb::Index::TRI_IDX_TYPE_ROCKSDB_INDEX: {
|
|
auto rocksDBIndex = static_cast<arangodb::RocksDBIndex*>(idx);
|
|
|
|
if (unique != rocksDBIndex->unique() ||
|
|
(sparsity != -1 && sparsity != (rocksDBIndex->sparse() ? 1 : 0))) {
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
default: { continue; }
|
|
}
|
|
|
|
// .........................................................................
|
|
// check that the number of paths (fields) in the index matches that
|
|
// of the number of attributes
|
|
// .........................................................................
|
|
|
|
auto const& idxFields = idx->fields();
|
|
size_t const n = idxFields.size();
|
|
|
|
if (n != paths.size()) {
|
|
continue;
|
|
}
|
|
|
|
// .........................................................................
|
|
// go through all the attributes and see if they match
|
|
// .........................................................................
|
|
|
|
bool found = true;
|
|
|
|
if (allowAnyAttributeOrder) {
|
|
// any permutation of attributes is allowed
|
|
for (size_t i = 0; i < n; ++i) {
|
|
found = false;
|
|
size_t fieldSize = idxFields[i].size();
|
|
|
|
for (size_t j = 0; j < n; ++j) {
|
|
if (fieldSize == paths[j].size()) {
|
|
bool allEqual = true;
|
|
for (size_t k = 0; k < fieldSize; ++k) {
|
|
if (idxFields[j][k] != paths[j][k]) {
|
|
allEqual = false;
|
|
break;
|
|
}
|
|
}
|
|
if (allEqual) {
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!found) {
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
// attributes need to be present in a given order
|
|
for (size_t i = 0; i < n; ++i) {
|
|
size_t fieldSize = idxFields[i].size();
|
|
if (fieldSize == paths[i].size()) {
|
|
for (size_t k = 0; k < fieldSize; ++k) {
|
|
if (idxFields[i][k] != paths[i][k]) {
|
|
found = false;
|
|
break;
|
|
}
|
|
}
|
|
if (!found) {
|
|
break;
|
|
}
|
|
} else {
|
|
found = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// stop if we found a match
|
|
if (found) {
|
|
return idx;
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
/// @brief restores a path based index (template)
|
|
static int PathBasedIndexFromVelocyPack(
|
|
arangodb::Transaction* trx, TRI_collection_t* document,
|
|
VPackSlice const& definition, TRI_idx_iid_t iid,
|
|
arangodb::Index* (*creator)(arangodb::Transaction*,
|
|
TRI_collection_t*,
|
|
std::vector<std::string> const&, TRI_idx_iid_t,
|
|
bool, bool, bool&),
|
|
arangodb::Index** dst) {
|
|
if (dst != nullptr) {
|
|
*dst = nullptr;
|
|
}
|
|
|
|
// extract fields
|
|
VPackSlice fld;
|
|
try {
|
|
fld = ExtractFields(definition, iid);
|
|
} catch (arangodb::basics::Exception const& e) {
|
|
return TRI_set_errno(e.code());
|
|
}
|
|
VPackValueLength fieldCount = fld.length();
|
|
|
|
// extract the list of fields
|
|
if (fieldCount < 1) {
|
|
LOG(ERR) << "ignoring index " << iid
|
|
<< ", need at least one attribute path";
|
|
|
|
return TRI_set_errno(TRI_ERROR_BAD_PARAMETER);
|
|
}
|
|
|
|
// determine if the index is unique or non-unique
|
|
VPackSlice bv = definition.get("unique");
|
|
|
|
if (!bv.isBoolean()) {
|
|
LOG(ERR) << "ignoring index " << iid
|
|
<< ", could not determine if unique or non-unique";
|
|
return TRI_set_errno(TRI_ERROR_BAD_PARAMETER);
|
|
}
|
|
|
|
bool unique = bv.getBoolean();
|
|
|
|
// determine sparsity
|
|
bool sparse = false;
|
|
|
|
bv = definition.get("sparse");
|
|
|
|
if (bv.isBoolean()) {
|
|
sparse = bv.getBoolean();
|
|
} else {
|
|
// no sparsity information given for index
|
|
// now use pre-2.5 defaults: unique hash indexes were sparse, all other
|
|
// indexes were non-sparse
|
|
bool isHashIndex = false;
|
|
VPackSlice typeSlice = definition.get("type");
|
|
if (typeSlice.isString()) {
|
|
isHashIndex = typeSlice.copyString() == "hash";
|
|
}
|
|
|
|
if (isHashIndex && unique) {
|
|
sparse = true;
|
|
}
|
|
}
|
|
|
|
// Initialize the vector in which we store the fields on which the hashing
|
|
// will be based.
|
|
std::vector<std::string> attributes;
|
|
attributes.reserve(static_cast<size_t>(fieldCount));
|
|
|
|
// find fields
|
|
for (auto const& fieldStr : VPackArrayIterator(fld)) {
|
|
attributes.emplace_back(fieldStr.copyString());
|
|
}
|
|
|
|
// create the index
|
|
bool created;
|
|
auto idx = creator(trx, document, attributes, iid, sparse, unique, created);
|
|
|
|
if (dst != nullptr) {
|
|
*dst = idx;
|
|
}
|
|
|
|
if (idx == nullptr) {
|
|
LOG(ERR) << "cannot create index " << iid << " in collection '" << document->_info.name() << "'";
|
|
return TRI_errno();
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief converts attribute names to lists of names
|
|
static int NamesByAttributeNames(
|
|
std::vector<std::string> const& attributes,
|
|
std::vector<std::vector<arangodb::basics::AttributeName>>& names,
|
|
bool isHashIndex) {
|
|
names.reserve(attributes.size());
|
|
|
|
// copy attributes, because we may need to sort them
|
|
std::vector<std::string> copy = attributes;
|
|
|
|
if (isHashIndex) {
|
|
// for a hash index, an index on ["a", "b"] is the same as an index on ["b", "a"].
|
|
// by sorting index attributes we can make sure the above the index variants are
|
|
// normalized and will be treated the same
|
|
std::sort(copy.begin(), copy.end());
|
|
}
|
|
|
|
for (auto const& name : copy) {
|
|
std::vector<arangodb::basics::AttributeName> attrNameList;
|
|
TRI_ParseAttributeString(name, attrNameList);
|
|
TRI_ASSERT(!attrNameList.empty());
|
|
std::vector<std::string> joinedNames;
|
|
TRI_AttributeNamesJoinNested(attrNameList, joinedNames, true);
|
|
names.emplace_back(attrNameList);
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief adds a geo index to a collection
|
|
static arangodb::Index* CreateGeoIndexDocumentCollection(
|
|
arangodb::Transaction* trx, TRI_collection_t* document,
|
|
std::string const& location, std::string const& latitude,
|
|
std::string const& longitude, bool geoJson, TRI_idx_iid_t iid,
|
|
bool& created) {
|
|
|
|
arangodb::Index* idx = nullptr;
|
|
created = false;
|
|
std::unique_ptr<arangodb::GeoIndex2> geoIndex;
|
|
if (!location.empty()) {
|
|
// Use the version with one value
|
|
std::vector<std::string> loc =
|
|
arangodb::basics::StringUtils::split(location, ".");
|
|
|
|
// check, if we know the index
|
|
idx = document->lookupGeoIndex1(loc, geoJson);
|
|
|
|
if (idx != nullptr) {
|
|
LOG(TRACE) << "geo-index already created for location '" << location << "'";
|
|
return idx;
|
|
}
|
|
|
|
if (iid == 0) {
|
|
iid = arangodb::Index::generateId();
|
|
}
|
|
|
|
geoIndex.reset(new arangodb::GeoIndex2(
|
|
iid, document,
|
|
std::vector<std::vector<arangodb::basics::AttributeName>>{
|
|
{{location, false}}},
|
|
loc, geoJson));
|
|
|
|
LOG(TRACE) << "created geo-index for location '" << location << "'";
|
|
} else if (!longitude.empty() && !latitude.empty()) {
|
|
// Use the version with two values
|
|
std::vector<std::string> lat =
|
|
arangodb::basics::StringUtils::split(latitude, ".");
|
|
|
|
std::vector<std::string> lon =
|
|
arangodb::basics::StringUtils::split(longitude, ".");
|
|
|
|
// check, if we know the index
|
|
idx = document->lookupGeoIndex2(lat, lon);
|
|
|
|
if (idx != nullptr) {
|
|
LOG(TRACE) << "geo-index already created for latitude '" << latitude
|
|
<< "' and longitude '" << longitude << "'";
|
|
return idx;
|
|
}
|
|
|
|
if (iid == 0) {
|
|
iid = arangodb::Index::generateId();
|
|
}
|
|
|
|
geoIndex.reset(new arangodb::GeoIndex2(
|
|
iid, document,
|
|
std::vector<std::vector<arangodb::basics::AttributeName>>{
|
|
{{latitude, false}}, {{longitude, false}}},
|
|
std::vector<std::vector<std::string>>{lat, lon}));
|
|
|
|
LOG(TRACE) << "created geo-index for latitude '" << latitude
|
|
<< "' and longitude '" << longitude << "'";
|
|
} else {
|
|
TRI_set_errno(TRI_ERROR_INTERNAL);
|
|
LOG(TRACE) << "expecting either 'location' or 'latitude' and 'longitude'";
|
|
return nullptr;
|
|
}
|
|
|
|
idx = static_cast<arangodb::GeoIndex2*>(geoIndex.get());
|
|
|
|
if (idx == nullptr) {
|
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
|
return nullptr;
|
|
}
|
|
|
|
// initializes the index with all existing documents
|
|
int res = document->fillIndex(trx, idx);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
TRI_set_errno(res);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
// and store index
|
|
try {
|
|
document->addIndex(idx);
|
|
geoIndex.release();
|
|
} catch (...) {
|
|
TRI_set_errno(res);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
created = true;
|
|
|
|
return idx;
|
|
}
|
|
|
|
/// @brief restores an index
|
|
static int GeoIndexFromVelocyPack(arangodb::Transaction* trx,
|
|
TRI_collection_t* document,
|
|
VPackSlice const& definition,
|
|
TRI_idx_iid_t iid, arangodb::Index** dst) {
|
|
if (dst != nullptr) {
|
|
*dst = nullptr;
|
|
}
|
|
|
|
VPackSlice const type = definition.get("type");
|
|
|
|
if (!type.isString()) {
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
std::string typeStr = type.copyString();
|
|
|
|
// extract fields
|
|
VPackSlice fld;
|
|
try {
|
|
fld = ExtractFields(definition, iid);
|
|
} catch (arangodb::basics::Exception const& e) {
|
|
return TRI_set_errno(e.code());
|
|
}
|
|
VPackValueLength fieldCount = fld.length();
|
|
|
|
arangodb::Index* idx = nullptr;
|
|
|
|
// list style
|
|
if (typeStr == "geo1") {
|
|
// extract geo json
|
|
bool geoJson = arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
definition, "geoJson", false);
|
|
|
|
// need just one field
|
|
if (fieldCount == 1) {
|
|
VPackSlice loc = fld.at(0);
|
|
bool created;
|
|
|
|
idx = CreateGeoIndexDocumentCollection(trx, document, loc.copyString(),
|
|
std::string(), std::string(),
|
|
geoJson, iid, created);
|
|
|
|
if (dst != nullptr) {
|
|
*dst = idx;
|
|
}
|
|
|
|
return idx == nullptr ? TRI_errno() : TRI_ERROR_NO_ERROR;
|
|
} else {
|
|
LOG(ERR) << "ignoring " << typeStr << "-index " << iid
|
|
<< ", 'fields' must be a list with 1 entries";
|
|
|
|
return TRI_set_errno(TRI_ERROR_BAD_PARAMETER);
|
|
}
|
|
}
|
|
|
|
// attribute style
|
|
else if (typeStr == "geo2") {
|
|
if (fieldCount == 2) {
|
|
VPackSlice lat = fld.at(0);
|
|
VPackSlice lon = fld.at(1);
|
|
|
|
bool created;
|
|
|
|
idx = CreateGeoIndexDocumentCollection(trx, document, std::string(),
|
|
lat.copyString(), lon.copyString(),
|
|
false, iid, created);
|
|
|
|
if (dst != nullptr) {
|
|
*dst = idx;
|
|
}
|
|
|
|
return idx == nullptr ? TRI_errno() : TRI_ERROR_NO_ERROR;
|
|
} else {
|
|
LOG(ERR) << "ignoring " << typeStr << "-index " << iid
|
|
<< ", 'fields' must be a list with 2 entries";
|
|
|
|
return TRI_set_errno(TRI_ERROR_BAD_PARAMETER);
|
|
}
|
|
} else {
|
|
TRI_ASSERT(false);
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR; // shut the vc++ up
|
|
}
|
|
|
|
/// @brief finds a geo index, list style
|
|
arangodb::Index* TRI_collection_t::lookupGeoIndex1(
|
|
std::vector<std::string> const& location, bool geoJson) {
|
|
|
|
for (auto const& idx : allIndexes()) {
|
|
if (idx->type() == arangodb::Index::TRI_IDX_TYPE_GEO1_INDEX) {
|
|
auto geoIndex = static_cast<arangodb::GeoIndex2*>(idx);
|
|
|
|
if (geoIndex->isSame(location, geoJson)) {
|
|
return idx;
|
|
}
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
/// @brief finds a geo index, attribute style
|
|
arangodb::Index* TRI_collection_t::lookupGeoIndex2(std::vector<std::string> const& latitude,
|
|
std::vector<std::string> const& longitude) {
|
|
for (auto const& idx : allIndexes()) {
|
|
if (idx->type() == arangodb::Index::TRI_IDX_TYPE_GEO2_INDEX) {
|
|
auto geoIndex = static_cast<arangodb::GeoIndex2*>(idx);
|
|
|
|
if (geoIndex->isSame(latitude, longitude)) {
|
|
return idx;
|
|
}
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
/// @brief ensures that a geo index exists, list style
|
|
arangodb::Index* TRI_collection_t::ensureGeoIndex1(
|
|
arangodb::Transaction* trx, TRI_idx_iid_t iid,
|
|
std::string const& location, bool geoJson, bool& created) {
|
|
|
|
auto idx =
|
|
CreateGeoIndexDocumentCollection(trx, this, location, std::string(),
|
|
std::string(), geoJson, iid, created);
|
|
|
|
if (idx != nullptr) {
|
|
if (created) {
|
|
arangodb::aql::QueryCache::instance()->invalidate(
|
|
_vocbase, _info.name());
|
|
int res = saveIndex(idx, true);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
idx = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
return idx;
|
|
}
|
|
|
|
/// @brief ensures that a geo index exists, attribute style
|
|
arangodb::Index* TRI_collection_t::ensureGeoIndex2(
|
|
arangodb::Transaction* trx, TRI_idx_iid_t iid, std::string const& latitude,
|
|
std::string const& longitude, bool& created) {
|
|
|
|
auto idx = CreateGeoIndexDocumentCollection(
|
|
trx, this, std::string(), latitude, longitude, false, iid, created);
|
|
|
|
if (idx != nullptr) {
|
|
if (created) {
|
|
arangodb::aql::QueryCache::instance()->invalidate(
|
|
_vocbase, _info.name());
|
|
int res = saveIndex(idx, true);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
idx = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
return idx;
|
|
}
|
|
|
|
/// @brief adds a hash index to the collection
|
|
static arangodb::Index* CreateHashIndexDocumentCollection(
|
|
arangodb::Transaction* trx, TRI_collection_t* document,
|
|
std::vector<std::string> const& attributes, TRI_idx_iid_t iid, bool sparse,
|
|
bool unique, bool& created) {
|
|
created = false;
|
|
std::vector<std::vector<arangodb::basics::AttributeName>> fields;
|
|
|
|
int res = NamesByAttributeNames(attributes, fields, true);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return nullptr;
|
|
}
|
|
|
|
// ...........................................................................
|
|
// Attempt to find an existing index which matches the attributes above.
|
|
// If a suitable index is found, return that one otherwise we need to create
|
|
// a new one.
|
|
// ...........................................................................
|
|
|
|
int sparsity = sparse ? 1 : 0;
|
|
auto idx = LookupPathIndexDocumentCollection(
|
|
document, fields, arangodb::Index::TRI_IDX_TYPE_HASH_INDEX, sparsity,
|
|
unique, false);
|
|
|
|
if (idx != nullptr) {
|
|
LOG(TRACE) << "hash-index already created";
|
|
|
|
return idx;
|
|
}
|
|
|
|
if (iid == 0) {
|
|
iid = arangodb::Index::generateId();
|
|
}
|
|
|
|
// create the hash index. we'll provide it with the current number of
|
|
// documents
|
|
// in the collection so the index can do a sensible memory preallocation
|
|
auto hashIndex = std::make_unique<arangodb::HashIndex>(iid, document, fields,
|
|
unique, sparse);
|
|
idx = static_cast<arangodb::Index*>(hashIndex.get());
|
|
|
|
// initializes the index with all existing documents
|
|
res = document->fillIndex(trx, idx);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
TRI_set_errno(res);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
// store index and return
|
|
try {
|
|
document->addIndex(idx);
|
|
hashIndex.release();
|
|
} catch (...) {
|
|
TRI_set_errno(res);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
created = true;
|
|
|
|
return idx;
|
|
}
|
|
|
|
/// @brief restores an index
|
|
static int HashIndexFromVelocyPack(arangodb::Transaction* trx,
|
|
TRI_collection_t* document,
|
|
VPackSlice const& definition,
|
|
TRI_idx_iid_t iid, arangodb::Index** dst) {
|
|
return PathBasedIndexFromVelocyPack(trx, document, definition, iid,
|
|
CreateHashIndexDocumentCollection, dst);
|
|
}
|
|
|
|
/// @brief finds a hash index (unique or non-unique)
|
|
arangodb::Index* TRI_collection_t::lookupHashIndex(
|
|
std::vector<std::string> const& attributes, int sparsity, bool unique) {
|
|
std::vector<std::vector<arangodb::basics::AttributeName>> fields;
|
|
|
|
int res = NamesByAttributeNames(attributes, fields, true);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return nullptr;
|
|
}
|
|
|
|
return LookupPathIndexDocumentCollection(
|
|
this, fields, arangodb::Index::TRI_IDX_TYPE_HASH_INDEX, sparsity,
|
|
unique, true);
|
|
}
|
|
|
|
/// @brief ensures that a hash index exists
|
|
arangodb::Index* TRI_collection_t::ensureHashIndex(
|
|
arangodb::Transaction* trx, TRI_idx_iid_t iid, std::vector<std::string> const& attributes, bool sparse,
|
|
bool unique, bool& created) {
|
|
|
|
auto idx = CreateHashIndexDocumentCollection(trx, this, attributes, iid,
|
|
sparse, unique, created);
|
|
|
|
if (idx != nullptr) {
|
|
if (created) {
|
|
arangodb::aql::QueryCache::instance()->invalidate(
|
|
_vocbase, _info.name());
|
|
int res = saveIndex(idx, true);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
idx = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
return idx;
|
|
}
|
|
|
|
/// @brief adds a skiplist index to the collection
|
|
static arangodb::Index* CreateSkiplistIndexDocumentCollection(
|
|
arangodb::Transaction* trx, TRI_collection_t* document,
|
|
std::vector<std::string> const& attributes, TRI_idx_iid_t iid, bool sparse,
|
|
bool unique, bool& created) {
|
|
created = false;
|
|
std::vector<std::vector<arangodb::basics::AttributeName>> fields;
|
|
|
|
int res = NamesByAttributeNames(attributes, fields, false);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return nullptr;
|
|
}
|
|
|
|
// ...........................................................................
|
|
// Attempt to find an existing index which matches the attributes above.
|
|
// If a suitable index is found, return that one otherwise we need to create
|
|
// a new one.
|
|
// ...........................................................................
|
|
|
|
int sparsity = sparse ? 1 : 0;
|
|
auto idx = LookupPathIndexDocumentCollection(
|
|
document, fields, arangodb::Index::TRI_IDX_TYPE_SKIPLIST_INDEX, sparsity,
|
|
unique, false);
|
|
|
|
if (idx != nullptr) {
|
|
LOG(TRACE) << "skiplist-index already created";
|
|
|
|
return idx;
|
|
}
|
|
|
|
if (iid == 0) {
|
|
iid = arangodb::Index::generateId();
|
|
}
|
|
|
|
// Create the skiplist index
|
|
auto skiplistIndex = std::make_unique<arangodb::SkiplistIndex>(
|
|
iid, document, fields, unique, sparse);
|
|
idx = static_cast<arangodb::Index*>(skiplistIndex.get());
|
|
|
|
// initializes the index with all existing documents
|
|
res = document->fillIndex(trx, idx);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
TRI_set_errno(res);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
// store index and return
|
|
try {
|
|
document->addIndex(idx);
|
|
skiplistIndex.release();
|
|
} catch (...) {
|
|
TRI_set_errno(res);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
created = true;
|
|
|
|
return idx;
|
|
}
|
|
|
|
/// @brief restores an index
|
|
static int SkiplistIndexFromVelocyPack(arangodb::Transaction* trx,
|
|
TRI_collection_t* document,
|
|
VPackSlice const& definition,
|
|
TRI_idx_iid_t iid,
|
|
arangodb::Index** dst) {
|
|
return PathBasedIndexFromVelocyPack(trx, document, definition, iid,
|
|
CreateSkiplistIndexDocumentCollection,
|
|
dst);
|
|
}
|
|
|
|
/// @brief finds a skiplist index (unique or non-unique)
|
|
arangodb::Index* TRI_collection_t::lookupSkiplistIndex(
|
|
std::vector<std::string> const& attributes, int sparsity, bool unique) {
|
|
std::vector<std::vector<arangodb::basics::AttributeName>> fields;
|
|
|
|
int res = NamesByAttributeNames(attributes, fields, false);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return nullptr;
|
|
}
|
|
|
|
return LookupPathIndexDocumentCollection(
|
|
this, fields, arangodb::Index::TRI_IDX_TYPE_SKIPLIST_INDEX, sparsity,
|
|
unique, true);
|
|
}
|
|
|
|
/// @brief ensures that a skiplist index exists
|
|
arangodb::Index* TRI_collection_t::ensureSkiplistIndex(
|
|
arangodb::Transaction* trx, TRI_idx_iid_t iid,
|
|
std::vector<std::string> const& attributes, bool sparse,
|
|
bool unique, bool& created) {
|
|
|
|
auto idx = CreateSkiplistIndexDocumentCollection(
|
|
trx, this, attributes, iid, sparse, unique, created);
|
|
|
|
if (idx != nullptr) {
|
|
if (created) {
|
|
arangodb::aql::QueryCache::instance()->invalidate(
|
|
_vocbase, _info.name());
|
|
int res = saveIndex(idx, true);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
idx = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
return idx;
|
|
}
|
|
|
|
static arangodb::Index* LookupFulltextIndexDocumentCollection(
|
|
TRI_collection_t* document, std::string const& attribute,
|
|
int minWordLength) {
|
|
for (auto const& idx : document->allIndexes()) {
|
|
if (idx->type() == arangodb::Index::TRI_IDX_TYPE_FULLTEXT_INDEX) {
|
|
auto fulltextIndex = static_cast<arangodb::FulltextIndex*>(idx);
|
|
|
|
if (fulltextIndex->isSame(attribute, minWordLength)) {
|
|
return idx;
|
|
}
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
/// @brief adds a rocksdb index to the collection
|
|
static arangodb::Index* CreateRocksDBIndexDocumentCollection(
|
|
arangodb::Transaction* trx, TRI_collection_t* document,
|
|
std::vector<std::string> const& attributes, TRI_idx_iid_t iid, bool sparse,
|
|
bool unique, bool& created) {
|
|
|
|
#ifdef ARANGODB_ENABLE_ROCKSDB
|
|
created = false;
|
|
std::vector<std::vector<arangodb::basics::AttributeName>> fields;
|
|
|
|
int res = NamesByAttributeNames(attributes, fields, false);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return nullptr;
|
|
}
|
|
|
|
// ...........................................................................
|
|
// Attempt to find an existing index which matches the attributes above.
|
|
// If a suitable index is found, return that one otherwise we need to create
|
|
// a new one.
|
|
// ...........................................................................
|
|
|
|
int sparsity = sparse ? 1 : 0;
|
|
auto idx = LookupPathIndexDocumentCollection(
|
|
document, fields, arangodb::Index::TRI_IDX_TYPE_ROCKSDB_INDEX, sparsity,
|
|
unique, false);
|
|
|
|
if (idx != nullptr) {
|
|
LOG(TRACE) << "rocksdb-index already created";
|
|
|
|
return idx;
|
|
}
|
|
|
|
if (iid == 0) {
|
|
iid = arangodb::Index::generateId();
|
|
}
|
|
|
|
// Create the index
|
|
auto rocksDBIndex = std::make_unique<arangodb::RocksDBIndex>(
|
|
iid, document, fields, unique, sparse);
|
|
idx = static_cast<arangodb::Index*>(rocksDBIndex.get());
|
|
|
|
// initializes the index with all existing documents
|
|
res = document->fillIndex(trx, idx, false);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
TRI_set_errno(res);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
auto rocksTransaction = trx->rocksTransaction();
|
|
TRI_ASSERT(rocksTransaction != nullptr);
|
|
auto status = rocksTransaction->Commit();
|
|
if (!status.ok()) {
|
|
// TODO:
|
|
}
|
|
auto t = trx->getInternals();
|
|
delete t->_rocksTransaction;
|
|
t->_rocksTransaction = nullptr;
|
|
|
|
// store index and return
|
|
try {
|
|
document->addIndex(idx);
|
|
rocksDBIndex.release();
|
|
} catch (...) {
|
|
TRI_set_errno(res);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
created = true;
|
|
|
|
return idx;
|
|
|
|
#else
|
|
TRI_set_errno(TRI_ERROR_NOT_IMPLEMENTED);
|
|
created = false;
|
|
return nullptr;
|
|
#endif
|
|
}
|
|
|
|
/// @brief restores an index
|
|
#ifdef ARANGODB_ENABLE_ROCKSDB
|
|
static int RocksDBIndexFromVelocyPack(arangodb::Transaction* trx,
|
|
TRI_collection_t* document,
|
|
VPackSlice const& definition,
|
|
TRI_idx_iid_t iid,
|
|
arangodb::Index** dst) {
|
|
return PathBasedIndexFromVelocyPack(trx, document, definition, iid,
|
|
CreateRocksDBIndexDocumentCollection,
|
|
dst);
|
|
}
|
|
#endif
|
|
|
|
/// @brief finds a rocksdb index (unique or non-unique)
|
|
arangodb::Index* TRI_collection_t::lookupRocksDBIndex(
|
|
std::vector<std::string> const& attributes, int sparsity, bool unique) {
|
|
std::vector<std::vector<arangodb::basics::AttributeName>> fields;
|
|
|
|
int res = NamesByAttributeNames(attributes, fields, false);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return nullptr;
|
|
}
|
|
|
|
return LookupPathIndexDocumentCollection(
|
|
this, fields, arangodb::Index::TRI_IDX_TYPE_ROCKSDB_INDEX, sparsity,
|
|
unique, true);
|
|
}
|
|
|
|
/// @brief ensures that a RocksDB index exists
|
|
arangodb::Index* TRI_collection_t::ensureRocksDBIndex(
|
|
arangodb::Transaction* trx, TRI_idx_iid_t iid,
|
|
std::vector<std::string> const& attributes, bool sparse,
|
|
bool unique, bool& created) {
|
|
|
|
auto idx = CreateRocksDBIndexDocumentCollection(
|
|
trx, this, attributes, iid, sparse, unique, created);
|
|
|
|
if (idx != nullptr) {
|
|
if (created) {
|
|
arangodb::aql::QueryCache::instance()->invalidate(
|
|
_vocbase, _info.name());
|
|
int res = saveIndex(idx, true);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
idx = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
return idx;
|
|
}
|
|
|
|
/// @brief adds a fulltext index to the collection
|
|
static arangodb::Index* CreateFulltextIndexDocumentCollection(
|
|
arangodb::Transaction* trx, TRI_collection_t* document,
|
|
std::string const& attribute, int minWordLength, TRI_idx_iid_t iid,
|
|
bool& created) {
|
|
created = false;
|
|
|
|
// ...........................................................................
|
|
// Attempt to find an existing index with the same attribute
|
|
// If a suitable index is found, return that one otherwise we need to create
|
|
// a new one.
|
|
// ...........................................................................
|
|
|
|
auto idx =
|
|
LookupFulltextIndexDocumentCollection(document, attribute, minWordLength);
|
|
|
|
if (idx != nullptr) {
|
|
LOG(TRACE) << "fulltext-index already created";
|
|
|
|
return idx;
|
|
}
|
|
|
|
if (iid == 0) {
|
|
iid = arangodb::Index::generateId();
|
|
}
|
|
|
|
// Create the fulltext index
|
|
auto fulltextIndex = std::make_unique<arangodb::FulltextIndex>(
|
|
iid, document, attribute, minWordLength);
|
|
idx = static_cast<arangodb::Index*>(fulltextIndex.get());
|
|
|
|
// initializes the index with all existing documents
|
|
int res = document->fillIndex(trx, idx);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
TRI_set_errno(res);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
// store index and return
|
|
try {
|
|
document->addIndex(idx);
|
|
fulltextIndex.release();
|
|
} catch (...) {
|
|
TRI_set_errno(res);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
created = true;
|
|
|
|
return idx;
|
|
}
|
|
|
|
/// @brief restores an index
|
|
static int FulltextIndexFromVelocyPack(arangodb::Transaction* trx,
|
|
TRI_collection_t* document,
|
|
VPackSlice const& definition,
|
|
TRI_idx_iid_t iid,
|
|
arangodb::Index** dst) {
|
|
if (dst != nullptr) {
|
|
*dst = nullptr;
|
|
}
|
|
|
|
// extract fields
|
|
VPackSlice fld;
|
|
try {
|
|
fld = ExtractFields(definition, iid);
|
|
} catch (arangodb::basics::Exception const& e) {
|
|
return TRI_set_errno(e.code());
|
|
}
|
|
VPackValueLength fieldCount = fld.length();
|
|
|
|
// extract the list of fields
|
|
if (fieldCount != 1) {
|
|
LOG(ERR) << "ignoring index " << iid
|
|
<< ", has an invalid number of attributes";
|
|
|
|
return TRI_set_errno(TRI_ERROR_BAD_PARAMETER);
|
|
}
|
|
|
|
VPackSlice value = fld.at(0);
|
|
|
|
if (!value.isString()) {
|
|
return TRI_set_errno(TRI_ERROR_BAD_PARAMETER);
|
|
}
|
|
|
|
std::string const attribute = value.copyString();
|
|
|
|
int minWordLengthValue =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<int>(
|
|
definition, "minLength", TRI_FULLTEXT_MIN_WORD_LENGTH_DEFAULT);
|
|
|
|
// create the index
|
|
auto idx = LookupFulltextIndexDocumentCollection(document, attribute,
|
|
minWordLengthValue);
|
|
|
|
if (idx == nullptr) {
|
|
bool created;
|
|
idx = CreateFulltextIndexDocumentCollection(
|
|
trx, document, attribute, minWordLengthValue, iid, created);
|
|
}
|
|
|
|
if (dst != nullptr) {
|
|
*dst = idx;
|
|
}
|
|
|
|
if (idx == nullptr) {
|
|
LOG(ERR) << "cannot create fulltext index " << iid;
|
|
return TRI_errno();
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief finds a fulltext index (unique or non-unique)
|
|
arangodb::Index* TRI_collection_t::lookupFulltextIndex(std::string const& attribute,
|
|
int minWordLength) {
|
|
return LookupFulltextIndexDocumentCollection(this, attribute,
|
|
minWordLength);
|
|
}
|
|
|
|
/// @brief ensures that a fulltext index exists
|
|
arangodb::Index* TRI_collection_t::ensureFulltextIndex(
|
|
arangodb::Transaction* trx, TRI_idx_iid_t iid, std::string const& attribute, int minWordLength,
|
|
bool& created) {
|
|
auto idx = CreateFulltextIndexDocumentCollection(trx, this, attribute,
|
|
minWordLength, iid, created);
|
|
|
|
if (idx != nullptr) {
|
|
if (created) {
|
|
arangodb::aql::QueryCache::instance()->invalidate(
|
|
_vocbase, _info.name());
|
|
int res = saveIndex(idx, true);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
idx = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
return idx;
|
|
}
|
|
|
|
/// @brief create an index, based on a VelocyPack description
|
|
int TRI_collection_t::indexFromVelocyPack(arangodb::Transaction* trx,
|
|
VPackSlice const& slice, arangodb::Index** idx) {
|
|
|
|
if (idx != nullptr) {
|
|
*idx = nullptr;
|
|
}
|
|
|
|
if (!slice.isObject()) {
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
// extract the type
|
|
VPackSlice type = slice.get("type");
|
|
|
|
if (!type.isString()) {
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
std::string typeStr = type.copyString();
|
|
|
|
// extract the index identifier
|
|
VPackSlice iis = slice.get("id");
|
|
|
|
TRI_idx_iid_t iid;
|
|
if (iis.isNumber()) {
|
|
iid = iis.getNumericValue<TRI_idx_iid_t>();
|
|
} else if (iis.isString()) {
|
|
std::string tmp = iis.copyString();
|
|
iid = static_cast<TRI_idx_iid_t>(StringUtils::uint64(tmp));
|
|
} else {
|
|
LOG(ERR) << "ignoring index, index identifier could not be located";
|
|
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
TRI_UpdateTickServer(iid);
|
|
|
|
if (typeStr == "geo1" || typeStr == "geo2") {
|
|
return GeoIndexFromVelocyPack(trx, this, slice, iid, idx);
|
|
}
|
|
|
|
if (typeStr == "hash") {
|
|
return HashIndexFromVelocyPack(trx, this, slice, iid, idx);
|
|
}
|
|
|
|
if (typeStr == "skiplist") {
|
|
return SkiplistIndexFromVelocyPack(trx, this, slice, iid, idx);
|
|
}
|
|
|
|
// ...........................................................................
|
|
// ROCKSDB INDEX
|
|
// ...........................................................................
|
|
if (typeStr == "persistent" || typeStr == "rocksdb") {
|
|
#ifdef ARANGODB_ENABLE_ROCKSDB
|
|
return RocksDBIndexFromVelocyPack(trx, this, slice, iid, idx);
|
|
#else
|
|
LOG(ERR) << "index type not supported in this build";
|
|
return TRI_ERROR_NOT_IMPLEMENTED;
|
|
#endif
|
|
}
|
|
|
|
if (typeStr == "fulltext") {
|
|
return FulltextIndexFromVelocyPack(trx, this, slice, iid, idx);
|
|
}
|
|
|
|
if (typeStr == "edge") {
|
|
// we should never get here, as users cannot create their own edge indexes
|
|
LOG(ERR) << "logic error. there should never be a JSON file describing an "
|
|
"edges index";
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
// default:
|
|
LOG(WARN) << "index type '" << typeStr
|
|
<< "' is not supported in this version of ArangoDB and is ignored";
|
|
|
|
return TRI_ERROR_NOT_IMPLEMENTED;
|
|
}
|
|
|
|
/// @brief state during opening of a collection
|
|
struct open_iterator_state_t {
|
|
TRI_collection_t* _document;
|
|
TRI_voc_tid_t _tid;
|
|
TRI_voc_fid_t _fid;
|
|
std::unordered_map<TRI_voc_fid_t, DatafileStatisticsContainer*> _stats;
|
|
DatafileStatisticsContainer* _dfi;
|
|
TRI_vocbase_t* _vocbase;
|
|
arangodb::Transaction* _trx;
|
|
uint64_t _deletions;
|
|
uint64_t _documents;
|
|
int64_t _initialCount;
|
|
|
|
open_iterator_state_t(TRI_collection_t* document,
|
|
TRI_vocbase_t* vocbase)
|
|
: _document(document),
|
|
_tid(0),
|
|
_fid(0),
|
|
_stats(),
|
|
_dfi(nullptr),
|
|
_vocbase(vocbase),
|
|
_trx(nullptr),
|
|
_deletions(0),
|
|
_documents(0),
|
|
_initialCount(-1) {}
|
|
|
|
~open_iterator_state_t() {
|
|
for (auto& it : _stats) {
|
|
delete it.second;
|
|
}
|
|
}
|
|
};
|
|
|
|
/// @brief find a statistics container for a given file id
|
|
static DatafileStatisticsContainer* FindDatafileStats(
|
|
open_iterator_state_t* state, TRI_voc_fid_t fid) {
|
|
auto it = state->_stats.find(fid);
|
|
|
|
if (it != state->_stats.end()) {
|
|
return (*it).second;
|
|
}
|
|
|
|
auto stats = std::make_unique<DatafileStatisticsContainer>();
|
|
|
|
state->_stats.emplace(fid, stats.get());
|
|
auto p = stats.release();
|
|
|
|
return p;
|
|
}
|
|
|
|
/// @brief process a document (or edge) marker when opening a collection
|
|
static int OpenIteratorHandleDocumentMarker(TRI_df_marker_t const* marker,
|
|
TRI_datafile_t* datafile,
|
|
open_iterator_state_t* state) {
|
|
auto const fid = datafile->_fid;
|
|
TRI_collection_t* document = state->_document;
|
|
arangodb::Transaction* trx = state->_trx;
|
|
|
|
VPackSlice const slice(reinterpret_cast<char const*>(marker) + DatafileHelper::VPackOffset(TRI_DF_MARKER_VPACK_DOCUMENT));
|
|
VPackSlice keySlice;
|
|
TRI_voc_rid_t revisionId;
|
|
|
|
Transaction::extractKeyAndRevFromDocument(slice, keySlice, revisionId);
|
|
|
|
document->setLastRevision(revisionId, false);
|
|
VPackValueLength length;
|
|
char const* p = keySlice.getString(length);
|
|
document->_keyGenerator->track(p, length);
|
|
|
|
++state->_documents;
|
|
|
|
if (state->_fid != fid) {
|
|
// update the state
|
|
state->_fid = fid; // when we're here, we're looking at a datafile
|
|
state->_dfi = FindDatafileStats(state, fid);
|
|
}
|
|
|
|
auto primaryIndex = document->primaryIndex();
|
|
|
|
// no primary index lock required here because we are the only ones reading
|
|
// from the index ATM
|
|
auto found = primaryIndex->lookupKey(trx, keySlice);
|
|
|
|
// it is a new entry
|
|
if (found == nullptr) {
|
|
TRI_doc_mptr_t* header = document->_masterPointers.request();
|
|
|
|
if (header == nullptr) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
header->setFid(fid, false);
|
|
header->setHash(primaryIndex->calculateHash(trx, keySlice));
|
|
header->setVPackFromMarker(marker);
|
|
|
|
// insert into primary index
|
|
void const* result = nullptr;
|
|
int res = primaryIndex->insertKey(trx, header, &result);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
document->_masterPointers.release(header);
|
|
LOG(ERR) << "inserting document into primary index failed with error: " << TRI_errno_string(res);
|
|
|
|
return res;
|
|
}
|
|
|
|
++document->_numberDocuments;
|
|
|
|
// update the datafile info
|
|
state->_dfi->numberAlive++;
|
|
state->_dfi->sizeAlive += DatafileHelper::AlignedMarkerSize<int64_t>(marker);
|
|
}
|
|
|
|
// it is an update, but only if found has a smaller revision identifier
|
|
else {
|
|
// save the old data
|
|
TRI_doc_mptr_t oldData = *found;
|
|
|
|
// update the header info
|
|
found->setFid(fid, false); // when we're here, we're looking at a datafile
|
|
found->setVPackFromMarker(marker);
|
|
|
|
// update the datafile info
|
|
DatafileStatisticsContainer* dfi;
|
|
if (oldData.getFid() == state->_fid) {
|
|
dfi = state->_dfi;
|
|
} else {
|
|
dfi = FindDatafileStats(state, oldData.getFid());
|
|
}
|
|
|
|
if (oldData.vpack() != nullptr) {
|
|
int64_t size = static_cast<int64_t>(oldData.markerSize());
|
|
|
|
dfi->numberAlive--;
|
|
dfi->sizeAlive -= DatafileHelper::AlignedSize<int64_t>(size);
|
|
dfi->numberDead++;
|
|
dfi->sizeDead += DatafileHelper::AlignedSize<int64_t>(size);
|
|
}
|
|
|
|
state->_dfi->numberAlive++;
|
|
state->_dfi->sizeAlive += DatafileHelper::AlignedMarkerSize<int64_t>(marker);
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief process a deletion marker when opening a collection
|
|
static int OpenIteratorHandleDeletionMarker(TRI_df_marker_t const* marker,
|
|
TRI_datafile_t* datafile,
|
|
open_iterator_state_t* state) {
|
|
TRI_collection_t* document = state->_document;
|
|
arangodb::Transaction* trx = state->_trx;
|
|
|
|
VPackSlice const slice(reinterpret_cast<char const*>(marker) + DatafileHelper::VPackOffset(TRI_DF_MARKER_VPACK_REMOVE));
|
|
|
|
VPackSlice keySlice;
|
|
TRI_voc_rid_t revisionId;
|
|
|
|
Transaction::extractKeyAndRevFromDocument(slice, keySlice, revisionId);
|
|
|
|
document->setLastRevision(revisionId, false);
|
|
VPackValueLength length;
|
|
char const* p = keySlice.getString(length);
|
|
document->_keyGenerator->track(p, length);
|
|
|
|
++state->_deletions;
|
|
|
|
if (state->_fid != datafile->_fid) {
|
|
// update the state
|
|
state->_fid = datafile->_fid;
|
|
state->_dfi = FindDatafileStats(state, datafile->_fid);
|
|
}
|
|
|
|
// no primary index lock required here because we are the only ones reading
|
|
// from the index ATM
|
|
auto primaryIndex = document->primaryIndex();
|
|
TRI_doc_mptr_t* found = primaryIndex->lookupKey(trx, keySlice);
|
|
|
|
// it is a new entry, so we missed the create
|
|
if (found == nullptr) {
|
|
// update the datafile info
|
|
state->_dfi->numberDeletions++;
|
|
}
|
|
|
|
// it is a real delete
|
|
else {
|
|
// update the datafile info
|
|
DatafileStatisticsContainer* dfi;
|
|
|
|
if (found->getFid() == state->_fid) {
|
|
dfi = state->_dfi;
|
|
} else {
|
|
dfi = FindDatafileStats(state, found->getFid());
|
|
}
|
|
|
|
TRI_ASSERT(found->vpack() != nullptr);
|
|
|
|
int64_t size = DatafileHelper::AlignedSize<int64_t>(found->markerSize());
|
|
|
|
dfi->numberAlive--;
|
|
dfi->sizeAlive -= DatafileHelper::AlignedSize<int64_t>(size);
|
|
dfi->numberDead++;
|
|
dfi->sizeDead += DatafileHelper::AlignedSize<int64_t>(size);
|
|
state->_dfi->numberDeletions++;
|
|
|
|
document->deletePrimaryIndex(trx, found);
|
|
--document->_numberDocuments;
|
|
|
|
// free the header
|
|
document->_masterPointers.release(found);
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief iterator for open
|
|
static bool OpenIterator(TRI_df_marker_t const* marker, void* data,
|
|
TRI_datafile_t* datafile) {
|
|
TRI_collection_t* document =
|
|
static_cast<open_iterator_state_t*>(data)->_document;
|
|
TRI_voc_tick_t const tick = marker->getTick();
|
|
TRI_df_marker_type_t const type = marker->getType();
|
|
|
|
int res;
|
|
|
|
if (type == TRI_DF_MARKER_VPACK_DOCUMENT) {
|
|
res = OpenIteratorHandleDocumentMarker(marker, datafile,
|
|
static_cast<open_iterator_state_t*>(data));
|
|
|
|
if (datafile->_dataMin == 0) {
|
|
datafile->_dataMin = tick;
|
|
}
|
|
|
|
if (tick > datafile->_dataMax) {
|
|
datafile->_dataMax = tick;
|
|
}
|
|
} else if (type == TRI_DF_MARKER_VPACK_REMOVE) {
|
|
res = OpenIteratorHandleDeletionMarker(marker, datafile,
|
|
static_cast<open_iterator_state_t*>(data));
|
|
} else {
|
|
if (type == TRI_DF_MARKER_HEADER) {
|
|
// ensure there is a datafile info entry for each datafile of the
|
|
// collection
|
|
FindDatafileStats(static_cast<open_iterator_state_t*>(data), datafile->_fid);
|
|
}
|
|
|
|
LOG(TRACE) << "skipping marker type " << TRI_NameMarkerDatafile(marker);
|
|
res = TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
if (datafile->_tickMin == 0) {
|
|
datafile->_tickMin = tick;
|
|
}
|
|
|
|
if (tick > datafile->_tickMax) {
|
|
datafile->_tickMax = tick;
|
|
}
|
|
|
|
if (tick > document->_tickMax) {
|
|
if (type != TRI_DF_MARKER_HEADER &&
|
|
type != TRI_DF_MARKER_FOOTER &&
|
|
type != TRI_DF_MARKER_COL_HEADER &&
|
|
type != TRI_DF_MARKER_PROLOGUE) {
|
|
document->_tickMax = tick;
|
|
}
|
|
}
|
|
|
|
return (res == TRI_ERROR_NO_ERROR);
|
|
}
|
|
|
|
/// @brief creates the initial indexes for the collection
|
|
int TRI_collection_t::createInitialIndexes() {
|
|
// create primary index
|
|
std::unique_ptr<arangodb::Index> primaryIndex(
|
|
new arangodb::PrimaryIndex(this));
|
|
|
|
try {
|
|
addIndex(primaryIndex.get());
|
|
primaryIndex.release();
|
|
} catch (...) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
// create edges index
|
|
if (_info.type() == TRI_COL_TYPE_EDGE) {
|
|
TRI_idx_iid_t iid = _info.id();
|
|
if (_info.planId() > 0) {
|
|
iid = _info.planId();
|
|
}
|
|
|
|
try {
|
|
std::unique_ptr<arangodb::Index> edgeIndex(
|
|
new arangodb::EdgeIndex(iid, this));
|
|
|
|
addIndex(edgeIndex.get());
|
|
edgeIndex.release();
|
|
} catch (...) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief iterate all markers of the collection
|
|
static int IterateMarkersCollection(arangodb::Transaction* trx,
|
|
TRI_collection_t* collection) {
|
|
auto document = reinterpret_cast<TRI_collection_t*>(collection);
|
|
|
|
// initialize state for iteration
|
|
open_iterator_state_t openState(document, collection->_vocbase);
|
|
|
|
if (collection->_info.initialCount() != -1) {
|
|
auto primaryIndex = document->primaryIndex();
|
|
|
|
int res = primaryIndex->resize(
|
|
trx, static_cast<size_t>(collection->_info.initialCount() * 1.1));
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
return res;
|
|
}
|
|
|
|
openState._initialCount = collection->_info.initialCount();
|
|
}
|
|
|
|
// read all documents and fill primary index
|
|
auto cb = [&openState](TRI_df_marker_t const* marker, TRI_datafile_t* datafile) -> bool {
|
|
return OpenIterator(marker, &openState, datafile);
|
|
};
|
|
|
|
collection->iterateDatafiles(cb);
|
|
|
|
LOG(TRACE) << "found " << openState._documents << " document markers, " << openState._deletions << " deletion markers for collection '" << collection->_info.name() << "'";
|
|
|
|
// update the real statistics for the collection
|
|
try {
|
|
for (auto& it : openState._stats) {
|
|
document->_datafileStatistics.create(it.first, *(it.second));
|
|
}
|
|
} catch (basics::Exception const& ex) {
|
|
return ex.code();
|
|
} catch (...) {
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
/// @brief creates a new collection
|
|
TRI_collection_t* TRI_collection_t::create(
|
|
TRI_vocbase_t* vocbase, VocbaseCollectionInfo& parameters,
|
|
TRI_voc_cid_t cid) {
|
|
if (cid > 0) {
|
|
TRI_UpdateTickServer(cid);
|
|
} else {
|
|
cid = TRI_NewTickServer();
|
|
}
|
|
|
|
parameters.setCollectionId(cid);
|
|
|
|
auto collection = std::make_unique<TRI_collection_t>(vocbase, parameters);
|
|
|
|
// create document collection
|
|
int res = collection->createInitialIndexes();
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
LOG(ERR) << "cannot initialize collection";
|
|
return nullptr;
|
|
}
|
|
|
|
StorageEngine* engine = EngineSelectorFeature::ENGINE;
|
|
std::string const path = engine->createCollection(vocbase, cid, parameters);
|
|
collection->setPath(path);
|
|
|
|
return collection.release();
|
|
}
|
|
|
|
/// @brief opens an existing collection
|
|
TRI_collection_t* TRI_collection_t::open(TRI_vocbase_t* vocbase,
|
|
TRI_vocbase_col_t* col,
|
|
bool ignoreErrors) {
|
|
|
|
std::string const path = col->path();
|
|
|
|
if (!TRI_IsDirectory(path.c_str())) {
|
|
LOG(ERR) << "cannot open '" << path << "', not a directory or not found";
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_DATADIR_INVALID);
|
|
}
|
|
|
|
// read parameters, no need to lock as we are opening the collection
|
|
VocbaseCollectionInfo parameters =
|
|
VocbaseCollectionInfo::fromFile(path, vocbase, "", true); // name will be set later on
|
|
|
|
// first open the document collection
|
|
auto collection = std::make_unique<TRI_collection_t>(vocbase, parameters);
|
|
|
|
double start = TRI_microtime();
|
|
LOG_TOPIC(TRACE, Logger::PERFORMANCE)
|
|
<< "open-document-collection { collection: " << vocbase->name() << "/"
|
|
<< col->name() << " }";
|
|
|
|
collection->_path = col->path();
|
|
int res = collection->open(ignoreErrors);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
LOG(ERR) << "cannot open document collection from path '" << col->path() << "'";
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
// create document collection
|
|
res = collection->createInitialIndexes();
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
LOG(ERR) << "cannot initialize document collection";
|
|
return nullptr;
|
|
}
|
|
|
|
arangodb::SingleCollectionTransaction trx(
|
|
arangodb::StandaloneTransactionContext::Create(vocbase),
|
|
collection->_info.id(), TRI_TRANSACTION_WRITE);
|
|
|
|
// build the primary index
|
|
res = TRI_ERROR_INTERNAL;
|
|
|
|
try {
|
|
double start = TRI_microtime();
|
|
|
|
LOG_TOPIC(TRACE, Logger::PERFORMANCE)
|
|
<< "iterate-markers { collection: " << vocbase->name() << "/"
|
|
<< collection->_info.name() << " }";
|
|
|
|
// iterate over all markers of the collection
|
|
res = IterateMarkersCollection(&trx, collection.get());
|
|
|
|
LOG_TOPIC(TRACE, Logger::PERFORMANCE) << "[timer] " << Logger::FIXED(TRI_microtime() - start) << " s, iterate-markers { collection: " << vocbase->name() << "/" << collection->_info.name() << " }";
|
|
} catch (arangodb::basics::Exception const& ex) {
|
|
res = ex.code();
|
|
} catch (std::bad_alloc const&) {
|
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
|
} catch (...) {
|
|
res = TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
LOG(ERR) << "cannot iterate data of document collection";
|
|
TRI_set_errno(res);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
// build the indexes meta-data, but do not fill the indexes yet
|
|
{
|
|
auto old = collection->useSecondaryIndexes();
|
|
|
|
// turn filling of secondary indexes off. we're now only interested in getting
|
|
// the indexes' definition. we'll fill them below ourselves.
|
|
collection->useSecondaryIndexes(false);
|
|
|
|
try {
|
|
collection->detectIndexes(&trx);
|
|
collection->useSecondaryIndexes(old);
|
|
} catch (...) {
|
|
collection->useSecondaryIndexes(old);
|
|
LOG(ERR) << "cannot initialize collection indexes";
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
|
|
if (!arangodb::wal::LogfileManager::instance()->isInRecovery()) {
|
|
// build the index structures, and fill the indexes
|
|
collection->fillIndexes(&trx, col);
|
|
}
|
|
|
|
LOG_TOPIC(TRACE, Logger::PERFORMANCE)
|
|
<< "[timer] " << Logger::FIXED(TRI_microtime() - start)
|
|
<< " s, open-document-collection { collection: " << vocbase->name() << "/"
|
|
<< collection->_info.name() << " }";
|
|
|
|
return collection.release();
|
|
}
|
|
|
|
/// @brief closes an open collection
|
|
int TRI_collection_t::unload(bool updateStats) {
|
|
auto primaryIndex = this->primaryIndex();
|
|
auto idxSize = primaryIndex->size();
|
|
|
|
if (!_info.deleted() &&
|
|
_info.initialCount() != static_cast<int64_t>(idxSize)) {
|
|
_info.updateCount(idxSize);
|
|
|
|
bool doSync = application_features::ApplicationServer::getFeature<DatabaseFeature>("Database")->forceSyncProperties();
|
|
// Ignore the error?
|
|
_info.saveToFile(path(), doSync);
|
|
}
|
|
|
|
// closes all open compactors, journals, datafiles
|
|
close();
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|