From 11850995019571f4d1cafb080623aa137669404a Mon Sep 17 00:00:00 2001 From: Frank Celler Date: Fri, 24 Mar 2017 14:53:18 +0100 Subject: [PATCH 1/2] raise limit fd as much as possible, warn if too small --- CHANGELOG | 2 + arangod/RestServer/FileDescriptorsFeature.cpp | 126 ++++++++++-------- arangod/RestServer/FileDescriptorsFeature.h | 3 + lib/Logger/LogTopic.cpp | 2 +- scripts/unittest | 2 +- 5 files changed, 75 insertions(+), 60 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c0f6c34930..9b8dfb4621 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -163,6 +163,8 @@ v3.1.16 (2017-XX-XX) * fixed issue #2392 +* try to raise file descriptors to at least 8192, warn otherwise + * ui - aql editor improvements + updated ace editor version (memory leak) * fixed lost HTTP requests diff --git a/arangod/RestServer/FileDescriptorsFeature.cpp b/arangod/RestServer/FileDescriptorsFeature.cpp index dd1f2fe8b4..2e4e23c2cc 100644 --- a/arangod/RestServer/FileDescriptorsFeature.cpp +++ b/arangod/RestServer/FileDescriptorsFeature.cpp @@ -33,9 +33,11 @@ using namespace arangodb::application_features; using namespace arangodb::basics; using namespace arangodb::options; +uint64_t const FileDescriptorsFeature::RECOMMENDED = 8192; + FileDescriptorsFeature::FileDescriptorsFeature( application_features::ApplicationServer* server) - : ApplicationFeature(server, "FileDescriptors"), _descriptorsMinimum(1024) { + : ApplicationFeature(server, "FileDescriptors"), _descriptorsMinimum(0) { setOptional(false); requiresElevatedPrivileges(false); startsAfter("Logger"); @@ -76,76 +78,84 @@ void FileDescriptorsFeature::start() { << StringifyLimitValue(rlim.rlim_max) << ", soft limit is " << StringifyLimitValue(rlim.rlim_cur); } + + if (rlim.rlim_cur < RECOMMENDED) { + LOG_TOPIC(WARN, arangodb::Logger::SYSCALL) + << "file-descriptors limit is too low, currently " + << StringifyLimitValue(rlim.rlim_cur) << ", raise to at least " + << RECOMMENDED; + } #endif } void FileDescriptorsFeature::adjustFileDescriptors() { #ifdef TRI_HAVE_GETRLIMIT - if (0 < _descriptorsMinimum) { - struct rlimit rlim; - int res = getrlimit(RLIMIT_NOFILE, &rlim); + struct rlimit rlim; + int res = getrlimit(RLIMIT_NOFILE, &rlim); - if (res != 0) { - LOG_TOPIC(FATAL, arangodb::Logger::FIXME) - << "cannot get the file descriptor limit: " << strerror(errno); + if (res != 0) { + LOG_TOPIC(FATAL, arangodb::Logger::SYSCALL) + << "cannot get the file descriptor limit: " << strerror(errno); + FATAL_ERROR_EXIT(); + } + + LOG_TOPIC(DEBUG, arangodb::Logger::SYSCALL) + << "file-descriptors (nofiles) hard limit is " + << StringifyLimitValue(rlim.rlim_max) << ", soft limit is " + << StringifyLimitValue(rlim.rlim_cur); + + uint64_t recommended = RECOMMENDED; + uint64_t minimum = _descriptorsMinimum; + + if (recommended < minimum) { + recommended = minimum; + } + + if (rlim.rlim_max < recommended) { + LOG_TOPIC(DEBUG, arangodb::Logger::SYSCALL) + << "hard limit " << rlim.rlim_max << " is too small, trying to raise"; + + rlim.rlim_max = recommended; + rlim.rlim_cur = recommended; + + res = setrlimit(RLIMIT_NOFILE, &rlim); + + if (0 < minimum && minimum < recommended && res < 0) { + rlim.rlim_max = minimum; + rlim.rlim_cur = minimum; + + res = setrlimit(RLIMIT_NOFILE, &rlim); + } + + if (0 < minimum && res < 0) { + LOG_TOPIC(FATAL, arangodb::Logger::SYSCALL) + << "cannot raise the file descriptor limit to " << minimum << ": " + << strerror(errno); FATAL_ERROR_EXIT(); } + } else if (rlim.rlim_cur < recommended) { + LOG_TOPIC(DEBUG, arangodb::Logger::SYSCALL) + << "soft limit " << rlim.rlim_cur << " is too small, trying to raise"; - LOG_TOPIC(DEBUG, arangodb::Logger::FIXME) - << "file-descriptors (nofiles) hard limit is " - << StringifyLimitValue(rlim.rlim_max) << ", soft limit is " - << StringifyLimitValue(rlim.rlim_cur); - - bool changed = false; - - if (rlim.rlim_max < _descriptorsMinimum) { - LOG_TOPIC(DEBUG, arangodb::Logger::FIXME) - << "hard limit " << rlim.rlim_max << " is too small, trying to raise"; - - rlim.rlim_max = _descriptorsMinimum; - rlim.rlim_cur = _descriptorsMinimum; - - res = setrlimit(RLIMIT_NOFILE, &rlim); - - if (res < 0) { - LOG_TOPIC(FATAL, arangodb::Logger::FIXME) - << "cannot raise the file descriptor limit to " - << _descriptorsMinimum << ": " << strerror(errno); - FATAL_ERROR_EXIT(); - } - - changed = true; - } else if (rlim.rlim_cur < _descriptorsMinimum) { - LOG_TOPIC(DEBUG, arangodb::Logger::FIXME) - << "soft limit " << rlim.rlim_cur << " is too small, trying to raise"; - - rlim.rlim_cur = _descriptorsMinimum; - - res = setrlimit(RLIMIT_NOFILE, &rlim); - - if (res < 0) { - LOG_TOPIC(FATAL, arangodb::Logger::FIXME) - << "cannot raise the file descriptor limit to " - << _descriptorsMinimum << ": " << strerror(errno); - FATAL_ERROR_EXIT(); - } - - changed = true; + if (recommended < rlim.rlim_max) { + recommended = rlim.rlim_max; } - if (changed) { - res = getrlimit(RLIMIT_NOFILE, &rlim); + rlim.rlim_cur = recommended; - if (res != 0) { - LOG_TOPIC(FATAL, arangodb::Logger::SYSCALL) - << "cannot get the file descriptor limit: " << strerror(errno); - FATAL_ERROR_EXIT(); - } + res = setrlimit(RLIMIT_NOFILE, &rlim); - LOG_TOPIC(INFO, arangodb::Logger::SYSCALL) - << "file-descriptors (nofiles) new hard limit is " - << StringifyLimitValue(rlim.rlim_max) << ", new soft limit is " - << StringifyLimitValue(rlim.rlim_cur); + if (0 < minimum && minimum < recommended && res < 0) { + rlim.rlim_cur = minimum; + + res = setrlimit(RLIMIT_NOFILE, &rlim); + } + + if (0 < minimum && res < 0) { + LOG_TOPIC(FATAL, arangodb::Logger::SYSCALL) + << "cannot raise the file descriptor limit to " << minimum << ": " + << strerror(errno); + FATAL_ERROR_EXIT(); } } #endif diff --git a/arangod/RestServer/FileDescriptorsFeature.h b/arangod/RestServer/FileDescriptorsFeature.h index 9f8e8dd3ee..a25864f21c 100644 --- a/arangod/RestServer/FileDescriptorsFeature.h +++ b/arangod/RestServer/FileDescriptorsFeature.h @@ -28,6 +28,9 @@ namespace arangodb { class FileDescriptorsFeature : public application_features::ApplicationFeature { +public: + static uint64_t const RECOMMENDED; + public: explicit FileDescriptorsFeature(application_features::ApplicationServer*); diff --git a/lib/Logger/LogTopic.cpp b/lib/Logger/LogTopic.cpp index 6acce3cc61..782d5ccb20 100644 --- a/lib/Logger/LogTopic.cpp +++ b/lib/Logger/LogTopic.cpp @@ -61,7 +61,7 @@ LogTopic Logger::REQUESTS("requests", LogLevel::FATAL); // suppress LogTopic Logger::SSL("ssl", LogLevel::WARN); LogTopic Logger::STARTUP("startup", LogLevel::INFO); LogTopic Logger::SUPERVISION("supervision", LogLevel::INFO); -LogTopic Logger::SYSCALL("syscall", LogLevel::WARN); +LogTopic Logger::SYSCALL("syscall", LogLevel::INFO); LogTopic Logger::THREADS("threads", LogLevel::WARN); LogTopic Logger::TRANSACTIONS("trx", LogLevel::WARN); LogTopic Logger::V8("v8", LogLevel::WARN); diff --git a/scripts/unittest b/scripts/unittest index 6d688dedd1..ece7f4bf55 100755 --- a/scripts/unittest +++ b/scripts/unittest @@ -10,7 +10,7 @@ else PS='/' fi; -ulimit -n 2048 +ulimit -n 8192 export PORT=`expr 1024 + $RANDOM` From 2c54a6ac1e8b952cf71c251c6471e3794608b728 Mon Sep 17 00:00:00 2001 From: jsteemann Date: Fri, 24 Mar 2017 17:23:32 +0100 Subject: [PATCH 2/2] added some basic transaction handling --- arangod/MMFiles/MMFilesCollection.cpp | 8 +- arangod/MMFiles/MMFilesCollection.h | 3 +- arangod/RocksDBEngine/RocksDBCollection.cpp | 150 ++++++++++++++++-- arangod/RocksDBEngine/RocksDBCollection.h | 10 +- arangod/RocksDBEngine/RocksDBEdgeIndex.cpp | 4 +- arangod/RocksDBEngine/RocksDBEngine.cpp | 19 ++- arangod/RocksDBEngine/RocksDBIndexFactory.cpp | 5 + arangod/RocksDBEngine/RocksDBPrimaryIndex.cpp | 22 ++- arangod/RocksDBEngine/RocksDBPrimaryIndex.h | 9 ++ .../RocksDBTransactionCollection.cpp | 40 +++-- .../RocksDBTransactionCollection.h | 3 +- .../RocksDBEngine/RocksDBTransactionState.cpp | 31 +++- arangod/StorageEngine/TransactionState.cpp | 5 + arangod/Transaction/Methods.cpp | 2 + arangod/VocBase/LogicalCollection.cpp | 7 - arangod/VocBase/LogicalCollection.h | 4 - arangod/VocBase/PhysicalCollection.h | 2 - 17 files changed, 256 insertions(+), 68 deletions(-) diff --git a/arangod/MMFiles/MMFilesCollection.cpp b/arangod/MMFiles/MMFilesCollection.cpp index a95b1b316a..96d0b14db6 100644 --- a/arangod/MMFiles/MMFilesCollection.cpp +++ b/arangod/MMFiles/MMFilesCollection.cpp @@ -23,7 +23,6 @@ #include "MMFilesCollection.h" #include "Aql/PlanCache.h" -#include "Aql/QueryCache.h" #include "ApplicationFeatures/ApplicationServer.h" #include "Basics/FileUtils.h" #include "Basics/PerformanceLogScope.h" @@ -1370,6 +1369,9 @@ uint64_t MMFilesCollection::numberDocuments() const { } void MMFilesCollection::sizeHint(transaction::Methods* trx, int64_t hint) { + if (hint <= 0) { + return; + } primaryIndex()->resize(trx, static_cast(hint * 1.1)); } @@ -1762,7 +1764,7 @@ int MMFilesCollection::iterateMarkersOnLoad(transaction::Methods* trx) { if (_initialCount != -1) { _revisionsCache.sizeHint(_initialCount); - _logicalCollection->sizeHint(trx, _initialCount); + sizeHint(trx, _initialCount); openState._initialCount = _initialCount; } @@ -1994,8 +1996,6 @@ std::shared_ptr MMFilesCollection::createIndex(transaction::Methods* trx, return idx; } - - /// @brief Persist an index information to file int MMFilesCollection::saveIndex(transaction::Methods* trx, std::shared_ptr idx) { TRI_ASSERT(!ServerState::instance()->isCoordinator()); diff --git a/arangod/MMFiles/MMFilesCollection.h b/arangod/MMFiles/MMFilesCollection.h index fea0a65207..3e7e67c1ec 100644 --- a/arangod/MMFiles/MMFilesCollection.h +++ b/arangod/MMFiles/MMFilesCollection.h @@ -195,8 +195,6 @@ class MMFilesCollection final : public PhysicalCollection { uint64_t numberDocuments() const override; - void sizeHint(transaction::Methods* trx, int64_t hint) override; - /// @brief report extra memory used by indexes etc. size_t memory() const override; @@ -379,6 +377,7 @@ class MMFilesCollection final : public PhysicalCollection { void removeRevision(TRI_voc_rid_t revisionId, bool updateStats); private: + void sizeHint(transaction::Methods* trx, int64_t hint); /// @brief creates the initial indexes for the collection void createInitialIndexes(); diff --git a/arangod/RocksDBEngine/RocksDBCollection.cpp b/arangod/RocksDBEngine/RocksDBCollection.cpp index 54b150b9db..0649358661 100644 --- a/arangod/RocksDBEngine/RocksDBCollection.cpp +++ b/arangod/RocksDBEngine/RocksDBCollection.cpp @@ -21,11 +21,14 @@ /// @author Jan-Christoph Uhde //////////////////////////////////////////////////////////////////////////////// +#include "RocksDBCollection.h" #include "Basics/Result.h" +#include "Aql/PlanCache.h" #include "Basics/VelocyPackHelper.h" #include "Indexes/Index.h" #include "Indexes/IndexIterator.h" -#include "RocksDBCollection.h" +#include "RestServer/DatabaseFeature.h" +#include "RocksDBEngine/RocksDBPrimaryIndex.h" #include "StorageEngine/EngineSelectorFeature.h" #include "StorageEngine/StorageEngine.h" #include "VocBase/LogicalCollection.h" @@ -104,21 +107,17 @@ void RocksDBCollection::getPropertiesVPackCoordinator(velocypack::Builder& resul /// @brief closes an open collection int RocksDBCollection::close() { THROW_ARANGO_NOT_YET_IMPLEMENTED(); - return 0; + return TRI_ERROR_NO_ERROR; } uint64_t RocksDBCollection::numberDocuments() const { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); + // TODO return 0; } -void RocksDBCollection::sizeHint(transaction::Methods* trx, int64_t hint) { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); -} - /// @brief report extra memory used by indexes etc. size_t RocksDBCollection::memory() const { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); + // TODO return 0; } @@ -182,17 +181,85 @@ void RocksDBCollection::prepareIndexes( /// @brief Find index by definition std::shared_ptr RocksDBCollection::lookupIndex( - velocypack::Slice const&) const { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); + velocypack::Slice const& info) const { + TRI_ASSERT(info.isObject()); + + // extract type + VPackSlice value = info.get("type"); + + if (!value.isString()) { + // Compatibility with old v8-vocindex. + THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); + } + + std::string tmp = value.copyString(); + arangodb::Index::IndexType const type = arangodb::Index::type(tmp.c_str()); + + for (auto const& idx : _indexes) { + if (idx->type() == type) { + // Only check relevant indices + if (idx->matchesDefinition(info)) { + // We found an index for this definition. + return idx; + } + } + } return nullptr; } std::shared_ptr RocksDBCollection::createIndex( transaction::Methods* trx, arangodb::velocypack::Slice const& info, bool& created) { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); - return nullptr; + // TODO Get LOCK for the vocbase + auto idx = lookupIndex(info); + if (idx != nullptr) { + created = false; + // We already have this index. + return idx; + } + + StorageEngine* engine = EngineSelectorFeature::ENGINE; + IndexFactory const* idxFactory = engine->indexFactory(); + TRI_ASSERT(idxFactory != nullptr); + + // We are sure that we do not have an index of this type. + // We also hold the lock. + // Create it + + idx = + idxFactory->prepareIndexFromSlice(info, true, _logicalCollection, false); + TRI_ASSERT(idx != nullptr); + if (ServerState::instance()->isCoordinator()) { + // In the coordinator case we do not fill the index + // We only inform the others. + addIndexCoordinator(idx); + created = true; + return idx; + } + + int res = saveIndex(trx, idx); + + if (res != TRI_ERROR_NO_ERROR) { + THROW_ARANGO_EXCEPTION(res); + } + + arangodb::aql::PlanCache::instance()->invalidate(_logicalCollection->vocbase()); + // Until here no harm is done if sth fails. The shared ptr will clean up. if + // left before + + addIndex(idx); + { + bool const doSync = + application_features::ApplicationServer::getFeature( + "Database") + ->forceSyncProperties(); + VPackBuilder builder = _logicalCollection->toVelocyPackIgnore({"path", "statusString"}, true); + _logicalCollection->updateProperties(builder.slice(), doSync); + } + created = true; + return idx; } + /// @brief Restores an index from VelocyPack. int RocksDBCollection::restoreIndex(transaction::Methods*, velocypack::Slice const&, @@ -200,21 +267,23 @@ int RocksDBCollection::restoreIndex(transaction::Methods*, THROW_ARANGO_NOT_YET_IMPLEMENTED(); return 0; } + /// @brief Drop an index with the given iid. bool RocksDBCollection::dropIndex(TRI_idx_iid_t iid) { THROW_ARANGO_NOT_YET_IMPLEMENTED(); return false; } + std::unique_ptr RocksDBCollection::getAllIterator( transaction::Methods* trx, ManagedDocumentResult* mdr, bool reverse) { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); - return nullptr; + return std::unique_ptr(primaryIndex()->allIterator(trx, mdr, reverse)); } + std::unique_ptr RocksDBCollection::getAnyIterator( transaction::Methods* trx, ManagedDocumentResult* mdr) { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); - return nullptr; + return std::unique_ptr(primaryIndex()->anyIterator(trx, mdr)); } + void RocksDBCollection::invokeOnAllElements( std::function callback) { THROW_ARANGO_NOT_YET_IMPLEMENTED(); @@ -351,3 +420,52 @@ void RocksDBCollection::addIndexCoordinator( _indexes.emplace_back(idx); } + +int RocksDBCollection::saveIndex(transaction::Methods* trx, std::shared_ptr idx) { + TRI_ASSERT(!ServerState::instance()->isCoordinator()); + // we cannot persist PrimaryIndex + TRI_ASSERT(idx->type() != Index::IndexType::TRI_IDX_TYPE_PRIMARY_INDEX); + std::vector> indexListLocal; + indexListLocal.emplace_back(idx); + +/* TODO + int res = fillIndexes(trx, indexListLocal, false); + + if (res != TRI_ERROR_NO_ERROR) { + return res; + } +*/ + std::shared_ptr builder = idx->toVelocyPack(false); + auto vocbase = _logicalCollection->vocbase(); + auto collectionId = _logicalCollection->cid(); + VPackSlice data = builder->slice(); + + StorageEngine* engine = EngineSelectorFeature::ENGINE; + engine->createIndex(vocbase, collectionId, idx->id(), data); + + return TRI_ERROR_NO_ERROR; +} + +// @brief return the primary index +// WARNING: Make sure that this LogicalCollection Instance +// is somehow protected. If it goes out of all scopes +// or it's indexes are freed the pointer returned will get invalidated. +arangodb::RocksDBPrimaryIndex* RocksDBCollection::primaryIndex() const { + // The primary index always has iid 0 + auto primary = _logicalCollection->lookupIndex(0); + TRI_ASSERT(primary != nullptr); + +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + if (primary->type() != Index::IndexType::TRI_IDX_TYPE_PRIMARY_INDEX) { + LOG_TOPIC(ERR, arangodb::Logger::FIXME) + << "got invalid indexes for collection '" << _logicalCollection->name() + << "'"; + for (auto const& it : _indexes) { + LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "- " << it.get(); + } + } +#endif + TRI_ASSERT(primary->type() == Index::IndexType::TRI_IDX_TYPE_PRIMARY_INDEX); + // the primary index must be the index at position #0 + return static_cast(primary.get()); +} diff --git a/arangod/RocksDBEngine/RocksDBCollection.h b/arangod/RocksDBEngine/RocksDBCollection.h index 3a06dc2b0f..791db70587 100644 --- a/arangod/RocksDBEngine/RocksDBCollection.h +++ b/arangod/RocksDBEngine/RocksDBCollection.h @@ -31,12 +31,12 @@ #include "VocBase/ManagedDocumentResult.h" #include "VocBase/PhysicalCollection.h" -struct RocksDBDatafile; namespace arangodb { class LogicalCollection; class ManagedDocumentResult; class Result; +class RocksDBPrimaryIndex; class RocksDBCollection final : public PhysicalCollection { friend class RocksDBEngine; @@ -74,7 +74,7 @@ class RocksDBCollection final : public PhysicalCollection { PhysicalCollection*) override; TRI_voc_rid_t revision() const override; - + int64_t initialCount() const override; void updateCount(int64_t) override; @@ -86,8 +86,6 @@ class RocksDBCollection final : public PhysicalCollection { uint64_t numberDocuments() const override; - void sizeHint(transaction::Methods* trx, int64_t hint) override; - /// @brief report extra memory used by indexes etc. size_t memory() const override; void open(bool ignoreErrors) override; @@ -171,6 +169,8 @@ class RocksDBCollection final : public PhysicalCollection { void deferDropCollection( std::function callback) override; + + uint64_t objectId() const { return _objectId; } private: /// @brief return engine-specific figures @@ -179,6 +179,8 @@ class RocksDBCollection final : public PhysicalCollection { void createInitialIndexes(); void addIndex(std::shared_ptr idx); void addIndexCoordinator(std::shared_ptr idx); + int saveIndex(transaction::Methods* trx, std::shared_ptr idx); + arangodb::RocksDBPrimaryIndex* primaryIndex() const; private: uint64_t _objectId; // rocksdb-specific object id for collection diff --git a/arangod/RocksDBEngine/RocksDBEdgeIndex.cpp b/arangod/RocksDBEngine/RocksDBEdgeIndex.cpp index f5f9f00e13..6257c3804e 100644 --- a/arangod/RocksDBEngine/RocksDBEdgeIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBEdgeIndex.cpp @@ -91,13 +91,13 @@ RocksDBEdgeIndex::~RocksDBEdgeIndex() {} /// @brief return a selectivity estimate for the index double RocksDBEdgeIndex::selectivityEstimate(arangodb::StringRef const* attribute) const { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); + // TODO return 0.0; } /// @brief return the memory usage for the index size_t RocksDBEdgeIndex::memory() const { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); + // TODO return 0; } diff --git a/arangod/RocksDBEngine/RocksDBEngine.cpp b/arangod/RocksDBEngine/RocksDBEngine.cpp index bf15221d8c..d18e86fa81 100644 --- a/arangod/RocksDBEngine/RocksDBEngine.cpp +++ b/arangod/RocksDBEngine/RocksDBEngine.cpp @@ -145,8 +145,8 @@ TransactionState* RocksDBEngine::createTransactionState(TRI_vocbase_t* vocbase) TransactionCollection* RocksDBEngine::createTransactionCollection( TransactionState* state, TRI_voc_cid_t cid, AccessMode::Type accessType, - int nestingLevel) { - return new RocksDBTransactionCollection(state, cid, accessType, nestingLevel); + int /*nestingLevel*/) { + return new RocksDBTransactionCollection(state, cid, accessType); } void RocksDBEngine::addParametersForNewCollection(VPackBuilder& builder, VPackSlice info) { @@ -444,9 +444,9 @@ void RocksDBEngine::destroyCollection(TRI_vocbase_t* vocbase, } void RocksDBEngine::changeCollection(TRI_vocbase_t* vocbase, TRI_voc_cid_t id, - arangodb::LogicalCollection const*, + arangodb::LogicalCollection const* parameters, bool doSync) { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); + createCollection(vocbase, id, parameters); } arangodb::Result RocksDBEngine::renameCollection( @@ -457,9 +457,16 @@ arangodb::Result RocksDBEngine::renameCollection( } void RocksDBEngine::createIndex(TRI_vocbase_t* vocbase, - TRI_voc_cid_t collectionId, TRI_idx_iid_t id, + TRI_voc_cid_t collectionId, TRI_idx_iid_t indexId, arangodb::velocypack::Slice const& data) { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); + RocksDBEntry entry = RocksDBEntry::Index(vocbase->id(), collectionId, indexId, data); + rocksdb::WriteOptions options; // TODO: check which options would make sense + + rocksdb::Status res = _db->Put(options, entry.key(), entry.value()); + if (!res.ok()) { + // TODO: need translation for RocksDB errors + THROW_ARANGO_EXCEPTION(TRI_ERROR_INTERNAL); + } } void RocksDBEngine::dropIndex(TRI_vocbase_t* vocbase, diff --git a/arangod/RocksDBEngine/RocksDBIndexFactory.cpp b/arangod/RocksDBEngine/RocksDBIndexFactory.cpp index 64d9ab4f5a..64d47364a9 100644 --- a/arangod/RocksDBEngine/RocksDBIndexFactory.cpp +++ b/arangod/RocksDBEngine/RocksDBIndexFactory.cpp @@ -342,6 +342,11 @@ std::shared_ptr RocksDBIndexFactory::prepareIndexFromSlice( newIdx.reset(new arangodb::RocksDBEdgeIndex(iid, col)); break; } + case arangodb::Index::TRI_IDX_TYPE_HASH_INDEX: { + // TODO: fix this wrong index type. only used temporarily because we don't have other indexes + newIdx.reset(new arangodb::RocksDBEdgeIndex(iid, col)); + break; + } case arangodb::Index::TRI_IDX_TYPE_UNKNOWN: default: { diff --git a/arangod/RocksDBEngine/RocksDBPrimaryIndex.cpp b/arangod/RocksDBEngine/RocksDBPrimaryIndex.cpp index 5eb3dfec2b..3ee83d61e8 100644 --- a/arangod/RocksDBEngine/RocksDBPrimaryIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBPrimaryIndex.cpp @@ -81,12 +81,12 @@ RocksDBAllIndexIterator::RocksDBAllIndexIterator(LogicalCollection* collection, : IndexIterator(collection, trx, mmdr, index), _reverse(reverse), _total(0) {} bool RocksDBAllIndexIterator::next(TokenCallback const& cb, size_t limit) { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); - return true; + // TODO + return false; } void RocksDBAllIndexIterator::reset() { - THROW_ARANGO_NOT_YET_IMPLEMENTED(); + // TODO } RocksDBAnyIndexIterator::RocksDBAnyIndexIterator(LogicalCollection* collection, transaction::Methods* trx, @@ -187,3 +187,19 @@ arangodb::aql::AstNode* RocksDBPrimaryIndex::specializeCondition( SimpleAttributeEqualityMatcher matcher(IndexAttributes); return matcher.specializeOne(this, node, reference); } + +/// @brief request an iterator over all elements in the index in +/// a sequential order. +IndexIterator* RocksDBPrimaryIndex::allIterator(transaction::Methods* trx, + ManagedDocumentResult* mmdr, + bool reverse) const { + return new RocksDBAllIndexIterator(_collection, trx, mmdr, this, reverse); +} + +/// @brief request an iterator over all elements in the index in +/// a random order. It is guaranteed that each element is found +/// exactly once unless the collection is modified. +IndexIterator* RocksDBPrimaryIndex::anyIterator(transaction::Methods* trx, + ManagedDocumentResult* mmdr) const { + return new RocksDBAnyIndexIterator(_collection, trx, mmdr, this); +} diff --git a/arangod/RocksDBEngine/RocksDBPrimaryIndex.h b/arangod/RocksDBEngine/RocksDBPrimaryIndex.h index 707be45bb0..3df8021c65 100644 --- a/arangod/RocksDBEngine/RocksDBPrimaryIndex.h +++ b/arangod/RocksDBEngine/RocksDBPrimaryIndex.h @@ -151,6 +151,15 @@ class RocksDBPrimaryIndex final : public Index { arangodb::aql::AstNode* specializeCondition( arangodb::aql::AstNode*, arangodb::aql::Variable const*) const override; + + /// @brief request an iterator over all elements in the index in + /// a sequential order. + IndexIterator* allIterator(transaction::Methods*, ManagedDocumentResult*, bool reverse) const; + + /// @brief request an iterator over all elements in the index in + /// a random order. It is guaranteed that each element is found + /// exactly once unless the collection is modified. + IndexIterator* anyIterator(transaction::Methods*, ManagedDocumentResult*) const; }; } diff --git a/arangod/RocksDBEngine/RocksDBTransactionCollection.cpp b/arangod/RocksDBEngine/RocksDBTransactionCollection.cpp index 27cd0e1c6a..d2bd1e7039 100644 --- a/arangod/RocksDBEngine/RocksDBTransactionCollection.cpp +++ b/arangod/RocksDBEngine/RocksDBTransactionCollection.cpp @@ -36,15 +36,17 @@ using namespace arangodb; RocksDBTransactionCollection::RocksDBTransactionCollection(TransactionState* trx, TRI_voc_cid_t cid, - AccessMode::Type accessType, - int nestingLevel) + AccessMode::Type accessType) : TransactionCollection(trx, cid), - _firstTime(true), _waitForSync(false), _accessType(accessType), - _numOperations(0) {} + _numOperations(0) { + LOG_TOPIC(ERR, Logger::FIXME) << "ctor rocksdb transaction collection: " << cid; +} -RocksDBTransactionCollection::~RocksDBTransactionCollection() {} +RocksDBTransactionCollection::~RocksDBTransactionCollection() { + LOG_TOPIC(ERR, Logger::FIXME) << "dtor rocksdb transaction collection: " << _cid; +} /// @brief request a main-level lock for a collection int RocksDBTransactionCollection::lock() { return TRI_ERROR_NO_ERROR; } @@ -124,8 +126,17 @@ int RocksDBTransactionCollection::updateUsage(AccessMode::Type accessType, int n return TRI_ERROR_NO_ERROR; } -int RocksDBTransactionCollection::use(int /*nestingLevel*/) { - if (_firstTime) { +int RocksDBTransactionCollection::use(int nestingLevel) { + if (_collection == nullptr) { + TRI_vocbase_col_status_e status; + LOG_TRX(_transaction, nestingLevel) << "using collection " << _cid; + _collection = _transaction->vocbase()->useCollection(_cid, status); + LOG_TOPIC(ERR, Logger::FIXME) << "using collection " << _cid << ": " << _collection; + + if (_collection == nullptr) { + return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND; + } + if (AccessMode::isWriteOrExclusive(_accessType) && TRI_GetOperationModeServer() == TRI_VOCBASE_MODE_NO_CREATE && !LogicalCollection::IsSystemName(_collection->name())) { @@ -134,8 +145,6 @@ int RocksDBTransactionCollection::use(int /*nestingLevel*/) { // store the waitForSync property _waitForSync = _collection->waitForSync(); - - _firstTime = false; } return TRI_ERROR_NO_ERROR; @@ -143,5 +152,14 @@ int RocksDBTransactionCollection::use(int /*nestingLevel*/) { void RocksDBTransactionCollection::unuse(int /*nestingLevel*/) {} -// nothing to do here -void RocksDBTransactionCollection::release() {} +void RocksDBTransactionCollection::release() { + // the top level transaction releases all collections + if (_collection != nullptr) { + LOG_TOPIC(ERR, Logger::FIXME) << "releasing collection " << _cid << ": " << _collection; + // unuse collection, remove usage-lock + LOG_TRX(_transaction, 0) << "unusing collection " << _cid; + + _transaction->vocbase()->releaseCollection(_collection); + _collection = nullptr; + } +} diff --git a/arangod/RocksDBEngine/RocksDBTransactionCollection.h b/arangod/RocksDBEngine/RocksDBTransactionCollection.h index 68be8cf4a0..4c32ed71c4 100644 --- a/arangod/RocksDBEngine/RocksDBTransactionCollection.h +++ b/arangod/RocksDBEngine/RocksDBTransactionCollection.h @@ -40,7 +40,7 @@ class TransactionState; class RocksDBTransactionCollection final : public TransactionCollection { public: - RocksDBTransactionCollection(TransactionState* trx, TRI_voc_cid_t cid, AccessMode::Type accessType, int nestingLevel); + RocksDBTransactionCollection(TransactionState* trx, TRI_voc_cid_t cid, AccessMode::Type accessType); ~RocksDBTransactionCollection(); /// @brief request a main-level lock for a collection @@ -70,7 +70,6 @@ class RocksDBTransactionCollection final : public TransactionCollection { void release() override; private: - bool _firstTime; bool _waitForSync; // whether or not the collection has waitForSync AccessMode::Type _accessType; // access type (read|write) diff --git a/arangod/RocksDBEngine/RocksDBTransactionState.cpp b/arangod/RocksDBEngine/RocksDBTransactionState.cpp index 986c1d7023..3951330f47 100644 --- a/arangod/RocksDBEngine/RocksDBTransactionState.cpp +++ b/arangod/RocksDBEngine/RocksDBTransactionState.cpp @@ -51,10 +51,14 @@ struct RocksDBTransactionData final : public TransactionData { RocksDBTransactionState::RocksDBTransactionState(TRI_vocbase_t* vocbase) : TransactionState(vocbase), _beginWritten(false), - _hasOperations(false) {} + _hasOperations(false) { + LOG_TOPIC(ERR, Logger::FIXME) << "ctor rocksdb transaction state: " << this; +} /// @brief free a transaction container -RocksDBTransactionState::~RocksDBTransactionState() {} +RocksDBTransactionState::~RocksDBTransactionState() { + LOG_TOPIC(ERR, Logger::FIXME) << "dtor rocksdb transaction state: " << this; +} /// @brief start a transaction int RocksDBTransactionState::beginTransaction(transaction::Hints hints) { @@ -75,13 +79,30 @@ int RocksDBTransactionState::beginTransaction(transaction::Hints hints) { StorageEngine* engine = EngineSelectorFeature::ENGINE; rocksdb::TransactionDB* db = static_cast(engine)->db(); _rocksTransaction.reset(db->BeginTransaction(rocksdb::WriteOptions(), rocksdb::TransactionOptions())); - - updateStatus(transaction::Status::RUNNING); } else { TRI_ASSERT(_status == transaction::Status::RUNNING); } + + int res = useCollections(_nestingLevel); + + LOG_TOPIC(ERR, Logger::FIXME) << "USE COLLECTIONS RETURNED: " << res << ", NESTING: " << _nestingLevel; + + if (res == TRI_ERROR_NO_ERROR) { + // all valid + if (_nestingLevel == 0) { + updateStatus(transaction::Status::RUNNING); + } + } else { + // something is wrong + if (_nestingLevel == 0) { + updateStatus(transaction::Status::ABORTED); + } + + // free what we have got so far + unuseCollections(_nestingLevel); + } - return TRI_ERROR_NO_ERROR; + return res; } /// @brief commit a transaction diff --git a/arangod/StorageEngine/TransactionState.cpp b/arangod/StorageEngine/TransactionState.cpp index 670ffd1ab0..7a56859362 100644 --- a/arangod/StorageEngine/TransactionState.cpp +++ b/arangod/StorageEngine/TransactionState.cpp @@ -93,6 +93,7 @@ TransactionCollection* TransactionState::collection(TRI_voc_cid_t cid, AccessMod int TransactionState::addCollection(TRI_voc_cid_t cid, AccessMode::Type accessType, int nestingLevel, bool force) { + LOG_TOPIC(ERR, Logger::FIXME) << "add collection: " << cid << ", " << this; LOG_TRX(this, nestingLevel) << "adding collection " << cid; // LOG_TOPIC(TRACE, arangodb::Logger::FIXME) << "cid: " << cid @@ -138,7 +139,9 @@ int TransactionState::addCollection(TRI_voc_cid_t cid, TRI_ASSERT(trxCollection == nullptr); StorageEngine* engine = EngineSelectorFeature::ENGINE; + LOG_TOPIC(ERR, Logger::FIXME) << "creating trx collection: " << cid << ", " << this; trxCollection = engine->createTransactionCollection(this, cid, accessType, nestingLevel); + LOG_TOPIC(ERR, Logger::FIXME) << "created trx collection: " << cid << ", " << this << "; " << trxCollection; TRI_ASSERT(trxCollection != nullptr); @@ -163,6 +166,8 @@ int TransactionState::ensureCollections(int nestingLevel) { int TransactionState::useCollections(int nestingLevel) { int res = TRI_ERROR_NO_ERROR; + LOG_TOPIC(ERR, Logger::FIXME) << "use collections " << this; + // process collections in forward order for (auto& trxCollection : _collections) { res = trxCollection->use(nestingLevel); diff --git a/arangod/Transaction/Methods.cpp b/arangod/Transaction/Methods.cpp index 894db32993..7c75c30c22 100644 --- a/arangod/Transaction/Methods.cpp +++ b/arangod/Transaction/Methods.cpp @@ -2577,6 +2577,8 @@ arangodb::LogicalCollection* transaction::Methods::documentCollection( TRI_ASSERT(_state != nullptr); TRI_ASSERT(trxCollection != nullptr); TRI_ASSERT(_state->status() == transaction::Status::RUNNING); + + LOG_TOPIC(ERR, Logger::FIXME) << "accessing collection " << trxCollection->id() << ": " << trxCollection; TRI_ASSERT(trxCollection->collection() != nullptr); return trxCollection->collection(); diff --git a/arangod/VocBase/LogicalCollection.cpp b/arangod/VocBase/LogicalCollection.cpp index ffc0790222..a9e8228fff 100644 --- a/arangod/VocBase/LogicalCollection.cpp +++ b/arangod/VocBase/LogicalCollection.cpp @@ -1107,13 +1107,6 @@ int LogicalCollection::remove(transaction::Methods* trx, lock, revisionId, prevRev); } -void LogicalCollection::sizeHint(transaction::Methods* trx, int64_t hint) { - if (hint <= 0) { - return; - } - getPhysical()->sizeHint(trx, hint); -} - bool LogicalCollection::readDocument(transaction::Methods* trx, DocumentIdentifierToken const& token, ManagedDocumentResult& result) { return getPhysical()->readDocument(trx, token, result); } diff --git a/arangod/VocBase/LogicalCollection.h b/arangod/VocBase/LogicalCollection.h index 63685567e8..f5ca4635ee 100644 --- a/arangod/VocBase/LogicalCollection.h +++ b/arangod/VocBase/LogicalCollection.h @@ -227,10 +227,6 @@ class LogicalCollection { /// @brief closes an open collection int close(); - /// datafile management - - void sizeHint(transaction::Methods* trx, int64_t hint); - // SECTION: Indexes /// @brief Create a new Index based on VelocyPack description diff --git a/arangod/VocBase/PhysicalCollection.h b/arangod/VocBase/PhysicalCollection.h index acb5b02b79..afddeba897 100644 --- a/arangod/VocBase/PhysicalCollection.h +++ b/arangod/VocBase/PhysicalCollection.h @@ -81,8 +81,6 @@ class PhysicalCollection { // @brief Return the number of documents in this collection virtual uint64_t numberDocuments() const = 0; - virtual void sizeHint(transaction::Methods* trx, int64_t hint) = 0; - /// @brief report extra memory used by indexes etc. virtual size_t memory() const = 0;