From 9b42e9cee64e441be9b82b10c3606b52543291ac Mon Sep 17 00:00:00 2001 From: jsteemann Date: Thu, 15 Dec 2016 14:57:35 +0100 Subject: [PATCH 1/5] some improvements for memory management added more memory diagnostics for memory usage: - collection.figures() now returns a "readCache" attribute which contains the number of and memory usage of entries in the document revisions cache for the collection, and a "revisions" attribute with the number and memory usage of entries in the storage engine's revision lookup table for the collection - the default value for --database.revision-cache-target-size was changed from 75% of RAM to 30% of RAM --- .../JSA_get_api_collection_figures.md | 43 ++++++------------- Documentation/DocuBlocks/collectionFigures.md | 33 ++++++-------- arangod/GeneralServer/HttpCommTask.cpp | 1 + arangod/GeneralServer/HttpCommTask.h | 2 +- arangod/RestServer/RevisionCacheFeature.cpp | 10 ++--- arangod/Scheduler/AcceptorTcp.h | 2 +- arangod/Scheduler/AcceptorUnixDomain.h | 2 +- arangod/Scheduler/JobQueue.cpp | 2 +- arangod/Scheduler/JobQueue.h | 2 +- arangod/Scheduler/SocketTcp.h | 4 +- arangod/Scheduler/SocketUnixDomain.h | 2 +- arangod/StorageEngine/MMFilesCollection.cpp | 10 ++++- .../StorageEngine/MMFilesRevisionsCache.cpp | 17 +++++++- arangod/StorageEngine/MMFilesRevisionsCache.h | 3 ++ arangod/VocBase/CollectionRevisionsCache.cpp | 17 +++++++- arangod/VocBase/CollectionRevisionsCache.h | 3 ++ arangod/VocBase/LogicalCollection.cpp | 17 ++++++-- arangod/VocBase/ReadCache.cpp | 8 +++- arangod/VocBase/ReadCache.h | 6 +++ arangod/Wal/LogfileManager.cpp | 9 ++-- arangod/Wal/LogfileManager.h | 2 +- lib/ApplicationFeatures/ApplicationServer.cpp | 2 + lib/Basics/AssocUnique.h | 8 ++++ lib/Basics/process-utils.cpp | 10 +++++ lib/Basics/process-utils.h | 2 + lib/Logger/LogTopic.cpp | 1 + lib/Logger/Logger.h | 1 + 27 files changed, 143 insertions(+), 76 deletions(-) diff --git a/Documentation/DocuBlocks/Rest/Collections/JSA_get_api_collection_figures.md b/Documentation/DocuBlocks/Rest/Collections/JSA_get_api_collection_figures.md index 66594d94d9..b22f858b32 100644 --- a/Documentation/DocuBlocks/Rest/Collections/JSA_get_api_collection_figures.md +++ b/Documentation/DocuBlocks/Rest/Collections/JSA_get_api_collection_figures.md @@ -99,40 +99,25 @@ The number of compactor files. @RESTSTRUCT{fileSize,collection_figures_compactors,integer,required,int64} The total filesize of all compactor files (in bytes). -@RESTSTRUCT{shapefiles,collection_figures,object,required,collection_figures_shapefiles} -**deprecated** +@RESTSTRUCT{readcache,collection_figures,object,required,collection_figures_readcache} -@RESTSTRUCT{count,collection_figures_shapefiles,integer,required,int64} -The number of shape files. This value is deprecated and kept for compatibility reasons only. -The value will always be 0 since ArangoDB 2.0 and higher. +@RESTSTRUCT{count,collection_figures_readcache,integer,required,int64} +The number of revisions of this collection stored in the document revisions cache. -@RESTSTRUCT{fileSize,collection_figures_shapefiles,integer,required,int64} -The total filesize of the shape files. This value is deprecated and kept -for compatibility reasons only. The value will always be 0 in ArangoDB 2.0 and higher. +@RESTSTRUCT{size,collection_figures_readcache,integer,required,int64} +The memory used for storing the revisions of this collection in the document +revisions cache (in bytes). This figure does not include the document data but +only mappings from document revision ids to cache entry locations. -@RESTSTRUCT{shapes,collection_figures,object,required,collection_figures_shapes} -@RESTSTRUCT{count,collection_figures_shapes,integer,required,int64} -The total number of shapes used in the collection. This includes shapes -that are not in use anymore. Shapes that are contained -in the write-ahead log only are not reported in this figure. +@RESTSTRUCT{revisions,collection_figures,object,required,collection_figures_revisions} -@RESTSTRUCT{size,collection_figures_shapes,integer,required,int64} -The total size of all shapes (in bytes). This includes -shapes that are not in use anymore. Shapes that are contained in the -write-ahead log only are not reported in this figure. +@RESTSTRUCT{count,collection_figures_revisions,integer,required,int64} +The number of revisions of this collection managed by the storage engine. -@RESTSTRUCT{attributes,collection_figures,object,required,collection_figures_attributes} -@RESTSTRUCT{count,collection_figures_attributes,integer,required,int64} -The total number of attributes used in the -collection. Note: the value includes data of attributes that are not in use -anymore. Attributes that are contained in the write-ahead log only are -not reported in this figure. - -@RESTSTRUCT{size,collection_figures_attributes,integer,required,int64} -The total size of the attribute data (in bytes). -Note: the value includes data of attributes that are not in use anymore. -Attributes that are contained in the write-ahead log only are not -reported in this figure. +@RESTSTRUCT{size,collection_figures_revisions,integer,required,int64} +The memory used for storing the revisions of this collection in the storage +engine (in bytes). This figure does not include the document data but only mappings +from document revision ids to storage engine datafile positions. @RESTSTRUCT{indexes,collection_figures,object,required,collection_figures_indexes} @RESTSTRUCT{count,collection_figures_indexes,integer,required,int64} diff --git a/Documentation/DocuBlocks/collectionFigures.md b/Documentation/DocuBlocks/collectionFigures.md index 77b54aacb4..0d261efcc9 100644 --- a/Documentation/DocuBlocks/collectionFigures.md +++ b/Documentation/DocuBlocks/collectionFigures.md @@ -28,28 +28,19 @@ memory. * *compactors.count*: The number of compactor files. * *compactors.fileSize*: The total filesize of the compactor files (in bytes). -* *shapefiles.count*: The number of shape files. This value is - deprecated and kept for compatibility reasons only. The value will always - be 0 since ArangoDB 2.0 and higher. -* *shapefiles.fileSize*: The total filesize of the shape files. This - value is deprecated and kept for compatibility reasons only. The value will - always be 0 in ArangoDB 2.0 and higher. -* *shapes.count*: The total number of shapes used in the collection. - This includes shapes that are not in use anymore. Shapes that are contained - in the write-ahead log only are not reported in this figure. -* *shapes.size*: The total size of all shapes (in bytes). This includes - shapes that are not in use anymore. Shapes that are contained in the - write-ahead log only are not reported in this figure. -* *attributes.count*: The total number of attributes used in the - collection. Note: the value includes data of attributes that are not in use - anymore. Attributes that are contained in the write-ahead log only are - not reported in this figure. -* *attributes.size*: The total size of the attribute data (in bytes). - Note: the value includes data of attributes that are not in use anymore. - Attributes that are contained in the write-ahead log only are not - reported in this figure. +* *readCache.count*: The number of revisions of this collection stored + in the document revisions cache. +* *readCache.size*: The memory used for storing the revisions + of this collection in the document revisions cache (in bytes). This + figure does not include the document data but only mappings from + document revision ids to cache entry locations. +* *revisions.count*: The number of revisions of this collection managed + by the storage engine. +* *revisions.size*: The memory used for storing the revisions + of this collection in the storage engine (in bytes). This figure + does not include the document data but only mappings from + document revision ids to storage engine datafile positions. * *indexes.count*: The total number of indexes defined for the - collection, including the pre-defined indexes (e.g. primary index). * *indexes.size*: The total memory allocated for indexes in bytes. * *maxTick*: The tick of the last marker that was stored in a journal of the collection. This might be 0 if the collection does not yet have diff --git a/arangod/GeneralServer/HttpCommTask.cpp b/arangod/GeneralServer/HttpCommTask.cpp index 7b76883b45..65bc77fbef 100644 --- a/arangod/GeneralServer/HttpCommTask.cpp +++ b/arangod/GeneralServer/HttpCommTask.cpp @@ -66,6 +66,7 @@ HttpCommTask::HttpCommTask(EventLoop loop, GeneralServer* server, _protocol = "http"; connectionStatisticsAgentSetHttp(); + auto agent = std::make_unique(true); agent->acquire(); MUTEX_LOCKER(lock, _agentsMutex); diff --git a/arangod/GeneralServer/HttpCommTask.h b/arangod/GeneralServer/HttpCommTask.h index 8ebf0e43fb..083a56164d 100644 --- a/arangod/GeneralServer/HttpCommTask.h +++ b/arangod/GeneralServer/HttpCommTask.h @@ -9,7 +9,7 @@ namespace arangodb { class HttpRequest; namespace rest { -class HttpCommTask : public GeneralCommTask { +class HttpCommTask final : public GeneralCommTask { public: static size_t const MaximalHeaderSize; static size_t const MaximalBodySize; diff --git a/arangod/RestServer/RevisionCacheFeature.cpp b/arangod/RestServer/RevisionCacheFeature.cpp index 865f0decc3..e94224294f 100644 --- a/arangod/RestServer/RevisionCacheFeature.cpp +++ b/arangod/RestServer/RevisionCacheFeature.cpp @@ -50,19 +50,19 @@ RevisionCacheFeature::RevisionCacheFeature(ApplicationServer* server) requiresElevatedPrivileges(false); startsAfter("WorkMonitor"); - - if (TRI_PhysicalMemory != 0) { + if (TRI_PhysicalMemory > 1024 * 1024 * 1024) { // reset target size to a fraction of the available memory - _targetSize = static_cast(TRI_PhysicalMemory * 0.75); + _targetSize = TRI_PhysicalMemory - (1024 * 1024 * 1024); + _targetSize *= 0.4; // 40 % } } void RevisionCacheFeature::collectOptions(std::shared_ptr options) { options->addSection("database", "Configure the database"); - options->addOption("--database.revision-cache-chunk-size", "chunk size for the document revision cache", + options->addOption("--database.revision-cache-chunk-size", "chunk size (in bytes) for the document revisions cache", new UInt32Parameter(&_chunkSize)); - options->addOption("--database.revision-cache-target-size", "total target size for the document revision cache", + options->addOption("--database.revision-cache-target-size", "total target size (in bytes) for the document revisions cache", new UInt64Parameter(&_targetSize)); } diff --git a/arangod/Scheduler/AcceptorTcp.h b/arangod/Scheduler/AcceptorTcp.h index 136dd3a060..97697c182e 100644 --- a/arangod/Scheduler/AcceptorTcp.h +++ b/arangod/Scheduler/AcceptorTcp.h @@ -26,7 +26,7 @@ #include "Scheduler/Acceptor.h" namespace arangodb { -class AcceptorTcp: public Acceptor { +class AcceptorTcp final : public Acceptor { public: AcceptorTcp(boost::asio::io_service& ioService, Endpoint* endpoint) : Acceptor(ioService, endpoint), diff --git a/arangod/Scheduler/AcceptorUnixDomain.h b/arangod/Scheduler/AcceptorUnixDomain.h index 80069bfb36..3e047ae4b4 100644 --- a/arangod/Scheduler/AcceptorUnixDomain.h +++ b/arangod/Scheduler/AcceptorUnixDomain.h @@ -26,7 +26,7 @@ #include "Scheduler/Acceptor.h" namespace arangodb { -class AcceptorUnixDomain: public Acceptor { +class AcceptorUnixDomain final : public Acceptor { public: AcceptorUnixDomain(boost::asio::io_service& ioService, Endpoint* endpoint) : Acceptor(ioService, endpoint), diff --git a/arangod/Scheduler/JobQueue.cpp b/arangod/Scheduler/JobQueue.cpp index 369092627f..68c88971ef 100644 --- a/arangod/Scheduler/JobQueue.cpp +++ b/arangod/Scheduler/JobQueue.cpp @@ -31,7 +31,7 @@ using namespace arangodb; namespace { -class JobQueueThread : public Thread { +class JobQueueThread final : public Thread { public: JobQueueThread(JobQueue* server, boost::asio::io_service* ioService) : Thread("JobQueueThread"), _jobQueue(server), _ioService(ioService) {} diff --git a/arangod/Scheduler/JobQueue.h b/arangod/Scheduler/JobQueue.h index 5f3cd06abf..2d7734bb09 100644 --- a/arangod/Scheduler/JobQueue.h +++ b/arangod/Scheduler/JobQueue.h @@ -49,7 +49,7 @@ class JobQueue { void start(); void beginShutdown(); - int64_t queueSize(size_t i) { return _queuesSize[i]; } + int64_t queueSize(size_t i) const { return _queuesSize[i]; } bool queue(size_t i, std::unique_ptr job) { if (i >= SYSTEM_QUEUE_SIZE) { diff --git a/arangod/Scheduler/SocketTcp.h b/arangod/Scheduler/SocketTcp.h index 4c7bc4a6d3..ae07db7df8 100644 --- a/arangod/Scheduler/SocketTcp.h +++ b/arangod/Scheduler/SocketTcp.h @@ -28,7 +28,7 @@ #include namespace arangodb { -class SocketTcp: public Socket { +class SocketTcp final : public Socket { public: SocketTcp(boost::asio::io_service& ioService, boost::asio::ssl::context&& context, bool encrypted) @@ -61,7 +61,7 @@ class SocketTcp: public Socket { // mop: these functions actually only access the underlying socket. The _sslSocket is // actually just an additional layer around the socket. These low level functions - // aceess the _socket only and it is ok that they are not implemented for _sslSocket in + // access the _socket only and it is ok that they are not implemented for _sslSocket in // the children void shutdownReceive() override; void shutdownReceive(boost::system::error_code& ec) override; diff --git a/arangod/Scheduler/SocketUnixDomain.h b/arangod/Scheduler/SocketUnixDomain.h index 0fa26eaf33..aeb8f0f526 100644 --- a/arangod/Scheduler/SocketUnixDomain.h +++ b/arangod/Scheduler/SocketUnixDomain.h @@ -30,7 +30,7 @@ using namespace arangodb::basics; namespace arangodb { -class SocketUnixDomain: public Socket { +class SocketUnixDomain final : public Socket { public: SocketUnixDomain(boost::asio::io_service& ioService, boost::asio::ssl::context&& context) : Socket(ioService, std::move(context), false), diff --git a/arangod/StorageEngine/MMFilesCollection.cpp b/arangod/StorageEngine/MMFilesCollection.cpp index 1ad1ae23f2..61a5ca0c5b 100644 --- a/arangod/StorageEngine/MMFilesCollection.cpp +++ b/arangod/StorageEngine/MMFilesCollection.cpp @@ -28,6 +28,7 @@ #include "Basics/StaticStrings.h" #include "Basics/VelocyPackHelper.h" #include "Basics/WriteLocker.h" +#include "Basics/process-utils.h" #include "Indexes/PrimaryIndex.h" #include "Logger/Logger.h" #include "RestServer/DatabaseFeature.h" @@ -312,7 +313,6 @@ int64_t MMFilesCollection::initialCount() const { void MMFilesCollection::updateCount(int64_t count) { _initialCount = count; - _revisionsCache.sizeHint(count); } /// @brief closes an open collection @@ -898,6 +898,11 @@ void MMFilesCollection::figures(std::shared_ptr& builder->add("count", VPackValue(_compactors.size())); builder->add("fileSize", VPackValue(sizeCompactors)); builder->close(); // compactors + + builder->add("revisions", VPackValue(VPackValueType::Object)); + builder->add("count", VPackValue(_revisionsCache.size())); + builder->add("size", VPackValue(_revisionsCache.memoryUsage())); + builder->close(); // revisions } /// @brief iterate over a vector of datafiles and pick those with a specific @@ -1075,6 +1080,7 @@ int MMFilesCollection::iterateMarkersOnLoad(arangodb::Transaction* trx) { OpenIteratorState openState(_logicalCollection, trx); if (_initialCount != -1) { + _revisionsCache.sizeHint(_initialCount); _logicalCollection->sizeHint(trx, _initialCount); openState._initialCount = _initialCount; } @@ -1085,7 +1091,7 @@ int MMFilesCollection::iterateMarkersOnLoad(arangodb::Transaction* trx) { }; iterateDatafiles(cb); - + LOG(TRACE) << "found " << openState._documents << " document markers, " << openState._deletions << " deletion markers for collection '" << _logicalCollection->name() << "'"; diff --git a/arangod/StorageEngine/MMFilesRevisionsCache.cpp b/arangod/StorageEngine/MMFilesRevisionsCache.cpp index 2dbd56ffc3..fda3a98818 100644 --- a/arangod/StorageEngine/MMFilesRevisionsCache.cpp +++ b/arangod/StorageEngine/MMFilesRevisionsCache.cpp @@ -69,10 +69,25 @@ MMFilesDocumentPosition MMFilesRevisionsCache::lookup(TRI_voc_rid_t revisionId) void MMFilesRevisionsCache::sizeHint(int64_t hint) { WRITE_LOCKER(locker, _lock); if (hint > 256) { - _positions.resize(nullptr, static_cast(hint * 1.1)); + _positions.resize(nullptr, static_cast(hint)); } } +size_t MMFilesRevisionsCache::size() { + READ_LOCKER(locker, _lock); + return _positions.size(); +} + +size_t MMFilesRevisionsCache::capacity() { + READ_LOCKER(locker, _lock); + return _positions.capacity(); +} + +size_t MMFilesRevisionsCache::memoryUsage() { + READ_LOCKER(locker, _lock); + return _positions.memoryUsage(); +} + void MMFilesRevisionsCache::clear() { WRITE_LOCKER(locker, _lock); _positions.truncate([](MMFilesDocumentPosition&) { return true; }); diff --git a/arangod/StorageEngine/MMFilesRevisionsCache.h b/arangod/StorageEngine/MMFilesRevisionsCache.h index 02c1037c5d..6852e2ceab 100644 --- a/arangod/StorageEngine/MMFilesRevisionsCache.h +++ b/arangod/StorageEngine/MMFilesRevisionsCache.h @@ -41,6 +41,9 @@ class MMFilesRevisionsCache { public: void sizeHint(int64_t hint); + size_t size(); + size_t capacity(); + size_t memoryUsage(); void clear(); MMFilesDocumentPosition lookup(TRI_voc_rid_t revisionId) const; void insert(TRI_voc_rid_t revisionId, uint8_t const* dataptr, TRI_voc_fid_t fid, bool isInWal, bool shouldLock); diff --git a/arangod/VocBase/CollectionRevisionsCache.cpp b/arangod/VocBase/CollectionRevisionsCache.cpp index d547294c69..a1e7ce18a8 100644 --- a/arangod/VocBase/CollectionRevisionsCache.cpp +++ b/arangod/VocBase/CollectionRevisionsCache.cpp @@ -92,9 +92,24 @@ void CollectionRevisionsCache::clear() { _readCache.clear(); } +size_t CollectionRevisionsCache::size() { + READ_LOCKER(locker, _lock); + return _revisions.size(); +} + +size_t CollectionRevisionsCache::memoryUsage() { + READ_LOCKER(locker, _lock); + return _revisions.memoryUsage(); +} + +size_t CollectionRevisionsCache::chunksMemoryUsage() { + READ_LOCKER(locker, _lock); + return _readCache.chunksMemoryUsage(); +} + void CollectionRevisionsCache::sizeHint(int64_t hint) { if (hint > 256) { - _revisions.resize(nullptr, static_cast(hint * 1.1)); + _revisions.resize(nullptr, static_cast(hint)); } } diff --git a/arangod/VocBase/CollectionRevisionsCache.h b/arangod/VocBase/CollectionRevisionsCache.h index 5396874866..af9a834aa7 100644 --- a/arangod/VocBase/CollectionRevisionsCache.h +++ b/arangod/VocBase/CollectionRevisionsCache.h @@ -53,6 +53,9 @@ class CollectionRevisionsCache { void clear(); void sizeHint(int64_t hint); + size_t size(); + size_t memoryUsage(); + size_t chunksMemoryUsage(); bool allowInvalidation() const { return _allowInvalidation.load(); diff --git a/arangod/VocBase/LogicalCollection.cpp b/arangod/VocBase/LogicalCollection.cpp index b5e7bb1a63..f6116141b2 100644 --- a/arangod/VocBase/LogicalCollection.cpp +++ b/arangod/VocBase/LogicalCollection.cpp @@ -23,6 +23,7 @@ #include "LogicalCollection.h" +#include "Aql/QueryCache.h" #include "Basics/Barrier.h" #include "Basics/ReadLocker.h" #include "Basics/StringUtils.h" @@ -31,7 +32,7 @@ #include "Basics/ThreadPool.h" #include "Basics/VelocyPackHelper.h" #include "Basics/WriteLocker.h" -#include "Aql/QueryCache.h" +#include "Basics/process-utils.h" #include "Cluster/ClusterInfo.h" #include "Cluster/ClusterMethods.h" #include "Cluster/FollowerInfo.h" @@ -1223,7 +1224,7 @@ std::shared_ptr LogicalCollection::figures() { } } else { builder->openObject(); - + // add index information size_t sizeIndexes = getPhysical()->memory(); size_t numIndexes = 0; @@ -1231,7 +1232,7 @@ std::shared_ptr LogicalCollection::figures() { sizeIndexes += static_cast(idx->memory()); ++numIndexes; } - + builder->add("indexes", VPackValue(VPackValueType::Object)); builder->add("count", VPackValue(numIndexes)); builder->add("size", VPackValue(sizeIndexes)); @@ -1255,6 +1256,9 @@ std::shared_ptr LogicalCollection::figures() { } if (lastCompactionStatus != nullptr) { + if (lastCompactionStamp == 0.0) { + lastCompactionStamp = TRI_microtime(); + } struct tm tb; time_t tt = static_cast(lastCompactionStamp); TRI_gmtime(tt, &tb); @@ -1265,6 +1269,13 @@ std::shared_ptr LogicalCollection::figures() { builder->add("message", VPackValue(lastCompactionStatus)); builder->add("time", VPackValue(&lastCompactionStampString[0])); builder->close(); // compactionStatus + + if (_revisionsCache) { + builder->add("readCache", VPackValue(VPackValueType::Object)); + builder->add("count", VPackValue(_revisionsCache->size())); + builder->add("size", VPackValue(_revisionsCache->memoryUsage())); + builder->close(); // readCache + } // add engine-specific figures getPhysical()->figures(builder); diff --git a/arangod/VocBase/ReadCache.cpp b/arangod/VocBase/ReadCache.cpp index a850d8a3a5..0348f47c4a 100644 --- a/arangod/VocBase/ReadCache.cpp +++ b/arangod/VocBase/ReadCache.cpp @@ -39,7 +39,9 @@ uint8_t* ReadCachePosition::vpack() noexcept { ReadCache::ReadCache(RevisionCacheChunkAllocator* allocator, CollectionRevisionsCache* collectionCache) : _allocator(allocator), _collectionCache(collectionCache), - _writeChunk(nullptr) {} + _writeChunk(nullptr) { + TRI_ASSERT(_allocator != nullptr); +} ReadCache::~ReadCache() { try { @@ -49,6 +51,10 @@ ReadCache::~ReadCache() { } } +size_t ReadCache::chunksMemoryUsage() { + return _allocator->totalAllocated(); +} + // clear all chunks currently in use. this is a fast-path deletion without checks void ReadCache::clear() { closeWriteChunk(); diff --git a/arangod/VocBase/ReadCache.h b/arangod/VocBase/ReadCache.h index 4b7fa9a1c5..c930b65c53 100644 --- a/arangod/VocBase/ReadCache.h +++ b/arangod/VocBase/ReadCache.h @@ -112,6 +112,8 @@ union RevisionCacheValue { RevisionCacheValue& operator=(RevisionCacheValue const& other) = delete; RevisionCacheValue& operator=(RevisionCacheValue&& other) = delete; }; + +static_assert(sizeof(RevisionCacheValue) == 16, "invalid size for RevisionCacheValue"); struct RevisionCacheEntry { TRI_voc_rid_t revisionId; @@ -180,11 +182,15 @@ struct RevisionCacheEntry { }; +static_assert(sizeof(RevisionCacheEntry) == 24, "invalid size for RevisionCacheEntry"); + class ReadCache { public: ReadCache(RevisionCacheChunkAllocator* allocator, CollectionRevisionsCache* collectionCache); ~ReadCache(); + size_t chunksMemoryUsage(); + // clear all chunks currently in use. this is a fast-path deletion without checks void clear(); diff --git a/arangod/Wal/LogfileManager.cpp b/arangod/Wal/LogfileManager.cpp index db7c18de2f..f7cb6f9767 100644 --- a/arangod/Wal/LogfileManager.cpp +++ b/arangod/Wal/LogfileManager.cpp @@ -86,7 +86,6 @@ static inline uint32_t MaxSlots() { return 1024 * 1024 * 16; } // create the logfile manager LogfileManager::LogfileManager(ApplicationServer* server) : ApplicationFeature(server, "LogfileManager"), - _recoverState(nullptr), _allowWrites(false), // start in read-only mode _hasFoundLastTick(false), _inRecovery(true), @@ -130,7 +129,6 @@ LogfileManager::~LogfileManager() { _barriers.clear(); - delete _recoverState; delete _slots; for (auto& it : _logfiles) { @@ -297,7 +295,7 @@ void LogfileManager::start() { // initialize some objects _slots = new Slots(this, _numberOfSlots, 0); - _recoverState = new RecoverState(_ignoreRecoveryErrors); + _recoverState.reset(new RecoverState(_ignoreRecoveryErrors)); TRI_ASSERT(!_allowWrites); @@ -405,6 +403,9 @@ bool LogfileManager::open() { // remove usage locks for databases and collections _recoverState->releaseResources(); + // not needed anymore + _recoverState.reset(); + // write the current state into the shutdown file writeShutdownInfo(false); @@ -2107,7 +2108,7 @@ int LogfileManager::inspectLogfiles() { // update the tick statistics if (!TRI_IterateDatafile(df, &RecoverState::InitialScanMarker, - static_cast(_recoverState))) { + static_cast(_recoverState.get()))) { std::string const logfileName = logfile->filename(); LOG(WARN) << "WAL inspection failed when scanning logfile '" << logfileName << "'"; diff --git a/arangod/Wal/LogfileManager.h b/arangod/Wal/LogfileManager.h index d94938b26d..5bb68673eb 100644 --- a/arangod/Wal/LogfileManager.h +++ b/arangod/Wal/LogfileManager.h @@ -457,7 +457,7 @@ class LogfileManager final : public application_features::ApplicationFeature { std::string _databasePath; // state during recovery - RecoverState* _recoverState; + std::unique_ptr _recoverState; bool _allowOversizeEntries = true; bool _useMLock = false; diff --git a/lib/ApplicationFeatures/ApplicationServer.cpp b/lib/ApplicationFeatures/ApplicationServer.cpp index aee12b9b3d..19c2a639a5 100644 --- a/lib/ApplicationFeatures/ApplicationServer.cpp +++ b/lib/ApplicationFeatures/ApplicationServer.cpp @@ -25,6 +25,7 @@ #include "ApplicationFeatures/ApplicationFeature.h" #include "ApplicationFeatures/PrivilegeFeature.h" #include "Basics/StringUtils.h" +#include "Basics/process-utils.h" #include "Logger/Logger.h" #include "ProgramOptions/ArgumentParser.h" @@ -532,6 +533,7 @@ void ApplicationServer::start() { feature->start(); feature->state(FeatureState::STARTED); reportFeatureProgress(_state, feature->name()); + } catch (std::exception const& ex) { LOG(ERR) << "caught exception during start of feature '" << feature->name() << "': " << ex.what() << ". shutting down"; diff --git a/lib/Basics/AssocUnique.h b/lib/Basics/AssocUnique.h index 88d79c3931..0ecff77369 100644 --- a/lib/Basics/AssocUnique.h +++ b/lib/Basics/AssocUnique.h @@ -349,6 +349,14 @@ class AssocUnique { } return sum; } + + size_t capacity() const { + size_t sum = 0; + for (auto& b : _buckets) { + sum += static_cast(b._nrAlloc); + } + return sum; + } ////////////////////////////////////////////////////////////////////////////// /// @brief resizes the hash table diff --git a/lib/Basics/process-utils.cpp b/lib/Basics/process-utils.cpp index c6665af1a4..8962d97251 100644 --- a/lib/Basics/process-utils.cpp +++ b/lib/Basics/process-utils.cpp @@ -472,6 +472,16 @@ static void StartExternalProcess(TRI_external_t* external, bool usePipes) { external->_status = TRI_EXT_RUNNING; } #endif + +void TRI_LogProcessInfoSelf(char const* message) { + TRI_process_info_t info = TRI_ProcessInfoSelf(); + + if (message == nullptr) { + message = ""; + } + + LOG_TOPIC(TRACE, Logger::MEMORY) << message << "virtualSize: " << info._virtualSize << ", residentSize: " << info._residentSize << ", numberThreads: " << info._numberThreads; +} //////////////////////////////////////////////////////////////////////////////// /// @brief converts usec and sec into seconds diff --git a/lib/Basics/process-utils.h b/lib/Basics/process-utils.h index 9e49111ceb..9b5c9f3bac 100644 --- a/lib/Basics/process-utils.h +++ b/lib/Basics/process-utils.h @@ -125,6 +125,8 @@ struct TRI_external_status_t { std::string _errorMessage; }; +void TRI_LogProcessInfoSelf(char const* message = nullptr); + //////////////////////////////////////////////////////////////////////////////// /// @brief converts usec and sec into seconds //////////////////////////////////////////////////////////////////////////////// diff --git a/lib/Logger/LogTopic.cpp b/lib/Logger/LogTopic.cpp index f0a553b056..91a294354f 100644 --- a/lib/Logger/LogTopic.cpp +++ b/lib/Logger/LogTopic.cpp @@ -49,6 +49,7 @@ LogTopic Logger::CONFIG("config"); LogTopic Logger::DATAFILES("datafiles", LogLevel::INFO); LogTopic Logger::GRAPHS("graphs", LogLevel::INFO); LogTopic Logger::HEARTBEAT("heartbeat", LogLevel::INFO); +LogTopic Logger::MEMORY("memory", LogLevel::FATAL); // suppress LogTopic Logger::MMAP("mmap"); LogTopic Logger::PERFORMANCE("performance", LogLevel::FATAL); // suppress LogTopic Logger::QUERIES("queries", LogLevel::INFO); diff --git a/lib/Logger/Logger.h b/lib/Logger/Logger.h index 2fd7776ff6..3529e9b9f4 100644 --- a/lib/Logger/Logger.h +++ b/lib/Logger/Logger.h @@ -137,6 +137,7 @@ class Logger { static LogTopic DATAFILES; static LogTopic GRAPHS; static LogTopic HEARTBEAT; + static LogTopic MEMORY; static LogTopic MMAP; static LogTopic PERFORMANCE; static LogTopic QUERIES; From 3e6064f772121da1d868b0a36f4a431a19869a7d Mon Sep 17 00:00:00 2001 From: jsteemann Date: Thu, 15 Dec 2016 15:45:33 +0100 Subject: [PATCH 2/5] preliminary fix for issue #2218 --- arangod/Scheduler/Socket.h | 50 ++++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/arangod/Scheduler/Socket.h b/arangod/Scheduler/Socket.h index 778d3024d8..59818aa5c1 100644 --- a/arangod/Scheduler/Socket.h +++ b/arangod/Scheduler/Socket.h @@ -44,10 +44,50 @@ template bool doSslHandshake(T& socket) { boost::system::error_code ec; - do { - ec.assign(boost::system::errc::success, boost::system::generic_category()); - socket.handshake(boost::asio::ssl::stream_base::handshake_type::server, ec); - } while (ec.value() == boost::asio::error::would_block); + uint64_t tries = 0; + double start = 0.0; + + while (true) { + ec.assign(boost::system::errc::success, + boost::system::generic_category()); + socket.handshake( + boost::asio::ssl::stream_base::handshake_type::server, ec); + + if (ec.value() != boost::asio::error::would_block) { + break; + } + + // got error EWOULDBLOCK and need to try again + ++tries; + + // following is a helpless fix for connections hanging in the handshake + // phase forever. we've seen this happening when the underlying peer + // connection was closed during the handshake. + // with the helpless fix, handshakes will be aborted it they take longer + // than x seconds. a proper fix is to make the handshake run asynchronously + // and somehow signal it that the connection got closed. apart from that + // running it asynchronously will not block the scheduler thread as it + // does now. anyway, even the helpless fix allows self-healing of busy + // scheduler threads after a network failure + if (tries == 1) { + // capture start time of handshake + start = TRI_microtime(); + } else if (tries % 50 == 0) { + // check if we have spent more than x seconds handshaking and then abort + TRI_ASSERT(start != 0.0); + + if (TRI_microtime() - start >= 3) { + ec.assign(boost::asio::error::connection_reset, + boost::system::generic_category()); + LOG_TOPIC(DEBUG, Logger::COMMUNICATION) << "forcefully shutting down connection after wait time"; + break; + } else { + usleep(10000); + } + } + + // next iteration + } if (ec) { LOG_TOPIC(ERR, Logger::COMMUNICATION) @@ -88,7 +128,7 @@ class Socket { : _ioService(ioService), _context(std::move(context)), _encrypted(encrypted) {} - Socket(Socket&& that) = default; + Socket(Socket&& that) = delete; virtual ~Socket() {} virtual void close() = 0; From 3939d5e2d887bdc79b6910952572b04489362004 Mon Sep 17 00:00:00 2001 From: jsteemann Date: Thu, 15 Dec 2016 16:18:07 +0100 Subject: [PATCH 3/5] don't log irrelevant message in generated Foxx apps --- js/server/modules/@arangodb/foxx/templates/setup.js.tmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/js/server/modules/@arangodb/foxx/templates/setup.js.tmpl b/js/server/modules/@arangodb/foxx/templates/setup.js.tmpl index a105ae9887..100e01946f 100644 --- a/js/server/modules/@arangodb/foxx/templates/setup.js.tmpl +++ b/js/server/modules/@arangodb/foxx/templates/setup.js.tmpl @@ -8,7 +8,7 @@ for (const localName of documentCollections) { if (!db._collection(qualifiedName)) { db._createDocumentCollection(qualifiedName); } else if (module.context.isProduction) { - console.warn(`collection <%= '$' %>{qualifiedName} already exists. Leaving it untouched.`) + console.debug(`collection <%= '$' %>{qualifiedName} already exists. Leaving it untouched.`) } } @@ -17,6 +17,6 @@ for (const localName of edgeCollections) { if (!db._collection(qualifiedName)) { db._createEdgeCollection(qualifiedName); } else if (module.context.isProduction) { - console.warn(`collection <%= '$' %>{qualifiedName} already exists. Leaving it untouched.`) + console.debug(`collection <%= '$' %>{qualifiedName} already exists. Leaving it untouched.`) } } From 1e65d28e5b16c2621587e4ce3ebb48bb881b019a Mon Sep 17 00:00:00 2001 From: jsteemann Date: Thu, 15 Dec 2016 16:26:27 +0100 Subject: [PATCH 4/5] align memory for ARM --- arangod/FulltextIndex/fulltext-index.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arangod/FulltextIndex/fulltext-index.cpp b/arangod/FulltextIndex/fulltext-index.cpp index b25f13d0f7..59dcd9192a 100644 --- a/arangod/FulltextIndex/fulltext-index.cpp +++ b/arangod/FulltextIndex/fulltext-index.cpp @@ -36,7 +36,13 @@ /// @brief use padding for pointers in binary data //////////////////////////////////////////////////////////////////////////////// +#ifdef __arm__ +// must properly align memory on ARM architecture to prevent +// unaligned memory accesses +#define FULLTEXT_PADDING 1 +#else #undef FULLTEXT_PADDING +#endif //////////////////////////////////////////////////////////////////////////////// /// @brief maximum length of an indexed word in bytes @@ -210,6 +216,7 @@ void DumpNode(const node_t* const node, uint32_t level) { static inline size_t Padding(uint32_t numEntries) { #ifdef FULLTEXT_PADDING + size_t const PAD = 8; size_t offset = sizeof(uint8_t) + // numAllocated sizeof(uint8_t) + // numUsed (sizeof(node_char_t) * numEntries); // followerKeys From 80ea8ae569228421c3f20e01fb98826bfdb76be4 Mon Sep 17 00:00:00 2001 From: jsteemann Date: Thu, 15 Dec 2016 17:00:11 +0100 Subject: [PATCH 5/5] updated CHANGELOG --- CHANGELOG | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 4908c453a5..e35e9ec6b9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -52,10 +52,11 @@ edge attribute `label`. v3.1.5 (XXXX-XX-XX) ------------------- +* fixed issue #2218 + * fixed issue #2217 * Foxx router.get/post/etc handler argument can no longer accidentally omitted -* fixed issue #2217 v3.1.4 (2016-12-08)