diff --git a/Documentation/Books/Makefile b/Documentation/Books/Makefile index 309b086db7..d3e2f48106 100644 --- a/Documentation/Books/Makefile +++ b/Documentation/Books/Makefile @@ -91,7 +91,7 @@ clean-book-intermediate: # - all docublocks are used somewhere in the documentation # check-docublocks: - grep -R '@startDocuBlock' . |\ + grep -R '@startDocuBlock' --include "*.h" --include "*.cpp" --include "*.js" --include "*.mdpp" . |\ grep -v '@startDocuBlockInline' |\ grep -v ppbook |\ grep -v allComments.txt |\ @@ -101,7 +101,7 @@ check-docublocks: grep -v '.*~:.*' |\ grep -v '.*#.*:.*' \ > /tmp/rawindoc.txt - grep -R '@startDocuBlockInline' . |\ + grep -R '@startDocuBlockInline' --include "*.h" --include "*.cpp" --include "*.js" --include "*.mdpp" . |\ grep -v ppbook |\ grep -v allComments.txt |\ grep -v Makefile |\ @@ -111,9 +111,9 @@ check-docublocks: grep -v '.*#.*:.*' \ >> /tmp/rawindoc.txt cat /tmp/rawindoc.txt | sed -e "s;.*ck ;;" -e "s;.*ne ;;" |sort -u > /tmp/indoc.txt - grep -R '^/// @startDocuBlock' ../../lib ../../arangod ../../arangosh ../../js |grep -v aardvark > /tmp/rawinprog.txt + grep -R '^/// @startDocuBlock' ../../lib ../../arangod ../../arangosh ../../js --include "*.h" --include "*.cpp" --include "*.js" --include "*.mdpp" |grep -v aardvark > /tmp/rawinprog.txt # searching the Inline docublocks needs some more blacklisting: - grep -R '@startDocuBlockInline' . |\ + grep -R '@startDocuBlockInline' --include "*.h" --include "*.cpp" --include "*.js" --include "*.mdpp" . |\ grep -v ppbook |\ grep -v allComments.txt |\ grep -v Makefile |\ diff --git a/Documentation/Books/Users/Installing/ARM.mdpp b/Documentation/Books/Users/Installing/ARM.mdpp new file mode 100644 index 0000000000..13e8e07e95 --- /dev/null +++ b/Documentation/Books/Users/Installing/ARM.mdpp @@ -0,0 +1,28 @@ +!CHAPTER ARM + +The ArangoDB packages for ARM require the kernel to allow unaligned memory access. +How the kernel handles unaligned memory access is configurable at runtime by +checking and adjusting the contents `/proc/cpu/alignment`. + +In order to operate on ARM, ArangoDB requires the bit 1 to be set. This will +make the kernel trap and adjust unaligned memory accesses. If this bit is not +set, the kernel may send a SIGBUS signal to ArangoDB and terminate it. + +To set bit 1 in `/proc/cpu/alignment` use the following command as a privileged +user (e.g. root): + + echo "2" > /proc/cpu/alignment + +Note that this setting affects all user processes and not just ArangoDB. Setting +the alignment with the above command will also not make the setting permanent, +so it will be lost after a restart of the system. In order to make the setting +permanent, it should be executed during system startup or before starting arangod. + +The ArangoDB start/stop scripts do not adjust the alignment setting, but rely on +the environment to have the correct alignment setting already. The reason for this +is that the alignment settings also affect all other user processes (which ArangoDB +is not aware of) and thus may have side-effects outside of ArangoDB. It is therefore +more reasonable to have the system administrator carry out the changes. + +If the alignment settings are not correct, ArangoDB will log a warning at startup. + diff --git a/Documentation/Books/Users/Installing/Compiling.mdpp b/Documentation/Books/Users/Installing/Compiling.mdpp index 97cc3d5f7e..6d73206fd4 100644 --- a/Documentation/Books/Users/Installing/Compiling.mdpp +++ b/Documentation/Books/Users/Installing/Compiling.mdpp @@ -336,7 +336,16 @@ From time to time there will be bigger structural changes in ArangoDB, which may render the old Makefiles invalid. Should this be the case and `make` complains about missing files etc., the following commands should fix it: + rm -rf lib/*/.deps arangod/*/.deps arangosh/*/.deps Makefile + make setup + ./configure + make + +In order to reset everything and also recompile all 3rd party libraries, issue +the following commands: + make superclean + git checkout -- . make setup ./configure make @@ -348,3 +357,6 @@ If you forgot your previous configure options, you can look them up with head config.log before issuing `make superclean` (as make `superclean` also removes the file `config.log`). + +Sometimes you can get away with the less intrusive commands: + diff --git a/Documentation/Books/Users/Installing/Linux.mdpp b/Documentation/Books/Users/Installing/Linux.mdpp index 491dcdcd79..aa3517acbc 100644 --- a/Documentation/Books/Users/Installing/Linux.mdpp +++ b/Documentation/Books/Users/Installing/Linux.mdpp @@ -23,7 +23,6 @@ page to use your favorite package manager for the major distributions. After setting up the ArangoDB repository you can easily install ArangoDB using yum, aptitude, urpmi or zypper. - !SUBSECTION Gentoo Please use the [portage](https://github.com/mgiken/portage-overlay/tree/master/dev-db/ArangoDB) @@ -51,7 +50,7 @@ A Chef recipe is available from jbianquetti at: !SECTION Using ansible -An [Ansible](http://ansible.com) role is available trough [Ansible-Galaxy](https://galaxy.ansible.com) +An [Ansible](http://ansible.com) role is available through [Ansible-Galaxy](https://galaxy.ansible.com) * Role on Ansible-Galaxy: https://galaxy.ansible.com/list#/roles/2344 * Source on Github: https://github.com/stackmagic/ansible-arangodb diff --git a/Documentation/Books/Users/SUMMARY.md b/Documentation/Books/Users/SUMMARY.md index 6fea620d5c..2607119e76 100644 --- a/Documentation/Books/Users/SUMMARY.md +++ b/Documentation/Books/Users/SUMMARY.md @@ -3,6 +3,7 @@ * [Linux](Installing/Linux.md) * [Mac OS X](Installing/MacOSX.md) * [Windows](Installing/Windows.md) + * [ARM](Installing/ARM.md) * [Compiling](Installing/Compiling.md) * [Upgrading](Installing/Upgrading.md) * [Incompatible changes in 2.7](Upgrading/UpgradingChanges27.md) diff --git a/arangod/Indexes/EdgeIndex.cpp b/arangod/Indexes/EdgeIndex.cpp index 404b5ff844..05473aa773 100644 --- a/arangod/Indexes/EdgeIndex.cpp +++ b/arangod/Indexes/EdgeIndex.cpp @@ -446,8 +446,8 @@ int EdgeIndex::remove (TRI_doc_mptr_t const* doc, int EdgeIndex::batchInsert (std::vector const* documents, size_t numThreads) { - _edgesFrom->batchInsert(reinterpret_cast const*>(documents), numThreads); - _edgesTo->batchInsert(reinterpret_cast const*>(documents), numThreads); + _edgesFrom->batchInsert(reinterpret_cast const*>(documents), numThreads); + _edgesTo->batchInsert(reinterpret_cast const*>(documents), numThreads); return TRI_ERROR_NO_ERROR; } diff --git a/arangod/Indexes/EdgeIndex.h b/arangod/Indexes/EdgeIndex.h index e0b4950bd7..2a2b273626 100644 --- a/arangod/Indexes/EdgeIndex.h +++ b/arangod/Indexes/EdgeIndex.h @@ -100,7 +100,7 @@ namespace triagens { int remove (struct TRI_doc_mptr_t const*, bool) override final; - int batchInsert (std::vector const*, + int batchInsert (std::vector const*, size_t) override final; //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/Indexes/HashIndex.cpp b/arangod/Indexes/HashIndex.cpp index 5a51cb1ff7..426c172681 100644 --- a/arangod/Indexes/HashIndex.cpp +++ b/arangod/Indexes/HashIndex.cpp @@ -104,38 +104,11 @@ static bool IsEqualKeyElement (TRI_index_search_value_t const* left, } static bool IsEqualKeyElementHash (TRI_index_search_value_t const* left, - uint64_t const hash, // Has been computed but is not used here - TRI_index_element_t const* right) { + uint64_t const hash, // Has been computed but is not used here + TRI_index_element_t const* right) { return IsEqualKeyElement(left, right); } -//////////////////////////////////////////////////////////////////////////////// -/// @brief fills the index search from hash index element -//////////////////////////////////////////////////////////////////////////////// - -static int FillIndexSearchValueByHashIndexElement (HashIndex const* hashIndex, - TRI_index_search_value_t* key, - TRI_index_element_t const* element) { - key->_values = static_cast(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, hashIndex->keyEntrySize(), false)); - - if (key->_values == nullptr) { - return TRI_ERROR_OUT_OF_MEMORY; - } - - char const* ptr = element->document()->getShapedJsonPtr(); // ONLY IN INDEX - size_t const n = hashIndex->paths().size(); - - for (size_t i = 0; i < n; ++i) { - auto sid = element->subObjects()[i]._sid; - key->_values[i]._sid = sid; - - TRI_InspectShapedSub(&element->subObjects()[i], ptr, key->_values[i]); - } - key->_length = n; - - return TRI_ERROR_NO_ERROR; -} - // ----------------------------------------------------------------------------- // --SECTION-- class HashIndex::UniqueArray // ----------------------------------------------------------------------------- @@ -145,12 +118,15 @@ static int FillIndexSearchValueByHashIndexElement (HashIndex const* hashIndex, //////////////////////////////////////////////////////////////////////////////// HashIndex::UniqueArray::UniqueArray (TRI_HashArray_t* hashArray, - HashElementFunc* hashElement) + HashElementFunc* hashElement, + IsEqualElementElementByKey* isEqualElElByKey) : _hashArray(hashArray), - _hashElement(hashElement) { + _hashElement(hashElement), + _isEqualElElByKey(isEqualElElByKey) { TRI_ASSERT(_hashArray != nullptr); TRI_ASSERT(_hashElement != nullptr); + TRI_ASSERT(_isEqualElElByKey != nullptr); } //////////////////////////////////////////////////////////////////////////////// @@ -230,14 +206,16 @@ HashIndex::HashIndex (TRI_idx_iid_t iid, std::unique_ptr func(new HashElementFunc(numPaths())); if (unique) { + std::unique_ptr compare(new IsEqualElementElementByKey(_paths.size())); std::unique_ptr array(new TRI_HashArray_t(HashKey, *(func.get()), IsEqualKeyElementHash, IsEqualElementElement, + *(compare.get()), indexBuckets, [] () -> std::string { return "unique hash-array"; })); - _uniqueArray = new HashIndex::UniqueArray(array.get(), func.get()); + _uniqueArray = new HashIndex::UniqueArray(array.get(), func.get(), compare.get()); array.release(); } else { @@ -360,6 +338,15 @@ int HashIndex::remove (TRI_doc_mptr_t const* doc, return removeMulti(doc, isRollback); } + +int HashIndex::batchInsert (std::vector const* documents, + size_t numThreads) { + if (_unique) { + return batchInsertUnique(documents, numThreads); + } + return batchInsertMulti(documents, numThreads); +} + //////////////////////////////////////////////////////////////////////////////// /// @brief provides a size hint for the hash index //////////////////////////////////////////////////////////////////////////////// @@ -508,22 +495,7 @@ int HashIndex::insertUnique (TRI_doc_mptr_t const* doc, TRI_IF_FAILURE("InsertHashIndex") { return TRI_ERROR_DEBUG; } - - TRI_index_search_value_t key; - int res = FillIndexSearchValueByHashIndexElement(this, &key, element); - - if (res != TRI_ERROR_NO_ERROR) { - // out of memory - return res; - } - - res = _uniqueArray->_hashArray->insert(&key, element, isRollback); - - if (key._values != nullptr) { - TRI_Free(TRI_UNKNOWN_MEM_ZONE, key._values); - } - - return res; + return _uniqueArray->_hashArray->insert(element, isRollback); }; size_t count = elements.size(); @@ -543,6 +515,37 @@ int HashIndex::insertUnique (TRI_doc_mptr_t const* doc, return res; } +int HashIndex::batchInsertUnique (std::vector const* documents, + size_t numThreads) { + auto allocate = [this] () -> TRI_index_element_t* { + return TRI_index_element_t::allocate(keyEntrySize(), false); + }; + + std::vector elements; + elements.reserve(documents->size()); + for (auto& doc : *documents) { + int res = fillElement(allocate, elements, doc); + if (res != TRI_ERROR_NO_ERROR) { + for (auto& it : elements) { + // free all elements to prevent leak + FreeElement(it); + } + return res; + } + } + int res = _uniqueArray->_hashArray->batchInsert(&elements, numThreads); + + if (res != TRI_ERROR_NO_ERROR) { + // TODO check leaks + for (auto& it : elements) { + // free all elements to prevent leak + FreeElement(it); + } + } + + return res; +} + int HashIndex::insertMulti (TRI_doc_mptr_t const* doc, bool isRollback) { @@ -590,6 +593,29 @@ int HashIndex::insertMulti (TRI_doc_mptr_t const* doc, return res; } +int HashIndex::batchInsertMulti (std::vector const* documents, + size_t numThreads) { + + auto allocate = [this] () -> TRI_index_element_t* { + return TRI_index_element_t::allocate(keyEntrySize(), false); + }; + + std::vector elements; + + for (auto& doc : *documents) { + int res = fillElement(allocate, elements, doc); + if (res != TRI_ERROR_NO_ERROR) { + // Filling the elements failed for some reason. Assume loading as failed + for (auto& el : elements) { + // Free all elements that are not yet in the index + FreeElement(el); + } + return res; + } + } + return _multiArray->_hashArray->batchInsert(&elements, numThreads); +} + int HashIndex::removeUniqueElement (TRI_index_element_t* element, bool isRollback) { TRI_IF_FAILURE("RemoveHashIndex") { return TRI_ERROR_DEBUG; @@ -668,6 +694,7 @@ int HashIndex::removeMulti (TRI_doc_mptr_t const* doc, bool isRollback) { return res; } + // ----------------------------------------------------------------------------- // --SECTION-- END-OF-FILE // ----------------------------------------------------------------------------- diff --git a/arangod/Indexes/HashIndex.h b/arangod/Indexes/HashIndex.h index 3edd91cd70..73344aded5 100644 --- a/arangod/Indexes/HashIndex.h +++ b/arangod/Indexes/HashIndex.h @@ -90,7 +90,14 @@ namespace triagens { int remove (struct TRI_doc_mptr_t const*, bool) override final; + int batchInsert (std::vector const*, + size_t) override final; + int sizeHint (size_t) override final; + + bool hasBatchInsert () const override final { + return true; + } std::vector>> const& paths () const { return _paths; @@ -120,8 +127,12 @@ namespace triagens { int insertUnique (struct TRI_doc_mptr_t const*, bool); + int batchInsertUnique (std::vector const*, size_t); + int insertMulti (struct TRI_doc_mptr_t const*, bool); + int batchInsertMulti (std::vector const*, size_t); + int removeUniqueElement(TRI_index_element_t*, bool); int removeUnique (struct TRI_doc_mptr_t const*, bool); @@ -193,6 +204,10 @@ namespace triagens { TRI_ASSERT_EXPENSIVE(left->document() != nullptr); TRI_ASSERT_EXPENSIVE(right->document() != nullptr); + if (left->document() == right->document()) { + return true; + } + for (size_t j = 0; j < _numFields; ++j) { TRI_shaped_sub_t* leftSub = &left->subObjects()[j]; TRI_shaped_sub_t* rightSub = &right->subObjects()[j]; @@ -241,11 +256,13 @@ namespace triagens { struct UniqueArray { UniqueArray () = delete; - UniqueArray (TRI_HashArray_t*, HashElementFunc*); + UniqueArray (TRI_HashArray_t*, HashElementFunc*, IsEqualElementElementByKey*); + ~UniqueArray (); - TRI_HashArray_t* _hashArray; // the hash array itself, unique values - HashElementFunc* _hashElement; // hash function for elements + TRI_HashArray_t* _hashArray; // the hash array itself, unique values + HashElementFunc* _hashElement; // hash function for elements + IsEqualElementElementByKey* _isEqualElElByKey; // comparison func }; //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/Indexes/Index.cpp b/arangod/Indexes/Index.cpp index fcee6ef01f..a116d42f19 100644 --- a/arangod/Indexes/Index.cpp +++ b/arangod/Indexes/Index.cpp @@ -384,7 +384,7 @@ double Index::selectivityEstimate () const { /// @brief default implementation for selectivityEstimate //////////////////////////////////////////////////////////////////////////////// -int Index::batchInsert (std::vector const*, size_t) { +int Index::batchInsert (std::vector const*, size_t) { THROW_ARANGO_EXCEPTION(TRI_ERROR_NOT_IMPLEMENTED); } diff --git a/arangod/Indexes/Index.h b/arangod/Indexes/Index.h index acd2ebf923..0066e03c6c 100644 --- a/arangod/Indexes/Index.h +++ b/arangod/Indexes/Index.h @@ -277,7 +277,7 @@ namespace triagens { virtual int insert (struct TRI_doc_mptr_t const*, bool) = 0; virtual int remove (struct TRI_doc_mptr_t const*, bool) = 0; virtual int postInsert (struct TRI_transaction_collection_s*, struct TRI_doc_mptr_t const*); - virtual int batchInsert (std::vector const*, size_t); + virtual int batchInsert (std::vector const*, size_t); // a garbage collection function for the index virtual int cleanup (); diff --git a/arangod/Indexes/PrimaryIndex.cpp b/arangod/Indexes/PrimaryIndex.cpp index 6283341e74..f0273a55ca 100644 --- a/arangod/Indexes/PrimaryIndex.cpp +++ b/arangod/Indexes/PrimaryIndex.cpp @@ -56,7 +56,6 @@ static bool IsEqualKeyElement (char const* key, uint64_t const hash, TRI_doc_mptr_t const* element) { - // Performance? return (hash == element->_hash && strcmp(key, TRI_EXTRACT_MARKER_KEY(element)) == 0); } @@ -92,6 +91,7 @@ PrimaryIndex::PrimaryIndex (TRI_document_collection_t* collection) HashElement, IsEqualKeyElement, IsEqualElementElement, + IsEqualElementElement, indexBuckets, [] () -> std::string { return "primary"; } ); @@ -217,7 +217,7 @@ TRI_doc_mptr_t* PrimaryIndex::lookupSequentialReverse (uint64_t& position) { int PrimaryIndex::insertKey (TRI_doc_mptr_t* header, void const** found) { *found = nullptr; - int res = _primaryIndex->insert(TRI_EXTRACT_MARKER_KEY(header), header, false); + int res = _primaryIndex->insert(header, false); if (res == TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED) { *found = _primaryIndex->find(header); } @@ -231,7 +231,7 @@ int PrimaryIndex::insertKey (TRI_doc_mptr_t* header, //////////////////////////////////////////////////////////////////////////////// void PrimaryIndex::insertKey (TRI_doc_mptr_t* header) { - _primaryIndex->insert(TRI_EXTRACT_MARKER_KEY(header), header, false); + _primaryIndex->insert(header, false); } //////////////////////////////////////////////////////////////////////////////// @@ -242,7 +242,7 @@ void PrimaryIndex::insertKey (TRI_doc_mptr_t* header) { void PrimaryIndex::insertKey (TRI_doc_mptr_t* header, uint64_t slot) { - _primaryIndex->insert(TRI_EXTRACT_MARKER_KEY(header), header, false); + _primaryIndex->insert(header, false); // TODO slot is hint where to insert the element. It is not yet used // // if (slot != UINT64_MAX) { diff --git a/arangod/Indexes/SkiplistIndex.cpp b/arangod/Indexes/SkiplistIndex.cpp index 7703498376..68b04de5c3 100644 --- a/arangod/Indexes/SkiplistIndex.cpp +++ b/arangod/Indexes/SkiplistIndex.cpp @@ -39,10 +39,6 @@ using namespace triagens::arango; // --SECTION-- private functions // ----------------------------------------------------------------------------- - - - - //////////////////////////////////////////////////////////////////////////////// /// @brief frees an element in the skiplist //////////////////////////////////////////////////////////////////////////////// @@ -52,7 +48,6 @@ static void FreeElm (void* e) { TRI_index_element_t::free(element); } - // ............................................................................. // recall for all of the following comparison functions: // @@ -205,7 +200,7 @@ static int FillLookupOperator (TRI_index_operator_t* slOperator, // --SECTION-- public methods // ----------------------------------------------------------------------------- -size_t SkiplistIterator::size () { +size_t SkiplistIterator::size () const { return _intervals.size(); } @@ -228,7 +223,7 @@ void SkiplistIterator::initCursor () { } } -bool SkiplistIterator::hasNext () { +bool SkiplistIterator::hasNext () const { if (_reverse) { return hasPrevIteration(); } @@ -415,7 +410,6 @@ void SkiplistIterator::findHelper ( return; } - case TRI_EQ_INDEX_OPERATOR: { temp = _index->_skiplistIndex->leftKeyLookup(&values); TRI_ASSERT(nullptr != temp); @@ -505,7 +499,7 @@ void SkiplistIterator::findHelper ( /// interval or before it - without advancing the iterator. //////////////////////////////////////////////////////////////////////////////// -bool SkiplistIterator::hasPrevIteration () { +bool SkiplistIterator::hasPrevIteration () const { // ........................................................................... // if we have more intervals than the one we are currently working // on then of course we have a previous doc, because intervals are nonempty. @@ -529,7 +523,7 @@ bool SkiplistIterator::hasPrevIteration () { /// interval - without advancing the iterator. //////////////////////////////////////////////////////////////////////////////// -bool SkiplistIterator::hasNextIteration () { +bool SkiplistIterator::hasNextIteration () const { if (_cursor == nullptr) { return false; } @@ -591,7 +585,6 @@ TRI_index_element_t* SkiplistIterator::prevIteration () { //////////////////////////////////////////////////////////////////////////////// TRI_index_element_t* SkiplistIterator::nextIteration () { - if (_cursor == nullptr) { // In this case the iterator is exhausted or does not even have intervals. return nullptr; @@ -643,12 +636,8 @@ SkiplistIndex::SkiplistIndex (TRI_idx_iid_t iid, CmpElmElm(this), CmpKeyElm(this), _skiplistIndex(nullptr) { - _skiplistIndex = new TRI_Skiplist(CmpElmElm, CmpKeyElm, - FreeElm, unique, _useExpansion); - if (_skiplistIndex == nullptr) { - THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); - } + _skiplistIndex = new TRI_Skiplist(CmpElmElm, CmpKeyElm, FreeElm, unique, _useExpansion); } //////////////////////////////////////////////////////////////////////////////// @@ -656,9 +645,7 @@ SkiplistIndex::SkiplistIndex (TRI_idx_iid_t iid, //////////////////////////////////////////////////////////////////////////////// SkiplistIndex::~SkiplistIndex () { - if (_skiplistIndex != nullptr) { - delete _skiplistIndex; - } + delete _skiplistIndex; } // ----------------------------------------------------------------------------- @@ -823,7 +810,7 @@ size_t SkiplistIndex::elementSize () const { //////////////////////////////////////////////////////////////////////////////// int SkiplistIndex::KeyElementComparator::operator() (TRI_skiplist_index_key_t const* leftKey, - TRI_index_element_t const* rightElement) { + TRI_index_element_t const* rightElement) const { TRI_ASSERT(nullptr != leftKey); TRI_ASSERT(nullptr != rightElement); @@ -850,8 +837,8 @@ int SkiplistIndex::KeyElementComparator::operator() (TRI_skiplist_index_key_t co //////////////////////////////////////////////////////////////////////////////// int SkiplistIndex::ElementElementComparator::operator() (TRI_index_element_t const* leftElement, - TRI_index_element_t const* rightElement, - triagens::basics::SkipListCmpType cmptype) { + TRI_index_element_t const* rightElement, + triagens::basics::SkipListCmpType cmptype) const { TRI_ASSERT(nullptr != leftElement); TRI_ASSERT(nullptr != rightElement); diff --git a/arangod/Indexes/SkiplistIndex.h b/arangod/Indexes/SkiplistIndex.h index c88a3f60a0..dac99ed144 100644 --- a/arangod/Indexes/SkiplistIndex.h +++ b/arangod/Indexes/SkiplistIndex.h @@ -81,7 +81,8 @@ namespace triagens { SkiplistIteratorInterval () : _leftEndPoint(nullptr), - _rightEndPoint(nullptr) { } + _rightEndPoint(nullptr) { + } }; // ----------------------------------------------------------------------------- @@ -90,7 +91,7 @@ namespace triagens { private: - SkiplistIndex* const _index; + SkiplistIndex const* _index; size_t _currentInterval; // starts with 0, current interval used bool _reverse; Node* _cursor; @@ -102,16 +103,16 @@ namespace triagens { public: - SkiplistIterator ( - SkiplistIndex* const idx, - bool reverse - ) : _index(idx) , + SkiplistIterator (SkiplistIndex const* idx, + bool reverse) + : _index(idx) , _currentInterval(0), _reverse(reverse), _cursor(nullptr) { } - ~SkiplistIterator () {} + ~SkiplistIterator () { + } // always holds the last node returned, initially equal to // the _leftEndPoint of the first interval (or the @@ -131,9 +132,9 @@ namespace triagens { public: - size_t size (); + size_t size () const; - bool hasNext (); + bool hasNext () const; TRI_index_element_t* next (); @@ -150,10 +151,10 @@ namespace triagens { private: - bool hasPrevIteration (); + bool hasPrevIteration () const; TRI_index_element_t* prevIteration (); - bool hasNextIteration (); + bool hasNextIteration () const; TRI_index_element_t* nextIteration (); bool findHelperIntervalIntersectionValid ( @@ -162,9 +163,7 @@ namespace triagens { SkiplistIteratorInterval& interval ); - bool findHelperIntervalValid ( - SkiplistIteratorInterval const& interval - ); + bool findHelperIntervalValid (SkiplistIteratorInterval const& interval); }; // ----------------------------------------------------------------------------- @@ -175,7 +174,7 @@ namespace triagens { struct KeyElementComparator { int operator() (TRI_skiplist_index_key_t const* leftKey, - TRI_index_element_t const* rightElement); + TRI_index_element_t const* rightElement) const; KeyElementComparator (SkiplistIndex* idx) { _idx = idx; @@ -189,7 +188,7 @@ namespace triagens { struct ElementElementComparator { int operator() (TRI_index_element_t const* leftElement, TRI_index_element_t const* rightElement, - triagens::basics::SkipListCmpType cmptype); + triagens::basics::SkipListCmpType cmptype) const; ElementElementComparator (SkiplistIndex* idx) { _idx = idx; @@ -262,7 +261,6 @@ namespace triagens { private: size_t elementSize () const; - int _CmpElmElm (TRI_index_element_t const* leftElement, TRI_index_element_t const* rightElement, diff --git a/js/apps/system/_admin/aardvark/APP/frontend/js/modules/org/arangodb/tutorial.js b/js/apps/system/_admin/aardvark/APP/frontend/js/modules/org/arangodb/tutorial.js index b3f09fbe99..662f72d235 100644 --- a/js/apps/system/_admin/aardvark/APP/frontend/js/modules/org/arangodb/tutorial.js +++ b/js/apps/system/_admin/aardvark/APP/frontend/js/modules/org/arangodb/tutorial.js @@ -77,7 +77,7 @@ var lessons = [ }, { title: "Creating Documents", - text: "Now we have a collection, but it is stil empty. So let's create some documents!\n\n" + + text: "Now we have a collection, but it is still empty. So let's create some documents!\n\n" + " db.places.save({ _key : \"foo\", city : \"foo-city\" });\n" + " for (i = 0; i <= 10; i++) { db.places.save({ _key: \"example\" + i, zipcode: i }) };" }, diff --git a/js/client/modules/org/arangodb/tutorial.js b/js/client/modules/org/arangodb/tutorial.js index 4e506001e9..59d60d2036 100644 --- a/js/client/modules/org/arangodb/tutorial.js +++ b/js/client/modules/org/arangodb/tutorial.js @@ -76,7 +76,7 @@ var lessons = [ }, { title: "Creating Documents", - text: "Now we have a collection, but it is stil empty. So let's create some documents!\n\n" + + text: "Now we have a collection, but it is still empty. So let's create some documents!\n\n" + " db.places.save({ _key : \"foo\", city : \"foo-city\" });\n" + " for (i = 0; i <= 10; i++) { db.places.save({ _key: \"example\" + i, zipcode: i }) };" }, diff --git a/lib/Basics/AssocMulti.h b/lib/Basics/AssocMulti.h index 2910c796b9..dd52e2644e 100644 --- a/lib/Basics/AssocMulti.h +++ b/lib/Basics/AssocMulti.h @@ -25,7 +25,7 @@ /// @author Dr. Frank Celler /// @author Martin Schoenert /// @author Max Neunhoeffer -/// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany +/// @author Copyright 2014-2015, ArangoDB GmbH, Cologne, Germany /// @author Copyright 2006-2014, triAGENS GmbH, Cologne, Germany //////////////////////////////////////////////////////////////////////////////// @@ -387,14 +387,14 @@ namespace triagens { /// @brief adds multiple elements to the array //////////////////////////////////////////////////////////////////////////////// - int batchInsert (std::vector const* data, + int batchInsert (std::vector const* data, size_t numThreads) { #ifdef TRI_CHECK_MULTI_POINTER_HASH check(true, true); #endif std::atomic res(TRI_ERROR_NO_ERROR); - std::vector const& elements = *(data); + std::vector const& elements = *(data); if (elements.size() < numThreads) { numThreads = elements.size(); @@ -428,7 +428,7 @@ namespace triagens { it = partitions.emplace(bucketId, DocumentsPerBucket()).first; } - (*it).second.emplace_back(std::make_pair(const_cast(elements[i]), hashByKey)); + (*it).second.emplace_back(std::make_pair(elements[i], hashByKey)); } // transfer ownership to the central map diff --git a/lib/Basics/AssocUnique.h b/lib/Basics/AssocUnique.h index 926169fd9c..4ed950d34a 100644 --- a/lib/Basics/AssocUnique.h +++ b/lib/Basics/AssocUnique.h @@ -24,7 +24,8 @@ /// /// @author Dr. Frank Celler /// @author Martin Schoenert -/// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany +/// @author Michael hackstein +/// @author Copyright 2014-2015, ArangoDB GmbH, Cologne, Germany /// @author Copyright 2006-2013, triAGENS GmbH, Cologne, Germany //////////////////////////////////////////////////////////////////////////////// @@ -35,6 +36,7 @@ #include "Basics/gcd.h" #include "Basics/JsonHelper.h" #include "Basics/logging.h" +#include "Basics/MutexLocker.h" #include "Basics/random.h" namespace triagens { @@ -78,6 +80,7 @@ namespace triagens { HashElementFuncType const _hashElement; IsEqualKeyElementFuncType const _isEqualKeyElement; IsEqualElementElementFuncType const _isEqualElementElement; + IsEqualElementElementFuncType const _isEqualElementElementByKey; std::function _contextCallback; @@ -95,12 +98,14 @@ namespace triagens { HashElementFuncType hashElement, IsEqualKeyElementFuncType isEqualKeyElement, IsEqualElementElementFuncType isEqualElementElement, + IsEqualElementElementFuncType isEqualElementElementByKey, size_t numberBuckets = 1, std::function contextCallback = [] () -> std::string { return ""; }) : _hashKey(hashKey), _hashElement(hashElement), _isEqualKeyElement(isEqualKeyElement), _isEqualElementElement(isEqualElementElement), + _isEqualElementElementByKey(isEqualElementElementByKey), _contextCallback(contextCallback) { // Make the number of buckets a power of two: @@ -242,10 +247,10 @@ namespace triagens { /// @brief check a resize of the hash array //////////////////////////////////////////////////////////////////////////////// - bool checkResize (Bucket& b) { - if (2 * b._nrAlloc < 3 * b._nrUsed) { + bool checkResize (Bucket& b, uint64_t expected) { + if (2 * (b._nrAlloc + expected) < 3 * b._nrUsed) { try { - resizeInternal(b, 2 * b._nrAlloc + 1, false); + resizeInternal(b, 2 * (b._nrAlloc + expected) + 1, false); } catch (...) { return false; @@ -272,6 +277,40 @@ namespace triagens { return nullptr; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief Insert a document into the given bucket +/// This does not resize and expects to have enough space +//////////////////////////////////////////////////////////////////////////////// + + int doInsert (Element* element, + Bucket& b, + uint64_t hash) { + + uint64_t const n = b._nrAlloc; + uint64_t i = hash % n; + uint64_t k = i; + + for (; i < n && b._table[i] != nullptr && + ! _isEqualElementElementByKey(element, b._table[i]); ++i); + if (i == n) { + for (i = 0; i < k && b._table[i] != nullptr && + ! _isEqualElementElementByKey(element, b._table[i]); ++i); + } + + Element* arrayElement = b._table[i]; + + if (arrayElement != nullptr) { + return TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED; + } + + b._table[i] = element; + TRI_ASSERT(b._table[i] != nullptr); + b._nrUsed++; + + return TRI_ERROR_NO_ERROR; + } + + // ----------------------------------------------------------------------------- // --SECTION-- public functions // ----------------------------------------------------------------------------- @@ -363,10 +402,10 @@ namespace triagens { uint64_t k = i; for (; i < n && b._table[i] != nullptr && - ! _isEqualElementElement(element, b._table[i]); ++i); + ! _isEqualElementElementByKey(element, b._table[i]); ++i); if (i == n) { for (i = 0; i < k && b._table[i] != nullptr && - ! _isEqualElementElement(element, b._table[i]); ++i); + ! _isEqualElementElementByKey(element, b._table[i]); ++i); } // ........................................................................... @@ -406,43 +445,174 @@ namespace triagens { } //////////////////////////////////////////////////////////////////////////////// -/// @brief adds an key/element to the array +/// @brief adds an element to the array //////////////////////////////////////////////////////////////////////////////// - int insert (Key const* key, - Element* element, + int insert (Element* element, bool isRollback) { - uint64_t hash = _hashKey(key); - uint64_t i = hash; - Bucket& b = _buckets[i & _bucketsMask]; + uint64_t hash = _hashElement(element); + Bucket& b = _buckets[hash & _bucketsMask]; - if (! checkResize(b)) { + if (! checkResize(b, 0)) { return TRI_ERROR_OUT_OF_MEMORY; } - uint64_t const n = b._nrAlloc; - i = i % n; - uint64_t k = i; + return doInsert(element, b, hash); + } - for (; i < n && b._table[i] != nullptr && - ! _isEqualKeyElement(key, hash, b._table[i]); ++i); - if (i == n) { - for (i = 0; i < k && b._table[i] != nullptr && - ! _isEqualKeyElement(key, hash, b._table[i]); ++i); +//////////////////////////////////////////////////////////////////////////////// +/// @brief adds multiple elements to the array +//////////////////////////////////////////////////////////////////////////////// + + int batchInsert (std::vector const* data, + size_t numThreads) { + + std::atomic res(TRI_ERROR_NO_ERROR); + std::vector const& elements = *(data); + + if (elements.size() < numThreads) { + numThreads = elements.size(); + } + if (numThreads > _buckets.size()) { + numThreads = _buckets.size(); } - Element* arrayElement = b._table[i]; + size_t const chunkSize = elements.size() / numThreads; - if (arrayElement != nullptr) { - return TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED; + typedef std::vector> DocumentsPerBucket; + triagens::basics::Mutex bucketMapLocker; + + std::unordered_map> allBuckets; + + // partition the work into some buckets + { + auto partitioner = [&] (size_t lower, size_t upper) -> void { + try { + std::unordered_map partitions; + + for (size_t i = lower; i < upper; ++i) { + uint64_t hash = _hashElement(elements[i]); + auto bucketId = hash & _bucketsMask; + + auto it = partitions.find(bucketId); + + if (it == partitions.end()) { + it = partitions.emplace(bucketId, DocumentsPerBucket()).first; + } + + (*it).second.emplace_back(std::make_pair(elements[i], hash)); + } + + // transfer ownership to the central map + MUTEX_LOCKER(bucketMapLocker); + + for (auto& it : partitions) { + auto it2 = allBuckets.find(it.first); + + if (it2 == allBuckets.end()) { + it2 = allBuckets.emplace(it.first, std::vector()).first; + } + + (*it2).second.emplace_back(std::move(it.second)); + } + } + catch (...) { + res = TRI_ERROR_INTERNAL; + } + }; + + std::vector threads; + threads.reserve(numThreads); + + try { + for (size_t i = 0; i < numThreads; ++i) { + size_t lower = i * chunkSize; + size_t upper = (i + 1) * chunkSize; + + if (i + 1 == numThreads) { + // last chunk. account for potential rounding errors + upper = elements.size(); + } + else if (upper > elements.size()) { + upper = elements.size(); + } + + threads.emplace_back(std::thread(partitioner, lower, upper)); + } + } + catch (...) { + res = TRI_ERROR_INTERNAL; + } + + for (size_t i = 0; i < threads.size(); ++i) { + // must join threads, otherwise the program will crash + threads[i].join(); + } } - b._table[i] = static_cast(element); - TRI_ASSERT(b._table[i] != nullptr); - b._nrUsed++; + if (res.load() != TRI_ERROR_NO_ERROR) { + return res.load(); + } - return TRI_ERROR_NO_ERROR; + // now the data is partitioned... + + // now insert the bucket data in parallel + { + auto inserter = [&] (size_t chunk) -> void { + try { + for (auto const& it : allBuckets) { + uint64_t bucketId = it.first; + + if (bucketId % numThreads != chunk) { + // we're not responsible for this bucket! + continue; + } + + // we're responsible for this bucket! + Bucket& b = _buckets[bucketId]; + uint64_t expected = 0; + + for (auto const& it2 : it.second) { + expected += it2.size(); + } + + if (! checkResize(b, expected)) { + res = TRI_ERROR_OUT_OF_MEMORY; + return; + } + + for (auto const& it2 : it.second) { + for (auto const& it3 : it2) { + doInsert(it3.first, b, it3.second); + } + } + } + } + catch (...) { + res = TRI_ERROR_INTERNAL; + } + }; + + std::vector threads; + threads.reserve(numThreads); + + try { + for (size_t i = 0; i < numThreads; ++i) { + threads.emplace_back(std::thread(inserter, i)); + } + } + catch (...) { + res = TRI_ERROR_INTERNAL; + } + + for (size_t i = 0; i < threads.size(); ++i) { + // must join threads, otherwise the program will crash + threads[i].join(); + } + } + + return res.load(); } //////////////////////////////////////////////////////////////////////////////// diff --git a/lib/Basics/RandomGenerator.cpp b/lib/Basics/RandomGenerator.cpp index 2f75507838..60ede21681 100644 --- a/lib/Basics/RandomGenerator.cpp +++ b/lib/Basics/RandomGenerator.cpp @@ -489,7 +489,7 @@ namespace triagens { // current implementation (see version at the top of the file) - UniformIntegerImpl * uniformInteger = new UniformIntegerMersenne; + UniformIntegerImpl* uniformInteger = new UniformIntegerMersenne; // ----------------------------------------------------------------------------- // uniform integer generator @@ -644,6 +644,10 @@ namespace triagens { if (RandomHelper::combinedDevice != nullptr) { delete RandomHelper::combinedDevice; RandomHelper::combinedDevice = nullptr; + } + + if (uniformInteger == nullptr) { + delete uniformInteger; } } @@ -681,6 +685,7 @@ namespace triagens { } } } + // ----------------------------------------------------------------------------- // --SECTION-- END-OF-FILE // ----------------------------------------------------------------------------- diff --git a/lib/Basics/SkipList.h b/lib/Basics/SkipList.h index 67930d347a..9dec63ec8d 100644 --- a/lib/Basics/SkipList.h +++ b/lib/Basics/SkipList.h @@ -245,7 +245,7 @@ namespace triagens { /// @brief return the successor node or nullptr if last node //////////////////////////////////////////////////////////////////////////////// - Node* nextNode (Node* node) { + Node* nextNode (Node* node) const { return node->_next[0]; } diff --git a/lib/Basics/memory.cpp b/lib/Basics/memory.cpp index 409ed803b4..c0dc57c863 100644 --- a/lib/Basics/memory.cpp +++ b/lib/Basics/memory.cpp @@ -330,15 +330,10 @@ void* TRI_AllocateZ (TRI_memory_zone_t* zone, uint64_t n, bool set, char const* #else void* TRI_Allocate (TRI_memory_zone_t* zone, uint64_t n, bool set) { #endif - char* m; - #ifdef TRI_ENABLE_MAINTAINER_MODE CheckSize(n, file, line); - - m = static_cast(MALLOC_WRAPPER(zone, (size_t) n + sizeof(uintptr_t))); -#else - m = static_cast(MALLOC_WRAPPER(zone, (size_t) n)); #endif + char* m = static_cast(MALLOC_WRAPPER(zone, (size_t) n)); if (m == nullptr) { if (zone->_failable) { @@ -371,24 +366,14 @@ void* TRI_Allocate (TRI_memory_zone_t* zone, uint64_t n, bool set) { #endif } -#ifdef TRI_ENABLE_MAINTAINER_MODE - else if (set) { - memset(m, 0, (size_t) n + sizeof(uintptr_t)); - } - else { - // prefill with 0xA5 (magic value, same as Valgrind will use) - memset(m, 0xA5, (size_t) n + sizeof(uintptr_t)); - } -#else - else if (set) { + if (set) { memset(m, 0, (size_t) n); } -#endif - #ifdef TRI_ENABLE_MAINTAINER_MODE - * (uintptr_t*) m = zone->_zid; - // zone->_zid is a uint32_t but we'll advance sizeof(uintptr_t) bytes for good alignment everywhere - m += sizeof(uintptr_t); + else { + // prefill with 0xA5 (magic value, same as Valgrind will use) + memset(m, 0xA5, (size_t) n); + } #endif return m; @@ -403,7 +388,6 @@ void* TRI_ReallocateZ (TRI_memory_zone_t* zone, void* m, uint64_t n, char const* #else void* TRI_Reallocate (TRI_memory_zone_t* zone, void* m, uint64_t n) { #endif - char* p; if (m == nullptr) { #ifdef TRI_ENABLE_MAINTAINER_MODE @@ -413,27 +397,14 @@ void* TRI_Reallocate (TRI_memory_zone_t* zone, void* m, uint64_t n) { #endif } - p = (char*) m; + char* p = (char*) m; #ifdef TRI_ENABLE_MAINTAINER_MODE - p -= sizeof(uintptr_t); - CheckSize(n, file, line); - - if (* (uintptr_t*) p != zone->_zid) { - fprintf(stderr, - "memory zone mismatch in TRI_Reallocate" ZONE_DEBUG_LOCATION ", old zone %d, new zone %d" - ZONE_DEBUG_PARAMS, - (int) * (uintptr_t*) p, - (int) zone->_zid); - TRI_ASSERT(false); - } - - p = static_cast(REALLOC_WRAPPER(zone, p, (size_t) n + sizeof(uintptr_t))); -#else - p = static_cast(REALLOC_WRAPPER(zone, p, (size_t) n)); #endif + p = static_cast(REALLOC_WRAPPER(zone, p, (size_t) n)); + if (p == nullptr) { if (zone->_failable) { TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); @@ -465,11 +436,6 @@ void* TRI_Reallocate (TRI_memory_zone_t* zone, void* m, uint64_t n) { #endif } -#ifdef TRI_ENABLE_MAINTAINER_MODE - // zone->_zid is a uint32_t but we'll advance sizeof(uintptr_t) bytes for good alignment everywhere - p += sizeof(uintptr_t); -#endif - return p; } @@ -483,11 +449,9 @@ void TRI_FreeZ (TRI_memory_zone_t* zone, void* m, char const* file, int line) { void TRI_Free (TRI_memory_zone_t* zone, void* m) { #endif + char* p = (char*) m; + #ifdef TRI_ENABLE_MAINTAINER_MODE - char* p; - - p = (char*) m; - if (p == nullptr) { fprintf(stderr, "freeing nil ptr " ZONE_DEBUG_LOCATION @@ -495,23 +459,9 @@ void TRI_Free (TRI_memory_zone_t* zone, void* m) { // crash intentionally TRI_ASSERT(false); } - - // zone->_zid is a uint32_t but we'll decrease by sizeof(uintptr_t) bytes for good alignment everywhere - p -= sizeof(uintptr_t); - - if (* (uintptr_t*) p != zone->_zid) { - fprintf(stderr, - "memory zone mismatch in TRI_Free" ZONE_DEBUG_LOCATION ", old zone %d, new %d\n" - ZONE_DEBUG_PARAMS, - (int) * (uintptr_t*) p, - (int) zone->_zid); - TRI_ASSERT(false); - } +#endif free(p); -#else - free(m); -#endif } ////////////////////////////////////////////////////////////////////////////////