// By Emil Ernerfeldt 2014-2016 // LICENSE: // This software is dual-licensed to the public domain and under the following // license: you are granted a perpetual, irrevocable license to copy, modify, // publish, and distribute this file as you see fit. #ifndef ARANGODB_HASH_SET_H #define ARANGODB_HASH_SET_H 1 #include #include #include #include #include namespace emilib { /// like std::equal_to but no need to `#include ` template struct HashSetEqualTo { constexpr bool operator()(const T& lhs, const T& rhs) const { return lhs == rhs; } }; /// A cache-friendly hash set with open addressing, linear probing and power-of-two capacity template , typename EqT = HashSetEqualTo> class HashSet { private: using MyType = HashSet; public: using size_type = size_t; using value_type = KeyT; using reference = KeyT&; using const_reference = const KeyT&; class iterator { public: using iterator_category = std::forward_iterator_tag; using difference_type = size_t; using distance_type = size_t; using value_type = KeyT; using pointer = value_type*; using reference = value_type&; iterator() { } iterator(MyType* hash_set, size_t bucket) : _set(hash_set), _bucket(bucket) {} iterator& operator++() { this->goto_next_element(); return *this; } iterator operator++(int) { size_t old_index = _bucket; this->goto_next_element(); return iterator(_set, old_index); } reference operator*() const { return _set->_keys[_bucket]; } pointer operator->() const { return _set->_keys + _bucket; } bool operator==(const iterator& rhs) const { return this->_bucket == rhs._bucket; } bool operator!=(const iterator& rhs) const { return this->_bucket != rhs._bucket; } private: void goto_next_element() { do { _bucket++; } while (_bucket < _set->_num_buckets && _set->_states[_bucket] != State::FILLED); } public: MyType* _set; size_t _bucket; }; class const_iterator { public: using iterator_category = std::forward_iterator_tag; using difference_type = size_t; using distance_type = size_t; using value_type = const KeyT; using pointer = value_type*; using reference = value_type&; const_iterator() { } const_iterator(iterator proto) : _set(proto._set), _bucket(proto._bucket) {} const_iterator(const MyType* hash_set, size_t bucket) : _set(hash_set), _bucket(bucket) {} const_iterator& operator++() { this->goto_next_element(); return *this; } const_iterator operator++(int) { size_t old_index = _bucket; this->goto_next_element(); return const_iterator(_set, old_index); } reference operator*() const { return _set->_keys[_bucket]; } pointer operator->() const { return _set->_keys + _bucket; } bool operator==(const const_iterator& rhs) const { return this->_bucket == rhs._bucket; } bool operator!=(const const_iterator& rhs) const { return this->_bucket != rhs._bucket; } private: void goto_next_element() { do { _bucket++; } while (_bucket < _set->_num_buckets && _set->_states[_bucket] != State::FILLED); } public: const MyType* _set; size_t _bucket; }; HashSet() = default; HashSet(const HashSet& other) { reserve(other.size()); insert(other.cbegin(), other.cend()); } HashSet(HashSet&& other) noexcept { doMove(std::move(other)); } HashSet(std::initializer_list const& values) : HashSet() { for (auto const& v : values) { insert(v); } } HashSet& operator=(const HashSet& other) { if (this != &other) { clear(); reserve(other.size()); insert(other.cbegin(), other.cend()); } return *this; } HashSet& operator=(HashSet&& other) noexcept { if (this != &other) { for (size_t bucket=0; bucket<_num_buckets; ++bucket) { if (_states[bucket] == State::FILLED) { _keys[bucket].~KeyT(); } } if (_buffer != &_local_buffer[0]) { delete[] _buffer; } doMove(std::move(other)); } return *this; } ~HashSet() { for (size_t bucket=0; bucket<_num_buckets; ++bucket) { if (_states[bucket] == State::FILLED) { _keys[bucket].~KeyT(); } } if (_buffer != &_local_buffer[0]) { delete[] _buffer; } } void swap(HashSet& other) { std::swap(_hasher, other._hasher); std::swap(_eq, other._eq); std::swap(_buffer, other._buffer); std::swap(_states, other._states); std::swap(_keys, other._keys); std::swap(_num_buckets, other._num_buckets); std::swap(_num_filled, other._num_filled); std::swap(_max_probe_length, other._max_probe_length); std::swap(_mask, other._mask); } // ------------------------------------------------------------- iterator begin() { size_t bucket = 0; while (bucket<_num_buckets && _states[bucket] != State::FILLED) { ++bucket; } return iterator(this, bucket); } const_iterator cbegin() const { size_t bucket = 0; while (bucket<_num_buckets && _states[bucket] != State::FILLED) { ++bucket; } return const_iterator(this, bucket); } const_iterator begin() const { return cbegin(); } iterator end() { return iterator(this, _num_buckets); } const_iterator cend() const { return const_iterator(this, _num_buckets); } const_iterator end() const { return cend(); } size_t size() const { return _num_filled; } bool empty() const { return _num_filled == 0; } // Returns the number of buckets. size_t bucket_count() const { return _num_buckets; } /// Returns average number of elements per bucket. float load_factor() const { return static_cast(_num_filled) / static_cast(_num_buckets); } // ------------------------------------------------------------ iterator find(const KeyT& key) { auto bucket = this->find_filled_bucket(key); if (bucket == (size_t)-1) { return this->end(); } return iterator(this, bucket); } const_iterator find(const KeyT& key) const { auto bucket = this->find_filled_bucket(key); if (bucket == (size_t)-1) { return this->end(); } return const_iterator(this, bucket); } bool contains(const KeyT& k) const { return find_filled_bucket(k) != (size_t)-1; } size_t count(const KeyT& k) const { return find_filled_bucket(k) != (size_t)-1 ? 1 : 0; } // ----------------------------------------------------- /// Insert an element, unless it already exists. /// Returns a pair consisting of an iterator to the inserted element /// (or to the element that prevented the insertion) /// and a bool denoting whether the insertion took place. std::pair insert(const KeyT& key) { check_expand_need(); auto bucket = find_or_allocate(key); if (_states[bucket] == State::FILLED) { return { iterator(this, bucket), false }; } else { _states[bucket] = State::FILLED; new(_keys + bucket) KeyT(key); _num_filled++; return { iterator(this, bucket), true }; } } /// Insert an element, unless it already exists. /// Returns a pair consisting of an iterator to the inserted element /// (or to the element that prevented the insertion) /// and a bool denoting whether the insertion took place. std::pair insert(KeyT&& key) { check_expand_need(); auto bucket = find_or_allocate(key); if (_states[bucket] == State::FILLED) { return { iterator(this, bucket), false }; } else { _states[bucket] = State::FILLED; new(_keys + bucket) KeyT(std::move(key)); _num_filled++; return { iterator(this, bucket), true }; } } template std::pair emplace(Args&&... args) { return insert(KeyT(std::forward(args)...)); } void insert(const_iterator begin, const_iterator end) { // TODO: reserve space exactly once. for (; begin != end; ++begin) { insert(*begin); } } // ------------------------------------------------------- /// Erase an element from the hash set. /// return false if element was not found. bool erase(const KeyT& key) { auto bucket = find_filled_bucket(key); if (bucket != (size_t)-1) { _states[bucket] = State::ACTIVE; _keys[bucket].~KeyT(); _num_filled -= 1; return true; } else { return false; } } /// Erase an element using an iterator. /// Returns an iterator to the next element (or end()). iterator erase(iterator it) { _states[it._bucket] = State::ACTIVE; _keys[it._bucket].~KeyT(); _num_filled -= 1; return ++it; } /// Remove all elements, keeping full capacity. void clear() { for (size_t bucket=0; bucket<_num_buckets; ++bucket) { if (_states[bucket] == State::FILLED) { _states[bucket] = State::INACTIVE; _keys[bucket].~KeyT(); } } _num_filled = 0; _max_probe_length = -1; } /// Make room for this many elements void reserve(size_t num_elems) { size_t required_buckets = num_elems + num_elems/2 + 1; if (required_buckets <= _num_buckets) { return; } size_t num_buckets = 8; while (num_buckets < required_buckets) { num_buckets *= 2; } size_t states_size = ((num_buckets * sizeof(State) + 8 - 1) / 8) * 8; size_t keys_size = num_buckets * sizeof(KeyT); // intentionally uninitialized here, initialization will be done below char* new_buffer; if (_buffer == nullptr && states_size + keys_size <= sizeof(_local_buffer)) { new_buffer = &_local_buffer[0]; } else { new_buffer = new char[states_size + keys_size]; } auto new_states = (State*)new_buffer; auto new_keys = (KeyT*)(new_buffer + states_size); // auto old_num_filled = _num_filled; auto old_num_buckets = _num_buckets; auto old_buffer = _buffer; auto old_states = _states; auto old_keys = _keys; _num_filled = 0; _num_buckets = num_buckets; _mask = _num_buckets - 1; _buffer = new_buffer; _states = new_states; _keys = new_keys; std::fill_n(_states, num_buckets, State::INACTIVE); _max_probe_length = -1; for (size_t src_bucket=0; src_bucket _max_probe_length) { _max_probe_length = offset; } return bucket; } } } private: enum class State : uint8_t { INACTIVE, // Never been touched ACTIVE, // Is inside a search-chain, but is empty FILLED // Is set with key/value }; HashT _hasher; EqT _eq; char* _buffer = nullptr; State* _states = nullptr; KeyT* _keys = nullptr; size_t _num_buckets = 0; size_t _num_filled = 0; int _max_probe_length = -1; // Our longest bucket-brigade is this long. ONLY when we have zero elements is this ever negative (-1). size_t _mask = 0; // _num_buckets minus one char _local_buffer[80]; // first few bytes are always allocated here }; } // namespace emilib // map HashSet into arangodb namespace namespace arangodb { template using HashSetEqualTo = emilib::HashSetEqualTo; template, typename EqT = HashSetEqualTo> using HashSet = emilib::HashSet; } #endif