1
0
Fork 0

optimizations for key bounds

This commit is contained in:
jsteemann 2017-05-16 12:53:44 +02:00
parent a4a88d03be
commit a75271e1f9
5 changed files with 217 additions and 135 deletions

View File

@ -190,7 +190,8 @@ void RocksDBIndex::truncate(transaction::Methods* trx) {
RocksDBKeyBounds indexBounds = getBounds();
rocksdb::ReadOptions options = state->readOptions();
options.iterate_upper_bound = &(indexBounds.end());
rocksdb::Slice upperBound = indexBounds.end();
options.iterate_upper_bound = &upperBound;
std::unique_ptr<rocksdb::Iterator> iter(rtrx->GetIterator(options));
iter->Seek(indexBounds.start());

View File

@ -22,7 +22,7 @@
/// @author Daniel H. Larkin
////////////////////////////////////////////////////////////////////////////////
#include "RocksDBEngine/RocksDBKeyBounds.h"
#include "RocksDBKeyBounds.h"
#include "Basics/Exceptions.h"
#include "RocksDBEngine/RocksDBCommon.h"
#include "RocksDBEngine/RocksDBTypes.h"
@ -80,21 +80,21 @@ RocksDBKeyBounds RocksDBKeyBounds::GeoIndex(uint64_t indexId) {
RocksDBKeyBounds RocksDBKeyBounds::GeoIndex(uint64_t indexId, bool isSlot) {
RocksDBKeyBounds b;
size_t length = sizeof(char) + sizeof(uint64_t) * 2;
b._startBuffer.reserve(length);
b._startBuffer.push_back(static_cast<char>(RocksDBEntryType::GeoIndexValue));
uint64ToPersistent(b._startBuffer, indexId);
b._endBuffer.clear();
b._endBuffer.append(b._startBuffer); // append common prefix
size_t length = 2 * (sizeof(char) + sizeof(uint64_t) + sizeof(uint64_t));
auto& internals = b.internals();
internals.reserve(length);
internals.push_back(static_cast<char>(RocksDBEntryType::GeoIndexValue));
uint64ToPersistent(internals.buffer(), indexId);
uint64_t norm = isSlot ? 0xFFU : 0; // encode slot|pot in lowest bit
uint64ToPersistent(b._startBuffer, norm); // lower endian
norm = norm | (0xFFFFFFFFULL << 32);
uint64ToPersistent(b._endBuffer, norm);
uint64ToPersistent(internals.buffer(), norm); // lower endian
internals.separate();
internals.push_back(static_cast<char>(RocksDBEntryType::GeoIndexValue));
uint64ToPersistent(internals.buffer(), indexId);
norm = norm | (0xFFFFFFFFULL << 32);
uint64ToPersistent(internals.buffer(), norm);
b._start = rocksdb::Slice(b._startBuffer);
b._end = rocksdb::Slice(b._endBuffer);
return b;
}
@ -126,23 +126,25 @@ RocksDBKeyBounds RocksDBKeyBounds::IndexEstimateValues() {
RocksDBKeyBounds RocksDBKeyBounds::FulltextIndexPrefix(
uint64_t indexId, arangodb::StringRef const& word) {
// I did not want to pass a bool to the constructor for this
RocksDBKeyBounds bounds;
size_t length = sizeof(char) + sizeof(uint64_t) + word.size();
bounds._startBuffer.reserve(length);
bounds._startBuffer.push_back(
static_cast<char>(RocksDBEntryType::FulltextIndexValue));
uint64ToPersistent(bounds._startBuffer, indexId);
bounds._startBuffer.append(word.data(), word.length());
RocksDBKeyBounds b;
size_t length = 2 * (sizeof(char) + sizeof(uint64_t) + word.size()) + 1;
bounds._endBuffer.clear();
bounds._endBuffer.append(bounds._startBuffer);
bounds._endBuffer.push_back(
auto& internals = b.internals();
internals.reserve(length);
internals.push_back(
static_cast<char>(RocksDBEntryType::FulltextIndexValue));
uint64ToPersistent(internals.buffer(), indexId);
internals.buffer().append(word.data(), word.length());
internals.separate();
internals.push_back(
static_cast<char>(RocksDBEntryType::FulltextIndexValue));
uint64ToPersistent(internals.buffer(), indexId);
internals.buffer().append(word.data(), word.length());
internals.push_back(
0xFFU); // invalid UTF-8 character, higher than with memcmp
bounds._start = rocksdb::Slice(bounds._startBuffer);
bounds._end = rocksdb::Slice(bounds._endBuffer);
return bounds;
return b;
}
RocksDBKeyBounds RocksDBKeyBounds::FulltextIndexComplete(
@ -152,43 +154,50 @@ RocksDBKeyBounds RocksDBKeyBounds::FulltextIndexComplete(
// ============================ Member Methods ==============================
RocksDBKeyBounds::RocksDBKeyBounds(RocksDBKeyBounds const& other)
: _type(other._type),
_internals(other._internals) {}
RocksDBKeyBounds::RocksDBKeyBounds(RocksDBKeyBounds&& other)
: _type(other._type),
_internals(std::move(other._internals)) {}
RocksDBKeyBounds& RocksDBKeyBounds::operator=(RocksDBKeyBounds const& other) {
if (this != &other) {
_type = other._type;
_startBuffer = other._startBuffer;
_endBuffer = other._endBuffer;
_start = rocksdb::Slice(_startBuffer);
_end = rocksdb::Slice(_endBuffer);
_internals = other._internals;
}
return *this;
}
RocksDBKeyBounds::RocksDBKeyBounds(RocksDBKeyBounds const& other)
: _type(other._type),
_startBuffer(other._startBuffer),
_endBuffer(other._endBuffer),
_end(rocksdb::Slice(_endBuffer)),
_start(rocksdb::Slice(_startBuffer)) {}
rocksdb::Slice const& RocksDBKeyBounds::start() const {
TRI_ASSERT(_start.size() > 0);
return _start;
RocksDBKeyBounds& RocksDBKeyBounds::operator=(RocksDBKeyBounds&& other) {
if (this != &other) {
_type = other._type;
_internals = std::move(other._internals);
}
rocksdb::Slice const& RocksDBKeyBounds::end() const {
TRI_ASSERT(_end.size() > 0);
return _end;
return *this;
}
rocksdb::Slice RocksDBKeyBounds::start() const {
return _internals.start();
}
rocksdb::Slice RocksDBKeyBounds::end() const {
return _internals.end();
}
uint64_t RocksDBKeyBounds::objectId() const {
RocksDBEntryType type = static_cast<RocksDBEntryType>(_startBuffer[0]);
RocksDBEntryType type = static_cast<RocksDBEntryType>(_internals._buffer[0]);
switch (type) {
case RocksDBEntryType::Document:
case RocksDBEntryType::PrimaryIndexValue:
case RocksDBEntryType::EdgeIndexValue:
case RocksDBEntryType::IndexValue:
case RocksDBEntryType::UniqueIndexValue: {
TRI_ASSERT(_startBuffer.size() >= (sizeof(char) + sizeof(uint64_t)));
return uint64FromPersistent(_startBuffer.data() + sizeof(char));
TRI_ASSERT(_internals.buffer().size() >= (sizeof(char) + sizeof(uint64_t)));
return uint64FromPersistent(_internals.buffer().data() + sizeof(char));
}
default:
@ -199,42 +208,37 @@ uint64_t RocksDBKeyBounds::objectId() const {
// constructor for an empty bound. do not use for anything but to
// default-construct a key bound!
RocksDBKeyBounds::RocksDBKeyBounds()
: _type(RocksDBEntryType::Database), _startBuffer(), _endBuffer() {}
: _type(RocksDBEntryType::Database) {}
RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type)
: _type(type), _startBuffer(), _endBuffer() {
: _type(type) {
switch (_type) {
case RocksDBEntryType::Database: {
size_t length = sizeof(char);
_startBuffer.reserve(length);
_startBuffer.push_back(static_cast<char>(_type));
_endBuffer.append(_startBuffer);
_endBuffer[0]++; // TODO: better solution?
_internals.reserve(2 * sizeof(char));
_internals.push_back(static_cast<char>(_type));
_internals.separate();
_internals.push_back(static_cast<char>(_type) + 1);
break;
}
case RocksDBEntryType::CounterValue:
case RocksDBEntryType::IndexEstimateValue: {
size_t length = sizeof(char) + sizeof(uint64_t);
_startBuffer.reserve(length);
_startBuffer.push_back(static_cast<char>(_type));
uint64ToPersistent(_startBuffer, 0);
_internals.reserve(2 * (sizeof(char) + sizeof(uint64_t)));
_internals.push_back(static_cast<char>(_type));
uint64ToPersistent(_internals.buffer(), 0);
_endBuffer.reserve(length);
_endBuffer.push_back(static_cast<char>(_type));
uint64ToPersistent(_endBuffer, UINT64_MAX);
_internals.separate();
_internals.push_back(static_cast<char>(_type));
uint64ToPersistent(_internals.buffer(), UINT64_MAX);
break;
}
default:
THROW_ARANGO_EXCEPTION(TRI_ERROR_BAD_PARAMETER);
}
_start = rocksdb::Slice(_startBuffer);
_end = rocksdb::Slice(_endBuffer);
}
RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first)
: _type(type), _startBuffer(), _endBuffer() {
: _type(type) {
switch (_type) {
case RocksDBEntryType::IndexValue:
case RocksDBEntryType::UniqueIndexValue: {
@ -245,17 +249,17 @@ RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first)
VPackSlice min("\x02\x03\x1e"); // [minSlice]
VPackSlice max("\x02\x03\x1f"); // [maxSlice]
size_t length = sizeof(char) + sizeof(uint64_t) + min.byteSize();
_startBuffer.reserve(length);
_startBuffer.push_back(static_cast<char>(_type));
uint64ToPersistent(_startBuffer, first);
// append common prefix
_endBuffer.clear();
_endBuffer.append(_startBuffer);
size_t length = 2 * (sizeof(char) + sizeof(uint64_t)) + min.byteSize() + max.byteSize();
_internals.reserve(length);
_internals.push_back(static_cast<char>(_type));
uint64ToPersistent(_internals.buffer(), first);
_internals.buffer().append((char*)(min.begin()), min.byteSize());
// construct min max
_startBuffer.append((char*)(min.begin()), min.byteSize());
_endBuffer.append((char*)(max.begin()), max.byteSize());
_internals.separate();
_internals.push_back(static_cast<char>(_type));
uint64ToPersistent(_internals.buffer(), first);
_internals.buffer().append((char*)(max.begin()), max.byteSize());
break;
}
@ -268,17 +272,17 @@ RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first)
//
// Documents are stored as follows:
// Key: 3 + 8-byte object ID of collection + 8-byte document revision ID
size_t length = sizeof(char) + sizeof(uint64_t) * 2;
_startBuffer.reserve(length);
_startBuffer.push_back(static_cast<char>(_type));
uint64ToPersistent(_startBuffer, first);
// append common prefix
_endBuffer.clear();
_endBuffer.append(_startBuffer);
size_t length = 2 * (sizeof(char) + sizeof(uint64_t) + sizeof(uint64_t));
_internals.reserve(length);
_internals.push_back(static_cast<char>(_type));
uint64ToPersistent(_internals.buffer(), first);
uint64ToPersistent(_internals.buffer(), 0);
// construct min max
uint64ToPersistent(_startBuffer, 0);
uint64ToPersistent(_endBuffer, UINT64_MAX);
_internals.separate();
_internals.push_back(static_cast<char>(_type));
uint64ToPersistent(_internals.buffer(), first);
uint64ToPersistent(_internals.buffer(), UINT64_MAX);
break;
}
@ -286,78 +290,78 @@ RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first)
case RocksDBEntryType::EdgeIndexValue:
case RocksDBEntryType::FulltextIndexValue: {
size_t length = sizeof(char) + sizeof(uint64_t);
_startBuffer.reserve(length);
_startBuffer.push_back(static_cast<char>(_type));
uint64ToPersistent(_startBuffer, first);
_internals.reserve(length);
_internals.push_back(static_cast<char>(_type));
uint64ToPersistent(_internals.buffer(), first);
_endBuffer.clear();
_endBuffer.append(_startBuffer);
_endBuffer.push_back(0xFFU);
_internals.separate();
_internals.push_back(static_cast<char>(_type));
uint64ToPersistent(_internals.buffer(), first);
_internals.push_back(0xFFU);
break;
}
default:
THROW_ARANGO_EXCEPTION(TRI_ERROR_BAD_PARAMETER);
}
_start = rocksdb::Slice(_startBuffer);
_end = rocksdb::Slice(_endBuffer);
}
RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first,
arangodb::StringRef const& second)
: _type(type), _startBuffer(), _endBuffer() {
: _type(type) {
switch (_type) {
case RocksDBEntryType::FulltextIndexValue:
case RocksDBEntryType::EdgeIndexValue: {
size_t length =
sizeof(char) + sizeof(uint64_t) + second.size() + sizeof(char);
_startBuffer.reserve(length);
_startBuffer.push_back(static_cast<char>(_type));
uint64ToPersistent(_startBuffer, first);
_startBuffer.append(second.data(), second.length());
_startBuffer.push_back(_stringSeparator);
size_t length = 2 * (sizeof(char) + sizeof(uint64_t) + second.size() + sizeof(char)) + 1;
_internals.reserve(length);
_internals.push_back(static_cast<char>(_type));
uint64ToPersistent(_internals.buffer(), first);
_internals.buffer().append(second.data(), second.length());
_internals.push_back(_stringSeparator);
_endBuffer.clear();
_endBuffer.append(_startBuffer);
_endBuffer.push_back(0xFFU);
_internals.separate();
_internals.push_back(static_cast<char>(_type));
uint64ToPersistent(_internals.buffer(), first);
_internals.buffer().append(second.data(), second.length());
_internals.push_back(_stringSeparator);
_internals.push_back(0xFFU);
break;
}
default:
THROW_ARANGO_EXCEPTION(TRI_ERROR_BAD_PARAMETER);
}
_start = rocksdb::Slice(_startBuffer);
_end = rocksdb::Slice(_endBuffer);
}
RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first,
VPackSlice const& second,
VPackSlice const& third)
: _type(type), _startBuffer(), _endBuffer() {
: _type(type) {
switch (_type) {
case RocksDBEntryType::IndexValue:
case RocksDBEntryType::UniqueIndexValue: {
size_t startLength = sizeof(char) + sizeof(uint64_t) +
static_cast<size_t>(second.byteSize()) +
sizeof(char);
_startBuffer.reserve(startLength);
_startBuffer.push_back(static_cast<char>(_type));
uint64ToPersistent(_startBuffer, first);
_startBuffer.append(reinterpret_cast<char const*>(second.begin()),
static_cast<size_t>(second.byteSize()));
_startBuffer.push_back(_stringSeparator);
TRI_ASSERT(_startBuffer.length() == startLength);
size_t endLength = sizeof(char) + sizeof(uint64_t) +
static_cast<size_t>(third.byteSize()) + sizeof(char);
_endBuffer.reserve(endLength);
_endBuffer.push_back(static_cast<char>(_type));
uint64ToPersistent(_endBuffer, first);
_endBuffer.append(reinterpret_cast<char const*>(third.begin()),
_internals.reserve(startLength + endLength);
_internals.push_back(static_cast<char>(_type));
uint64ToPersistent(_internals.buffer(), first);
_internals.buffer().append(reinterpret_cast<char const*>(second.begin()),
static_cast<size_t>(second.byteSize()));
_internals.push_back(_stringSeparator);
_internals.separate();
_internals.push_back(static_cast<char>(_type));
uint64ToPersistent(_internals.buffer(), first);
_internals.buffer().append(reinterpret_cast<char const*>(third.begin()),
static_cast<size_t>(third.byteSize()));
_endBuffer.push_back(_stringSeparator + 1); // compare greater than
_internals.push_back(_stringSeparator + 1); // compare greater than
// actual key
break;
}
@ -365,6 +369,4 @@ RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first,
default:
THROW_ARANGO_EXCEPTION(TRI_ERROR_BAD_PARAMETER);
}
_start = rocksdb::Slice(_startBuffer);
_end = rocksdb::Slice(_endBuffer);
}

View File

@ -25,12 +25,12 @@
#ifndef ARANGO_ROCKSDB_ROCKSDB_KEY_BOUNDS_H
#define ARANGO_ROCKSDB_ROCKSDB_KEY_BOUNDS_H 1
#include <rocksdb/slice.h>
#include "Basics/Common.h"
#include "Basics/StringRef.h"
#include "RocksDBEngine/RocksDBTypes.h"
#include "VocBase/vocbase.h"
#include <rocksdb/slice.h>
#include <velocypack/Slice.h>
#include <velocypack/velocypack-aliases.h>
@ -141,8 +141,10 @@ class RocksDBKeyBounds {
arangodb::StringRef const&);
public:
RocksDBKeyBounds& operator=(RocksDBKeyBounds const& other);
RocksDBKeyBounds(RocksDBKeyBounds const& other);
RocksDBKeyBounds(RocksDBKeyBounds&& other);
RocksDBKeyBounds& operator=(RocksDBKeyBounds const& other);
RocksDBKeyBounds& operator=(RocksDBKeyBounds&& other);
//////////////////////////////////////////////////////////////////////////////
/// @brief Returns the left bound slice.
@ -150,7 +152,7 @@ class RocksDBKeyBounds {
/// Forward iterators may use it->Seek(bound.start()) and reverse iterators
/// may check that the current key is greater than this value.
//////////////////////////////////////////////////////////////////////////////
rocksdb::Slice const& start() const;
rocksdb::Slice start() const;
//////////////////////////////////////////////////////////////////////////////
/// @brief Returns the right bound slice.
@ -158,7 +160,7 @@ class RocksDBKeyBounds {
/// Reverse iterators may use it->SeekForPrev(bound.end()) and forward
/// iterators may check that the current key is less than this value.
//////////////////////////////////////////////////////////////////////////////
rocksdb::Slice const& end() const;
rocksdb::Slice end() const;
//////////////////////////////////////////////////////////////////////////////
/// @brief Returns the object ID for these bounds
@ -178,12 +180,85 @@ class RocksDBKeyBounds {
VPackSlice const& second, VPackSlice const& third);
private:
// private class that will hold both bounds in a single buffer (with only one allocation)
class BoundsBuffer {
friend class RocksDBKeyBounds;
public:
BoundsBuffer() : _separatorPosition(0) {}
BoundsBuffer(BoundsBuffer const& other)
: _buffer(other._buffer), _separatorPosition(other._separatorPosition) {
}
BoundsBuffer(BoundsBuffer&& other)
: _buffer(std::move(other._buffer)), _separatorPosition(other._separatorPosition) {
other._separatorPosition = 0;
}
BoundsBuffer& operator=(BoundsBuffer const& other) {
if (this != &other) {
_buffer = other._buffer;
_separatorPosition = other._separatorPosition;
}
return *this;
}
BoundsBuffer& operator=(BoundsBuffer&& other) {
if (this != &other) {
_buffer = std::move(other._buffer);
_separatorPosition = other._separatorPosition;
other._separatorPosition = 0;
}
return *this;
}
// reserve space for bounds
void reserve(size_t length) {
TRI_ASSERT(_separatorPosition == 0);
TRI_ASSERT(_buffer.empty());
_buffer.reserve(length);
}
// mark the end of the start buffer
void separate() {
TRI_ASSERT(_separatorPosition == 0);
TRI_ASSERT(!_buffer.empty());
_separatorPosition = _buffer.size();
}
// append a character
void push_back(char c) {
_buffer.push_back(c);
}
// return the internal buffer for modification or reading
std::string& buffer() { return _buffer; }
std::string const& buffer() const { return _buffer; }
// return a slice to the start buffer
rocksdb::Slice start() const {
TRI_ASSERT(_separatorPosition != 0);
return rocksdb::Slice(_buffer.data(), _separatorPosition);
}
// return a slice to the end buffer
rocksdb::Slice end() const {
TRI_ASSERT(_separatorPosition != 0);
return rocksdb::Slice(_buffer.data() + _separatorPosition, _buffer.size() - _separatorPosition);
}
private:
std::string _buffer;
size_t _separatorPosition;
};
BoundsBuffer& internals() { return _internals; }
BoundsBuffer const& internals() const { return _internals; }
static const char _stringSeparator;
RocksDBEntryType _type;
std::string _startBuffer;
std::string _endBuffer;
rocksdb::Slice _end;
rocksdb::Slice _start;
BoundsBuffer _internals;
};
} // namespace arangodb

View File

@ -96,7 +96,10 @@ RocksDBVPackIndexIterator::RocksDBVPackIndexIterator(
TRI_ASSERT(state != nullptr);
rocksdb::ReadOptions options = state->readOptions();
if (!reverse) {
options.iterate_upper_bound = &(_bounds.end());
// we need to have a pointer to a slice for the upper bound
// so we need to assign the slice to an instance variable here
_upperBound = _bounds.end();
options.iterate_upper_bound = &_upperBound;
}
_iterator.reset(rtrx->GetIterator(options));

View File

@ -96,6 +96,7 @@ class RocksDBVPackIndexIterator final : public IndexIterator {
std::unique_ptr<rocksdb::Iterator> _iterator;
bool const _reverse;
RocksDBKeyBounds _bounds;
rocksdb::Slice _upperBound; // used for iterate_upper_bound
};
class RocksDBVPackIndex : public RocksDBIndex {