1
0
Fork 0
arangodb/arangod/RocksDBEngine/RocksDBEngine.cpp

694 lines
23 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Steemann
/// @author Jan Christoph Uhde
////////////////////////////////////////////////////////////////////////////////
#include "RocksDBEngine.h"
#include "Basics/Exceptions.h"
#include "Basics/FileUtils.h"
#include "Basics/Result.h"
#include "Basics/StaticStrings.h"
#include "Basics/VelocyPackHelper.h"
#include "Logger/Logger.h"
#include "ProgramOptions/ProgramOptions.h"
#include "ProgramOptions/Section.h"
#include "RestServer/DatabasePathFeature.h"
#include "RestServer/ViewTypesFeature.h"
#include "RocksDBEngine/RocksDBCollection.h"
#include "RocksDBEngine/RocksDBEntry.h"
#include "RocksDBEngine/RocksDBTypes.h"
#include "RocksDBEngine/RocksDBView.h"
#include "VocBase/ticks.h"
#include <rocksdb/db.h>
#include <rocksdb/convenience.h>
#include <rocksdb/env.h>
#include <rocksdb/filter_policy.h>
#include <rocksdb/iterator.h>
#include <rocksdb/options.h>
#include <rocksdb/slice_transform.h>
#include <rocksdb/table.h>
#include <rocksdb/write_batch.h>
#include <velocypack/Iterator.h>
#include <velocypack/velocypack-aliases.h>
using namespace arangodb;
using namespace arangodb::application_features;
using namespace arangodb::options;
namespace arangodb {
std::string const RocksDBEngine::EngineName("rocksdb");
std::string const RocksDBEngine::FeatureName("RocksDBEngine");
// create the storage engine
RocksDBEngine::RocksDBEngine(application_features::ApplicationServer* server)
: StorageEngine(server, EngineName, FeatureName, nullptr /*new
MMFilesIndexFactory()*/),
_db(nullptr) {
//inherits order from StorageEngine
}
RocksDBEngine::~RocksDBEngine() {
delete _db;
}
// inherited from ApplicationFeature
// ---------------------------------
// add the storage engine's specifc options to the global list of options
void RocksDBEngine::collectOptions(std::shared_ptr<options::ProgramOptions>) {
}
// validate the storage engine's specific options
void RocksDBEngine::validateOptions(std::shared_ptr<options::ProgramOptions>) {
}
// preparation phase for storage engine. can be used for internal setup.
// the storage engine must not start any threads here or write any files
void RocksDBEngine::prepare() {
}
void RocksDBEngine::start() {
//it is already decided that rocksdb is used
if (!isEnabled()) {
return;
}
// set the database sub-directory for RocksDB
auto database = ApplicationServer::getFeature<DatabasePathFeature>("DatabasePath");
_path = database->subdirectoryName("engine-rocksdb");
LOG_TOPIC(TRACE, arangodb::Logger::STARTUP) << "initializing rocksdb, path: " << _path;
rocksdb::TransactionDBOptions transactionOptions;
_options.create_if_missing = true;
_options.max_open_files = -1;
rocksdb::Status status = rocksdb::TransactionDB::Open(_options, transactionOptions, _path, &_db);
if (!status.ok()) {
LOG_TOPIC(FATAL, arangodb::Logger::STARTUP) << "unable to initialize RocksDB engine: " << status.ToString();
FATAL_ERROR_EXIT();
}
TRI_ASSERT(_db != nullptr);
if (!systemDatabaseExists()) {
addSystemDatabase();
}
}
void RocksDBEngine::stop() {
}
void RocksDBEngine::unprepare() {
if (_db) {
delete _db;
_db = nullptr;
}
}
transaction::ContextData* RocksDBEngine::createTransactionContextData() {
throw std::runtime_error("not implemented35");
return nullptr;
}
TransactionState* RocksDBEngine::createTransactionState(TRI_vocbase_t*) {
throw std::runtime_error("not implemented36");
return nullptr;
}
TransactionCollection* RocksDBEngine::createTransactionCollection(
TransactionState* state, TRI_voc_cid_t cid, AccessMode::Type accessType,
int nestingLevel) {
throw std::runtime_error("not implemented37");
return nullptr;
}
// create storage-engine specific collection
PhysicalCollection* RocksDBEngine::createPhysicalCollection(LogicalCollection*,
VPackSlice const&) {
throw std::runtime_error("not implemented38");
return nullptr;
}
// create storage-engine specific view
PhysicalView* RocksDBEngine::createPhysicalView(LogicalView*,
VPackSlice const&) {
throw std::runtime_error("not implemented39");
return nullptr;
}
// inventory functionality
// -----------------------
void RocksDBEngine::getDatabases(arangodb::velocypack::Builder& result) {
LOG_TOPIC(TRACE, Logger::STARTUP) << "getting existing databases";
rocksdb::ReadOptions readOptions;
readOptions.total_order_seek = true; // TODO: why?
auto& iter = *_db->NewIterator(readOptions);
result.openArray();
auto rSlice = rocksDBSlice(RocksDBEntryType::Database);
for (iter.Seek(rSlice); iter.Valid() && iter.key().starts_with(rSlice);
iter.Next()) {
auto slice = VPackSlice(iter.value().data());
//// check format
// id
VPackSlice idSlice = slice.get("id");
if (!idSlice.isString()) {
LOG_TOPIC(ERR, arangodb::Logger::STARTUP)
<< "found invalid database declaration with non-string id: " << slice.toJson();
THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_ILLEGAL_PARAMETER_FILE);
}
// deleted
if (arangodb::basics::VelocyPackHelper::getBooleanValue(slice, "deleted",
false)) {
TRI_voc_tick_t id = static_cast<TRI_voc_tick_t>(
basics::StringUtils::uint64(idSlice.copyString()));
// database is deleted, skip it!
LOG_TOPIC(DEBUG, arangodb::Logger::STARTUP)
<< "found dropped database " << id;
dropDatabase(id);
continue;
}
// name
VPackSlice nameSlice = slice.get("name");
if (!nameSlice.isString()) {
LOG_TOPIC(ERR, arangodb::Logger::STARTUP)
<< "found invalid database declaration with non-string name: " << slice.toJson();
THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_ILLEGAL_PARAMETER_FILE);
}
result.add(slice);
}
result.close();
}
void RocksDBEngine::getCollectionInfo(TRI_vocbase_t* vocbase, TRI_voc_cid_t cid,
arangodb::velocypack::Builder& result,
bool includeIndexes,
TRI_voc_tick_t maxTick) {
throw std::runtime_error("not implemented1");
}
int RocksDBEngine::getCollectionsAndIndexes(
TRI_vocbase_t* vocbase, arangodb::velocypack::Builder& result,
bool wasCleanShutdown, bool isUpgrade) {
rocksdb::ReadOptions readOptions;
readOptions.total_order_seek = true; // TODO: why?
auto& iter = *_db->NewIterator(readOptions);
result.openArray();
auto rSlice = rocksDBSlice(RocksDBEntryType::Collection);
for (iter.Seek(rSlice); iter.Valid() && iter.key().starts_with(rSlice);
iter.Next()) {
if (!RocksDBEntry::isSameDatabase(RocksDBEntryType::Collection, vocbase->id(), iter.key())) {
continue;
}
auto slice = VPackSlice(iter.value().data());
LOG_TOPIC(TRACE, Logger::FIXME) << "got collection slice: " << slice.toJson();
if (arangodb::basics::VelocyPackHelper::readBooleanValue(slice, "deleted", false)) {
continue;
}
result.add(slice);
}
result.close();
return TRI_ERROR_NO_ERROR;
}
int RocksDBEngine::getViews(TRI_vocbase_t* vocbase,
arangodb::velocypack::Builder& result) {
rocksdb::ReadOptions readOptions;
readOptions.total_order_seek = true; // TODO: why?
auto& iter = *_db->NewIterator(readOptions);
result.openArray();
auto rSlice = rocksDBSlice(RocksDBEntryType::View);
for (iter.Seek(rSlice); iter.Valid() && iter.key().starts_with(rSlice);
iter.Next()) {
if (!RocksDBEntry::isSameDatabase(RocksDBEntryType::View, vocbase->id(), iter.key())) {
continue;
}
auto slice = VPackSlice(iter.value().data());
LOG_TOPIC(TRACE, Logger::FIXME) << "got view slice: " << slice.toJson();
if (arangodb::basics::VelocyPackHelper::readBooleanValue(slice, "deleted", false)) {
continue;
}
result.add(slice);
}
result.close();
return TRI_ERROR_NO_ERROR;
}
std::string RocksDBEngine::databasePath(TRI_vocbase_t const* vocbase) const {
return std::string(); // no path to be returned here!
}
std::string RocksDBEngine::collectionPath(TRI_vocbase_t const* vocbase,
TRI_voc_cid_t id) const {
throw std::runtime_error("not implemented3");
return "not implemented";
}
void RocksDBEngine::waitForSync(TRI_voc_tick_t tick) {
throw std::runtime_error("not implemented4");
}
std::shared_ptr<arangodb::velocypack::Builder> RocksDBEngine::getReplicationApplierConfiguration(TRI_vocbase_t* vocbase, int& status) {
// TODO!
status = TRI_ERROR_FILE_NOT_FOUND;
return std::shared_ptr<arangodb::velocypack::Builder>();
}
int RocksDBEngine::removeReplicationApplierConfiguration(TRI_vocbase_t* vocbase) {
// TODO!
return TRI_ERROR_NO_ERROR;
}
int RocksDBEngine::saveReplicationApplierConfiguration(TRI_vocbase_t* vocbase, arangodb::velocypack::Slice slice, bool doSync) {
// TODO!
return TRI_ERROR_NO_ERROR;
}
// database, collection and index management
// -----------------------------------------
TRI_vocbase_t* RocksDBEngine::openDatabase(
arangodb::velocypack::Slice const& args, bool isUpgrade, int& status) {
VPackSlice idSlice = args.get("id");
TRI_voc_tick_t id = static_cast<TRI_voc_tick_t>(
basics::StringUtils::uint64(idSlice.copyString()));
std::string const name = args.get("name").copyString();
status = TRI_ERROR_NO_ERROR;
return openExistingDatabase(id, name, true, isUpgrade);
}
RocksDBEngine::Database* RocksDBEngine::createDatabase(
TRI_voc_tick_t id, arangodb::velocypack::Slice const& args, int& status) {
status = TRI_ERROR_NO_ERROR;
auto vocbase =
std::make_unique<TRI_vocbase_t>(TRI_VOCBASE_TYPE_NORMAL, id, args.get("name").copyString());
return vocbase.release();
}
int RocksDBEngine::writeCreateDatabaseMarker(TRI_voc_tick_t id,
VPackSlice const& slice) {
RocksDBEntry entry = RocksDBEntry::Database(id, slice);
rocksdb::WriteOptions options; // TODO: check which options would make sense
rocksdb::Status res = _db->Put(options, entry.key(), entry.value());
if (res.ok()) {
return TRI_ERROR_NO_ERROR;
}
return TRI_ERROR_INTERNAL; // TODO: need translation for RocksDB errors
}
void RocksDBEngine::prepareDropDatabase(TRI_vocbase_t* vocbase,
bool useWriteMarker, int& status) {
throw std::runtime_error("not implemented5");
}
void RocksDBEngine::dropDatabase(Database* database, int& status) {
throw std::runtime_error("not implemented6");
}
void RocksDBEngine::waitUntilDeletion(TRI_voc_tick_t /* id */, bool /* force */,
int& status) {
// can delete databases instantly
status = TRI_ERROR_NO_ERROR;
}
// wal in recovery
bool RocksDBEngine::inRecovery() {
throw std::runtime_error("not implemented7");
return true;
}
void RocksDBEngine::recoveryDone(TRI_vocbase_t* vocbase) {
// nothing to do here
}
std::string RocksDBEngine::createCollection(
TRI_vocbase_t* vocbase, TRI_voc_cid_t id,
arangodb::LogicalCollection const*) {
throw std::runtime_error("not implemented9");
return "not implemented";
}
arangodb::Result RocksDBEngine::persistCollection(
TRI_vocbase_t* vocbase, arangodb::LogicalCollection const*) {
throw std::runtime_error("not implemented10");
return arangodb::Result{};
}
arangodb::Result RocksDBEngine::dropCollection(TRI_vocbase_t* vocbase,
arangodb::LogicalCollection*) {
throw std::runtime_error("not implemented11");
return arangodb::Result{};
}
void RocksDBEngine::destroyCollection(TRI_vocbase_t* vocbase,
arangodb::LogicalCollection*) {
throw std::runtime_error("not implemented12");
}
void RocksDBEngine::changeCollection(TRI_vocbase_t* vocbase, TRI_voc_cid_t id,
arangodb::LogicalCollection const*,
bool doSync) {
throw std::runtime_error("not implemented13");
}
arangodb::Result RocksDBEngine::renameCollection(
TRI_vocbase_t* vocbase, arangodb::LogicalCollection const*,
std::string const& oldName) {
throw std::runtime_error("not implemented14");
return arangodb::Result{};
}
void RocksDBEngine::createIndex(TRI_vocbase_t* vocbase,
TRI_voc_cid_t collectionId, TRI_idx_iid_t id,
arangodb::velocypack::Slice const& data) {
throw std::runtime_error("not implemented15");
}
void RocksDBEngine::dropIndex(TRI_vocbase_t* vocbase,
TRI_voc_cid_t collectionId, TRI_idx_iid_t id) {
throw std::runtime_error("not implemented16");
}
void RocksDBEngine::dropIndexWalMarker(TRI_vocbase_t* vocbase,
TRI_voc_cid_t collectionId,
arangodb::velocypack::Slice const& data,
bool writeMarker, int&) {
throw std::runtime_error("not implemented17");
}
void RocksDBEngine::unloadCollection(TRI_vocbase_t* vocbase,
arangodb::LogicalCollection* collection) {
throw std::runtime_error("not implemented18");
}
void RocksDBEngine::createView(TRI_vocbase_t* vocbase, TRI_voc_cid_t id,
arangodb::LogicalView const*) {
throw std::runtime_error("not implemented19");
}
arangodb::Result RocksDBEngine::persistView(TRI_vocbase_t* vocbase,
arangodb::LogicalView const*) {
throw std::runtime_error("not implemented20");
return arangodb::Result{};
}
arangodb::Result RocksDBEngine::dropView(TRI_vocbase_t* vocbase,
arangodb::LogicalView*) {
throw std::runtime_error("not implemented21");
return arangodb::Result{};
}
void RocksDBEngine::destroyView(TRI_vocbase_t* vocbase,
arangodb::LogicalView*) {
throw std::runtime_error("not implemented22");
}
void RocksDBEngine::changeView(TRI_vocbase_t* vocbase, TRI_voc_cid_t id,
arangodb::LogicalView const*, bool doSync) {
throw std::runtime_error("not implemented23");
}
std::string RocksDBEngine::createViewDirectoryName(std::string const& basePath,
TRI_voc_cid_t id) {
throw std::runtime_error("not implemented24");
return "not implemented";
}
void RocksDBEngine::signalCleanup(TRI_vocbase_t*) {
// nothing to do here
}
// document operations
// -------------------
void RocksDBEngine::iterateDocuments(
TRI_voc_tick_t databaseId, TRI_voc_cid_t collectionId,
std::function<void(arangodb::velocypack::Slice const&)> const& cb) {
throw std::runtime_error("not implemented25");
}
void RocksDBEngine::addDocumentRevision(
TRI_voc_tick_t databaseId, TRI_voc_cid_t collectionId,
arangodb::velocypack::Slice const& document) {
throw std::runtime_error("not implemented26");
}
void RocksDBEngine::removeDocumentRevision(
TRI_voc_tick_t databaseId, TRI_voc_cid_t collectionId,
arangodb::velocypack::Slice const& document) {
throw std::runtime_error("not implemented27");
}
/// @brief remove data of expired compaction blockers
bool RocksDBEngine::cleanupCompactionBlockers(TRI_vocbase_t* vocbase) {
throw std::runtime_error("not implemented28");
return true;
}
/// @brief insert a compaction blocker
int RocksDBEngine::insertCompactionBlocker(TRI_vocbase_t* vocbase, double ttl,
TRI_voc_tick_t& id) {
throw std::runtime_error("not implemented29");
return true;
}
/// @brief touch an existing compaction blocker
int RocksDBEngine::extendCompactionBlocker(TRI_vocbase_t* vocbase,
TRI_voc_tick_t id, double ttl) {
throw std::runtime_error("not implemented30");
return true;
}
/// @brief remove an existing compaction blocker
int RocksDBEngine::removeCompactionBlocker(TRI_vocbase_t* vocbase,
TRI_voc_tick_t id) {
throw std::runtime_error("not implemented31");
return true;
}
/// @brief a callback function that is run while it is guaranteed that there
/// is no compaction ongoing
void RocksDBEngine::preventCompaction(
TRI_vocbase_t* vocbase,
std::function<void(TRI_vocbase_t*)> const& callback) {
throw std::runtime_error("not implemented32");
}
/// @brief a callback function that is run there is no compaction ongoing
bool RocksDBEngine::tryPreventCompaction(
TRI_vocbase_t* vocbase, std::function<void(TRI_vocbase_t*)> const& callback,
bool checkForActiveBlockers) {
throw std::runtime_error("not implemented33");
return true;
}
int RocksDBEngine::shutdownDatabase(TRI_vocbase_t* vocbase) {
return TRI_ERROR_NO_ERROR;
}
int RocksDBEngine::openCollection(TRI_vocbase_t* vocbase,
LogicalCollection* collection,
bool ignoreErrors) {
throw std::runtime_error("not implemented34");
return 0;
}
/// @brief Add engine-specific AQL functions.
void RocksDBEngine::addAqlFunctions() {
// there are no specific AQL functions here
// TODO: potentially add NEAR, WITHIN?
}
/// @brief Add engine-specific optimizer rules
void RocksDBEngine::addOptimizerRules() {
// there are no specific optimizer rules here
// TODO: add geo index optimization once there is the geo index
}
/// @brief Add engine-specific V8 functions
void RocksDBEngine::addV8Functions() {
// there are no specific V8 functions here
// TODO: add WAL management functions here once they exist in the engine
}
/// @brief Add engine-specific REST handlers
void RocksDBEngine::addRestHandlers(rest::RestHandlerFactory*) {
// TODO: add /_api/export and /_admin/wal later
}
EngineResult RocksDBEngine::dropDatabase(TRI_voc_tick_t str){
LOG_TOPIC(WARN, Logger::STARTUP) << "rocksdb - dropping database: " << str;
return EngineResult{};
}
bool RocksDBEngine::systemDatabaseExists() {
velocypack::Builder builder;
getDatabases(builder);
for (auto const& item : velocypack::ArrayIterator(builder.slice())) {
if (item.get("name").copyString() == StaticStrings::SystemDatabase) {
return true;
}
}
return false;
}
void RocksDBEngine::addSystemDatabase() {
// create system database entry
TRI_voc_tick_t id = TRI_NewTickServer();
VPackBuilder builder;
builder.openObject();
builder.add("id", VPackValue(std::to_string(id)));
builder.add("name", VPackValue(StaticStrings::SystemDatabase));
builder.add("deleted", VPackValue(false));
builder.close();
int res = writeCreateDatabaseMarker(id, builder.slice());
if (res != TRI_ERROR_NO_ERROR) {
LOG_TOPIC(FATAL, arangodb::Logger::STARTUP) << "unable to write database marker: " << TRI_errno_string(res);
FATAL_ERROR_EXIT();
}
}
/// @brief open an existing database. internal function
TRI_vocbase_t* RocksDBEngine::openExistingDatabase(TRI_voc_tick_t id,
std::string const& name,
bool wasCleanShutdown,
bool isUpgrade) {
auto vocbase =
std::make_unique<TRI_vocbase_t>(TRI_VOCBASE_TYPE_NORMAL, id, name);
// scan the database path for views
try {
VPackBuilder builder;
int res = getViews(vocbase.get(), builder);
if (res != TRI_ERROR_NO_ERROR) {
THROW_ARANGO_EXCEPTION(res);
}
VPackSlice slice = builder.slice();
TRI_ASSERT(slice.isArray());
ViewTypesFeature* viewTypesFeature =
application_features::ApplicationServer::getFeature<ViewTypesFeature>(
"ViewTypes");
for (auto const& it : VPackArrayIterator(slice)) {
// we found a view that is still active
std::string type = it.get("type").copyString();
// will throw if type is invalid
ViewCreator& creator = viewTypesFeature->creator(type);
TRI_ASSERT(!it.get("id").isNone());
std::shared_ptr<LogicalView> view =
std::make_shared<arangodb::LogicalView>(vocbase.get(), it);
StorageEngine::registerView(vocbase.get(), view);
auto physical = static_cast<RocksDBView*>(view->getPhysical());
TRI_ASSERT(physical != nullptr);
view->spawnImplementation(creator, it, false);
view->getImplementation()->open();
}
} catch (std::exception const& ex) {
LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "error while opening database: "
<< ex.what();
throw;
} catch (...) {
LOG_TOPIC(ERR, arangodb::Logger::FIXME)
<< "error while opening database: unknown exception";
throw;
}
// scan the database path for collections
try {
VPackBuilder builder;
int res = getCollectionsAndIndexes(vocbase.get(), builder, wasCleanShutdown,
isUpgrade);
if (res != TRI_ERROR_NO_ERROR) {
THROW_ARANGO_EXCEPTION(res);
}
VPackSlice slice = builder.slice();
TRI_ASSERT(slice.isArray());
for (auto const& it : VPackArrayIterator(slice)) {
// we found a collection that is still active
TRI_ASSERT(!it.get("id").isNone() || !it.get("cid").isNone());
auto uniqCol =
std::make_unique<arangodb::LogicalCollection>(vocbase.get(), it);
auto collection = uniqCol.get();
TRI_ASSERT(collection != nullptr);
StorageEngine::registerCollection(vocbase.get(), uniqCol.get());
// The vocbase has taken over control
uniqCol.release();
auto physical =
static_cast<RocksDBCollection*>(collection->getPhysical());
TRI_ASSERT(physical != nullptr);
LOG_TOPIC(DEBUG, arangodb::Logger::FIXME) << "added document collection '"
<< collection->name() << "'";
}
return vocbase.release();
} catch (std::exception const& ex) {
LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "error while opening database: "
<< ex.what();
throw;
} catch (...) {
LOG_TOPIC(ERR, arangodb::Logger::FIXME)
<< "error while opening database: unknown exception";
throw;
}
}
} // namespace