//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2014-2017 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Daniel H. Larkin /// @author Jan Steemann /// @author Jan Christoph Uhde //////////////////////////////////////////////////////////////////////////////// #include "RocksDBEngine/RocksDBCommon.h" #include "RocksDBEngine/RocksDBComparator.h" #include "RocksDBEngine/RocksDBEngine.h" #include "RocksDBEngine/RocksDBKey.h" #include "RocksDBEngine/RocksDBKeyBounds.h" #include "RocksDBEngine/RocksDBTransactionState.h" #include "StorageEngine/EngineSelectorFeature.h" #include "Transaction/Methods.h" #include #include #include #include "Logger/Logger.h" namespace arangodb { namespace rocksutils { arangodb::Result convertStatus(rocksdb::Status const& status, StatusHint hint) { switch (status.code()) { case rocksdb::Status::Code::kOk: return {TRI_ERROR_NO_ERROR}; case rocksdb::Status::Code::kNotFound: switch (hint) { case StatusHint::collection: return {TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND, status.ToString()}; case StatusHint::database: return {TRI_ERROR_ARANGO_DATABASE_NOT_FOUND, status.ToString()}; case StatusHint::document: return {TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND, status.ToString()}; case StatusHint::index: return {TRI_ERROR_ARANGO_INDEX_NOT_FOUND, status.ToString()}; case StatusHint::view: return {TRI_ERROR_ARANGO_VIEW_NOT_FOUND, status.ToString()}; default: return {TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND, status.ToString()}; } case rocksdb::Status::Code::kCorruption: return {TRI_ERROR_ARANGO_CORRUPTED_DATAFILE, status.ToString()}; case rocksdb::Status::Code::kNotSupported: return {TRI_ERROR_NOT_IMPLEMENTED, status.ToString()}; case rocksdb::Status::Code::kInvalidArgument: return {TRI_ERROR_BAD_PARAMETER, status.ToString()}; case rocksdb::Status::Code::kIOError: if (status.subcode() == rocksdb::Status::SubCode::kNoSpace) { return {TRI_ERROR_ARANGO_FILESYSTEM_FULL, status.ToString()}; } return {TRI_ERROR_ARANGO_IO_ERROR, status.ToString()}; case rocksdb::Status::Code::kMergeInProgress: return {TRI_ERROR_ARANGO_MERGE_IN_PROGRESS, status.ToString()}; case rocksdb::Status::Code::kIncomplete: return {TRI_ERROR_INTERNAL, "'incomplete' error in storage engine"}; case rocksdb::Status::Code::kShutdownInProgress: return {TRI_ERROR_SHUTTING_DOWN, status.ToString()}; case rocksdb::Status::Code::kTimedOut: if (status.subcode() == rocksdb::Status::SubCode::kMutexTimeout || status.subcode() == rocksdb::Status::SubCode::kLockTimeout) { // TODO: maybe add a separator error code/message here return {TRI_ERROR_LOCK_TIMEOUT, status.ToString()}; } return {TRI_ERROR_LOCK_TIMEOUT, status.ToString()}; case rocksdb::Status::Code::kAborted: return {TRI_ERROR_TRANSACTION_ABORTED, status.ToString()}; case rocksdb::Status::Code::kBusy: if (status.subcode() == rocksdb::Status::SubCode::kDeadlock) { return {TRI_ERROR_DEADLOCK}; } return {TRI_ERROR_ARANGO_CONFLICT}; case rocksdb::Status::Code::kExpired: return {TRI_ERROR_INTERNAL, "key expired; TTL was set in error"}; case rocksdb::Status::Code::kTryAgain: return {TRI_ERROR_ARANGO_TRY_AGAIN, status.ToString()}; default: return {TRI_ERROR_INTERNAL, "unknown RocksDB status code"}; } } uint64_t uint64FromPersistent(char const* p) { uint64_t value = 0; uint64_t x = 0; uint8_t const* ptr = reinterpret_cast(p); uint8_t const* end = ptr + sizeof(uint64_t); do { value += static_cast(*ptr++) << x; x += 8; } while (ptr < end); return value; } void uint64ToPersistent(char* p, uint64_t value) { char* end = p + sizeof(uint64_t); do { *p++ = static_cast(value & 0xffU); value >>= 8; } while (p < end); } void uint64ToPersistent(std::string& p, uint64_t value) { size_t len = 0; do { p.push_back(static_cast(value & 0xffU)); value >>= 8; } while (++len < sizeof(uint64_t)); } RocksDBTransactionState* toRocksTransactionState(transaction::Methods* trx) { TRI_ASSERT(trx != nullptr); TransactionState* state = trx->state(); TRI_ASSERT(state != nullptr); return static_cast(state); } rocksdb::TransactionDB* globalRocksDB() { StorageEngine* engine = EngineSelectorFeature::ENGINE; TRI_ASSERT(engine != nullptr); RocksDBEngine* rocks = static_cast(engine); TRI_ASSERT(rocks->db() != nullptr); return rocks->db(); } RocksDBEngine* globalRocksEngine() { StorageEngine* engine = EngineSelectorFeature::ENGINE; TRI_ASSERT(engine != nullptr); return static_cast(engine); } arangodb::Result globalRocksDBPut(rocksdb::Slice const& key, rocksdb::Slice const& val, rocksdb::WriteOptions const& options) { auto status = globalRocksDB()->Put(options, key, val); return convertStatus(status); }; arangodb::Result globalRocksDBRemove(rocksdb::Slice const& key, rocksdb::WriteOptions const& options) { auto status = globalRocksDB()->Delete(options, key); return convertStatus(status); }; std::size_t countKeyRange(rocksdb::DB* db, rocksdb::ReadOptions const& opts, RocksDBKeyBounds const& bounds) { const rocksdb::Comparator* cmp = db->GetOptions().comparator; std::unique_ptr it(db->NewIterator(opts)); std::size_t count = 0; rocksdb::Slice lower(bounds.start()); rocksdb::Slice upper(bounds.end()); it->Seek(lower); while (it->Valid() && cmp->Compare(it->key(), upper) < 0) { ++count; it->Next(); } return count; } /// @brief helper method to remove large ranges of data /// Should mainly be used to implement the drop() call Result removeLargeRange(rocksdb::TransactionDB* db, RocksDBKeyBounds const& bounds) { try { // delete files in range lower..upper rocksdb::Slice lower(bounds.start()); rocksdb::Slice upper(bounds.end()); { rocksdb::Status status = rocksdb::DeleteFilesInRange( db->GetBaseDB(), db->GetBaseDB()->DefaultColumnFamily(), &lower, &upper); if (!status.ok()) { // if file deletion failed, we will still iterate over the remaining // keys, so we // don't need to abort and raise an error here LOG_TOPIC(WARN, arangodb::Logger::FIXME) << "RocksDB file deletion failed"; } } // go on and delete the remaining keys (delete files in range does not // necessarily // find them all, just complete files) const rocksdb::Comparator* cmp = db->GetOptions().comparator; rocksdb::WriteBatch batch; std::unique_ptr it( db->NewIterator(rocksdb::ReadOptions())); it->Seek(lower); while (it->Valid() && cmp->Compare(it->key(), upper) < 0) { batch.Delete(it->key()); it->Next(); } // now apply deletion batch rocksdb::Status status = db->Write(rocksdb::WriteOptions(), &batch); if (!status.ok()) { LOG_TOPIC(WARN, arangodb::Logger::FIXME) << "RocksDB key deletion failed: " << status.ToString(); return TRI_ERROR_INTERNAL; } return TRI_ERROR_NO_ERROR; } catch (arangodb::basics::Exception const& ex) { LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "caught exception during RocksDB key prefix deletion: " << ex.what(); return ex.code(); } catch (std::exception const& ex) { LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "caught exception during RocksDB key prefix deletion: " << ex.what(); return TRI_ERROR_INTERNAL; } catch (...) { LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "caught unknown exception during RocksDB key prefix deletion"; return TRI_ERROR_INTERNAL; } } std::vector> collectionKVPairs( TRI_voc_tick_t databaseId) { std::vector> rv; RocksDBKeyBounds bounds = RocksDBKeyBounds::DatabaseCollections(databaseId); iterateBounds(bounds, [&rv](rocksdb::Iterator* it) { rv.emplace_back(RocksDBKey(it->key()), RocksDBValue(RocksDBEntryType::Collection, it->value())); }); return rv; } std::vector> viewKVPairs( TRI_voc_tick_t databaseId) { std::vector> rv; RocksDBKeyBounds bounds = RocksDBKeyBounds::DatabaseViews(databaseId); iterateBounds(bounds, [&rv](rocksdb::Iterator* it) { rv.emplace_back(RocksDBKey(it->key()), RocksDBValue(RocksDBEntryType::View, it->value())); }); return rv; } } // namespace rocksutils } // namespace arangodb