diff --git a/CHANGELOG b/CHANGELOG index 44c81b37c6..88063af697 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,19 @@ v2.6.0 (XXXX-XX-XX) ------------------- +* changed default value of option `--database.ignore-datafile-errors` from `true` to `false` + + If the new default value of `false` is used, then arangod will refuse loading collections that contain + datafiles with CRC mismatches or other errors. A collection with datafile errors will then become + unavailable. This prevents follow up errors from happening. + + The only way to access such collection is to use the datafile debugger (arango-dfdb) and try to repair + or truncate the datafile with it. + + If `--database.ignore-datafile-errors` is set to `true`, then collections will become available + even if parts of their data cannot be loaded. This helps availability, but may cause (partial) data + loss and follow up errors. + * added server startup option `--server.session-timeout` for controlling the timeout of user sessions in the web interface @@ -552,7 +565,7 @@ v2.5.1 (2015-03-19) If set to `true`, CRC mismatch errors in collection datafiles will lead to the datafile being partially loaded. All data up to until the mismatch will - be loaded. This will enable users to continue with a collection datafiles + be loaded. This will enable users to continue with collection datafiles that are corrupted, but will result in only a partial load of the data. The WAL recovery will still abort when encountering a collection with a corrupted datafile, at least if `--wal.ignore-recovery-errors` is not set to diff --git a/Documentation/Books/Users/ConfigureArango/Wal.mdpp b/Documentation/Books/Users/ConfigureArango/Wal.mdpp index 6534dc6bd7..5183dd97cb 100644 --- a/Documentation/Books/Users/ConfigureArango/Wal.mdpp +++ b/Documentation/Books/Users/ConfigureArango/Wal.mdpp @@ -53,7 +53,7 @@ a replication backlog. @startDocuBlock WalLogfileIgnoreRecoveryErrors -!SUBSECTION Ignore logfile errors +!SUBSECTION Ignore (non-WAL) datafile errors @startDocuBlock databaseIgnoreDatafileErrors diff --git a/Documentation/Books/Users/DatafileDebugger/README.mdpp b/Documentation/Books/Users/DatafileDebugger/README.mdpp index cf98066743..d6532a9720 100644 --- a/Documentation/Books/Users/DatafileDebugger/README.mdpp +++ b/Documentation/Books/Users/DatafileDebugger/README.mdpp @@ -16,7 +16,7 @@ the program in order to check the consistency of the datafiles and journals. This brings up - ___ _ __ _ _ ___ ___ ___ + ___ _ __ _ _ ___ ___ ___ / \__ _| |_ __ _ / _(_) | ___ / \/ __\ / _ \ / /\ / _` | __/ _` | |_| | |/ _ \ / /\ /__\// / /_\/ / /_// (_| | || (_| | _| | | __/ / /_// \/ \/ /_\\ @@ -34,8 +34,8 @@ in order to check the consistency of the datafiles and journals. This brings up Collection to check: -You can now select, which collection you want to check. After you selected one -or all collections, a consistency check is performed. +You can now select which database and collection you want to check. After you selected +one or all of the collections, a consistency check will be performed. Checking collection #1: _users @@ -60,10 +60,10 @@ or all collections, a consistency check is performed. # of entries: 3 status: OK -If there is a problem with one of the datafile, then the database debugger tries -to fixed that problem. +If there is a problem with one of the datafiles, then the database debugger will print it +and prompt for whether to attempt to fix it. - WARNING: The journal was not closed properly, the last entries is corrupted. + WARNING: The journal was not closed properly, the last entries are corrupted. This might happen ArangoDB was killed and the last entries were not fully written to disk. @@ -72,6 +72,6 @@ to fixed that problem. If you answer **Y**, the corrupted entry will be removed. If you see a corruption in a datafile (and not a journal), then something is -terrible wrong. These files are immutable and never changed by ArangoDB. A -corruption in such a file is an indication of a hard-disk failure. +terribly wrong. These files are immutable and never changed by ArangoDB. A +corruption in such file is an indication of a hard-disk failure. diff --git a/arangod/RestServer/ArangoServer.cpp b/arangod/RestServer/ArangoServer.cpp index 429915e804..d60cc4e573 100644 --- a/arangod/RestServer/ArangoServer.cpp +++ b/arangod/RestServer/ArangoServer.cpp @@ -319,7 +319,7 @@ ArangoServer::ArangoServer (int argc, char** argv) _defaultMaximalSize(TRI_JOURNAL_DEFAULT_MAXIMAL_SIZE), _defaultWaitForSync(false), _forceSyncProperties(true), - _ignoreDatafileErrors(true), + _ignoreDatafileErrors(false), _disableReplicationApplier(false), _disableQueryTracking(false), _foxxQueuesSystemOnly(true), diff --git a/arangod/RestServer/ArangoServer.h b/arangod/RestServer/ArangoServer.h index cf791be7b3..1d1eb2770a 100644 --- a/arangod/RestServer/ArangoServer.h +++ b/arangod/RestServer/ArangoServer.h @@ -454,25 +454,32 @@ namespace triagens { /// @startDocuBlock databaseIgnoreDatafileErrors /// `--database.ignore-datafile-errors boolean` /// -/// If set to `false`, CRC mismatch errors in collection datafiles will lead -/// to a collection not being loaded at all. If a collection needs to be loaded -/// during WAL recovery, the WAL recovery will also abort (if not forced with -/// `--wal.ignore-recovery-errors true`). Setting this flag to `false` protects -/// users from unintentionally using a collection with corrupted datafiles, from -/// which only a subset of the original data can be recovered. +/// If set to `false`, CRC mismatch and other errors in collection datafiles +/// will lead to a collection not being loaded at all. The collection in this +/// case becomes unavailable. If such collection needs to be loaded during WAL +/// recovery, the WAL recovery will also abort (if not forced with option +/// `--wal.ignore-recovery-errors true`). /// -/// If set to `true`, CRC mismatch errors in collection datafiles will lead to -/// the datafile being partially loaded. All data up to until the mismatch will -/// be loaded. This will enable users to continue with a collection datafiles -/// that are corrupted, but will result in only a partial load of the data. -/// The WAL recovery will still abort when encountering a collection with a -/// corrupted datafile, at least if `--wal.ignore-recovery-errors` is not set to -/// `true`. +/// Setting this flag to `false` protects users from unintentionally using a +/// collection with corrupted datafiles, from which only a subset of the +/// original data can be recovered. Working with such collection could lead +/// to data loss and follow up errors. +/// In order to access such collection, it is required to inspect and repair +/// the collection datafile with the datafile debugger (arango-dfdb). /// -/// The default value is *true*, so for collections with corrupted datafiles -/// there might be partial data loads once the WAL recovery has finished. If -/// the WAL recovery will need to load a collection with a corrupted datafile, -/// it will still stop when using the default values. +/// If set to `true`, CRC mismatch and other errors during the loading of a +/// collection will lead to the datafile being partially loaded, up to the +/// position of the first error. All data up to until the invalid position +/// will be loaded. This will enable users to continue with collection datafiles +/// even if they are corrupted, but this will result in only a partial load +/// of the original data and potential follow up errors. The WAL recovery +/// will still abort when encountering a collection with a corrupted datafile, +/// at least if `--wal.ignore-recovery-errors` is not set to `true`. +/// +/// The default value is *false*, so collections with corrupted datafiles will +/// not be loaded at all, preventing partial loads and follow up errors. However, +/// if such collection is required at server startup, during WAL recovery, the +/// server will abort the recovery and refuse to start. /// @endDocuBlock //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/V8Server/v8-collection.cpp b/arangod/V8Server/v8-collection.cpp index 9cbd05549e..7fa31507dd 100644 --- a/arangod/V8Server/v8-collection.cpp +++ b/arangod/V8Server/v8-collection.cpp @@ -3352,6 +3352,47 @@ static void JS_TruncateDatafileVocbaseCol (const v8::FunctionCallbackInfo& args) { + v8::Isolate* isolate = args.GetIsolate(); + v8::HandleScope scope(isolate); + + TRI_vocbase_col_t* collection = TRI_UnwrapClass(args.Holder(), WRP_VOCBASE_COL_TYPE); + + if (collection == nullptr) { + TRI_V8_THROW_EXCEPTION_INTERNAL("cannot extract collection"); + } + + TRI_THROW_SHARDING_COLLECTION_NOT_YET_IMPLEMENTED(collection); + + if (args.Length() != 1) { + TRI_V8_THROW_EXCEPTION_USAGE("tryRepairDatafile()"); + } + + string path = TRI_ObjectToString(args[0]); + + TRI_READ_LOCK_STATUS_VOCBASE_COL(collection); + + if (collection->_status != TRI_VOC_COL_STATUS_UNLOADED && + collection->_status != TRI_VOC_COL_STATUS_CORRUPTED) { + TRI_READ_UNLOCK_STATUS_VOCBASE_COL(collection); + TRI_V8_THROW_EXCEPTION(TRI_ERROR_ARANGO_COLLECTION_NOT_UNLOADED); + } + + bool result = TRI_TryRepairDatafile(path.c_str()); + + TRI_READ_UNLOCK_STATUS_VOCBASE_COL(collection); + + if (result) { + TRI_V8_RETURN_TRUE(); + } + + TRI_V8_RETURN_FALSE(); +} + //////////////////////////////////////////////////////////////////////////////// /// @brief returns the type of a collection /// @startDocuBlock collectionType @@ -4358,7 +4399,7 @@ void TRI_InitV8collection (v8::Handle context, #endif TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("count"), JS_CountVocbaseCol); TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("datafiles"), JS_DatafilesVocbaseCol); - TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("datafileScan"), JS_DatafileScanVocbaseCol); + TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("datafileScan"), JS_DatafileScanVocbaseCol, true); TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("document"), JS_DocumentVocbaseCol); TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("drop"), JS_DropVocbaseCol); TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("exists"), JS_ExistsVocbaseCol); @@ -4376,7 +4417,8 @@ void TRI_InitV8collection (v8::Handle context, TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("save"), JS_InsertVocbaseCol); // note: save is now an alias for insert TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("status"), JS_StatusVocbaseCol); TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("TRUNCATE"), JS_TruncateVocbaseCol, true); - TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("truncateDatafile"), JS_TruncateDatafileVocbaseCol); + TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("truncateDatafile"), JS_TruncateDatafileVocbaseCol, true); + TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("tryRepairDatafile"), JS_TryRepairDatafileVocbaseCol, true); TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("type"), JS_TypeVocbaseCol); TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("unload"), JS_UnloadVocbaseCol); TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("update"), JS_UpdateVocbaseCol); diff --git a/arangod/V8Server/v8-wrapshapedjson.cpp b/arangod/V8Server/v8-wrapshapedjson.cpp index d101ed93d0..38ba400efb 100644 --- a/arangod/V8Server/v8-wrapshapedjson.cpp +++ b/arangod/V8Server/v8-wrapshapedjson.cpp @@ -224,6 +224,7 @@ static v8::Handle SetBasicDocumentAttributesShaped (v8::Isolate* iso TRI_GET_GLOBAL_STRING(_ToKey); result->ForceSet(_ToKey, TRI_V8_PAIR_STRING(buffer, (int) (len + keyLength + 1))); } + return scope.Escape(result); } @@ -291,6 +292,7 @@ v8::Handle TRI_WrapShapedJson (v8::Isolate* isolate, TRI_shape_t const* shape = shaper->lookupShapeId(shaper, json._sid); if (shape == nullptr) { + LOG_WARNING("cannot find shape #%u", (unsigned int) json._sid); return scope.Escape(v8::Object::New(isolate)); } @@ -370,27 +372,28 @@ static void KeysOfShapedJson (const v8::PropertyCallbackInfo& args) { TRI_shape_sid_t sid; TRI_EXTRACT_SHAPE_IDENTIFIER_MARKER(sid, marker); + TRI_shape_aid_t const* aids; + TRI_shape_size_t n; TRI_shape_t const* shape = shaper->lookupShapeId(shaper, sid); if (shape == nullptr || shape->_type != TRI_SHAPE_ARRAY) { - TRI_V8_RETURN(v8::Array::New(isolate)); + n = 0; + aids = nullptr; + LOG_WARNING("cannot find shape #%u", (unsigned int) sid); } + else { + // shape is an array + TRI_array_shape_t const* s = (TRI_array_shape_t const*) shape; - TRI_array_shape_t const* s; - TRI_shape_aid_t const* aids; - char const* qtr; + // number of entries + n = s->_fixedEntries + s->_variableEntries; - // shape is an array - s = (TRI_array_shape_t const*) shape; - - // number of entries - TRI_shape_size_t const n = s->_fixedEntries + s->_variableEntries; - - // calculate position of attribute ids - qtr = (char const*) shape; - qtr += sizeof(TRI_array_shape_t); - qtr += n * sizeof(TRI_shape_sid_t); - aids = (TRI_shape_aid_t const*) qtr; + // calculate position of attribute ids + char const* qtr = (char const*) shape; + qtr += sizeof(TRI_array_shape_t); + qtr += n * sizeof(TRI_shape_sid_t); + aids = (TRI_shape_aid_t const*) qtr; + } TRI_df_marker_type_t type = static_cast(marker)->_type; bool isEdge = (type == TRI_DOC_MARKER_KEY_EDGE || type == TRI_WAL_MARKER_EDGE); @@ -414,7 +417,6 @@ static void KeysOfShapedJson (const v8::PropertyCallbackInfo& args) { } for (TRI_shape_size_t i = 0; i < n; ++i, ++aids) { - /// TODO: avoid strlen here! char const* att = shaper->lookupAttributeId(shaper, *aids); if (att != nullptr) { @@ -436,18 +438,6 @@ static void CopyAttributes (v8::Isolate* isolate, TRI_barrier_t* barrier = static_cast(v8::Handle::Cast(self->GetInternalField(SLOT_BARRIER))->Value()); TRI_document_collection_t* collection = barrier->_container->_collection; - // check for array shape - TRI_shaper_t* shaper = collection->getShaper(); // PROTECTED by BARRIER, checked by RUNTIME - - TRI_shape_sid_t sid; - TRI_EXTRACT_SHAPE_IDENTIFIER_MARKER(sid, marker); - - TRI_shape_t const* shape = shaper->lookupShapeId(shaper, sid); - - if (shape == nullptr || shape->_type != TRI_SHAPE_ARRAY) { - return; - } - // copy _key and _rev // _key @@ -468,6 +458,20 @@ static void CopyAttributes (v8::Isolate* isolate, self->ForceSet(_RevKey, TRI_V8_PAIR_STRING((char const*) buffer, (int) len)); // finally insert the dynamic attributes from the shaped json + + // check for array shape + TRI_shaper_t* shaper = collection->getShaper(); // PROTECTED by BARRIER, checked by RUNTIME + + TRI_shape_sid_t sid; + TRI_EXTRACT_SHAPE_IDENTIFIER_MARKER(sid, marker); + + TRI_shape_t const* shape = shaper->lookupShapeId(shaper, sid); + + if (shape == nullptr || shape->_type != TRI_SHAPE_ARRAY) { + LOG_WARNING("cannot find shape #%u", (unsigned int) sid); + return; + } + TRI_array_shape_t const* s; TRI_shape_aid_t const* aids; char const* qtr; diff --git a/arangod/VocBase/datafile.cpp b/arangod/VocBase/datafile.cpp index 51292b8e9a..df8415ee40 100644 --- a/arangod/VocBase/datafile.cpp +++ b/arangod/VocBase/datafile.cpp @@ -134,6 +134,28 @@ static int TruncateDatafile (TRI_datafile_t* const datafile, const off_t length) return TRI_ERROR_NO_ERROR; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief calculates the actual CRC of a marker, without bounds checks +//////////////////////////////////////////////////////////////////////////////// + +static TRI_voc_crc_t CalculateCrcValue (TRI_df_marker_t const* marker) { + TRI_voc_size_t zero = 0; + off_t o = offsetof(TRI_df_marker_t, _crc); + size_t n = sizeof(TRI_voc_crc_t); + + char const* ptr = (char const*) marker; + + TRI_voc_crc_t crc = TRI_InitialCrc32(); + + crc = TRI_BlockCrc32(crc, ptr, o); + crc = TRI_BlockCrc32(crc, (char*) &zero, n); + crc = TRI_BlockCrc32(crc, ptr + o + n, marker->_size - o - n); + + crc = TRI_FinalCrc32(crc); + + return crc; +} + //////////////////////////////////////////////////////////////////////////////// /// @brief diagnoses a marker //////////////////////////////////////////////////////////////////////////////// @@ -168,12 +190,6 @@ static std::string DiagnoseMarker (TRI_df_marker_t const* marker, return result.str(); } - TRI_voc_size_t zero = 0; - off_t o = offsetof(TRI_df_marker_t, _crc); - size_t n = sizeof(TRI_voc_crc_t); - - char const* ptr = (char const*) marker; - if (marker->_size < sizeof(TRI_df_marker_t)) { result << "marker size is too small (" << marker->_size << "). expecting at least " << sizeof(TRI_df_marker_t) << " bytes"; return result.str(); @@ -184,13 +200,7 @@ static std::string DiagnoseMarker (TRI_df_marker_t const* marker, return result.str(); } - TRI_voc_crc_t crc = TRI_InitialCrc32(); - - crc = TRI_BlockCrc32(crc, ptr, o); - crc = TRI_BlockCrc32(crc, (char*) &zero, n); - crc = TRI_BlockCrc32(crc, ptr + o + n, marker->_size - o - n); - - crc = TRI_FinalCrc32(crc); + TRI_voc_crc_t crc = CalculateCrcValue(marker); if (marker->_crc == crc) { result << "crc checksum is correct"; @@ -203,17 +213,11 @@ static std::string DiagnoseMarker (TRI_df_marker_t const* marker, } //////////////////////////////////////////////////////////////////////////////// -/// @brief checks a CRC of a marker +/// @brief checks a CRC of a marker, with bounds checks //////////////////////////////////////////////////////////////////////////////// static bool CheckCrcMarker (TRI_df_marker_t const* marker, char const* end) { - TRI_voc_size_t zero = 0; - off_t o = offsetof(TRI_df_marker_t, _crc); - size_t n = sizeof(TRI_voc_crc_t); - - char const* ptr = (char const*) marker; - if (marker->_size < sizeof(TRI_df_marker_t)) { return false; } @@ -222,15 +226,8 @@ static bool CheckCrcMarker (TRI_df_marker_t const* marker, return false; } - TRI_voc_crc_t crc = TRI_InitialCrc32(); - - crc = TRI_BlockCrc32(crc, ptr, o); - crc = TRI_BlockCrc32(crc, (char*) &zero, n); - crc = TRI_BlockCrc32(crc, ptr + o + n, marker->_size - o - n); - - crc = TRI_FinalCrc32(crc); - - return marker->_crc == crc; + auto expected = CalculateCrcValue(marker); + return marker->_crc == expected; } //////////////////////////////////////////////////////////////////////////////// @@ -626,22 +623,156 @@ static TRI_df_scan_t ScanDatafile (TRI_datafile_t const* datafile) { return scan; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief tries to repair a datafile +//////////////////////////////////////////////////////////////////////////////// + +static bool TryRepairDatafile (TRI_datafile_t* datafile) { + // this function must not be called for non-physical datafiles + TRI_ASSERT(datafile->isPhysical(datafile)); + + char* ptr = datafile->_data; + char* end = datafile->_data + datafile->_currentSize; + + if (datafile->_currentSize == 0) { + end = datafile->_data + datafile->_maximalSize; + } + + TRI_voc_size_t currentSize = 0; + + while (ptr < end) { + TRI_df_marker_t* marker = reinterpret_cast(ptr); + + if (marker->_size == 0) { + // reached end + return true; + } + + if (marker->_size < sizeof(TRI_df_marker_t) || + ptr + marker->_size > end) { + // marker too small or too big + return false; + } + + if (! TRI_IsValidMarkerDatafile(marker)) { + // unknown marker type + return false; + } + + if (marker->_type != 0) { + if (! CheckCrcMarker(marker, end)) { + // CRC mismatch! + auto next = reinterpret_cast(marker) + marker->_size; + auto p = next; + + if (p < end) { + // check if the rest of the datafile is only followed by NULL bytes + bool isFollowedByNullBytes = true; + while (p < end) { + if (*p != '\0') { + isFollowedByNullBytes = false; + break; + } + ++p; + } + + if (isFollowedByNullBytes) { + // only last marker in datafile was corrupt. fix the datafile in place + LOG_INFO("truncating datafile '%s' at position %lu", + datafile->getName(datafile), + (unsigned long) currentSize); + int res = TruncateAndSealDatafile(datafile, currentSize); + return (res == TRI_ERROR_NO_ERROR); + } + + // there is some other stuff following. now inspect it... + TRI_ASSERT(next <= end); + + if (next < end) { + // there is a next marker + auto nextMarker = reinterpret_cast(next); + + if (nextMarker->_type != 0 && + nextMarker->_size >= sizeof(TRI_df_marker_t) && + next + nextMarker->_size <= end && + TRI_IsValidMarkerDatafile(nextMarker) && + CheckCrcMarker(nextMarker, end)) { + // next marker looks good. + + // create a temporary buffer + auto buffer = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, marker->_size, false); + + if (buffer == nullptr) { + return false; + } + + // create a new marker in the temporary buffer + auto temp = reinterpret_cast(buffer); + TRI_InitMarkerDatafile(static_cast(buffer), TRI_DF_MARKER_BLANK, static_cast(marker->_size)); + temp->_crc = CalculateCrcValue(temp); + + // all done. now copy back the marker into the file + memcpy(static_cast(ptr), buffer, static_cast(marker->_size)); + + TRI_Free(TRI_UNKNOWN_MEM_ZONE, buffer); + + bool ok = datafile->sync(datafile, ptr, (ptr + marker->_size)); + + if (ok) { + LOG_INFO("zeroed single invalid marker in datafile '%s' at position %lu", + datafile->getName(datafile), + (unsigned long) currentSize); + } + else { + LOG_ERROR("could not zero single invalid marker in datafile '%s' at position %lu", + datafile->getName(datafile), + (unsigned long) currentSize); + return false; + } + } + else { + // next marker looks broken, too. + int res = TruncateAndSealDatafile(datafile, currentSize); + return (res == TRI_ERROR_NO_ERROR); + } + } + } + + } + } + + size_t size = TRI_DF_ALIGN_BLOCK(marker->_size); + currentSize += (TRI_voc_size_t) size; + + if (marker->_type == TRI_DF_MARKER_FOOTER) { + return true; + } + + ptr += size; + } + + return true; +} + //////////////////////////////////////////////////////////////////////////////// /// @brief fixes a corrupted datafile //////////////////////////////////////////////////////////////////////////////// static bool FixDatafile (TRI_datafile_t* datafile, TRI_voc_size_t currentSize) { - LOG_WARNING("datafile '%s' is corrupted at position %llu. setting it to read-only", - datafile->getName(datafile), + LOG_WARNING("datafile '%s' is corrupted at position %llu", + datafile->getName(datafile), (unsigned long long) currentSize); + + LOG_WARNING("setting datafile '%s' to read-only and ignoring all data from this file beyond this position", + datafile->getName(datafile)); datafile->_currentSize = currentSize; datafile->_maximalSize = static_cast(currentSize); - datafile->_next = datafile->_data + datafile->_currentSize; - datafile->_full = true; - datafile->_state = TRI_DF_STATE_READ; - datafile->_isSealed = true; + datafile->_next = datafile->_data + datafile->_currentSize; + datafile->_full = true; + datafile->_state = TRI_DF_STATE_READ; + datafile->_isSealed = true; return true; } @@ -672,7 +803,7 @@ static bool CheckDatafile (TRI_datafile_t* datafile, }; while (ptr < end) { - TRI_df_marker_t* marker = (TRI_df_marker_t*) ptr; + TRI_df_marker_t* marker = reinterpret_cast(ptr); #ifdef DEBUG_DATAFILE LOG_TRACE("MARKER: size %lu, tick %lx, crc %lx, type %u", @@ -696,19 +827,45 @@ static bool CheckDatafile (TRI_datafile_t* datafile, } if (marker->_size < sizeof(TRI_df_marker_t)) { - datafile->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE); - datafile->_currentSize = currentSize; - datafile->_next = datafile->_data + datafile->_currentSize; - datafile->_state = TRI_DF_STATE_OPEN_ERROR; + if (ignoreFailures) { + return FixDatafile(datafile, currentSize); + } + else { + datafile->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE); + datafile->_currentSize = currentSize; + datafile->_next = datafile->_data + datafile->_currentSize; + datafile->_state = TRI_DF_STATE_OPEN_ERROR; - LOG_WARNING("marker in datafile '%s' too small, size %lu, should be at least %lu", - datafile->getName(datafile), - (unsigned long) marker->_size, - (unsigned long) sizeof(TRI_df_marker_t)); + LOG_WARNING("marker in datafile '%s' too small, size %lu, should be at least %lu", + datafile->getName(datafile), + (unsigned long) marker->_size, + (unsigned long) sizeof(TRI_df_marker_t)); - updateTick(maxTick); + updateTick(maxTick); - return false; + return false; + } + } + + // prevent reading over the end of the file + if (ptr + marker->_size > end) { + if (ignoreFailures) { + return FixDatafile(datafile, currentSize); + } + else { + datafile->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE); + datafile->_currentSize = currentSize; + datafile->_next = datafile->_data + datafile->_currentSize; + datafile->_state = TRI_DF_STATE_OPEN_ERROR; + + LOG_WARNING("marker in datafile '%s' points with size %lu beyond end of file", + datafile->getName(datafile), + (unsigned long) marker->_size); + + updateTick(maxTick); + + return false; + } } // the following sanity check offers some, but not 100% crash-protection when reading @@ -747,17 +904,22 @@ static bool CheckDatafile (TRI_datafile_t* datafile, bool ok = CheckCrcMarker(marker, end); if (! ok) { + // CRC mismatch! + bool nextMarkerOk = false; + if (marker->_size > 0) { auto next = reinterpret_cast(marker) + marker->_size; - if (next < end) { + auto p = next; + + if (p < end) { // check if the rest of the datafile is only followed by NULL bytes bool isFollowedByNullBytes = true; - while (next < end) { - if (*next != '\0') { + while (p < end) { + if (*p != '\0') { isFollowedByNullBytes = false; break; } - ++next; + ++p; } if (isFollowedByNullBytes) { @@ -765,6 +927,28 @@ static bool CheckDatafile (TRI_datafile_t* datafile, LOG_WARNING("datafile '%s' automatically truncated at last marker", datafile->getName(datafile)); ignoreFailures = true; } + else { + // there is some other stuff following. now inspect it... + TRI_ASSERT(next <= end); + + if (next < end) { + // there is a next marker + auto nextMarker = reinterpret_cast(next); + + if (nextMarker->_type != 0 && + nextMarker->_size >= sizeof(TRI_df_marker_t) && + next + nextMarker->_size <= end && + TRI_IsValidMarkerDatafile(nextMarker) && + CheckCrcMarker(nextMarker, end)) { + // next marker looks good. + nextMarkerOk = true; + } + } + else { + // EOF + nextMarkerOk = true; + } + } } } @@ -777,8 +961,19 @@ static bool CheckDatafile (TRI_datafile_t* datafile, datafile->_next = datafile->_data + datafile->_currentSize; datafile->_state = TRI_DF_STATE_OPEN_ERROR; - LOG_WARNING("crc mismatch found in datafile '%s'", datafile->getName(datafile)); - + LOG_WARNING("crc mismatch found in datafile '%s' at position %lu. expected crc: %x, actual crc: %x", + datafile->getName(datafile), + (unsigned long) currentSize, + CalculateCrcValue(marker), + marker->_crc); + + if (nextMarkerOk) { + LOG_INFO("data directly following this marker looks ok so repairing the marker may recover it"); + } + else { + LOG_WARNING("data directly following this marker cannot be analyzed"); + } + updateTick(maxTick); return false; @@ -1166,8 +1361,6 @@ TRI_datafile_t* TRI_CreateAnonymousDatafile (TRI_voc_fid_t fid, TRI_datafile_t* TRI_CreatePhysicalDatafile (char const* filename, TRI_voc_fid_t fid, TRI_voc_size_t maximalSize) { - TRI_datafile_t* datafile; - ssize_t res; void* data; void* mmHandle; @@ -1181,7 +1374,7 @@ TRI_datafile_t* TRI_CreatePhysicalDatafile (char const* filename, } // memory map the data - res = TRI_MMFile(0, maximalSize, PROT_WRITE | PROT_READ, MAP_SHARED, fd, &mmHandle, 0, &data); + ssize_t res = TRI_MMFile(0, maximalSize, PROT_WRITE | PROT_READ, MAP_SHARED, fd, &mmHandle, 0, &data); if (res != TRI_ERROR_NO_ERROR) { TRI_set_errno(res); @@ -1195,7 +1388,7 @@ TRI_datafile_t* TRI_CreatePhysicalDatafile (char const* filename, } // create datafile structure - datafile = static_cast(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_datafile_t), false)); + auto datafile = static_cast(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_datafile_t), false)); if (datafile == nullptr) { TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); @@ -1246,6 +1439,8 @@ char const* TRI_NameMarkerDatafile (TRI_df_marker_t const* marker) { switch (marker->_type) { // general markers case TRI_DF_MARKER_HEADER: + case TRI_DF_MARKER_BLANK: + return "blank marker (used when repairing datafiles)"; case TRI_COL_MARKER_HEADER: return "header"; case TRI_DF_MARKER_FOOTER: @@ -1709,20 +1904,17 @@ TRI_datafile_t* TRI_OpenDatafile (char const* filename, //////////////////////////////////////////////////////////////////////////////// TRI_datafile_t* TRI_ForcedOpenDatafile (char const* filename) { - TRI_datafile_t* datafile; - bool ok; - // this function must not be called for non-physical datafiles TRI_ASSERT(filename != nullptr); - datafile = OpenDatafile(filename, true); + TRI_datafile_t* datafile = OpenDatafile(filename, true); if (datafile == nullptr) { return nullptr; } // check the current marker - ok = CheckDatafile(datafile, true); + bool ok = CheckDatafile(datafile, true); if (! ok) { LOG_ERROR("datafile '%s' is corrupt", datafile->getName(datafile)); @@ -1930,6 +2122,30 @@ int TRI_TruncateDatafile (char const* path, return res; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief try to repair a datafile +//////////////////////////////////////////////////////////////////////////////// + +bool TRI_TryRepairDatafile (char const* path) { + // this function must not be called for non-physical datafiles + TRI_ASSERT(path != nullptr); + + TRI_datafile_t* datafile = OpenDatafile(path, true); + + if (datafile == nullptr) { + return false; + } + + // set to read/write access + TRI_ProtectMMFile(datafile->_data, datafile->_maximalSize, PROT_READ | PROT_WRITE, datafile->_fd, &datafile->_mmHandle); + + int result = TryRepairDatafile(datafile); + TRI_CloseDatafile(datafile); + TRI_FreeDatafile(datafile); + + return result; +} + //////////////////////////////////////////////////////////////////////////////// /// @brief returns information about the datafile //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/VocBase/datafile.h b/arangod/VocBase/datafile.h index e60a5c9122..45f986f0f5 100644 --- a/arangod/VocBase/datafile.h +++ b/arangod/VocBase/datafile.h @@ -144,6 +144,8 @@ typedef enum { TRI_DF_MARKER_ATTRIBUTE = 1003, TRI_DF_MARKER_SHAPE = 1004, + TRI_DF_MARKER_BLANK = 1100, + TRI_COL_MARKER_HEADER = 2000, /* TRI_DOC_MARKER_HEADER = 3000, // deprecated. do not use @@ -741,6 +743,12 @@ bool TRI_RenameDatafile (TRI_datafile_t* datafile, char const* filename); int TRI_TruncateDatafile (char const* path, TRI_voc_size_t position); +//////////////////////////////////////////////////////////////////////////////// +/// @brief try to repair a datafile +//////////////////////////////////////////////////////////////////////////////// + +bool TRI_TryRepairDatafile (char const* path); + //////////////////////////////////////////////////////////////////////////////// /// @brief returns information about the datafile //////////////////////////////////////////////////////////////////////////////// diff --git a/js/server/arango-dfdb.js b/js/server/arango-dfdb.js index 165410015e..59ae149714 100644 --- a/js/server/arango-dfdb.js +++ b/js/server/arango-dfdb.js @@ -50,7 +50,7 @@ function UnloadCollection (collection) { var next = Math.round(internal.time()); - if (next != last) { + if (next !== last) { printf("Trying to unload collection '%s'\n", collection.name()); last = next; } @@ -71,6 +71,15 @@ function RemoveDatafile (collection, type, datafile) { printf("\n"); } +//////////////////////////////////////////////////////////////////////////////// +/// @brief try to repair a datafile +//////////////////////////////////////////////////////////////////////////////// + +function TryRepairDatafile (collection, datafile) { + UnloadCollection(collection); + return collection.tryRepairDatafile(datafile); +} + //////////////////////////////////////////////////////////////////////////////// /// @brief wipe entries //////////////////////////////////////////////////////////////////////////////// @@ -89,7 +98,7 @@ function WipeDatafile (collection, type, datafile, lastGoodPos) { function QueryWipeDatafile (collection, type, datafile, scan, lastGoodPos) { var entries = scan.entries; - if (entries.length == 0) { + if (entries.length === 0) { if (type === "journal" || type === "compactor") { printf("WARNING: The journal is empty. Even the header is missing. Going\n"); printf(" to remove the file.\n"); @@ -107,16 +116,18 @@ function QueryWipeDatafile (collection, type, datafile, scan, lastGoodPos) { } var ask = true; + var tryRepair = false; + if (type === "journal") { if (entries.length === lastGoodPos + 3 && entries[lastGoodPos + 2].status === 2) { printf("WARNING: The journal was not closed properly, the last entry is corrupted.\n"); - printf(" This might happen ArangoDB was killed and the last entry was not\n"); + printf(" This might happen if ArangoDB was killed and the last entry was not\n"); printf(" fully written to disk. Going to remove the last entry.\n"); ask = false; } else { printf("WARNING: The journal was not closed properly, the last entries are corrupted.\n"); - printf(" This might happen ArangoDB was killed and the last entries were not\n"); + printf(" This might happen if ArangoDB was killed and the last entries were not\n"); printf(" fully written to disk.\n"); } } @@ -124,13 +135,33 @@ function QueryWipeDatafile (collection, type, datafile, scan, lastGoodPos) { printf("WARNING: The datafile contains corrupt entries. This should never happen.\n"); printf(" Datafiles are append-only. Make sure your hard disk does not contain\n"); printf(" any hardware errors.\n"); + + tryRepair = true; } printf("\n"); + + var entry = entries[lastGoodPos]; if (ask) { + var line; + + if (tryRepair) { + printf("Try to repair the error(s) (Y/N)? "); + line = console.getline(); + + if (line === "yes" || line === "YES" || line === "y" || line === "Y") { + if (TryRepairDatafile(collection, datafile)) { + printf("Repair succeeded.\n"); + return; + } + + printf("Repair failed.\n"); + } + } + printf("Wipe the last entries (Y/N)? "); - var line = console.getline(); + line = console.getline(); if (line !== "yes" && line !== "YES" && line !== "y" && line !== "Y") { printf("ABORTING\n"); @@ -138,8 +169,6 @@ function QueryWipeDatafile (collection, type, datafile, scan, lastGoodPos) { } } - var entry = entries[lastGoodPos]; - WipeDatafile(collection, type, datafile, entry.position + entry.realSize); }