//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Dr. Frank Celler //////////////////////////////////////////////////////////////////////////////// #include "datafile.h" #include "ApplicationFeatures/PageSizeFeature.h" #include "Basics/FileUtils.h" #include "Basics/StaticStrings.h" #include "Basics/StringUtils.h" #include "Basics/files.h" #include "Basics/hashes.h" #include "Basics/memory-map.h" #include "Basics/tri-strings.h" #include "Logger/Logger.h" #include "VocBase/DatafileHelper.h" #include "VocBase/ticks.h" #include // #define DEBUG_DATAFILE 1 using namespace arangodb; using namespace arangodb::basics; namespace { /// @brief check if a marker appears to be created by ArangoDB 28 static TRI_voc_crc_t Crc28(TRI_voc_crc_t crc, void const* data, size_t length) { static TRI_voc_crc_t const CrcPolynomial = 0xEDB88320; unsigned char* current = (unsigned char*) data; while (length--) { crc ^= *current++; for (unsigned int i = 0; i < 8; ++i) { if (crc & 1) { crc = (crc >> 1) ^ CrcPolynomial; } else { crc = crc >> 1; } } } return crc; } static bool IsMarker28 (void const* marker) { struct Marker28 { TRI_voc_size_t _size; TRI_voc_crc_t _crc; uint32_t _type; #ifdef TRI_PADDING_32 char _padding_df_marker[4]; #endif TRI_voc_tick_t _tick; }; TRI_voc_size_t zero = 0; off_t o = offsetof(Marker28, _crc); size_t n = sizeof(TRI_voc_crc_t); char const* ptr = static_cast(marker); Marker28 const* m = static_cast(marker); TRI_voc_crc_t crc = TRI_InitialCrc32(); crc = Crc28(crc, ptr, o); crc = Crc28(crc, (char*) &zero, n); crc = Crc28(crc, ptr + o + n, m->_size - o - n); crc = TRI_FinalCrc32(crc); return crc == m->_crc; } /// @brief calculates the actual CRC of a marker, without bounds checks static TRI_voc_crc_t CalculateCrcValue(TRI_df_marker_t const* marker) { TRI_voc_size_t zero = 0; off_t o = marker->offsetOfCrc(); size_t n = sizeof(TRI_voc_crc_t); char const* ptr = reinterpret_cast(marker); TRI_voc_crc_t crc = TRI_InitialCrc32(); crc = TRI_BlockCrc32(crc, ptr, o); crc = TRI_BlockCrc32(crc, (char*)&zero, n); crc = TRI_BlockCrc32(crc, ptr + o + n, marker->getSize() - o - n); crc = TRI_FinalCrc32(crc); return crc; } /// @brief checks a CRC of a marker, with bounds checks static bool CheckCrcMarker(TRI_df_marker_t const* marker, char const* end) { TRI_voc_size_t const size = marker->getSize(); if (size < sizeof(TRI_df_marker_t)) { return false; } if (reinterpret_cast(marker) + size > end) { return false; } auto expected = CalculateCrcValue(marker); return marker->getCrc() == expected; } } //////////////////////////////////////////////////////////////////////////////// /// @brief creates a new datafile /// /// returns the file descriptor or -1 if the file cannot be created //////////////////////////////////////////////////////////////////////////////// static int CreateDatafile(std::string const& filename, TRI_voc_size_t maximalSize) { TRI_ERRORBUF; // open the file int fd = TRI_CREATE(filename.c_str(), O_CREAT | O_EXCL | O_RDWR | TRI_O_CLOEXEC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); TRI_IF_FAILURE("CreateDatafile1") { // intentionally fail TRI_CLOSE(fd); fd = -1; errno = ENOSPC; } if (fd < 0) { if (errno == ENOSPC) { TRI_set_errno(TRI_ERROR_ARANGO_FILESYSTEM_FULL); LOG(ERR) << "cannot create datafile '" << filename << "': " << TRI_last_error(); } else { TRI_SYSTEM_ERROR(); TRI_set_errno(TRI_ERROR_SYS_ERROR); LOG(ERR) << "cannot create datafile '" << filename << "': " << TRI_GET_ERRORBUF; } return -1; } // fill file with zeros from FileNullBuffer size_t writeSize = TRI_GetNullBufferSizeFiles(); size_t written = 0; while (written < maximalSize) { if (writeSize + written > maximalSize) { writeSize = maximalSize - written; } ssize_t writeResult = TRI_WRITE(fd, TRI_GetNullBufferFiles(), static_cast(writeSize)); TRI_IF_FAILURE("CreateDatafile2") { // intentionally fail writeResult = -1; errno = ENOSPC; } if (writeResult < 0) { if (errno == ENOSPC) { TRI_set_errno(TRI_ERROR_ARANGO_FILESYSTEM_FULL); LOG(ERR) << "cannot create datafile '" << filename << "': " << TRI_last_error(); } else { TRI_SYSTEM_ERROR(); TRI_set_errno(TRI_ERROR_SYS_ERROR); LOG(ERR) << "cannot create datafile '" << filename << "': " << TRI_GET_ERRORBUF; } TRI_CLOSE(fd); TRI_UnlinkFile(filename.c_str()); return -1; } written += static_cast(writeResult); } // go back to offset 0 TRI_lseek_t offset = TRI_LSEEK(fd, (TRI_lseek_t)0, SEEK_SET); if (offset == (TRI_lseek_t)-1) { TRI_SYSTEM_ERROR(); TRI_set_errno(TRI_ERROR_SYS_ERROR); TRI_CLOSE(fd); // remove empty file TRI_UnlinkFile(filename.c_str()); LOG(ERR) << "cannot seek in datafile '" << filename << "': '" << TRI_GET_ERRORBUF << "'"; return -1; } return fd; } //////////////////////////////////////////////////////////////////////////////// /// @brief truncates a datafile /// /// Create a truncated datafile, seal it and rename the old. //////////////////////////////////////////////////////////////////////////////// static int TruncateAndSealDatafile(TRI_datafile_t* datafile, TRI_voc_size_t vocSize) { TRI_ERRORBUF; void* data; void* mmHandle; // this function must not be called for non-physical datafiles TRI_ASSERT(datafile->isPhysical()); size_t pageSize = PageSizeFeature::getPageSize(); // use multiples of page-size size_t maximalSize = ((vocSize + sizeof(TRI_df_footer_marker_t) + pageSize - 1) / pageSize) * pageSize; // sanity check if (sizeof(TRI_df_header_marker_t) + sizeof(TRI_df_footer_marker_t) > maximalSize) { LOG(ERR) << "cannot create datafile '" << datafile->getName() << "', maximal size " << maximalSize << " is too small"; return TRI_set_errno(TRI_ERROR_ARANGO_MAXIMAL_SIZE_TOO_SMALL); } // open the file std::string filename = arangodb::basics::FileUtils::buildFilename(datafile->_filename, ".new"); int fd = TRI_CREATE(filename.c_str(), O_CREAT | O_EXCL | O_RDWR | TRI_O_CLOEXEC, S_IRUSR | S_IWUSR); if (fd < 0) { TRI_SYSTEM_ERROR(); LOG(ERR) << "cannot create new datafile '" << filename << "': " << TRI_GET_ERRORBUF; return TRI_set_errno(TRI_ERROR_SYS_ERROR); } // go back to the beginning of the file TRI_lseek_t offset = TRI_LSEEK(fd, (TRI_lseek_t)(maximalSize - 1), SEEK_SET); if (offset == (TRI_lseek_t)-1) { TRI_SYSTEM_ERROR(); TRI_set_errno(TRI_ERROR_SYS_ERROR); TRI_CLOSE(fd); // remove empty file TRI_UnlinkFile(filename.c_str()); LOG(ERR) << "cannot seek in new datafile '" << filename << "': " << TRI_GET_ERRORBUF; return TRI_ERROR_SYS_ERROR; } char zero = 0; int res = TRI_WRITE(fd, &zero, 1); if (res < 0) { TRI_SYSTEM_ERROR(); TRI_set_errno(TRI_ERROR_SYS_ERROR); TRI_CLOSE(fd); // remove empty file TRI_UnlinkFile(filename.c_str()); LOG(ERR) << "cannot create datafile '" << filename << "': " << TRI_GET_ERRORBUF; return TRI_ERROR_SYS_ERROR; } // memory map the data res = TRI_MMFile(0, maximalSize, PROT_WRITE | PROT_READ, MAP_SHARED, fd, &mmHandle, 0, &data); if (res != TRI_ERROR_NO_ERROR) { TRI_SYSTEM_ERROR(); TRI_set_errno(res); TRI_CLOSE(fd); // remove empty file TRI_UnlinkFile(filename.c_str()); LOG(ERR) << "cannot memory map file '" << filename << "': " << TRI_GET_ERRORBUF; LOG(ERR) << "The database directory might reside on a shared folder " "(VirtualBox, VMWare) or an NFS " "mounted volume which does not allow memory mapped files."; return TRI_errno(); } // copy the data memcpy(data, datafile->_data, vocSize); // patch the datafile structure res = TRI_UNMMFile(datafile->_data, datafile->_initSize, datafile->_fd, &datafile->_mmHandle); if (res < 0) { TRI_CLOSE(datafile->_fd); LOG(ERR) << "munmap failed with: " << res; return res; } // ............................................................................................. // For windows: Mem mapped files use handles // the datafile->_mmHandle handle object has been closed in the underlying // TRI_UNMMFile(...) call above so we do not need to close it for the // associated file below // ............................................................................................. TRI_CLOSE(datafile->_fd); datafile->_data = static_cast(data); datafile->_next = (char*)(data) + vocSize; datafile->_currentSize = vocSize; // do not change _initSize! TRI_ASSERT(datafile->_initSize == datafile->_maximalSize); datafile->_maximalSize = static_cast(maximalSize); datafile->_fd = fd; datafile->_mmHandle = mmHandle; datafile->_state = TRI_DF_STATE_CLOSED; datafile->_full = false; datafile->_isSealed = false; datafile->_synced = static_cast(data); datafile->_written = datafile->_next; // rename files std::string oldname = arangodb::basics::FileUtils::buildFilename(datafile->_filename, ".corrupted"); res = TRI_RenameFile(datafile->_filename.c_str(), oldname.c_str()); if (res != TRI_ERROR_NO_ERROR) { return res; } res = TRI_RenameFile(filename.c_str(), datafile->_filename.c_str()); if (res != TRI_ERROR_NO_ERROR) { return res; } // need to reset the datafile state here to write, otherwise the following // call will return an error datafile->_state = TRI_DF_STATE_WRITE; return datafile->seal(); } //////////////////////////////////////////////////////////////////////////////// /// @brief tries to repair a datafile //////////////////////////////////////////////////////////////////////////////// static bool TryRepairDatafile(TRI_datafile_t* datafile) { // this function must not be called for non-physical datafiles TRI_ASSERT(datafile->isPhysical()); char* ptr = datafile->_data; char* end = datafile->_data + datafile->_currentSize; if (datafile->_currentSize == 0) { end = datafile->_data + datafile->_maximalSize; } TRI_voc_size_t currentSize = 0; while (ptr < end) { TRI_df_marker_t* marker = reinterpret_cast(ptr); TRI_voc_size_t const size = marker->getSize(); if (size == 0) { // reached end return true; } if (size < sizeof(TRI_df_marker_t) || ptr + size > end) { // marker too small or too big return false; } if (!TRI_IsValidMarkerDatafile(marker)) { // unknown marker type return false; } if (marker->getType() != 0) { if (!CheckCrcMarker(marker, end)) { // CRC mismatch! auto next = reinterpret_cast(marker) + size; auto p = next; if (p < end) { // check if the rest of the datafile is only followed by NULL bytes bool isFollowedByNullBytes = true; while (p < end) { if (*p != '\0') { isFollowedByNullBytes = false; break; } ++p; } if (isFollowedByNullBytes) { // only last marker in datafile was corrupt. fix the datafile in // place LOG(INFO) << "truncating datafile '" << datafile->getName() << "' at position " << currentSize; int res = TruncateAndSealDatafile(datafile, currentSize); return (res == TRI_ERROR_NO_ERROR); } // there is some other stuff following. now inspect it... TRI_ASSERT(next <= end); if (next < end) { // there is a next marker auto nextMarker = reinterpret_cast(next); if (nextMarker->getType() != 0 && nextMarker->getSize() >= sizeof(TRI_df_marker_t) && next + nextMarker->getSize() <= end && TRI_IsValidMarkerDatafile(nextMarker) && CheckCrcMarker(nextMarker, end)) { // next marker looks good. // create a temporary buffer auto buffer = std::unique_ptr(new char[size]); // create a new marker in the temporary buffer auto temp = reinterpret_cast(buffer.get()); DatafileHelper::InitMarker( reinterpret_cast(buffer.get()), TRI_DF_MARKER_BLANK, static_cast(size)); temp->setCrc(CalculateCrcValue(temp)); // all done. now copy back the marker into the file memcpy(static_cast(ptr), buffer.get(), static_cast(size)); buffer.reset(); // don't need the buffer anymore bool ok = datafile->sync(ptr, (ptr + size)); if (ok) { LOG(INFO) << "zeroed single invalid marker in datafile '" << datafile->getName() << "' at position " << currentSize; } else { LOG(ERR) << "could not zero single invalid marker in datafile '" << datafile->getName() << "' at position " << currentSize; return false; } } else { // next marker looks broken, too. int res = TruncateAndSealDatafile(datafile, currentSize); return (res == TRI_ERROR_NO_ERROR); } } } } } size_t alignedSize = DatafileHelper::AlignedMarkerSize(marker); currentSize += static_cast(alignedSize); if (marker->getType() == TRI_DF_MARKER_FOOTER) { return true; } ptr += alignedSize; } return true; } //////////////////////////////////////////////////////////////////////////////// /// @brief fixes a corrupted datafile //////////////////////////////////////////////////////////////////////////////// static bool FixDatafile(TRI_datafile_t* datafile, TRI_voc_size_t currentSize) { LOG(WARN) << "datafile '" << datafile->getName() << "' is corrupted at position " << currentSize; LOG(WARN) << "setting datafile '" << datafile->getName() << "' to read-only and ignoring all data from this file beyond this position"; datafile->_currentSize = currentSize; TRI_ASSERT(datafile->_initSize == datafile->_maximalSize); datafile->_maximalSize = static_cast(currentSize); datafile->_next = datafile->_data + datafile->_currentSize; datafile->_full = true; datafile->_state = TRI_DF_STATE_READ; datafile->_isSealed = true; return true; } //////////////////////////////////////////////////////////////////////////////// /// @brief checks a datafile //////////////////////////////////////////////////////////////////////////////// static bool CheckDatafile(TRI_datafile_t* datafile, bool ignoreFailures) { // this function must not be called for non-physical datafiles TRI_ASSERT(datafile->isPhysical()); char* ptr = datafile->_data; char* end = datafile->_data + datafile->_currentSize; TRI_voc_size_t currentSize = 0; if (datafile->_currentSize == 0) { LOG(WARN) << "current size is 0 in read-only datafile '" << datafile->getName() << "', trying to fix"; end = datafile->_data + datafile->_maximalSize; } TRI_voc_tick_t maxTick = 0; auto updateTick = [](TRI_voc_tick_t maxTick) -> void { TRI_UpdateTickServer(maxTick); }; while (ptr < end) { TRI_df_marker_t* marker = reinterpret_cast(ptr); TRI_voc_size_t const size = marker->getSize(); TRI_voc_tick_t const tick = marker->getTick(); TRI_df_marker_type_t const type = marker->getType(); #ifdef DEBUG_DATAFILE LOG(TRACE) << "MARKER: size " << size << ", tick " << tick << ", crc " << marker->getCrc() << ", type " << type; #endif if (size == 0) { LOG(DEBUG) << "reached end of datafile '" << datafile->getName() << "' data, current size " << currentSize; datafile->_currentSize = currentSize; datafile->_next = datafile->_data + datafile->_currentSize; updateTick(maxTick); return true; } if (size < sizeof(TRI_df_marker_t)) { if (ignoreFailures) { return FixDatafile(datafile, currentSize); } datafile->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE); datafile->_currentSize = currentSize; datafile->_next = datafile->_data + datafile->_currentSize; datafile->_state = TRI_DF_STATE_OPEN_ERROR; LOG(WARN) << "marker in datafile '" << datafile->getName() << "' too small, size " << size << ", should be at least " << sizeof(TRI_df_marker_t); updateTick(maxTick); return false; } // prevent reading over the end of the file if (ptr + size > end) { if (ignoreFailures) { return FixDatafile(datafile, currentSize); } datafile->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE); datafile->_currentSize = currentSize; datafile->_next = datafile->_data + datafile->_currentSize; datafile->_state = TRI_DF_STATE_OPEN_ERROR; LOG(WARN) << "marker in datafile '" << datafile->getName() << "' points with size " << size << " beyond end of file"; updateTick(maxTick); return false; } // the following sanity check offers some, but not 100% crash-protection // when reading // totally corrupted datafiles if (!TRI_IsValidMarkerDatafile(marker)) { if (type == 0 && size < 128) { // ignore markers with type 0 and a small size LOG(WARN) << "ignoring suspicious marker in datafile '" << datafile->getName() << "': type: " << type << ", size: " << size; } else { if (ignoreFailures) { return FixDatafile(datafile, currentSize); } datafile->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE); datafile->_currentSize = currentSize; datafile->_next = datafile->_data + datafile->_currentSize; datafile->_state = TRI_DF_STATE_OPEN_ERROR; LOG(WARN) << "marker in datafile '" << datafile->getName() << "' is corrupt: type: " << type << ", size: " << size; updateTick(maxTick); return false; } } if (type != 0) { bool ok = CheckCrcMarker(marker, end); if (!ok) { // CRC mismatch! bool nextMarkerOk = false; if (size > 0) { auto next = reinterpret_cast(marker) + size; auto p = next; if (p < end) { // check if the rest of the datafile is only followed by NULL bytes bool isFollowedByNullBytes = true; while (p < end) { if (*p != '\0') { isFollowedByNullBytes = false; break; } ++p; } if (isFollowedByNullBytes) { // only last marker in datafile was corrupt. fix the datafile in // place LOG(WARN) << "datafile '" << datafile->getName() << "' automatically truncated at last marker"; ignoreFailures = true; } else { // there is some other stuff following. now inspect it... TRI_ASSERT(next <= end); if (next < end) { // there is a next marker auto nextMarker = reinterpret_cast(next); if (nextMarker->getType() != 0 && nextMarker->getSize() >= sizeof(TRI_df_marker_t) && next + nextMarker->getSize() <= end && TRI_IsValidMarkerDatafile(nextMarker) && CheckCrcMarker(nextMarker, end)) { // next marker looks good. nextMarkerOk = true; } } else { // EOF nextMarkerOk = true; } } } } if (ignoreFailures) { return FixDatafile(datafile, currentSize); } datafile->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE); datafile->_currentSize = currentSize; datafile->_next = datafile->_data + datafile->_currentSize; datafile->_state = TRI_DF_STATE_OPEN_ERROR; LOG(WARN) << "crc mismatch found in datafile '" << datafile->getName() << "' at position " << currentSize << ". expected crc: " << CalculateCrcValue(marker) << ", actual crc: " << marker->getCrc(); if (nextMarkerOk) { LOG(INFO) << "data directly following this marker looks ok so repairing the marker may recover it"; } else { LOG(WARN) << "data directly following this marker cannot be analyzed"; } updateTick(maxTick); return false; } } if (tick > maxTick) { maxTick = tick; } size_t alignedSize = DatafileHelper::AlignedMarkerSize(marker); currentSize += static_cast(alignedSize); if (marker->getType() == TRI_DF_MARKER_FOOTER) { LOG(DEBUG) << "found footer, reached end of datafile '" << datafile->getName() << "', current size " << currentSize; datafile->_isSealed = true; datafile->_currentSize = currentSize; datafile->_next = datafile->_data + datafile->_currentSize; updateTick(maxTick); return true; } ptr += alignedSize; } updateTick(maxTick); return true; } //////////////////////////////////////////////////////////////////////////////// /// @brief extract the numeric part from a filename /// the filename must look like this: /.*type-abc\.ending$/, where abc is /// a number, and type and ending are arbitrary letters //////////////////////////////////////////////////////////////////////////////// static uint64_t GetNumericFilenamePart(char const* filename) { char const* pos1 = strrchr(filename, '.'); if (pos1 == nullptr) { return 0; } char const* pos2 = strrchr(filename, '-'); if (pos2 == nullptr || pos2 > pos1) { return 0; } return StringUtils::uint64(pos2 + 1, pos1 - pos2 - 1); } //////////////////////////////////////////////////////////////////////////////// /// @brief create the initial datafile header marker //////////////////////////////////////////////////////////////////////////////// static int WriteInitialHeaderMarker(TRI_datafile_t* datafile, TRI_voc_fid_t fid, TRI_voc_size_t maximalSize) { // create the header TRI_df_header_marker_t header = DatafileHelper::CreateHeaderMarker( maximalSize, static_cast(fid)); // reserve space and write header to file TRI_df_marker_t* position; int res = TRI_ReserveElementDatafile(datafile, header.base.getSize(), &position, 0); if (res == TRI_ERROR_NO_ERROR) { TRI_ASSERT(position != nullptr); res = TRI_WriteCrcElementDatafile(datafile, position, &header.base, false); } return res; } //////////////////////////////////////////////////////////////////////////////// /// @brief creates a new anonymous datafile /// /// this is only supported on certain platforms (Linux, MacOS) //////////////////////////////////////////////////////////////////////////////// #ifdef TRI_HAVE_ANONYMOUS_MMAP static TRI_datafile_t* CreateAnonymousDatafile(TRI_voc_fid_t fid, TRI_voc_size_t maximalSize) { #ifdef TRI_MMAP_ANONYMOUS // fd -1 is required for "real" anonymous regions int fd = -1; int flags = TRI_MMAP_ANONYMOUS | MAP_SHARED; #else // ugly workaround if MAP_ANONYMOUS is not available int fd = TRI_OPEN("/dev/zero", O_RDWR | TRI_O_CLOEXEC); if (fd == -1) { return nullptr; } int flags = MAP_PRIVATE; #endif // memory map the data void* data; void* mmHandle; ssize_t res = TRI_MMFile(nullptr, maximalSize, PROT_WRITE | PROT_READ, flags, fd, &mmHandle, 0, &data); #ifdef MAP_ANONYMOUS // nothing to do #else // close auxilliary file TRI_CLOSE(fd); fd = -1; #endif if (res != TRI_ERROR_NO_ERROR) { TRI_set_errno(res); LOG(ERR) << "cannot memory map anonymous region: " << TRI_last_error(); LOG(ERR) << "The database directory might reside on a shared folder " "(VirtualBox, VMWare) or an NFS " "mounted volume which does not allow memory mapped files."; return nullptr; } return new TRI_datafile_t(StaticStrings::Empty, fd, mmHandle, maximalSize, 0, fid, static_cast(data)); } #endif //////////////////////////////////////////////////////////////////////////////// /// @brief creates a new physical datafile //////////////////////////////////////////////////////////////////////////////// static TRI_datafile_t* CreatePhysicalDatafile(std::string const& filename, TRI_voc_fid_t fid, TRI_voc_size_t maximalSize) { TRI_ASSERT(!filename.empty()); int fd = CreateDatafile(filename, maximalSize); if (fd < 0) { // an error occurred return nullptr; } // memory map the data void* data; void* mmHandle; int flags = MAP_SHARED; #ifdef __linux__ // try populating the mapping already flags |= MAP_POPULATE; #endif ssize_t res = TRI_MMFile(0, maximalSize, PROT_WRITE | PROT_READ, flags, fd, &mmHandle, 0, &data); if (res != TRI_ERROR_NO_ERROR) { TRI_set_errno(res); TRI_CLOSE(fd); // remove empty file TRI_UnlinkFile(filename.c_str()); LOG(ERR) << "cannot memory map file '" << filename << "': '" << TRI_errno_string((int)res) << "'"; LOG(ERR) << "The database directory might reside on a shared folder " "(VirtualBox, VMWare) or an NFS " "mounted volume which does not allow memory mapped files."; return nullptr; } // create datafile structure try { return new TRI_datafile_t(filename, fd, mmHandle, maximalSize, 0, fid, static_cast(data)); } catch (...) { TRI_CLOSE(fd); return nullptr; } } //////////////////////////////////////////////////////////////////////////////// /// @brief opens a datafile //////////////////////////////////////////////////////////////////////////////// static TRI_datafile_t* OpenDatafile(std::string const& filename, bool ignoreErrors) { TRI_ERRORBUF; void* data; TRI_stat_t status; void* mmHandle; // this function must not be called for non-physical datafiles TRI_ASSERT(!filename.empty()); TRI_voc_fid_t fid = GetNumericFilenamePart(filename.c_str()); // .......................................................................... // attempt to open a datafile file // .......................................................................... int fd = TRI_OPEN(filename.c_str(), O_RDWR | TRI_O_CLOEXEC); if (fd < 0) { TRI_SYSTEM_ERROR(); TRI_set_errno(TRI_ERROR_SYS_ERROR); LOG(ERR) << "cannot open datafile '" << filename << "': '" << TRI_GET_ERRORBUF << "'"; return nullptr; } // compute the size of the file int res = TRI_FSTAT(fd, &status); if (res < 0) { TRI_SYSTEM_ERROR(); TRI_set_errno(TRI_ERROR_SYS_ERROR); TRI_CLOSE(fd); LOG(ERR) << "cannot get status of datafile '" << filename << "': " << TRI_GET_ERRORBUF; return nullptr; } // check that file is not too small TRI_voc_size_t size = static_cast(status.st_size); if (size < sizeof(TRI_df_header_marker_t) + sizeof(TRI_df_footer_marker_t)) { TRI_set_errno(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE); TRI_CLOSE(fd); LOG(ERR) << "datafile '" << filename << "' is corrupt, size is only " << (unsigned int)size; return nullptr; } // read header from file char buffer[128]; memset(&buffer[0], 0, sizeof(buffer)); ssize_t len = sizeof(TRI_df_header_marker_t); ssize_t toRead = sizeof(buffer); if (toRead > static_cast(status.st_size)) { toRead = static_cast(status.st_size); } bool ok = TRI_ReadPointer(fd, &buffer[0], toRead); if (!ok) { LOG(ERR) << "cannot read datafile header from '" << filename << "': " << TRI_last_error(); TRI_CLOSE(fd); return nullptr; } char const* ptr = reinterpret_cast(&buffer[0]); char const* end = static_cast(ptr) + len; TRI_df_header_marker_t const* header = reinterpret_cast(&buffer[0]); // check CRC ok = CheckCrcMarker(reinterpret_cast(ptr), end); if (!ok) { if (IsMarker28(ptr)) { LOG(ERR) << "datafile found from older version of ArangoDB. " << "Please dump data from that version with arangodump " << "and reload it into this ArangoDB instance with arangorestore"; TRI_CLOSE(fd); TRI_set_errno(TRI_ERROR_NOT_IMPLEMENTED); FATAL_ERROR_EXIT(); } TRI_set_errno(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE); LOG(ERR) << "corrupted datafile header read from '" << filename << "'"; if (!ignoreErrors) { TRI_CLOSE(fd); return nullptr; } } // check the datafile version if (ok) { if (header->_version != TRI_DF_VERSION) { TRI_set_errno(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE); LOG(ERR) << "unknown datafile version '" << header->_version << "' in datafile '" << filename << "'"; if (!ignoreErrors) { TRI_CLOSE(fd); return nullptr; } } } // check the maximal size if (size > header->_maximalSize) { LOG(DEBUG) << "datafile '" << filename << "' has size '" << size << "', but maximal size is '" << header->_maximalSize << "'"; } // map datafile into memory res = TRI_MMFile(0, size, PROT_READ, MAP_SHARED, fd, &mmHandle, 0, &data); if (res != TRI_ERROR_NO_ERROR) { TRI_set_errno(res); TRI_CLOSE(fd); LOG(ERR) << "cannot memory map datafile '" << filename << "': " << TRI_errno_string(res); LOG(ERR) << "The database directory might reside on a shared folder " "(VirtualBox, VMWare) or an NFS " "mounted volume which does not allow memory mapped files."; return nullptr; } // create datafile structure try { return new TRI_datafile_t(filename, fd, mmHandle, size, size, fid, static_cast(data)); } catch (...) { TRI_UNMMFile(data, size, fd, &mmHandle); TRI_CLOSE(fd); return nullptr; } } //////////////////////////////////////////////////////////////////////////////// /// @brief creates either an anonymous or a physical datafile //////////////////////////////////////////////////////////////////////////////// TRI_datafile_t* TRI_CreateDatafile(std::string const& filename, TRI_voc_fid_t fid, TRI_voc_size_t maximalSize, bool withInitialMarkers) { TRI_datafile_t* datafile; size_t pageSize = PageSizeFeature::getPageSize(); TRI_ASSERT(pageSize >= 256); // use multiples of page-size maximalSize = (TRI_voc_size_t)(((maximalSize + pageSize - 1) / pageSize) * pageSize); // sanity check maximal size if (sizeof(TRI_df_header_marker_t) + sizeof(TRI_df_footer_marker_t) > maximalSize) { LOG(ERR) << "cannot create datafile, maximal size '" << (unsigned int)maximalSize << "' is too small"; TRI_set_errno(TRI_ERROR_ARANGO_MAXIMAL_SIZE_TOO_SMALL); return nullptr; } // create either an anonymous or a physical datafile if (filename.empty()) { #ifdef TRI_HAVE_ANONYMOUS_MMAP datafile = CreateAnonymousDatafile(fid, maximalSize); #else // system does not support anonymous mmap return nullptr; #endif } else { datafile = CreatePhysicalDatafile(filename, fid, maximalSize); } if (datafile == nullptr) { // an error occurred during creation return nullptr; } datafile->_state = TRI_DF_STATE_WRITE; if (withInitialMarkers) { int res = WriteInitialHeaderMarker(datafile, fid, maximalSize); if (res != TRI_ERROR_NO_ERROR) { LOG(ERR) << "cannot write header to datafile '" << datafile->getName() << "'"; TRI_UNMMFile(datafile->_data, datafile->_maximalSize, datafile->_fd, &datafile->_mmHandle); datafile->close(); delete datafile; return nullptr; } } LOG(DEBUG) << "created datafile '" << datafile->getName() << "' of size " << maximalSize << " and page-size " << pageSize; return datafile; } //////////////////////////////////////////////////////////////////////////////// /// @brief returns the name for a marker //////////////////////////////////////////////////////////////////////////////// char const* TRI_NameMarkerDatafile(TRI_df_marker_t const* marker) { switch (marker->getType()) { // general markers case TRI_DF_MARKER_HEADER: return "datafile header"; case TRI_DF_MARKER_FOOTER: return "footer"; case TRI_DF_MARKER_BLANK: return "blank marker (used when repairing datafiles)"; case TRI_DF_MARKER_COL_HEADER: return "collection header"; case TRI_DF_MARKER_PROLOGUE: return "prologue"; case TRI_DF_MARKER_VPACK_DOCUMENT: return "document"; case TRI_DF_MARKER_VPACK_REMOVE: return "remove document"; case TRI_DF_MARKER_VPACK_CREATE_COLLECTION: return "create collection"; case TRI_DF_MARKER_VPACK_DROP_COLLECTION: return "drop collection"; case TRI_DF_MARKER_VPACK_RENAME_COLLECTION: return "rename collection"; case TRI_DF_MARKER_VPACK_CHANGE_COLLECTION: return "change collection"; case TRI_DF_MARKER_VPACK_CREATE_INDEX: return "create index"; case TRI_DF_MARKER_VPACK_DROP_INDEX: return "drop index"; case TRI_DF_MARKER_VPACK_CREATE_DATABASE: return "create database"; case TRI_DF_MARKER_VPACK_DROP_DATABASE: return "drop database"; case TRI_DF_MARKER_VPACK_BEGIN_TRANSACTION: return "begin transaction"; case TRI_DF_MARKER_VPACK_COMMIT_TRANSACTION: return "commit transaction"; case TRI_DF_MARKER_VPACK_ABORT_TRANSACTION: return "abort transaction"; default: return "unused/unknown"; } } //////////////////////////////////////////////////////////////////////////////// /// @brief checks whether a marker is valid //////////////////////////////////////////////////////////////////////////////// bool TRI_IsValidMarkerDatafile(TRI_df_marker_t const* marker) { if (marker == nullptr) { return false; } // check marker type TRI_df_marker_type_t const type = marker->getType(); if (type <= TRI_DF_MARKER_MIN) { // marker type is less than minimum allowed type value return false; } if (type >= TRI_DF_MARKER_MAX) { // marker type is greater than maximum allowed type value return false; } if (marker->getSize() >= DatafileHelper::MaximalMarkerSize()) { // a single marker bigger than 256 MB seems unreasonable // note: this is an arbitrary limit return false; } return true; } //////////////////////////////////////////////////////////////////////////////// /// @brief reserves room for an element, advances the pointer /// /// note: maximalJournalSize is the collection's maximalJournalSize property, /// which may be different from the size of the current datafile /// some callers do not set the value of maximalJournalSize //////////////////////////////////////////////////////////////////////////////// int TRI_ReserveElementDatafile(TRI_datafile_t* datafile, TRI_voc_size_t size, TRI_df_marker_t** position, TRI_voc_size_t maximalJournalSize) { *position = nullptr; size = DatafileHelper::AlignedSize(size); if (datafile->_state != TRI_DF_STATE_WRITE) { if (datafile->_state == TRI_DF_STATE_READ) { LOG(ERR) << "cannot reserve marker, datafile is read-only"; return TRI_set_errno(TRI_ERROR_ARANGO_READ_ONLY); } return TRI_set_errno(TRI_ERROR_ARANGO_ILLEGAL_STATE); } // check the maximal size if (size + DatafileHelper::JournalOverhead() > datafile->_maximalSize) { // marker is bigger than journal size. // adding the marker to this datafile will not work if (maximalJournalSize <= datafile->_maximalSize) { // the collection property 'maximalJournalSize' is equal to // or smaller than the size of this datafile // creating a new file and writing the marker into it will not work either return TRI_set_errno(TRI_ERROR_ARANGO_DOCUMENT_TOO_LARGE); } // if we get here, the collection's 'maximalJournalSize' property is // higher than the size of this datafile. // maybe the marker will fit into a new datafile with the bigger size? if (size + DatafileHelper::JournalOverhead() > maximalJournalSize) { // marker still won't fit return TRI_set_errno(TRI_ERROR_ARANGO_DOCUMENT_TOO_LARGE); } // fall-through intentional } // add the marker, leave enough room for the footer if (datafile->_currentSize + size + datafile->_footerSize > datafile->_maximalSize) { datafile->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_DATAFILE_FULL); datafile->_full = true; LOG(TRACE) << "cannot write marker, not enough space"; return TRI_ERROR_ARANGO_DATAFILE_FULL; } *position = reinterpret_cast(datafile->_next); TRI_ASSERT(*position != nullptr); datafile->_next += size; datafile->_currentSize += size; return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief writes a marker to the datafile /// this function will write the marker as-is, without any CRC or tick updates //////////////////////////////////////////////////////////////////////////////// int TRI_WriteElementDatafile(TRI_datafile_t* datafile, void* position, TRI_df_marker_t const* marker, bool forceSync) { TRI_ASSERT(marker->getTick() > 0); TRI_ASSERT(marker->getSize() > 0); TRI_UpdateTicksDatafile(datafile, marker); if (datafile->_state != TRI_DF_STATE_WRITE) { if (datafile->_state == TRI_DF_STATE_READ) { LOG(ERR) << "cannot write marker, datafile is read-only"; return TRI_set_errno(TRI_ERROR_ARANGO_READ_ONLY); } return TRI_set_errno(TRI_ERROR_ARANGO_ILLEGAL_STATE); } TRI_ASSERT(position != nullptr); // out of bounds check for writing into a datafile if (position == nullptr || position < (void*)datafile->_data || position >= (void*)(datafile->_data + datafile->_maximalSize)) { LOG(ERR) << "logic error. writing out of bounds of datafile '" << datafile->getName() << "'"; return TRI_set_errno(TRI_ERROR_ARANGO_ILLEGAL_STATE); } memcpy(position, marker, static_cast(marker->getSize())); if (forceSync) { bool ok = datafile->sync(static_cast(position), reinterpret_cast(position) + marker->getSize()); if (!ok) { datafile->_state = TRI_DF_STATE_WRITE_ERROR; if (errno == ENOSPC) { datafile->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_FILESYSTEM_FULL); } else { datafile->_lastError = TRI_set_errno(TRI_ERROR_SYS_ERROR); } LOG(ERR) << "msync failed with: " << TRI_last_error(); return datafile->_lastError; } else { LOG(TRACE) << "msync succeeded " << (void*) position << ", size " << marker->getSize(); } } return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief update tick values for a datafile //////////////////////////////////////////////////////////////////////////////// void TRI_UpdateTicksDatafile(TRI_datafile_t* datafile, TRI_df_marker_t const* marker) { TRI_df_marker_type_t const type = marker->getType(); if (type != TRI_DF_MARKER_HEADER && type != TRI_DF_MARKER_FOOTER && type != TRI_DF_MARKER_COL_HEADER) { // every marker but headers / footers counts TRI_voc_tick_t tick = marker->getTick(); if (datafile->_tickMin == 0) { datafile->_tickMin = tick; } if (datafile->_tickMax < tick) { datafile->_tickMax = tick; } if (datafile->_dataMin == 0) { datafile->_dataMin = tick; } if (datafile->_dataMax < tick) { datafile->_dataMax = tick; } } } //////////////////////////////////////////////////////////////////////////////// /// @brief checksums and writes a marker to the datafile //////////////////////////////////////////////////////////////////////////////// int TRI_WriteCrcElementDatafile(TRI_datafile_t* datafile, void* position, TRI_df_marker_t* marker, bool forceSync) { TRI_ASSERT(marker->getTick() != 0); if (datafile->isPhysical()) { TRI_voc_crc_t crc = TRI_InitialCrc32(); crc = TRI_BlockCrc32(crc, (char const*)marker, marker->getSize()); marker->setCrc(TRI_FinalCrc32(crc)); } return TRI_WriteElementDatafile(datafile, position, marker, forceSync); } //////////////////////////////////////////////////////////////////////////////// /// @brief iterates over a datafile /// also may set datafile's min/max tick values /// deprecated //////////////////////////////////////////////////////////////////////////////// bool TRI_IterateDatafile(TRI_datafile_t* datafile, bool (*iterator)(TRI_df_marker_t const*, void*, TRI_datafile_t*), void* data) { TRI_ASSERT(iterator != nullptr); LOG(TRACE) << "iterating over datafile '" << datafile->getName() << "', fid: " << datafile->_fid; char const* ptr = datafile->_data; char const* end = datafile->_data + datafile->_currentSize; if (datafile->_state != TRI_DF_STATE_READ && datafile->_state != TRI_DF_STATE_WRITE) { TRI_set_errno(TRI_ERROR_ARANGO_ILLEGAL_STATE); return false; } while (ptr < end) { auto const* marker = reinterpret_cast(ptr); if (marker->getSize() == 0) { return true; } // update the tick statistics TRI_UpdateTicksDatafile(datafile, marker); if (!iterator(marker, data, datafile)) { return false; } ptr += DatafileHelper::AlignedMarkerSize(marker); } return true; } /// @brief iterates over a datafile /// also may set datafile's min/max tick values bool TRI_IterateDatafile(TRI_datafile_t* datafile, std::function const& cb) { LOG(TRACE) << "iterating over datafile '" << datafile->getName() << "', fid: " << datafile->_fid; char const* ptr = datafile->_data; char const* end = datafile->_data + datafile->_currentSize; if (datafile->_state != TRI_DF_STATE_READ && datafile->_state != TRI_DF_STATE_WRITE) { TRI_set_errno(TRI_ERROR_ARANGO_ILLEGAL_STATE); return false; } while (ptr < end) { auto const* marker = reinterpret_cast(ptr); if (marker->getSize() == 0) { return true; } // update the tick statistics TRI_UpdateTicksDatafile(datafile, marker); if (!cb(marker, datafile)) { return false; } ptr += DatafileHelper::AlignedMarkerSize(marker); } return true; } //////////////////////////////////////////////////////////////////////////////// /// @brief opens an existing datafile /// /// The datafile will be opened read-only if a footer is found //////////////////////////////////////////////////////////////////////////////// TRI_datafile_t* TRI_OpenDatafile(std::string const& filename, bool ignoreFailures) { // this function must not be called for non-physical datafiles TRI_ASSERT(!filename.empty()); TRI_datafile_t* datafile = OpenDatafile(filename, false); if (datafile == nullptr) { return nullptr; } // check the datafile by scanning markers bool ok = CheckDatafile(datafile, ignoreFailures); if (!ok) { TRI_UNMMFile(datafile->_data, datafile->_maximalSize, datafile->_fd, &datafile->_mmHandle); TRI_CLOSE(datafile->_fd); LOG(ERR) << "datafile '" << datafile->getName() << "' is corrupt"; // must free datafile here delete datafile; return nullptr; } // change to read-write if no footer has been found if (!datafile->_isSealed) { datafile->_state = TRI_DF_STATE_WRITE; TRI_ProtectMMFile(datafile->_data, datafile->_maximalSize, PROT_READ | PROT_WRITE, datafile->_fd, &datafile->_mmHandle); } // Advise on sequential use: TRI_MMFileAdvise(datafile->_data, datafile->_maximalSize, TRI_MADVISE_SEQUENTIAL); TRI_MMFileAdvise(datafile->_data, datafile->_maximalSize, TRI_MADVISE_WILLNEED); return datafile; } //////////////////////////////////////////////////////////////////////////////// /// @brief renames a datafile //////////////////////////////////////////////////////////////////////////////// int TRI_RenameDatafile(TRI_datafile_t* datafile, char const* filename) { // this function must not be called for non-physical datafiles TRI_ASSERT(datafile->isPhysical()); TRI_ASSERT(filename != nullptr); if (TRI_ExistsFile(filename)) { LOG(ERR) << "cannot overwrite datafile '" << filename << "'"; datafile->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_DATAFILE_ALREADY_EXISTS); return TRI_ERROR_ARANGO_DATAFILE_ALREADY_EXISTS; } int res = TRI_RenameFile(datafile->_filename.c_str(), filename); if (res != TRI_ERROR_NO_ERROR) { datafile->_state = TRI_DF_STATE_RENAME_ERROR; datafile->_lastError = TRI_set_errno(TRI_ERROR_SYS_ERROR); return res; } datafile->_filename = filename; return TRI_ERROR_NO_ERROR; } /// @brief seals a datafile, writes a footer, sets it to read-only int TRI_datafile_t::seal() { if (_state == TRI_DF_STATE_READ) { return TRI_ERROR_ARANGO_READ_ONLY; } if (_state != TRI_DF_STATE_WRITE) { return TRI_ERROR_ARANGO_ILLEGAL_STATE; } if (_isSealed) { return TRI_ERROR_ARANGO_DATAFILE_SEALED; } // set a proper tick value if (_tickMax == 0) { _tickMax = TRI_NewTickServer(); } // create the footer TRI_df_footer_marker_t footer = DatafileHelper::CreateFooterMarker(_tickMax); // reserve space and write footer to file _footerSize = 0; TRI_df_marker_t* position; int res = TRI_ReserveElementDatafile(this, footer.base.getSize(), &position, 0); if (res == TRI_ERROR_NO_ERROR) { TRI_ASSERT(position != nullptr); res = TRI_WriteCrcElementDatafile(this, position, &footer.base, false); } if (res != TRI_ERROR_NO_ERROR) { return res; } // sync file bool ok = sync(_synced, reinterpret_cast(_data) + _currentSize); if (!ok) { _state = TRI_DF_STATE_WRITE_ERROR; if (errno == ENOSPC) { _lastError = TRI_set_errno(TRI_ERROR_ARANGO_FILESYSTEM_FULL); } else { _lastError = TRI_errno(); } LOG(ERR) << "msync failed with: " << TRI_last_error(); } // everything is now synced _synced = _written; TRI_ProtectMMFile(_data, _maximalSize, PROT_READ, _fd, &_mmHandle); // seal datafile if (ok) { _isSealed = true; _state = TRI_DF_STATE_READ; // note: _initSize must remain constant TRI_ASSERT(_initSize == _maximalSize); _maximalSize = _currentSize; } if (!ok) { return _lastError; } if (isPhysical()) { // From now on we predict random access (until collection or compaction): TRI_MMFileAdvise(_data, _maximalSize, TRI_MADVISE_RANDOM); } return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief truncates a datafile and seals it /// this is called from the recovery procedure only //////////////////////////////////////////////////////////////////////////////// int TRI_TruncateDatafile(std::string const& path, TRI_voc_size_t position) { // this function must not be called for non-physical datafiles TRI_ASSERT(!path.empty()); TRI_datafile_t* datafile = OpenDatafile(path, true); if (datafile == nullptr) { return TRI_ERROR_ARANGO_DATAFILE_UNREADABLE; } int res = TruncateAndSealDatafile(datafile, position); delete datafile; return res; } //////////////////////////////////////////////////////////////////////////////// /// @brief try to repair a datafile //////////////////////////////////////////////////////////////////////////////// bool TRI_TryRepairDatafile(char const* path) { // this function must not be called for non-physical datafiles TRI_ASSERT(path != nullptr); TRI_datafile_t* datafile = OpenDatafile(path, true); if (datafile == nullptr) { return false; } // set to read/write access TRI_ProtectMMFile(datafile->_data, datafile->_maximalSize, PROT_READ | PROT_WRITE, datafile->_fd, &datafile->_mmHandle); bool result = TryRepairDatafile(datafile); delete datafile; return result; } //////////////////////////////////////////////////////////////////////////////// /// @brief diagnoses a marker //////////////////////////////////////////////////////////////////////////////// static std::string DiagnoseMarker(TRI_df_marker_t const* marker, char const* end) { std::ostringstream result; if (marker == nullptr) { return "marker is undefined. should not happen"; } // check marker type TRI_df_marker_type_t type = marker->getType(); if (type <= TRI_DF_MARKER_MIN) { // marker type is less than minimum allowed type value result << "marker type value (" << static_cast(type) << ") is wrong. expecting value higher than " << TRI_DF_MARKER_MIN; return result.str(); } if (type >= TRI_DF_MARKER_MAX) { // marker type is greater than maximum allowed type value result << "marker type value (" << static_cast(type) << ") is wrong. expecting value less than " << TRI_DF_MARKER_MAX; return result.str(); } TRI_voc_size_t size = marker->getSize(); if (size >= DatafileHelper::MaximalMarkerSize()) { // a single marker bigger than this size seems unreasonable // note: this is an arbitrary limit result << "marker size value (" << size << ") is wrong. expecting value less than " << DatafileHelper::MaximalMarkerSize(); return result.str(); } if (size < sizeof(TRI_df_marker_t)) { result << "marker size is too small (" << size << "). expecting at least " << sizeof(TRI_df_marker_t) << " bytes"; return result.str(); } if (reinterpret_cast(marker) + size > end) { return "marker size is beyond end of datafile"; } TRI_voc_crc_t crc = CalculateCrcValue(marker); if (marker->getCrc() == crc) { return "crc checksum is correct"; } result << "crc checksum (hex " << std::hex << marker->getCrc() << ") is wrong. expecting (hex " << std::hex << crc << ")"; return result.str(); } //////////////////////////////////////////////////////////////////////////////// /// @brief scans a datafile //////////////////////////////////////////////////////////////////////////////// static DatafileScan ScanDatafile(TRI_datafile_t const* datafile) { // this function must not be called for non-physical datafiles TRI_ASSERT(datafile->isPhysical()); char* ptr = datafile->_data; char* end = datafile->_data + datafile->_currentSize; TRI_voc_size_t currentSize = 0; DatafileScan scan; scan.currentSize = datafile->_currentSize; scan.maximalSize = datafile->_maximalSize; if (datafile->_currentSize == 0) { end = datafile->_data + datafile->_maximalSize; } while (ptr < end) { TRI_df_marker_t* marker = reinterpret_cast(ptr); DatafileScanEntry entry; entry.position = static_cast(ptr - datafile->_data); entry.size = marker->getSize(); entry.realSize = static_cast(DatafileHelper::AlignedMarkerSize(marker)); entry.tick = marker->getTick(); entry.type = marker->getType(); entry.status = 1; entry.typeName = TRI_NameMarkerDatafile(marker); if (marker->getSize() == 0 && marker->getCrc() == 0 && marker->getType() == 0 && marker->getTick() == 0) { entry.status = 2; scan.endPosition = currentSize; scan.entries.emplace_back(entry); return scan; } ++scan.numberMarkers; if (marker->getSize() == 0) { entry.status = 3; scan.status = 2; scan.endPosition = currentSize; scan.entries.emplace_back(entry); return scan; } if (marker->getSize() < sizeof(TRI_df_marker_t)) { entry.status = 4; entry.diagnosis = DiagnoseMarker(marker, end); scan.endPosition = currentSize; scan.status = 3; scan.entries.emplace_back(entry); return scan; } if (!TRI_IsValidMarkerDatafile(marker)) { entry.status = 4; entry.diagnosis = DiagnoseMarker(marker, end); scan.endPosition = currentSize; scan.status = 3; scan.entries.emplace_back(entry); return scan; } bool ok = CheckCrcMarker(marker, end); if (!ok) { entry.status = 5; entry.diagnosis = DiagnoseMarker(marker, end); scan.status = 4; } TRI_df_marker_type_t const type = marker->getType(); if (type == TRI_DF_MARKER_VPACK_DOCUMENT || type == TRI_DF_MARKER_VPACK_REMOVE) { VPackSlice const slice(reinterpret_cast(marker) + DatafileHelper::VPackOffset(type)); TRI_ASSERT(slice.isObject()); entry.key = slice.get(StaticStrings::KeyString).copyString(); } scan.entries.emplace_back(entry); size_t size = DatafileHelper::AlignedMarkerSize(marker); currentSize += static_cast(size); if (marker->getType() == TRI_DF_MARKER_FOOTER) { scan.endPosition = currentSize; scan.isSealed = true; return scan; } ptr += size; } return scan; } //////////////////////////////////////////////////////////////////////////////// /// @brief returns information about the datafile //////////////////////////////////////////////////////////////////////////////// DatafileScan TRI_ScanDatafile(char const* path) { DatafileScan scan; // this function must not be called for non-physical datafiles TRI_ASSERT(path != nullptr); TRI_datafile_t* datafile = OpenDatafile(path, true); if (datafile != nullptr) { scan = ScanDatafile(datafile); delete datafile; } else { scan.currentSize = 0; scan.maximalSize = 0; scan.endPosition = 0; scan.numberMarkers = 0; scan.status = 5; scan.isSealed = false; } return scan; } TRI_datafile_t::TRI_datafile_t(std::string const& filename, int fd, void* mmHandle, TRI_voc_size_t maximalSize, TRI_voc_size_t currentSize, TRI_voc_fid_t fid, char* data) : _fid(fid), _state(TRI_DF_STATE_READ), _fd(fd), _mmHandle(mmHandle), _initSize(maximalSize), _maximalSize(maximalSize), _currentSize(currentSize), _footerSize(sizeof(TRI_df_footer_marker_t)), _data(data), _next(data + currentSize), _tickMin(0), _tickMax(0), _dataMin(0), _dataMax(0), _filename(filename), _lastError(TRI_ERROR_NO_ERROR), _full(false), _isSealed(false), _synced(data), _written(nullptr) { // filename is a string for physical datafiles, and NULL for anonymous regions // fd is a positive value for physical datafiles, and -1 for anonymous regions if (filename.empty()) { TRI_ASSERT(fd == -1); } else { TRI_ASSERT(fd >= 0); // Advise OS that sequential access is going to happen: TRI_MMFileAdvise(_data, _maximalSize, TRI_MADVISE_SEQUENTIAL); } } TRI_datafile_t::~TRI_datafile_t() { try { this->close(); } catch (...) { // silently continue as this is the destructor } } /// @brief return the name of a datafile std::string TRI_datafile_t::getName() const { if (_filename.empty()) { // anonymous regions do not have a filename return "anonymous region"; } // return name of the physical file return _filename; } /// @brief close a datafile int TRI_datafile_t::close() { if (_state == TRI_DF_STATE_READ || _state == TRI_DF_STATE_WRITE) { int res = TRI_UNMMFile(_data, _initSize, _fd, &_mmHandle); if (res != TRI_ERROR_NO_ERROR) { LOG(ERR) << "munmap failed with: " << res; _state = TRI_DF_STATE_WRITE_ERROR; _lastError = res; return res; } if (isPhysical()) { int res = TRI_CLOSE(_fd); if (res != TRI_ERROR_NO_ERROR) { LOG(ERR) << "unable to close datafile '" << getName() << "': " << res; } } _state = TRI_DF_STATE_CLOSED; _data = nullptr; _next = nullptr; _fd = -1; return TRI_ERROR_NO_ERROR; } if (_state == TRI_DF_STATE_CLOSED) { LOG(WARN) << "closing an already closed datafile '" << getName() << "'"; return TRI_ERROR_NO_ERROR; } return TRI_ERROR_ARANGO_ILLEGAL_STATE; } /// @brief sync the data of a datafile bool TRI_datafile_t::sync(char const* begin, char const* end) { if (!isPhysical()) { // anonymous regions do not need to be synced return true; } TRI_ASSERT(_fd >= 0); if (begin == end) { // no need to sync return true; } return TRI_MSync(_fd, begin, end); }