1
0
Fork 0
arangodb/arangod/RestHandler/RestExportHandler.cpp

525 lines
20 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// @brief export request handler
///
/// @file
///
/// DISCLAIMER
///
/// Copyright 2014 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Steemann
/// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany
/// @author Copyright 2010-2014, triAGENS GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////
#include "RestExportHandler.h"
#include "Basics/Exceptions.h"
#include "Basics/json.h"
#include "Basics/MutexLocker.h"
#include "Utils/CollectionExport.h"
#include "Utils/Cursor.h"
#include "Utils/CursorRepository.h"
#include "Wal/LogfileManager.h"
using namespace triagens::arango;
using namespace triagens::rest;
// -----------------------------------------------------------------------------
// --SECTION-- constructors and destructors
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief constructor
////////////////////////////////////////////////////////////////////////////////
RestExportHandler::RestExportHandler (HttpRequest* request)
: RestVocbaseBaseHandler(request),
_restrictions() {
}
// -----------------------------------------------------------------------------
// --SECTION-- Handler methods
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// {@inheritDoc}
////////////////////////////////////////////////////////////////////////////////
HttpHandler::status_t RestExportHandler::execute () {
if (ServerState::instance()->isCoordinator()) {
generateError(HttpResponse::NOT_IMPLEMENTED,
TRI_ERROR_CLUSTER_UNSUPPORTED,
"'/_api/export' is not yet supported in a cluster");
return status_t(HANDLER_DONE);
}
// extract the sub-request type
HttpRequest::HttpRequestType type = _request->requestType();
if (type == HttpRequest::HTTP_REQUEST_POST) {
createCursor();
return status_t(HANDLER_DONE);
}
if (type == HttpRequest::HTTP_REQUEST_PUT) {
modifyCursor();
return status_t(HANDLER_DONE);
}
if (type == HttpRequest::HTTP_REQUEST_DELETE) {
deleteCursor();
return status_t(HANDLER_DONE);
}
generateError(HttpResponse::METHOD_NOT_ALLOWED, TRI_ERROR_HTTP_METHOD_NOT_ALLOWED);
return status_t(HANDLER_DONE);
}
// -----------------------------------------------------------------------------
// --SECTION-- private methods
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief build options for the query as JSON
////////////////////////////////////////////////////////////////////////////////
triagens::basics::Json RestExportHandler::buildOptions (TRI_json_t const* json) {
auto getAttribute = [&json] (char const* name) {
return TRI_LookupObjectJson(json, name);
};
triagens::basics::Json options(triagens::basics::Json::Object);
auto attribute = getAttribute("count");
options.set("count", triagens::basics::Json(TRI_IsBooleanJson(attribute) ? attribute->_value._boolean : false));
attribute = getAttribute("batchSize");
options.set("batchSize", triagens::basics::Json(TRI_IsNumberJson(attribute) ? attribute->_value._number : 1000.0));
if (TRI_IsNumberJson(attribute) && static_cast<size_t>(attribute->_value._number) == 0) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_TYPE_ERROR, "expecting non-zero value for 'batchSize'");
}
attribute = getAttribute("limit");
if (TRI_IsNumberJson(attribute)) {
options.set("limit", triagens::basics::Json(attribute->_value._number));
}
attribute = getAttribute("flush");
options.set("flush", triagens::basics::Json(TRI_IsBooleanJson(attribute) ? attribute->_value._boolean : false));
if (! options.has("ttl")) {
attribute = getAttribute("ttl");
options.set("ttl", triagens::basics::Json(TRI_IsNumberJson(attribute) ? attribute->_value._number : 30.0));
}
attribute = getAttribute("flushWait");
options.set("flushWait", triagens::basics::Json(TRI_IsNumberJson(attribute) ? attribute->_value._number : 10.0));
// handle "restrict" parameter
attribute = getAttribute("restrict");
if (attribute != nullptr) {
if (! TRI_IsObjectJson(attribute)) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_TYPE_ERROR, "expecting object for 'restrict'");
}
// "restrict"."type"
auto type = TRI_LookupObjectJson(attribute, "type");
if (! TRI_IsStringJson(type)) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, "expecting string for 'restrict.type'");
}
std::string typeString = std::string(type->_value._string.data, type->_value._string.length - 1);
if (typeString == "include") {
_restrictions.type = CollectionExport::Restrictions::RESTRICTION_INCLUDE;
}
else if (typeString == "exclude") {
_restrictions.type = CollectionExport::Restrictions::RESTRICTION_EXCLUDE;
}
else {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, "expecting either 'include' or 'exclude' for 'restrict.type'");
}
// "restrict"."fields"
auto fields = TRI_LookupObjectJson(attribute, "fields");
if (! TRI_IsArrayJson(fields)) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, "expecting array for 'restrict.fields'");
}
size_t const n = TRI_LengthArrayJson(fields);
for (size_t i = 0; i < n; ++i) {
auto name = TRI_LookupArrayJson(fields, i);
if (TRI_IsStringJson(name)) {
_restrictions.fields.emplace(std::string(name->_value._string.data, name->_value._string.length - 1));
}
}
}
return options;
}
////////////////////////////////////////////////////////////////////////////////
/// @startDocuBlock JSF_post_api_export
/// @brief export all documents from a collection, using a cursor
///
/// @RESTHEADER{POST /_api/export, Create export cursor}
///
/// @RESTBODYPARAM{options,json,optional}
/// A JSON object with export options.
///
/// @RESTQUERYPARAMETERS
///
/// @RESTQUERYPARAM{collection,string,required}
/// The name of the collection to export.
///
/// @RESTDESCRIPTION
/// A call to this method creates a cursor containing all documents in the
/// specified collection. In contrast to other data-producing APIs, the internal
/// data structures produced by the export API are more lightweight, so it is
/// the preferred way to retrieve all documents from a collection.
///
/// Documents are returned in a similar manner as in the `/_api/cursor` REST API.
/// If all documents of the collection fit into the first batch, then no cursor
/// will be created, and the result object's *hasMore* attribute will be set to
/// *false*. If not all documents fit into the first batch, then the result
/// object's *hasMore* attribute will be set to *true*, and the *id* attribute
/// of the result will contain a cursor id.
///
/// The order in which the documents are returned is not specified.
///
/// By default, only those documents from the collection will be returned that are
/// stored in the collection's datafiles. Documents that are present in the write-ahead
/// log (WAL) at the time the export is run will not be exported.
///
/// To export these documents as well, the caller can issue a WAL flush request
/// before calling the export API or set the *flush* attribute. Setting the *flush*
/// option will trigger a WAL flush before the export so documents get copied from
/// the WAL to the collection datafiles.
///
/// The following attributes can be used inside the JSON request object to control
/// the export behavior:
///
/// - *flush*: if set to *true*, a WAL flush operation will be executed prior to the
/// export. The flush operation will start copying documents from the WAL to the
/// collection's datafiles. There will be an additional wait time of up
/// to *flushWait* seconds after the flush to allow the WAL collector to change
/// the adjusted document meta-data to point into the datafiles, too.
/// The default value is *false* (i.e. no flush) so most recently inserted or updated
/// documents from the collection might be missing in the export.
///
/// - *flushWait*: maximum wait time in seconds after a flush operation. The default
/// value is 10. This option only has an effect when *flush* is set to *true*.
///
/// - *count*: boolean flag that indicates whether the number of documents
/// in the result set should be returned in the "count" attribute of the result (optional).
/// Calculating the "count" attribute might in the future have a performance
/// impact so this option is turned off by default, and "count" is only returned
/// when requested.
///
/// - *batchSize*: maximum number of result documents to be transferred from
/// the server to the client in one roundtrip (optional). If this attribute is
/// not set, a server-controlled default value will be used.
///
/// - *limit*: an optional limit value, determining the maximum number of documents to
/// be included in the cursor. Omitting the *limit* attribute or setting it to 0 will
/// lead to no limit being used. If a limit is used, it is undefined which documents
/// from the collection will be included in the export and which will be excluded.
/// This is because there is no natural order of documents in a collection.
///
/// - *ttl*: an optional time-to-live for the cursor (in seconds). The cursor will be
/// removed on the server automatically after the specified amount of time. This
/// is useful to ensure garbage collection of cursors that are not fully fetched
/// by clients. If not set, a server-defined value will be used.
///
/// - *restrict*: an optional object containing an array of attribute names that will be
/// included or excluded when returning result documents. If specified, *fields* must
/// be an object and contain a *type* attribute which must be set to either *include*
/// or *exclude*. It must also contain a *fields* attribute containing an array of
/// attribute names to include or exclude. Matching of attribute names for inclusion
/// or exclusion will be done on the top level only. Specifying names of nested attributes
/// is not supported at the moment.
///
/// Not specifying *restrict* will by default return all attributes of each document.
///
/// If the result set can be created by the server, the server will respond with
/// *HTTP 201*. The body of the response will contain a JSON object with the
/// result set.
///
/// The returned JSON object has the following properties:
///
/// - *error*: boolean flag to indicate that an error occurred (*false*
/// in this case)
///
/// - *code*: the HTTP status code
///
/// - *result*: an array of result documents (might be empty if the collection was empty)
///
/// - *hasMore*: a boolean indicator whether there are more results
/// available for the cursor on the server
///
/// - *count*: the total number of result documents available (only
/// available if the query was executed with the *count* attribute set)
///
/// - *id*: id of temporary cursor created on the server (optional, see above)
///
/// If the JSON representation is malformed or the query specification is
/// missing from the request, the server will respond with *HTTP 400*.
///
/// The body of the response will contain a JSON object with additional error
/// details. The object has the following attributes:
///
/// - *error*: boolean flag to indicate that an error occurred (*true* in this case)
///
/// - *code*: the HTTP status code
///
/// - *errorNum*: the server error number
///
/// - *errorMessage*: a descriptive error message
///
/// Clients should always delete an export cursor result as early as possible because a
/// lingering export cursor will prevent the underlying collection from being
/// compacted or unloaded. By default, unused cursors will be deleted automatically
/// after a server-defined idle time, and clients can adjust this idle time by setting
/// the *ttl* value.
///
/// Note: this API is currently not supported on cluster coordinators.
///
/// @RESTRETURNCODES
///
/// @RESTRETURNCODE{201}
/// is returned if the result set can be created by the server.
///
/// @RESTRETURNCODE{400}
/// is returned if the JSON representation is malformed or the query specification is
/// missing from the request.
///
/// @RESTRETURNCODE{404}
/// The server will respond with *HTTP 404* in case a non-existing collection is
/// accessed in the query.
///
/// @RESTRETURNCODE{405}
/// The server will respond with *HTTP 405* if an unsupported HTTP method is used.
///
/// @RESTRETURNCODE{501}
/// The server will respond with *HTTP 501* if this API is called on a cluster
/// coordinator.
///
/// @endDocuBlock
////////////////////////////////////////////////////////////////////////////////
void RestExportHandler::createCursor () {
std::vector<std::string> const& suffix = _request->suffix();
if (suffix.size() != 0) {
generateError(HttpResponse::BAD,
TRI_ERROR_HTTP_BAD_PARAMETER,
"expecting POST /_api/export");
return;
}
// extract the cid
bool found;
char const* name = _request->value("collection", found);
if (! found || *name == '\0') {
generateError(HttpResponse::BAD,
TRI_ERROR_ARANGO_COLLECTION_PARAMETER_MISSING,
"'collection' is missing, expecting " + EXPORT_PATH + "?collection=<identifier>");
return;
}
try {
std::unique_ptr<TRI_json_t> json(parseJsonBody());
if (json.get() == nullptr) {
return;
}
triagens::basics::Json options;
if (json.get() != nullptr) {
if (! TRI_IsObjectJson(json.get())) {
generateError(HttpResponse::BAD, TRI_ERROR_QUERY_EMPTY);
return;
}
options = buildOptions(json.get());
}
else {
// create an empty options object
options = triagens::basics::Json(triagens::basics::Json::Object);
}
uint64_t waitTime = 0;
bool flush = triagens::basics::JsonHelper::getBooleanValue(options.json(), "flush", false);
if (flush) {
// flush the logfiles so the export can fetch all documents
int res = triagens::wal::LogfileManager::instance()->flush(true, true, false);
if (res != TRI_ERROR_NO_ERROR) {
THROW_ARANGO_EXCEPTION(res);
}
double flushWait = triagens::basics::JsonHelper::getNumericValue<double>(options.json(), "flushWait", 10.0);
waitTime = static_cast<uint64_t>(flushWait * 1000 * 1000); // flushWait is specified in s, but we need ns
}
size_t limit = triagens::basics::JsonHelper::getNumericValue<size_t>(options.json(), "limit", 0);
// this may throw!
std::unique_ptr<CollectionExport> collectionExport(new CollectionExport(_vocbase, name, _restrictions));
collectionExport->run(waitTime, limit);
{
size_t batchSize = triagens::basics::JsonHelper::getNumericValue<size_t>(options.json(), "batchSize", 1000);
double ttl = triagens::basics::JsonHelper::getNumericValue<double>(options.json(), "ttl", 30);
bool count = triagens::basics::JsonHelper::getBooleanValue(options.json(), "count", false);
_response = createResponse(HttpResponse::CREATED);
_response->setContentType("application/json; charset=utf-8");
auto cursors = static_cast<triagens::arango::CursorRepository*>(_vocbase->_cursorRepository);
TRI_ASSERT(cursors != nullptr);
// create a cursor from the result
triagens::arango::ExportCursor* cursor = cursors->createFromExport(collectionExport.get(), batchSize, ttl, count);
collectionExport.release();
try {
_response->body().appendChar('{');
cursor->dump(_response->body());
_response->body().appendText(",\"error\":false,\"code\":");
_response->body().appendInteger(static_cast<uint32_t>(_response->responseCode()));
_response->body().appendChar('}');
cursors->release(cursor);
}
catch (...) {
cursors->release(cursor);
throw;
}
}
}
catch (triagens::basics::Exception const& ex) {
generateError(HttpResponse::responseCode(ex.code()), ex.code(), ex.what());
}
catch (...) {
generateError(HttpResponse::SERVER_ERROR, TRI_ERROR_INTERNAL);
}
}
void RestExportHandler::modifyCursor () {
std::vector<std::string> const& suffix = _request->suffix();
if (suffix.size() != 1) {
generateError(HttpResponse::BAD,
TRI_ERROR_HTTP_BAD_PARAMETER,
"expecting PUT /_api/export/<cursor-id>");
return;
}
std::string const& id = suffix[0];
auto cursors = static_cast<triagens::arango::CursorRepository*>(_vocbase->_cursorRepository);
TRI_ASSERT(cursors != nullptr);
auto cursorId = static_cast<triagens::arango::CursorId>(triagens::basics::StringUtils::uint64(id));
bool busy;
auto cursor = cursors->find(cursorId, busy);
if (cursor == nullptr) {
if (busy) {
generateError(HttpResponse::responseCode(TRI_ERROR_CURSOR_BUSY), TRI_ERROR_CURSOR_BUSY);
}
else {
generateError(HttpResponse::responseCode(TRI_ERROR_CURSOR_NOT_FOUND), TRI_ERROR_CURSOR_NOT_FOUND);
}
return;
}
try {
_response = createResponse(HttpResponse::OK);
_response->setContentType("application/json; charset=utf-8");
_response->body().appendChar('{');
cursor->dump(_response->body());
_response->body().appendText(",\"error\":false,\"code\":");
_response->body().appendInteger(static_cast<uint32_t>(_response->responseCode()));
_response->body().appendChar('}');
cursors->release(cursor);
}
catch (triagens::basics::Exception const& ex) {
cursors->release(cursor);
generateError(HttpResponse::responseCode(ex.code()), ex.code(), ex.what());
}
catch (...) {
cursors->release(cursor);
generateError(HttpResponse::SERVER_ERROR, TRI_ERROR_INTERNAL);
}
}
void RestExportHandler::deleteCursor () {
std::vector<std::string> const& suffix = _request->suffix();
if (suffix.size() != 1) {
generateError(HttpResponse::BAD,
TRI_ERROR_HTTP_BAD_PARAMETER,
"expecting DELETE /_api/export/<cursor-id>");
return;
}
std::string const& id = suffix[0];
auto cursors = static_cast<triagens::arango::CursorRepository*>(_vocbase->_cursorRepository);
TRI_ASSERT(cursors != nullptr);
auto cursorId = static_cast<triagens::arango::CursorId>(triagens::basics::StringUtils::uint64(id));
bool found = cursors->remove(cursorId);
if (! found) {
generateError(HttpResponse::NOT_FOUND, TRI_ERROR_CURSOR_NOT_FOUND);
return;
}
_response = createResponse(HttpResponse::ACCEPTED);
_response->setContentType("application/json; charset=utf-8");
triagens::basics::Json json(triagens::basics::Json::Object);
json.set("id", triagens::basics::Json(id)); // id as a string!
json.set("error", triagens::basics::Json(false));
json.set("code", triagens::basics::Json(static_cast<double>(_response->responseCode())));
json.dump(_response->body());
}
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// Local Variables:
// mode: outline-minor
// outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}"
// End: