1
0
Fork 0
arangodb/arangod/RestHandler/RestImportHandler.cpp

1303 lines
41 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// @brief import request handler
///
/// @file
///
/// DISCLAIMER
///
/// Copyright 2014 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Dr. Frank Celler
/// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany
/// @author Copyright 2010-2013, triAGENS GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////
#include "RestImportHandler.h"
#include "Basics/JsonHelper.h"
#include "Basics/StringUtils.h"
#include "Basics/tri-strings.h"
#include "Rest/HttpRequest.h"
#include "VocBase/document-collection.h"
#include "VocBase/edge-collection.h"
#include "VocBase/vocbase.h"
using namespace std;
using namespace triagens::basics;
using namespace triagens::rest;
using namespace triagens::arango;
// -----------------------------------------------------------------------------
// --SECTION-- constructors and destructors
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief constructor
////////////////////////////////////////////////////////////////////////////////
RestImportHandler::RestImportHandler (HttpRequest* request)
: RestVocbaseBaseHandler(request) {
}
// -----------------------------------------------------------------------------
// --SECTION-- Handler methods
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// {@inheritDoc}
////////////////////////////////////////////////////////////////////////////////
HttpHandler::status_t RestImportHandler::execute () {
if (ServerState::instance()->isCoordinator()) {
generateError(HttpResponse::NOT_IMPLEMENTED,
TRI_ERROR_CLUSTER_UNSUPPORTED,
"'/_api/import' is not yet supported in a cluster");
return status_t(HANDLER_DONE);
}
// extract the sub-request type
HttpRequest::HttpRequestType type = _request->requestType();
switch (type) {
case HttpRequest::HTTP_REQUEST_POST: {
// extract the import type
bool found;
string const documentType = _request->value("type", found);
if (found &&
(documentType == "documents" ||
documentType == "array" ||
documentType == "list" ||
documentType == "auto")) {
createFromJson(documentType);
}
else {
// CSV
createFromKeyValueList();
}
break;
}
default:
generateNotImplemented("ILLEGAL " + DOCUMENT_IMPORT_PATH);
break;
}
// this handler is done
return status_t(HANDLER_DONE);
}
// -----------------------------------------------------------------------------
// --SECTION-- private methods
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief extracts the "overwrite" value
////////////////////////////////////////////////////////////////////////////////
bool RestImportHandler::extractOverwrite () const {
bool found;
char const* overwrite = _request->value("overwrite", found);
if (found) {
return StringUtils::boolean(overwrite);
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief extracts the "complete" value
////////////////////////////////////////////////////////////////////////////////
bool RestImportHandler::extractComplete () const {
bool found;
char const* forceStr = _request->value("complete", found);
if (found) {
return StringUtils::boolean(forceStr);
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief create a position string
////////////////////////////////////////////////////////////////////////////////
std::string RestImportHandler::positionise (size_t i) const {
return string("at position " + StringUtils::itoa(i) + ": ");
}
////////////////////////////////////////////////////////////////////////////////
/// @brief register an error
////////////////////////////////////////////////////////////////////////////////
void RestImportHandler::registerError (RestImportResult& result,
std::string const& errorMsg) {
++result._numErrors;
result._errors.push_back(errorMsg);
LOG_WARNING("%s", errorMsg.c_str());
}
////////////////////////////////////////////////////////////////////////////////
/// @brief process a single JSON document
////////////////////////////////////////////////////////////////////////////////
int RestImportHandler::handleSingleDocument (RestImportTransaction& trx,
TRI_json_t const* json,
string& errorMsg,
bool isEdgeCollection,
bool waitForSync,
size_t i) {
if (! TRI_IsArrayJson(json)) {
errorMsg = positionise(i) + "invalid JSON type (expecting array)";
return TRI_ERROR_ARANGO_DOCUMENT_TYPE_INVALID;
}
// document ok, now import it
TRI_doc_mptr_copy_t document;
int res = TRI_ERROR_NO_ERROR;
if (isEdgeCollection) {
char const* from = extractJsonStringValue(json, TRI_VOC_ATTRIBUTE_FROM);
char const* to = extractJsonStringValue(json, TRI_VOC_ATTRIBUTE_TO);
if (from == 0 || to == 0) {
errorMsg = positionise(i) + "missing '_from' or '_to' attribute";
return TRI_ERROR_ARANGO_INVALID_EDGE_ATTRIBUTE;
}
TRI_document_edge_t edge;
edge._fromCid = 0;
edge._toCid = 0;
edge._fromKey = nullptr;
edge._toKey = nullptr;
// Note that in a DBserver in a cluster the following two calls will
// parse the first part as a cluster-wide collection name:
int res1 = parseDocumentId(trx.resolver(), from, edge._fromCid, edge._fromKey);
int res2 = parseDocumentId(trx.resolver(), to, edge._toCid, edge._toKey);
if (res1 == TRI_ERROR_NO_ERROR && res2 == TRI_ERROR_NO_ERROR) {
res = trx.createEdge(&document, json, waitForSync, &edge);
}
else {
res = (res1 != TRI_ERROR_NO_ERROR ? res1 : res2);
}
if (edge._fromKey != 0) {
TRI_Free(TRI_CORE_MEM_ZONE, edge._fromKey);
}
if (edge._toKey != 0) {
TRI_Free(TRI_CORE_MEM_ZONE, edge._toKey);
}
}
else {
// do not acquire an extra lock
res = trx.createDocument(&document, json, waitForSync);
}
if (res != TRI_ERROR_NO_ERROR) {
string part = JsonHelper::toString(json);
if (part.size() > 255) {
// UTF-8 chars in string will be escaped so we can truncate it at any point
part = part.substr(0, 255) + "...";
}
errorMsg = positionise(i) +
"creating document failed with error '" + TRI_errno_string(res) +
"', offending document: " + part;
}
return res;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief imports documents from JSON
///
/// @RESTHEADER{POST /_api/import,imports documents from JSON}
///
/// @RESTBODYPARAM{documents,string,required}
/// The body must either be a JSON-encoded list of documents or a string with
/// multiple JSON documents separated by newlines.
///
/// @RESTQUERYPARAMETERS
///
/// @RESTQUERYPARAM{type,string,required}
/// Determines how the body of the request will be interpreted. `type` can have
/// the following values:
/// - `documents`: when this type is used, each line in the request body is
/// expected to be an individual JSON-encoded document. Multiple JSON documents
/// in the request body need to be separated by newlines.
/// - `list`: when this type is used, the request body must contain a single
/// JSON-encoded list of individual documents to import.
/// - `auto`: if set, this will automatically determine the body type (either
/// `documents` or `list`).
///
/// @RESTQUERYPARAM{collection,string,required}
/// The collection name.
///
/// @RESTQUERYPARAM{createCollection,boolean,optional}
/// If this parameter has a value of `true` or `yes`, then the collection is
/// created if it does not yet exist. Other values will be ignored so the
/// collection must be present for the operation to succeed.
///
/// @RESTQUERYPARAM{overwrite,boolean,optional}
/// If this parameter has a value of `true` or `yes`, then all data in the
/// collection will be removed prior to the import. Note that any existing
/// index definitions will be preseved.
///
/// @RESTQUERYPARAM{waitForSync,boolean,optional}
/// Wait until documents have been synced to disk before returning.
///
/// @RESTQUERYPARAM{complete,boolean,optional}
/// If set to `true` or `yes`, it will make the whole import fail if any error
/// occurs. Otherwise the import will continue even if some documents cannot
/// be imported.
///
/// @RESTQUERYPARAM{details,boolean,optional}
/// If set to `true` or `yes`, the result will include an attribute `details`
/// with details about documents that could not be imported.
///
/// @RESTDESCRIPTION
/// Creates documents in the collection identified by `collection-name`.
/// The JSON representations of the documents must be passed as the body of the
/// POST request. The request body can either consist of multiple lines, with
/// each line being a single stand-alone JSON document, or a JSON list.
///
/// The response is a JSON object with the following attributes:
///
/// - `created`: number of documents imported.
///
/// - `errors`: number of documents that were not imported due to an error.
///
/// - `empty`: number of empty lines found in the input (will only contain a
/// value greater zero for types `documents` or `auto`).
///
/// - `details`: if URL parameter `details` is set to true, the result will
/// contain a `details` attribute which is a list with more detailed
/// information about which documents could not be inserted.
///
/// @RESTRETURNCODES
///
/// @RESTRETURNCODE{201}
/// is returned if all documents could be imported successfully.
///
/// @RESTRETURNCODE{400}
/// is returned if `type` contains an invalid value, no `collection` is
/// specified, the documents are incorrectly encoded, or the request
/// is malformed.
///
/// @RESTRETURNCODE{404}
/// is returned if `collection` or the `_from` or `_to` attributes of an
/// imported edge refer to an unknown collection.
///
/// @RESTRETURNCODE{409}
/// is returned if the import would trigger a unique key violation and
/// `complete` is set to `true`.
///
/// @RESTRETURNCODE{500}
/// is returned if the server cannot auto-generate a document key (out of keys
/// error) for a document with no user-defined key.
///
/// @EXAMPLES
///
/// Importing documents with heterogenous attributes from a JSON list:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportJsonList}
/// db._flushCache();
/// var cn = "products";
/// db._drop(cn);
/// db._create(cn);
/// db._flushCache();
///
/// var body = [
/// { _key: "abc", value1: 25, value2: "test", allowed: true },
/// { _key: "foo", name: "baz" },
/// { name: { detailed: "detailed name", short: "short name" } }
/// ];
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&type=list", JSON.stringify(body));
///
/// assert(response.code === 201);
/// var r = JSON.parse(response.body);
/// assert(r.created === 3);
/// assert(r.errors === 0);
/// assert(r.empty === 0);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Importing documents from individual JSON lines:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportJsonLines}
/// db._flushCache();
/// var cn = "products";
/// db._drop(cn);
/// db._create(cn);
/// db._flushCache();
///
/// var body = '{ "_key": "abc", "value1": 25, "value2": "test", "allowed": true }\n{ "_key": "foo", "name": "baz" }\n\n{ "name": { "detailed": "detailed name", "short": "short name" } }\n';
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&type=documents", body);
///
/// assert(response.code === 201);
/// var r = JSON.parse(response.body);
/// assert(r.created === 3);
/// assert(r.errors === 0);
/// assert(r.empty === 1);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Using the auto type detection:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportJsonType}
/// db._flushCache();
/// var cn = "products";
/// db._drop(cn);
/// db._create(cn);
/// db._flushCache();
///
/// var body = [
/// { _key: "abc", value1: 25, value2: "test", allowed: true },
/// { _key: "foo", name: "baz" },
/// { name: { detailed: "detailed name", short: "short name" } }
/// ];
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&type=auto", JSON.stringify(body));
///
/// assert(response.code === 201);
/// var r = JSON.parse(response.body);
/// assert(r.created === 3);
/// assert(r.errors === 0);
/// assert(r.empty === 0);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Importing documents into a new collection from a JSON list:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportJsonCreate}
/// db._flushCache();
/// var cn = "products";
/// db._drop(cn);
/// db._create(cn);
/// db._flushCache();
///
/// var body = [
/// { id: "12553", active: true },
/// { id: "4433", active: false },
/// { id: "55932", count: 4334 },
/// ];
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&createCollection=true&type=list", JSON.stringify(body));
///
/// assert(response.code === 201);
/// var r = JSON.parse(response.body);
/// assert(r.created === 3);
/// assert(r.errors === 0);
/// assert(r.empty === 0);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Importing into an edge collection, with attributes `_from`, `_to` and `name`:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportJsonEdge}
/// db._flushCache();
/// var cn = "links";
/// db._drop(cn);
/// db._createEdgeCollection(cn);
/// db._drop("products");
/// db._create("products");
/// db._flushCache();
///
/// var body = '{ "_from": "products/123", "_to": "products/234" }\n{ "_from": "products/332", "_to": "products/abc", "name": "other name" }';
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&type=documents", body);
///
/// assert(response.code === 201);
/// var r = JSON.parse(response.body);
/// assert(r.created === 2);
/// assert(r.errors === 0);
/// assert(r.empty === 0);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// db._drop("products");
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Importing into an edge collection, omitting `_from` or `_to`:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportJsonEdgeInvalid}
/// db._flushCache();
/// var cn = "links";
/// db._drop(cn);
/// db._createEdgeCollection(cn);
/// db._flushCache();
///
/// var body = [ { name: "some name" } ];
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&type=list&details=true", JSON.stringify(body));
///
/// assert(response.code === 201);
/// var r = JSON.parse(response.body);
/// assert(r.created === 0);
/// assert(r.errors === 1);
/// assert(r.empty === 0);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Violating a unique constraint, but allow partial imports:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportJsonUniqueContinue}
/// var cn = "products";
/// db._drop(cn);
/// db._create(cn);
/// db._flushCache();
///
/// var body = '{ "_key": "abc", "value1": 25, "value2": "test" }\n{ "_key": "abc", "value1": "bar", "value2": "baz" }';
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&type=documents&details=true", body);
///
/// assert(response.code === 201);
/// var r = JSON.parse(response.body);
/// assert(r.created === 1);
/// assert(r.errors === 1);
/// assert(r.empty === 0);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Violating a unique constraint, not allowing partial imports:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportJsonUniqueFail}
/// var cn = "products";
/// db._drop(cn);
/// db._create(cn);
/// db._flushCache();
///
/// var body = '{ "_key": "abc", "value1": 25, "value2": "test" }\n{ "_key": "abc", "value1": "bar", "value2": "baz" }';
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&type=documents&complete=true", body);
///
/// assert(response.code === 409);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Using a non-existing collection:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportJsonInvalidCollection}
/// var cn = "products";
/// db._drop(cn);
///
/// var body = '{ "name": "test" }';
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&type=documents", body);
///
/// assert(response.code === 404);
///
/// logJsonResponse(response);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Using a malformed body:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportJsonInvalidBody}
/// var cn = "products";
/// db._drop(cn);
/// db._create(cn);
/// db._flushCache();
///
/// var body = '{ }';
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&type=list", body);
///
/// assert(response.code === 400);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
////////////////////////////////////////////////////////////////////////////////
bool RestImportHandler::createFromJson (string const& type) {
RestImportResult result;
vector<string> const& suffix = _request->suffix();
if (suffix.size() != 0) {
generateError(HttpResponse::BAD,
TRI_ERROR_HTTP_SUPERFLUOUS_SUFFICES,
"superfluous suffix, expecting " + DOCUMENT_IMPORT_PATH + "?collection=<identifier>");
return false;
}
bool const waitForSync = extractWaitForSync();
bool const complete = extractComplete();
bool const overwrite = extractOverwrite();
// extract the collection name
bool found;
string const& collection = _request->value("collection", found);
if (! found || collection.empty()) {
generateError(HttpResponse::BAD,
TRI_ERROR_ARANGO_COLLECTION_PARAMETER_MISSING,
"'collection' is missing, expecting " + DOCUMENT_IMPORT_PATH + "?collection=<identifier>");
return false;
}
if (! checkCreateCollection(collection, TRI_COL_TYPE_DOCUMENT)) {
return false;
}
bool linewise;
if (type == "documents") {
// linewise import
linewise = true;
}
else if (type == "array" || type == "list") {
// non-linewise import
linewise = false;
}
else if (type == "auto") {
linewise = true;
// auto detect import type by peeking at first character
char const* ptr = _request->body();
char const* end = ptr + _request->bodySize();
while (ptr < end) {
char const c = *ptr;
if (c == '\r' || c == '\n' || c == '\t' || c == ' ') {
ptr++;
continue;
}
else if (c == '[') {
linewise = false;
}
break;
}
}
else {
generateError(HttpResponse::BAD,
TRI_ERROR_BAD_PARAMETER,
"invalid value for 'type'");
return false;
}
// find and load collection given by name or identifier
RestImportTransaction trx(_vocbase, collection);
// .............................................................................
// inside write transaction
// .............................................................................
int res = trx.begin();
if (res != TRI_ERROR_NO_ERROR) {
generateTransactionError(collection, res);
return false;
}
TRI_document_collection_t* document = trx.documentCollection();
bool const isEdgeCollection = (document->_info._type == TRI_COL_TYPE_EDGE);
trx.lockWrite();
if (overwrite) {
// truncate collection first
trx.truncate(false);
}
if (linewise) {
// each line is a separate JSON document
char const* ptr = _request->body();
char const* end = ptr + _request->bodySize();
string line;
size_t i = 0;
while (ptr < end) {
// read line until done
i++;
char const* pos = strchr(ptr, '\n');
if (pos == 0) {
line.assign(ptr, (size_t) (end - ptr));
ptr = end;
}
else {
line.assign(ptr, (size_t) (pos - ptr));
ptr = pos + 1;
}
StringUtils::trimInPlace(line, "\r\n\t ");
if (line.length() == 0) {
++result._numEmpty;
continue;
}
TRI_json_t* json = parseJsonLine(line);
string errorMsg;
res = handleSingleDocument(trx, json, errorMsg, isEdgeCollection, waitForSync, i);
if (json != 0) {
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json);
}
if (res == TRI_ERROR_NO_ERROR) {
++result._numCreated;
}
else {
registerError(result, errorMsg);
if (complete) {
// only perform a full import: abort
break;
}
// perform partial import: continue
res = TRI_ERROR_NO_ERROR;
}
}
}
else {
// the entire request body is one JSON document
TRI_json_t* documents = TRI_Json2String(TRI_UNKNOWN_MEM_ZONE, _request->body(), 0);
if (! TRI_IsListJson(documents)) {
if (documents != 0) {
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, documents);
}
generateError(HttpResponse::BAD,
TRI_ERROR_HTTP_BAD_PARAMETER,
"expecting a JSON list in the request");
return false;
}
size_t const n = documents->_value._objects._length;
for (size_t i = 0; i < n; ++i) {
TRI_json_t const* json = (TRI_json_t const*) TRI_AtVector(&documents->_value._objects, i);
string errorMsg;
res = handleSingleDocument(trx, json, errorMsg, isEdgeCollection, waitForSync, i + 1);
if (res == TRI_ERROR_NO_ERROR) {
++result._numCreated;
}
else {
registerError(result, errorMsg);
if (complete) {
// only perform a full import: abort
break;
}
// perform partial import: continue
res = TRI_ERROR_NO_ERROR;
}
}
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, documents);
}
// this may commit, even if previous errors occurred
res = trx.finish(res);
// .............................................................................
// outside write transaction
// .............................................................................
if (res != TRI_ERROR_NO_ERROR) {
generateTransactionError(collection, res);
}
else {
// generate result
generateDocumentsCreated(result);
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief imports documents from JSON-encoded key-value lists
///
/// @RESTHEADER{POST /_api/import,imports document values}
///
/// @RESTBODYPARAM{documents,string,required}
/// The body must consist of JSON-encoded lists of attribute values, with one
/// line per per document. The first row of the request must be a JSON-encoded
/// list of attribute names. These attribute names are used for the data in the
/// subsequent rows.
///
/// @RESTQUERYPARAMETERS
///
/// @RESTQUERYPARAM{collection,string,required}
/// The collection name.
///
/// @RESTQUERYPARAM{createCollection,boolean,optional}
/// If this parameter has a value of `true` or `yes`, then the collection is
/// created if it does not yet exist. Other values will be ignored so the
/// collection must be present for the operation to succeed.
///
/// @RESTQUERYPARAM{overwrite,boolean,optional}
/// If this parameter has a value of `true` or `yes`, then all data in the
/// collection will be removed prior to the import. Note that any existing
/// index definitions will be preseved.
///
/// @RESTQUERYPARAM{waitForSync,boolean,optional}
/// Wait until documents have been synced to disk before returning.
///
/// @RESTQUERYPARAM{complete,boolean,optional}
/// If set to `true` or `yes`, it will make the whole import fail if any error
/// occurs. Otherwise the import will continue even if some documents cannot
/// be imported.
///
/// @RESTQUERYPARAM{details,boolean,optional}
/// If set to `true` or `yes`, the result will include an attribute `details`
/// with details about documents that could not be imported.
///
/// @RESTDESCRIPTION
/// Creates documents in the collection identified by `collection-name`.
/// The first line of the request body must contain a JSON-encoded list of
/// attribute names. All following lines in the request body must contain
/// JSON-encoded lists of attribute values. Each line is interpreted as a
/// separate document, and the values specified will be mapped to the list
/// of attribute names specified in the first header line.
///
/// The response is a JSON object with the following attributes:
///
/// - `created`: number of documents imported.
///
/// - `errors`: number of documents that were not imported due to an error.
///
/// - `empty`: number of empty lines found in the input (will only contain a
/// value greater zero for types `documents` or `auto`).
///
/// - `details`: if URL parameter `details` is set to true, the result will
/// contain a `details` attribute which is a list with more detailed
/// information about which documents could not be inserted.
///
/// @RESTRETURNCODES
///
/// @RESTRETURNCODE{201}
/// is returned if all documents could be imported successfully.
///
/// @RESTRETURNCODE{400}
/// is returned if `type` contains an invalid value, no `collection` is
/// specified, the documents are incorrectly encoded, or the request
/// is malformed.
///
/// @RESTRETURNCODE{404}
/// is returned if `collection` or the `_from` or `_to` attributes of an
/// imported edge refer to an unknown collection.
///
/// @RESTRETURNCODE{409}
/// is returned if the import would trigger a unique key violation and
/// `complete` is set to `true`.
///
/// @RESTRETURNCODE{500}
/// is returned if the server cannot auto-generate a document key (out of keys
/// error) for a document with no user-defined key.
///
/// @EXAMPLES
///
/// Importing two documents, with attributes `_key`, `value1` and `value2` each. One
/// line in the import data is empty:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportCsvExample}
/// var cn = "products";
/// db._drop(cn);
/// db._create(cn);
///
/// var body = '[ "_key", "value1", "value2" ]\n[ "abc", 25, "test" ]\n\n[ "foo", "bar", "baz" ]';
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn, body);
///
/// assert(response.code === 201);
/// var r = JSON.parse(response.body)
/// assert(r.created === 2);
/// assert(r.errors === 0);
/// assert(r.empty === 1);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Importing two documents into a new collection:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportCsvCreate}
/// var cn = "products";
/// db._drop(cn);
/// db._create(cn);
///
/// var body = '[ "value1", "value2" ]\n[ 1234, null ]\n[ "foo", "bar" ]\n[ 534.55, true ]';
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&createCollection=true", body);
///
/// assert(response.code === 201);
/// var r = JSON.parse(response.body)
/// assert(r.created === 3);
/// assert(r.errors === 0);
/// assert(r.empty === 0);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Importing into an edge collection, with attributes `_from`, `_to` and `name`:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportCsvEdge}
/// var cn = "links";
/// db._drop(cn);
/// db._createEdgeCollection(cn);
/// db._drop("products");
/// db._create("products");
///
/// var body = '[ "_from", "_to", "name" ]\n[ "products/123", "products/234", "some name" ]\n[ "products/332", "products/abc", "other name" ]';
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn, body);
///
/// assert(response.code === 201);
/// var r = JSON.parse(response.body)
/// assert(r.created === 2);
/// assert(r.errors === 0);
/// assert(r.empty === 0);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// db._drop("products");
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Importing into an edge collection, omitting `_from` or `_to`:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportCsvEdgeInvalid}
/// var cn = "links";
/// db._drop(cn);
/// db._createEdgeCollection(cn);
///
/// var body = '[ "name" ]\n[ "some name" ]\n[ "other name" ]';
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&details=true", body);
///
/// assert(response.code === 201);
/// var r = JSON.parse(response.body)
/// assert(r.created === 0);
/// assert(r.errors === 2);
/// assert(r.empty === 0);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Violating a unique constraint, but allow partial imports:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportCsvUniqueContinue}
/// var cn = "products";
/// db._drop(cn);
/// db._create(cn);
///
/// var body = '[ "_key", "value1", "value2" ]\n[ "abc", 25, "test" ]\n[ "abc", "bar", "baz" ]';
///
/// var response = logCurlRequestRaw('POST', "/_api/import?collection=" + cn + "&details=true", body);
///
/// assert(response.code === 201);
/// var r = JSON.parse(response.body)
/// assert(r.created === 1);
/// assert(r.errors === 1);
/// assert(r.empty === 0);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Violating a unique constraint, not allowing partial imports:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportCsvUniqueFail}
/// var cn = "products";
/// db._drop(cn);
/// db._create(cn);
///
/// var body = '[ "_key", "value1", "value2" ]\n[ "abc", 25, "test" ]\n[ "abc", "bar", "baz" ]';
///
/// var response = logCurlRequest('POST', "/_api/import?collection=" + cn + "&complete=true", body);
///
/// assert(response.code === 409);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Using a non-existing collection:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportCsvInvalidCollection}
/// var cn = "products";
/// db._drop(cn);
///
/// var body = '[ "_key", "value1", "value2" ]\n[ "abc", 25, "test" ]\n[ "foo", "bar", "baz" ]';
///
/// var response = logCurlRequest('POST', "/_api/import?collection=" + cn, body);
///
/// assert(response.code === 404);
///
/// logJsonResponse(response);
/// @END_EXAMPLE_ARANGOSH_RUN
///
/// Using a malformed body:
///
/// @EXAMPLE_ARANGOSH_RUN{RestImportCsvInvalidBody}
/// var cn = "products";
/// db._drop(cn);
/// db._create(cn);
///
/// var body = '{ "_key": "foo", "value1": "bar" }';
///
/// var response = logCurlRequest('POST', "/_api/import?collection=" + cn, body);
///
/// assert(response.code === 400);
///
/// logJsonResponse(response);
/// db._drop(cn);
/// @END_EXAMPLE_ARANGOSH_RUN
////////////////////////////////////////////////////////////////////////////////
bool RestImportHandler::createFromKeyValueList () {
RestImportResult result;
vector<string> const& suffix = _request->suffix();
if (suffix.size() != 0) {
generateError(HttpResponse::BAD,
TRI_ERROR_HTTP_SUPERFLUOUS_SUFFICES,
"superfluous suffix, expecting " + DOCUMENT_IMPORT_PATH + "?collection=<identifier>");
return false;
}
bool const waitForSync = extractWaitForSync();
bool const complete = extractComplete();
bool const overwrite = extractOverwrite();
// extract the collection name
bool found;
string const& collection = _request->value("collection", found);
if (! found || collection.empty()) {
generateError(HttpResponse::BAD,
TRI_ERROR_ARANGO_COLLECTION_PARAMETER_MISSING,
"'collection' is missing, expecting " + DOCUMENT_IMPORT_PATH + "?collection=<identifier>");
return false;
}
if (! checkCreateCollection(collection, TRI_COL_TYPE_DOCUMENT)) {
return false;
}
// read line number (optional)
int64_t lineNumber = 0;
string const& lineNumValue = _request->value("line", found);
if (found) {
lineNumber = StringUtils::int64(lineNumValue);
}
size_t start = 0;
string body(_request->body(), _request->bodySize());
size_t next = body.find('\n', start);
if (next == string::npos) {
generateError(HttpResponse::BAD,
TRI_ERROR_HTTP_BAD_PARAMETER,
"no JSON list found in second line");
return false;
}
string line = body.substr(start, next);
StringUtils::trimInPlace(line, "\r\n\t ");
// get first line
TRI_json_t* keys = 0;
if (line != "") {
keys = parseJsonLine(line);
}
if (! checkKeys(keys)) {
LOG_WARNING("no JSON string list in first line found");
generateError(HttpResponse::BAD,
TRI_ERROR_HTTP_BAD_PARAMETER,
"no JSON string list in first line found");
if (keys != 0) {
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, keys);
}
return false;
}
start = next + 1;
// find and load collection given by name or identifier
RestImportTransaction trx(_vocbase, collection);
// .............................................................................
// inside write transaction
// .............................................................................
int res = trx.begin();
if (res != TRI_ERROR_NO_ERROR) {
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, keys);
generateTransactionError(collection, res);
return false;
}
TRI_document_collection_t* document = trx.documentCollection();
bool const isEdgeCollection = (document->_info._type == TRI_COL_TYPE_EDGE);
trx.lockWrite();
if (overwrite) {
// truncate collection first
trx.truncate(false);
}
size_t i = (size_t) lineNumber;
while (next != string::npos && start < body.length()) {
i++;
next = body.find('\n', start);
if (next == string::npos) {
line = body.substr(start);
}
else {
line = body.substr(start, next - start);
start = next + 1;
}
StringUtils::trimInPlace(line, "\r\n\t ");
if (line.length() == 0) {
++result._numEmpty;
continue;
}
TRI_json_t* values = parseJsonLine(line);
if (values != 0) {
// build the json object from the list
string errorMsg;
TRI_json_t* json = createJsonObject(keys, values, errorMsg, line, i);
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, values);
if (json != 0) {
res = handleSingleDocument(trx, json, errorMsg, isEdgeCollection, waitForSync, i);
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json);
}
else {
// raise any error
res = TRI_ERROR_INTERNAL;
}
if (res == TRI_ERROR_NO_ERROR) {
++result._numCreated;
}
else {
registerError(result, errorMsg);
if (complete) {
// only perform a full import: abort
break;
}
// perform partial import: continue
res = TRI_ERROR_NO_ERROR;
}
}
else {
string errorMsg = positionise(i) + "no valid JSON data";
registerError(result, errorMsg);
}
}
// we'll always commit, even if previous errors occurred
res = trx.finish(res);
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, keys);
// .............................................................................
// outside write transaction
// .............................................................................
if (res != TRI_ERROR_NO_ERROR) {
generateTransactionError(collection, res);
}
else {
// generate result
generateDocumentsCreated(result);
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief create response for number of documents created / failed
////////////////////////////////////////////////////////////////////////////////
void RestImportHandler::generateDocumentsCreated (RestImportResult const& result) {
_response = createResponse(HttpResponse::CREATED);
_response->setContentType("application/json; charset=utf-8");
TRI_json_t json;
TRI_InitArrayJson(TRI_CORE_MEM_ZONE, &json);
TRI_Insert3ArrayJson(TRI_CORE_MEM_ZONE, &json, "error", TRI_CreateBooleanJson(TRI_CORE_MEM_ZONE, false));
TRI_Insert3ArrayJson(TRI_CORE_MEM_ZONE, &json, "created", TRI_CreateNumberJson(TRI_CORE_MEM_ZONE, (double) result._numCreated));
TRI_Insert3ArrayJson(TRI_CORE_MEM_ZONE, &json, "errors", TRI_CreateNumberJson(TRI_CORE_MEM_ZONE, (double) result._numErrors));
TRI_Insert3ArrayJson(TRI_CORE_MEM_ZONE, &json, "empty", TRI_CreateNumberJson(TRI_CORE_MEM_ZONE, (double) result._numEmpty));
bool found;
char const* detailsStr = _request->value("details", found);
// include failure details?
if (found && StringUtils::boolean(detailsStr)) {
TRI_json_t* messages = TRI_CreateListJson(TRI_CORE_MEM_ZONE);
for (size_t i = 0, n = result._errors.size(); i < n; ++i) {
string const& msg = result._errors[i];
TRI_PushBack3ListJson(TRI_CORE_MEM_ZONE, messages, TRI_CreateString2CopyJson(TRI_CORE_MEM_ZONE, msg.c_str(), msg.size()));
}
TRI_Insert3ArrayJson(TRI_CORE_MEM_ZONE, &json, "details", messages);
}
generateResult(HttpResponse::CREATED, &json);
TRI_DestroyJson(TRI_CORE_MEM_ZONE, &json);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief parse a single document line
////////////////////////////////////////////////////////////////////////////////
TRI_json_t* RestImportHandler::parseJsonLine (string const& line) {
char* errmsg = 0;
TRI_json_t* json = TRI_Json2String(TRI_UNKNOWN_MEM_ZONE, line.c_str(), &errmsg);
if (errmsg != 0) {
// must free this error message, otherwise we'll have a memleak
TRI_FreeString(TRI_CORE_MEM_ZONE, errmsg);
}
return json;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief create a JSON object from a line containing a document
////////////////////////////////////////////////////////////////////////////////
TRI_json_t* RestImportHandler::createJsonObject (TRI_json_t const* keys,
TRI_json_t const* values,
string& errorMsg,
string const& line,
size_t lineNumber) {
if (values->_type != TRI_JSON_LIST) {
errorMsg = positionise(lineNumber) + "no valid JSON list data";
return 0;
}
size_t const n = keys->_value._objects._length;
if (n != values->_value._objects._length) {
errorMsg = positionise(lineNumber) + "wrong number of JSON values";
return 0;
}
TRI_json_t* result = TRI_CreateArray2Json(TRI_UNKNOWN_MEM_ZONE, n);
if (result == 0) {
LOG_ERROR("out of memory");
return 0;
}
for (size_t i = 0; i < n; ++i) {
TRI_json_t const* key = static_cast<TRI_json_t const*>(TRI_AtVector(&keys->_value._objects, i));
TRI_json_t const* value = static_cast<TRI_json_t const*>(TRI_AtVector(&values->_value._objects, i));
if (JsonHelper::isString(key) && value->_type > TRI_JSON_NULL) {
TRI_InsertArrayJson(TRI_UNKNOWN_MEM_ZONE, result, key->_value._string.data, value);
}
}
return result;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief validate keys
////////////////////////////////////////////////////////////////////////////////
bool RestImportHandler::checkKeys (TRI_json_t const* keys) {
if (! TRI_IsListJson(keys)) {
return false;
}
size_t const n = keys->_value._objects._length;
if (n == 0) {
return false;
}
for (size_t i = 0; i < n; ++i) {
TRI_json_t* key = (TRI_json_t*) TRI_AtVector(&keys->_value._objects, i);
if (! JsonHelper::isString(key)) {
return false;
}
}
return true;
}
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// Local Variables:
// mode: outline-minor
// outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}"
// End: