diff --git a/Documentation/Books/Users/Arangoimp/README.mdpp b/Documentation/Books/Users/Arangoimp/README.mdpp index 083d1ad68a..76abc2abc4 100644 --- a/Documentation/Books/Users/Arangoimp/README.mdpp +++ b/Documentation/Books/Users/Arangoimp/README.mdpp @@ -152,8 +152,8 @@ or the null value, don't enclose the value into the quotes in your file. We'll be using the following import for the CSV import: -```js -"first","name","age","active","dob" +``` +"first","last","age","active","dob" "John","Connor",25,true, "Jim","O'Brady",19,, "Lisa","Jones",,,"1981-04-09" @@ -163,9 +163,30 @@ The command line to execute the import then is: unix> arangoimp --file "data.csv" --type csv --collection "users" +String values containing the quote character or the separator must be enclosed +with quote characters. Within a string, the quote character itself must be +escaped with another quote character. + Note that the quote and separator characters can be adjusted via the -*--quote* and *--separator* arguments when invoking _arangoimp_. The importer -supports Windows (CRLF) and Unix (LF) line breaks. +*--quote* and *--separator* arguments when invoking _arangoimp_. The quote +character defaults to the double quote (*"*). To use a literal quote in a +string, you can use two quote characters. Using a backslash to escape a quote +character is currently not supported by arangoimp. + +The importer supports Windows (CRLF) and Unix (LF) line breaks. Line breaks might +also occur inside values that are enclosed with the quote character. + +Here's an example for using literal quotes and newlines inside values: + +``` +"name","password" +"Foo","r4ndom""123!" +"Bar","wow! +this is a +multine password!" +"Bartholomew ""Bart"" Simpson","Milhouse" +``` + !SUBSECTION Importing TSV Data diff --git a/UnitTests/HttpInterface/api-database-spec.rb b/UnitTests/HttpInterface/api-database-spec.rb index fff5fbe7f1..be5552fd4f 100644 --- a/UnitTests/HttpInterface/api-database-spec.rb +++ b/UnitTests/HttpInterface/api-database-spec.rb @@ -149,6 +149,28 @@ describe ArangoDB do response["errorNum"].should eq(1207) end + it "creates a database with users = null" do + body = "{\"name\" : \"#{name}\", \"users\" : null }" + doc = ArangoDB.log_post("#{prefix}-create-no-users1", api, :body => body) + + doc.code.should eq(201) + doc.headers['content-type'].should eq("application/json; charset=utf-8") + response = doc.parsed_response + response["result"].should eq(true) + response["error"].should eq(false) + end + + it "creates a database with users = [ ]" do + body = "{\"name\" : \"#{name}\", \"users\" : [ ] }" + doc = ArangoDB.log_post("#{prefix}-create-no-users2", api, :body => body) + + doc.code.should eq(201) + doc.headers['content-type'].should eq("application/json; charset=utf-8") + response = doc.parsed_response + response["result"].should eq(true) + response["error"].should eq(false) + end + it "drops an existing database" do cmd = api + "/#{name}" body = "{\"name\" : \"#{name}\" }" diff --git a/UnitTests/HttpInterface/api-replication-spec.rb b/UnitTests/HttpInterface/api-replication-spec.rb index 35f19d4036..d0c5bda75d 100644 --- a/UnitTests/HttpInterface/api-replication-spec.rb +++ b/UnitTests/HttpInterface/api-replication-spec.rb @@ -161,25 +161,34 @@ describe ArangoDB do ################################################################################ it "fetches the empty follow log" do - sleep 1 + while 1 + cmd = api + "/logger-state" + doc = ArangoDB.log_get("#{prefix}-follow-empty", cmd, :body => "") + doc.code.should eq(200) + doc.parsed_response["state"]["running"].should eq(true) + fromTick = doc.parsed_response["state"]["lastLogTick"] - cmd = api + "/logger-state" - doc = ArangoDB.log_get("#{prefix}-follow-empty", cmd, :body => "") - doc.code.should eq(200) - doc.parsed_response["state"]["running"].should eq(true) - fromTick = doc.parsed_response["state"]["lastLogTick"] + cmd = api + "/logger-follow?from=" + fromTick + doc = ArangoDB.log_get("#{prefix}-follow-empty", cmd, :body => "", :format => :plain) - cmd = api + "/logger-follow?from=" + fromTick - doc = ArangoDB.log_get("#{prefix}-follow-empty", cmd, :body => "", :format => :plain) - doc.code.should eq(204) + if doc.code != 204 + # someone else did something else + doc.code.should eq(200) + # sleep for a second and try again + sleep 1 + else + doc.code.should eq(204) - doc.headers["x-arango-replication-checkmore"].should eq("false") - doc.headers["x-arango-replication-lastincluded"].should match(/^\d+$/) - doc.headers["x-arango-replication-lastincluded"].should eq("0") - doc.headers["content-type"].should eq("application/x-arango-dump; charset=utf-8") + doc.headers["x-arango-replication-checkmore"].should eq("false") + doc.headers["x-arango-replication-lastincluded"].should match(/^\d+$/) + doc.headers["x-arango-replication-lastincluded"].should eq("0") + doc.headers["content-type"].should eq("application/x-arango-dump; charset=utf-8") - body = doc.response.body - body.should eq(nil) + body = doc.response.body + body.should eq(nil) + break + end + end end it "fetches a create collection action from the follow log" do diff --git a/UnitTests/Makefile.unittests b/UnitTests/Makefile.unittests index 1f61f360aa..eb554d82d5 100755 --- a/UnitTests/Makefile.unittests +++ b/UnitTests/Makefile.unittests @@ -725,7 +725,7 @@ unittests-import: $(VALGRIND) @builddir@/bin/arangosh $(CLIENT_OPT) --server.username "$(USERNAME)" --server.password "$(PASSWORD)" --server.endpoint unix://$(VOCDIR)/arango.sock --javascript.unit-tests @top_srcdir@/js/server/tests/import-setup.js || test "x$(FORCE)" == "x1" for i in 1 2 3 4; do $(VALGRIND) @builddir@/bin/arangoimp --server.username "$(USERNAME)" --server.password "$(PASSWORD)" --server.endpoint unix://$(VOCDIR)/arango.sock --file UnitTests/import-$$i.json --collection UnitTestsImportJson$$i --type json || test "x$(FORCE)" == "x1"; done - for i in 1 2; do $(VALGRIND) @builddir@/bin/arangoimp --server.username "$(USERNAME)" --server.password "$(PASSWORD)" --server.endpoint unix://$(VOCDIR)/arango.sock --file UnitTests/import-$$i.csv --collection UnitTestsImportCsv$$i --create-collection true --type csv || test "x$(FORCE)" == "x1"; done + for i in 1 2 3; do $(VALGRIND) @builddir@/bin/arangoimp --server.username "$(USERNAME)" --server.password "$(PASSWORD)" --server.endpoint unix://$(VOCDIR)/arango.sock --file UnitTests/import-$$i.csv --collection UnitTestsImportCsv$$i --create-collection true --type csv || test "x$(FORCE)" == "x1"; done for i in 1 2; do $(VALGRIND) @builddir@/bin/arangoimp --server.username "$(USERNAME)" --server.password "$(PASSWORD)" --server.endpoint unix://$(VOCDIR)/arango.sock --file UnitTests/import-$$i.tsv --collection UnitTestsImportTsv$$i --create-collection true --type tsv || test "x$(FORCE)" == "x1"; done $(VALGRIND) @builddir@/bin/arangoimp --server.username "$(USERNAME)" --server.password "$(PASSWORD)" --server.endpoint unix://$(VOCDIR)/arango.sock --file UnitTests/import-edges.json --collection UnitTestsImportEdge --create-collection false --type json || test "x$(FORCE)" == "x1" $(VALGRIND) @builddir@/bin/arangosh $(CLIENT_OPT) --server.username "$(USERNAME)" --server.password "$(PASSWORD)" --server.endpoint unix://$(VOCDIR)/arango.sock --javascript.unit-tests @top_srcdir@/js/server/tests/import.js || test "x$(FORCE)" == "x1" diff --git a/arangod/Utils/SingleCollectionTransaction.h b/arangod/Utils/SingleCollectionTransaction.h index 4b931642dd..3ccf967cca 100644 --- a/arangod/Utils/SingleCollectionTransaction.h +++ b/arangod/Utils/SingleCollectionTransaction.h @@ -279,6 +279,19 @@ namespace triagens { return this->readIncremental(this->trxCollection(), docs, internalSkip, batchSize, skip, limit, total); } +//////////////////////////////////////////////////////////////////////////////// +/// @brief selects documents from a collection, hashing the document key and +/// only returning these documents which fall into a specific partition +//////////////////////////////////////////////////////////////////////////////// + + int readPartition (std::vector& docs, + uint64_t partitionId, + uint64_t numberOfPartitions, + uint32_t* total) { + + return this->readNth(this->trxCollection(), docs, partitionId, numberOfPartitions, total); + } + // ----------------------------------------------------------------------------- // --SECTION-- private variables // ----------------------------------------------------------------------------- diff --git a/arangod/Utils/Transaction.h b/arangod/Utils/Transaction.h index 12137b1d7c..0ef4f9ddba 100644 --- a/arangod/Utils/Transaction.h +++ b/arangod/Utils/Transaction.h @@ -1003,6 +1003,61 @@ namespace triagens { return TRI_ERROR_NO_ERROR; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief selects documents from a collection, hashing the document key and +/// only returning these documents which fall into a specific partition +//////////////////////////////////////////////////////////////////////////////// + + int readNth (TRI_transaction_collection_t* trxCollection, + std::vector& docs, + uint64_t partitionId, + uint64_t numberOfPartitions, + uint32_t* total) { + + TRI_document_collection_t* document = documentCollection(trxCollection); + + // READ-LOCK START + int res = this->lock(trxCollection, TRI_TRANSACTION_READ); + + if (res != TRI_ERROR_NO_ERROR) { + return res; + } + + if (document->_primaryIndex._nrUsed == 0) { + // nothing to do + this->unlock(trxCollection, TRI_TRANSACTION_READ); + + // READ-LOCK END + return TRI_ERROR_NO_ERROR; + } + + if (orderBarrier(trxCollection) == nullptr) { + return TRI_ERROR_OUT_OF_MEMORY; + } + + void** beg = document->_primaryIndex._table; + void** end = beg + document->_primaryIndex._nrAlloc; + void** ptr = beg; + *total = (uint32_t) document->_primaryIndex._nrUsed; + + // fetch documents, taking partition into account + for (; ptr < end; ++ptr) { + if (*ptr) { + TRI_doc_mptr_t* d = (TRI_doc_mptr_t*) *ptr; + + if (d->_hash % numberOfPartitions == partitionId) { + // correct partition + docs.emplace_back(*d); + } + } + } + + this->unlock(trxCollection, TRI_TRANSACTION_READ); + // READ-LOCK END + + return TRI_ERROR_NO_ERROR; + } + //////////////////////////////////////////////////////////////////////////////// /// @brief create a single document, using shaped json //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/Utils/transactions.h b/arangod/Utils/transactions.h index 58856bf36d..f57aa25adf 100644 --- a/arangod/Utils/transactions.h +++ b/arangod/Utils/transactions.h @@ -51,7 +51,6 @@ #define RestImportTransaction triagens::arango::SingleCollectionWriteTransaction - #endif // ----------------------------------------------------------------------------- diff --git a/arangod/V8Server/v8-query.cpp b/arangod/V8Server/v8-query.cpp index a0dd6b931b..d24e25f4e3 100644 --- a/arangod/V8Server/v8-query.cpp +++ b/arangod/V8Server/v8-query.cpp @@ -1674,6 +1674,81 @@ static v8::Handle JS_AllQuery (v8::Arguments const& argv) { return scope.Close(result); } +//////////////////////////////////////////////////////////////////////////////// +/// @brief selects documents from a collection, hashing the document key and +/// only returning these documents which fall into a specific partition +//////////////////////////////////////////////////////////////////////////////// + +static v8::Handle JS_NthQuery (v8::Arguments const& argv) { + v8::HandleScope scope; + + // expecting two arguments + if (argv.Length() != 2 || ! argv[0]->IsNumber() || ! argv[1]->IsNumber()) { + TRI_V8_EXCEPTION_USAGE(scope, "NTH(, )"); + } + + TRI_vocbase_col_t const* col; + col = TRI_UnwrapClass(argv.Holder(), TRI_GetVocBaseColType()); + + if (col == nullptr) { + TRI_V8_EXCEPTION_INTERNAL(scope, "cannot extract collection"); + } + + TRI_SHARDING_COLLECTION_NOT_YET_IMPLEMENTED(scope, col); + + uint64_t const partitionId = TRI_ObjectToUInt64(argv[0], false); + uint64_t const numberOfPartitions = TRI_ObjectToUInt64(argv[1], false); + + if (partitionId >= numberOfPartitions || numberOfPartitions == 0) { + TRI_V8_EXCEPTION_PARAMETER(scope, "invalid value for or "); + } + + uint32_t total = 0; + vector docs; + + V8ReadTransaction trx(col->_vocbase, col->_cid); + + int res = trx.begin(); + + if (res != TRI_ERROR_NO_ERROR) { + TRI_V8_EXCEPTION(scope, res); + } + + res = trx.readPartition(docs, partitionId, numberOfPartitions, &total); + TRI_ASSERT(docs.empty() || trx.hasBarrier()); + + res = trx.finish(res); + + if (res != TRI_ERROR_NO_ERROR) { + TRI_V8_EXCEPTION(scope, res); + } + + size_t const n = docs.size(); + uint32_t count = 0; + + // setup result + v8::Handle result = v8::Object::New(); + v8::Handle documents = v8::Array::New((int) n); + // reserve full capacity in one go + result->Set(v8::String::New("documents"), documents); + + for (size_t i = 0; i < n; ++i) { + v8::Handle document = WRAP_SHAPED_JSON(trx, col->_cid, &docs[i]); + + if (document.IsEmpty()) { + TRI_V8_EXCEPTION_MEMORY(scope); + } + else { + documents->Set(count++, document); + } + } + + result->Set(v8::String::New("total"), v8::Number::New(total)); + result->Set(v8::String::New("count"), v8::Number::New(count)); + + return scope.Close(result); +} + //////////////////////////////////////////////////////////////////////////////// /// @brief selects documents from a collection, using an offset into the /// primary index. this can be used for incremental access @@ -2968,6 +3043,9 @@ void TRI_InitV8Queries (v8::Handle context) { TRI_AddMethodVocbase(rt, "INEDGES", JS_InEdgesQuery, true); TRI_AddMethodVocbase(rt, "LAST", JS_LastQuery, true); TRI_AddMethodVocbase(rt, "NEAR", JS_NearQuery, true); + + // internal method. not intended to be used by end-users + TRI_AddMethodVocbase(rt, "NTH", JS_NthQuery, true); // internal method. not intended to be used by end-users TRI_AddMethodVocbase(rt, "OFFSET", JS_OffsetQuery, true); diff --git a/js/actions/api-database.js b/js/actions/api-database.js index 39e6d1b6ea..501ec221c8 100644 --- a/js/actions/api-database.js +++ b/js/actions/api-database.js @@ -349,7 +349,7 @@ function post_api_database (req, res) { var users = json.users; - if (users === undefined) { + if (users === undefined || users === null) { users = [ ]; } else if (! Array.isArray(users)) { diff --git a/js/server/modules/org/arangodb/testing.js b/js/server/modules/org/arangodb/testing.js index 883f204bd2..8d21a8b3be 100644 --- a/js/server/modules/org/arangodb/testing.js +++ b/js/server/modules/org/arangodb/testing.js @@ -682,6 +682,8 @@ var impTodo = [ coll: "UnitTestsImportCsv1", type: "csv", create: "true"}, {id: "csv2", data: makePath("UnitTests/import-2.csv"), coll: "UnitTestsImportCsv2", type: "csv", create: "true"}, + {id: "csv3", data: makePath("UnitTests/import-3.csv"), + coll: "UnitTestsImportCsv3", type: "csv", create: "true"}, {id: "tsv1", data: makePath("UnitTests/import-1.tsv"), coll: "UnitTestsImportTsv1", type: "tsv", create: "true"}, {id: "tsv2", data: makePath("UnitTests/import-2.tsv"), diff --git a/js/server/tests/import-setup.js b/js/server/tests/import-setup.js index c6672dd19f..651027aace 100644 --- a/js/server/tests/import-setup.js +++ b/js/server/tests/import-setup.js @@ -34,6 +34,7 @@ db._drop("UnitTestsImportJson4"); db._drop("UnitTestsImportCsv1"); db._drop("UnitTestsImportCsv2"); + db._drop("UnitTestsImportCsv3"); db._drop("UnitTestsImportTsv1"); db._drop("UnitTestsImportTsv2"); db._drop("UnitTestsImportVertex"); diff --git a/js/server/tests/import-teardown.js b/js/server/tests/import-teardown.js index 990ce85bd1..7634477d11 100644 --- a/js/server/tests/import-teardown.js +++ b/js/server/tests/import-teardown.js @@ -34,6 +34,7 @@ db._drop("UnitTestsImportJson4"); db._drop("UnitTestsImportCsv1"); db._drop("UnitTestsImportCsv2"); + db._drop("UnitTestsImportCsv3"); db._drop("UnitTestsImportTsv1"); db._drop("UnitTestsImportTsv2"); db._drop("UnitTestsImportVertex"); diff --git a/js/server/tests/import.js b/js/server/tests/import.js index 8eefa46628..a6c3916f72 100644 --- a/js/server/tests/import.js +++ b/js/server/tests/import.js @@ -171,6 +171,21 @@ function importTestSuite () { assertEqual(errors.ERROR_ARANGO_COLLECTION_NOT_FOUND.code, getErrorCode(function() { executeQuery("FOR i IN UnitTestsImportCsv2 SORT i.id RETURN i"); } )); }, +//////////////////////////////////////////////////////////////////////////////// +/// @brief test csv import +//////////////////////////////////////////////////////////////////////////////// + + testCsvImport3 : function () { + var expected = [ + { name: "Bar", password: "wow!\nthis is a\nmultine password!" }, + { name: "Bartholomew \"Bart\" Simpson", password: "Milhouse" }, + { name: "Foo", password: "r4ndom\"123!" } + ]; + + var actual = getQueryResults("FOR i IN UnitTestsImportCsv3 SORT i.name RETURN i"); + assertEqual(expected, actual); + }, + //////////////////////////////////////////////////////////////////////////////// /// @brief test tsv import //////////////////////////////////////////////////////////////////////////////// diff --git a/lib/BasicsC/csv.c b/lib/BasicsC/csv.c index 8e9bfb65b3..f9461237a2 100644 --- a/lib/BasicsC/csv.c +++ b/lib/BasicsC/csv.c @@ -346,7 +346,7 @@ int TRI_ParseCsvString2 (TRI_csv_parser_t* parser, char const* line, size_t leng *qtr++ = *ptr++; } - // found quote, need at least another quote, a separator, or a eol + // found quote, need at least another quote, a separator, or an eol if (ptr + 1 < parser->_stop) { ++ptr;