1
0
Fork 0

Merge branch 'devel' of github.com:triAGENS/ArangoDB into 1.2

This commit is contained in:
Frank Celler 2013-01-22 16:07:32 +01:00
commit f4814571ef
16 changed files with 229 additions and 14 deletions

View File

@ -1,6 +1,19 @@
v1.2.alpha (XXXX-XX-XX)
-----------------------
* added --progress option for arangoimp. This will show the percentage of the input
file that has been processed by arangoimp while the import is still running. It can
be used as a rough indicator of progress for the entire import.
* make the server log documents that cannot be imported via /_api/import into the
logfile using the warning log level. This may help finding illegal documents in big
import runs.
* check on server startup whether the database directory and all collection directories
are writable. if not, the server startup will be aborted. this prevents serious
problems with collections being non-writable and this being detected at some pointer
after the server has been started
* allow the following AQL constructs: FUNC(...)[...], FUNC(...).attribute
* fixed issue #361: Bug in Admin Interface. Header disappears when clicking new collection

View File

@ -615,6 +615,7 @@ TRI_associative_pointer_t* TRI_InitialiseFunctionsAql (void) {
REGISTER_FUNCTION("MERGE", "MERGE", true, false, "a,a|+", NULL);
REGISTER_FUNCTION("MERGE_RECURSIVE", "MERGE_RECURSIVE", true, false, "a,a|+", NULL);
REGISTER_FUNCTION("DOCUMENT", "DOCUMENT", false, false, "h,sl", NULL);
REGISTER_FUNCTION("MATCHES", "MATCHES", true, false, ".,l", NULL);
// geo functions
REGISTER_FUNCTION("NEAR", "GEO_NEAR", false, false, "h,n,n,n|s", NULL);

View File

@ -80,7 +80,12 @@
/// @LIT{./arangoimp \-\-file "data.json" \-\-type json \-\-collection "users"}
///
/// This will transfer the data to the server, import the records, and print a
/// status summary.
/// status summary. To show the intermediate progress during the import process,
/// the option @LIT{\-\-progress} can be added. This option will show the
/// percentage of the input file that has been sent to the server. This will only
/// be useful for big import files.
///
/// @LIT{./arangoimp \-\-file "data.json" \-\-type json \-\-collection "users" \-\-progress true}
///
/// By default, the endpoint @LIT{tcp://127.0.0.1:8529} will be used.
/// If you want to specify a different endpoint, you can use the \-\-server.endpoint

View File

@ -162,6 +162,21 @@ HttpHandler::status_e RestImportHandler::execute () {
/// @{
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
/// @brief log an error document
////////////////////////////////////////////////////////////////////////////////
void RestImportHandler::logDocument (const TRI_json_t* const json) const {
TRI_string_buffer_t buffer;
TRI_InitStringBuffer(&buffer, TRI_UNKNOWN_MEM_ZONE);
int res = TRI_StringifyJson(&buffer, json);
if (res == TRI_ERROR_NO_ERROR) {
LOGGER_WARNING << "offending document" << buffer._buffer;
}
TRI_DestroyStringBuffer(&buffer);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief creates documents
///
@ -251,6 +266,8 @@ bool RestImportHandler::createByDocumentsLines () {
++numCreated;
}
else {
LOGGER_WARNING << "creating document failed with error: " << TRI_errno_string(res);
logDocument(values);
++numError;
}
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, values);
@ -366,6 +383,7 @@ bool RestImportHandler::createByDocumentsList () {
TRI_json_t* values = (TRI_json_t*) TRI_AtVector(&documents->_value._objects, i);
if (values == 0 || values->_type != TRI_JSON_ARRAY) {
LOGGER_WARNING << "invalid JSON type (expecting array) at position " << i;
++numError;
}
else {
@ -377,6 +395,8 @@ bool RestImportHandler::createByDocumentsList () {
++numCreated;
}
else {
LOGGER_WARNING << "creating document failed with error: " << TRI_errno_string(res);
logDocument(values);
++numError;
}
}
@ -556,6 +576,8 @@ bool RestImportHandler::createByKeyValueList () {
++numCreated;
}
else {
LOGGER_WARNING << "creating document failed with error: " << TRI_errno_string(res);
logDocument(json);
++numError;
}

View File

@ -117,6 +117,12 @@ namespace triagens {
protected:
////////////////////////////////////////////////////////////////////////////////
/// @brief log an error document
////////////////////////////////////////////////////////////////////////////////
void logDocument (const TRI_json_t* const) const;
////////////////////////////////////////////////////////////////////////////////
/// @brief creates documents by JSON objects
/// each line of the input stream contains an individual JSON object

View File

@ -582,7 +582,7 @@ static int ScanPath (TRI_vocbase_t* vocbase, char const* path) {
file = TRI_Concatenate2File(path, name);
if (!file) {
if (file == NULL) {
LOG_FATAL("out of memory");
regfree(&re);
return TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
@ -590,6 +590,15 @@ static int ScanPath (TRI_vocbase_t* vocbase, char const* path) {
if (TRI_IsDirectory(file)) {
TRI_col_info_t info;
if (! TRI_IsWritable(file)) {
// the collection directory we found is not writable for the current user
// this can cause serious trouble so we will abort the server start if we
// encounter this situation
LOG_ERROR("database subdirectory '%s' is not writable for current user", file);
return TRI_set_errno(TRI_ERROR_ARANGO_DATADIR_NOT_WRITABLE);
}
// no need to lock as we are scanning
res = TRI_LoadCollectionInfo(file, &info);
@ -1119,6 +1128,14 @@ TRI_vocbase_t* TRI_OpenVocBase (char const* path) {
return NULL;
}
if (! TRI_IsWritable(path)) {
// database directory is not writable for the current user... bad luck
LOG_ERROR("database directory '%s' is not writable for current user", path);
TRI_set_errno(TRI_ERROR_ARANGO_DATADIR_NOT_WRITABLE);
return NULL;
}
// .............................................................................
// check that the database is not locked and lock it
// .............................................................................

View File

@ -39,8 +39,10 @@
#include "ImportHelper.h"
#include <sstream>
#include <iomanip>
#include "Basics/StringUtils.h"
#include "BasicsC/files.h"
#include "BasicsC/json.h"
#include "BasicsC/strings.h"
#include "Rest/HttpRequest.h"
@ -68,6 +70,7 @@ namespace triagens {
_separator = ",";
_eol = "\\n";
_createCollection = false;
_progress = false;
regcomp(&_doubleRegex, "^[-+]?([0-9]+\\.?[0-9]*|\\.[0-9]+)([eE][-+]?[0-8]+)?$", REG_EXTENDED);
regcomp(&_intRegex, "^[-+]?([0-9]+)$", REG_EXTENDED);
_hasError = false;
@ -101,11 +104,16 @@ namespace triagens {
// read and convert
int fd;
int64_t totalLength;
if (fileName == "-") {
// we don't have a filesize
totalLength = 0;
fd = STDIN_FILENO;
}
else {
// read filesize
totalLength = TRI_SizeFile(fileName.c_str());
fd = TRI_OPEN(fileName.c_str(), O_RDONLY);
}
@ -113,6 +121,10 @@ namespace triagens {
_errorMessage = TRI_LAST_ERROR_STR;
return false;
}
// progress display control variables
int64_t totalRead = 0;
double nextProgress = ProgressStep;
size_t separatorLength;
char* separator = TRI_UnescapeUtf8StringZ(TRI_UNKNOWN_MEM_ZONE, _separator.c_str(), _separator.size(), &separatorLength);
@ -149,11 +161,9 @@ namespace triagens {
}
parser._dataAdd = this;
char buffer[16384];
char buffer[32768];
while (! _hasError) {
v8::HandleScope scope;
ssize_t n = TRI_READ(fd, buffer, sizeof(buffer));
if (n < 0) {
@ -166,7 +176,10 @@ namespace triagens {
else if (n == 0) {
break;
}
totalRead += (int64_t) n;
reportProgress(totalLength, totalRead, nextProgress);
TRI_ParseCsvString2(&parser, buffer, n);
}
@ -198,12 +211,17 @@ namespace triagens {
// read and convert
int fd;
int64_t totalLength;
if (fileName == "-") {
// we don't have a filesize
totalLength = 0;
fd = STDIN_FILENO;
}
else {
fd = TRI_OPEN(fileName.c_str(), O_RDONLY);
// read filesize
totalLength = TRI_SizeFile(fileName.c_str());
fd = TRI_OPEN(fileName.c_str(), O_RDONLY);
}
if (fd < 0) {
@ -211,8 +229,13 @@ namespace triagens {
return false;
}
char buffer[16384];
char buffer[32768];
bool isArray = false;
bool checkedFront = false;
// progress display control variables
int64_t totalRead = 0;
double nextProgress = ProgressStep;
while (! _hasError) {
ssize_t n = TRI_READ(fd, buffer, sizeof(buffer));
@ -222,17 +245,22 @@ namespace triagens {
return false;
}
else if (n == 0) {
// we're done
break;
}
if (_outputBuffer.length() == 0) {
if (! checkedFront) {
// detect the import file format (single lines with individual JSON objects
// or a JSON array with all documents)
const string firstChar = StringUtils::lTrim(string(buffer, n), "\r\n\t\f\b ").substr(0, 1);
isArray = (firstChar == "[");
checkedFront = true;
}
_outputBuffer.appendText(buffer, n);
totalRead += (int64_t) n;
reportProgress(totalLength, totalRead, nextProgress);
if (_outputBuffer.length() > _maxUploadSize) {
if (isArray) {
@ -249,7 +277,6 @@ namespace triagens {
sendJsonBuffer(first, len, isArray);
_outputBuffer.erase_front(len);
}
}
}
@ -271,6 +298,20 @@ namespace triagens {
////////////////////////////////////////////////////////////////////////////////
/// private functions
////////////////////////////////////////////////////////////////////////////////
void ImportHelper::reportProgress (const int64_t totalLength,
const int64_t totalRead,
double& nextProgress) {
if (! _progress || totalLength == 0) {
return;
}
double pct = 100.0 * ((double) totalRead / (double) totalLength);
if (pct >= nextProgress) {
LOGGER_INFO << "processed " << totalRead << " bytes (" << std::fixed << std::setprecision(2) << pct << " %) of input file";
nextProgress = pct + ProgressStep;
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief return the collection-related URL part
@ -293,7 +334,9 @@ namespace triagens {
void ImportHelper::ProcessCsvBegin (TRI_csv_parser_t* parser, size_t row) {
ImportHelper* ih = reinterpret_cast<ImportHelper*> (parser->_dataAdd);
if (ih) ih->beginLine(row);
if (ih) {
ih->beginLine(row);
}
}
void ImportHelper::beginLine(size_t row) {
@ -373,7 +416,9 @@ namespace triagens {
void ImportHelper::ProcessCsvEnd (TRI_csv_parser_t* parser, char const* field, size_t row, size_t column, bool escaped) {
ImportHelper* ih = reinterpret_cast<ImportHelper*> (parser->_dataAdd);
if (ih) ih->addLastField(field, row, column, escaped);
if (ih) {
ih->addLastField(field, row, column, escaped);
}
}
void ImportHelper::addLastField (char const* field, size_t row, size_t column, bool escaped) {

View File

@ -150,6 +150,14 @@ namespace triagens {
_createCollection = value;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief set the progress indicator
////////////////////////////////////////////////////////////////////////////////
void setProgress (const bool value) {
_progress = value;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get the number of read lines
///
@ -194,6 +202,8 @@ namespace triagens {
static void ProcessCsvBegin (TRI_csv_parser_t* , size_t );
static void ProcessCsvAdd (TRI_csv_parser_t* parser, char const* field, size_t row, size_t column, bool escaped);
static void ProcessCsvEnd (TRI_csv_parser_t* parser, char const* field, size_t row, size_t column, bool escaped);
void reportProgress (const int64_t, const int64_t, double&);
string getCollectionUrlPart ();
void beginLine (size_t row);
@ -213,7 +223,7 @@ namespace triagens {
string _eol;
bool _createCollection;
bool _useIds;
bool _progress;
size_t _numberLines;
size_t _numberOk;
@ -229,6 +239,8 @@ namespace triagens {
bool _hasError;
string _errorMessage;
static const double ProgressStep = 2.0;
};
}
}

View File

@ -124,6 +124,12 @@ static string TypeImport = "json";
static bool CreateCollection = false;
////////////////////////////////////////////////////////////////////////////////
/// @brief progress
////////////////////////////////////////////////////////////////////////////////
static bool Progress = false;
////////////////////////////////////////////////////////////////////////////////
/// @}
////////////////////////////////////////////////////////////////////////////////
@ -153,6 +159,7 @@ static void ParseProgramOptions (int argc, char* argv[]) {
("quote", &Quote, "quote character(s)")
("eol", &Eol, "end of line character(s)")
("separator", &Separator, "separator")
("progress", &Progress, "show progress")
;
BaseClient.setupGeneral(description);
@ -324,6 +331,11 @@ int main (int argc, char* argv[]) {
return EXIT_FAILURE;
}
// progress
if (Progress) {
ih.setProgress(true);
}
// import type
bool ok;

View File

@ -77,6 +77,7 @@
"ERROR_ARANGO_DOCUMENT_KEY_BAD" : { "code" : 1221, "message" : "illegal document key" },
"ERROR_ARANGO_DOCUMENT_KEY_UNEXPECTED" : { "code" : 1222, "message" : "unexpected document key" },
"ERROR_ARANGO_INDEX_NEEDS_RESIZE" : { "code" : 1223, "message" : "index needs resizing" },
"ERROR_ARANGO_DATADIR_NOT_WRITABLE" : { "code" : 1224, "message" : "database directory not writable" },
"ERROR_ARANGO_DATAFILE_FULL" : { "code" : 1300, "message" : "datafile full" },
"ERROR_QUERY_KILLED" : { "code" : 1500, "message" : "query killed" },
"ERROR_QUERY_PARSE" : { "code" : 1501, "message" : "%s" },

View File

@ -2579,6 +2579,49 @@ function AHUACATL_MERGE_RECURSIVE () {
return result;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief compare an object against a list of examples and return whether the
/// object matches at least one of the examples
////////////////////////////////////////////////////////////////////////////////
function AHUACATL_MATCHES () {
var element = arguments[0];
if (AHUACATL_TYPEWEIGHT(element) !== AHUACATL_TYPEWEIGHT_DOCUMENT) {
return false;
}
var examples = arguments[1];
if (! Array.isArray(examples)) {
examples = [ examples ];
}
for (var i = 0; i < examples.length; ++i) {
var example = examples[i];
var result = true;
if (AHUACATL_TYPEWEIGHT(example) !== AHUACATL_TYPEWEIGHT_DOCUMENT) {
AHUACATL_THROW(INTERNAL.errors.ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, "MATCHES");
}
var keys = AHUACATL_KEYS(example);
for (var j = 0; j < keys.length; ++j) {
var key = keys[j];
if (! AHUACATL_RELATIONAL_EQUAL(element[key], example[key])) {
result = false;
break;
}
}
if (result) {
return true;
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief passthru the argument
///

View File

@ -95,6 +95,7 @@ ERROR_ARANGO_PARSER_FAILED,1220,"parser failed","Will be raised when the parsing
ERROR_ARANGO_DOCUMENT_KEY_BAD,1221,"illegal document key","Will be raised when a document key is corrupt."
ERROR_ARANGO_DOCUMENT_KEY_UNEXPECTED,1222,"unexpected document key","Will be raised when a user-defined document key is supplied for collections with auto key generation."
ERROR_ARANGO_INDEX_NEEDS_RESIZE,1223,"index needs resizing","Will be raised when an index is full and should be resized to contain more data."
ERROR_ARANGO_DATADIR_NOT_WRITABLE,1224,"database directory not writable","Will be raised when the database directory is not writable for the current user."
################################################################################
## ArangoDB storage errors

View File

@ -271,6 +271,22 @@ int64_t TRI_SizeFile (char const* path) {
return (int64_t) stbuf.st_size;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief checks if file or directory is writable
////////////////////////////////////////////////////////////////////////////////
#ifdef _WIN32
bool TRI_IsWritable (char const* path) {
#error "TRI_IsWritable needs to be implemented for Windows"
// implementation for seems to be non-trivial
return true;
}
#else
bool TRI_IsWritable (char const* path) {
// we can use POSIX access() from unistd.h to check for write permissions
return (access(path, W_OK) == 0);
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// @brief checks if path is a directory

View File

@ -63,6 +63,12 @@ bool TRI_SetCloseOnExecFile (socket_t fd);
int64_t TRI_SizeFile (char const* path);
////////////////////////////////////////////////////////////////////////////////
/// @brief checks if file or directory is writable
////////////////////////////////////////////////////////////////////////////////
bool TRI_IsWritable (char const* path);
////////////////////////////////////////////////////////////////////////////////
/// @brief checks if path is a directory
////////////////////////////////////////////////////////////////////////////////

View File

@ -73,6 +73,7 @@ void TRI_InitialiseErrorMessages (void) {
REG_ERROR(ERROR_ARANGO_DOCUMENT_KEY_BAD, "illegal document key");
REG_ERROR(ERROR_ARANGO_DOCUMENT_KEY_UNEXPECTED, "unexpected document key");
REG_ERROR(ERROR_ARANGO_INDEX_NEEDS_RESIZE, "index needs resizing");
REG_ERROR(ERROR_ARANGO_DATADIR_NOT_WRITABLE, "database directory not writable");
REG_ERROR(ERROR_ARANGO_DATAFILE_FULL, "datafile full");
REG_ERROR(ERROR_QUERY_KILLED, "query killed");
REG_ERROR(ERROR_QUERY_PARSE, "%s");

View File

@ -149,6 +149,9 @@ extern "C" {
/// - 1223: @LIT{index needs resizing}
/// Will be raised when an index is full and should be resized to contain
/// more data.
/// - 1224: @LIT{database directory not writable}
/// Will be raised when the database directory is not writable for the
/// current user.
/// - 1300: @LIT{datafile full}
/// Will be raised when the datafile reaches its limit.
/// - 1500: @LIT{query killed}
@ -983,6 +986,17 @@ void TRI_InitialiseErrorMessages (void);
#define TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE (1223)
////////////////////////////////////////////////////////////////////////////////
/// @brief 1224: ERROR_ARANGO_DATADIR_NOT_WRITABLE
///
/// database directory not writable
///
/// Will be raised when the database directory is not writable for the current
/// user.
////////////////////////////////////////////////////////////////////////////////
#define TRI_ERROR_ARANGO_DATADIR_NOT_WRITABLE (1224)
////////////////////////////////////////////////////////////////////////////////
/// @brief 1300: ERROR_ARANGO_DATAFILE_FULL
///