1
0
Fork 0

Merge branch 'devel' of https://github.com/arangodb/arangodb into feature/add-aql-function-levenshteindistance

This commit is contained in:
jsteemann 2018-07-19 10:54:30 +02:00
commit 425050a8e7
331 changed files with 8079 additions and 3945 deletions

View File

@ -1,5 +1,13 @@
# Rocksdb Change Log
## Unreleased
## 5.14.2 (7/3/2018)
### Bug Fixes
* Change default value of `bytes_max_delete_chunk` to 0 in NewSstFileManager() as it doesn't work well with checkpoints.
* Set DEBUG_LEVEL=0 for RocksJava Mac Release build.
## 5.14.1 (6/20/2018)
### Bug Fixes
* Fix block-based table reader pinning blocks throughout its lifetime, causing memory usage increase.
* Fix bug with prefix search in partition filters where a shared prefix would be ignored from the later partitions. The bug could report an eixstent key as missing. The bug could be triggered if prefix_extractor is set and partition filters is enabled.
## 5.14.0 (5/16/2018)
### Public API Change
@ -26,6 +34,7 @@
* Fix `BackupableDBOptions::max_valid_backups_to_open` to not delete backup files when refcount cannot be accurately determined.
* Fix memory leak when pin_l0_filter_and_index_blocks_in_cache is used with partitioned filters
* Disable rollback of merge operands in WritePrepared transactions to work around an issue in MyRocks. It can be enabled back by setting TransactionDBOptions::rollback_merge_operands to true.
* Fix bug with prefix search in partition filters where a shared prefix would be ignored from the later partitions. The bug could report an eixstent key as missing. The bug could be triggered if prefix_extractor is set and partition filters is enabled.
### Java API Changes
* Add `BlockBasedTableConfig.setBlockCache` to allow sharing a block cache across DB instances.

View File

@ -85,6 +85,10 @@ ifeq ($(MAKECMDGOALS),rocksdbjavastaticrelease)
DEBUG_LEVEL=0
endif
ifeq ($(MAKECMDGOALS),rocksdbjavastaticreleasedocker)
DEBUG_LEVEL=0
endif
ifeq ($(MAKECMDGOALS),rocksdbjavastaticpublish)
DEBUG_LEVEL=0
endif

View File

@ -28,7 +28,7 @@ rocksdb_compiler_flags = [
# Added missing flags from output of build_detect_platform
"-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX",
"-DROCKSDB_BACKTRACE",
"-Wshorten-64-to-32",
"-Wnarrowing",
]
rocksdb_external_deps = [
@ -556,13 +556,13 @@ ROCKS_TESTS = [
"serial",
],
[
"db_iter_test",
"db/db_iter_test.cc",
"db_iter_stress_test",
"db/db_iter_stress_test.cc",
"serial",
],
[
"db_iter_stress_test",
"db/db_iter_stress_test.cc",
"db_iter_test",
"db/db_iter_test.cc",
"serial",
],
[

View File

@ -32,7 +32,7 @@ rocksdb_compiler_flags = [
# Added missing flags from output of build_detect_platform
"-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX",
"-DROCKSDB_BACKTRACE",
"-Wshorten-64-to-32",
"-Wnarrowing",
]
rocksdb_external_deps = [

View File

@ -36,7 +36,9 @@ class TestIterator : public InternalIterator {
valid_(false),
sequence_number_(0),
iter_(0),
cmp(comparator) {}
cmp(comparator) {
data_.reserve(16);
}
void AddPut(std::string argkey, std::string argvalue) {
Add(argkey, kTypeValue, argvalue);
@ -2605,7 +2607,7 @@ TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace1) {
// MergeIterator::Prev() realized the mem table iterator is at its end
// and before an SeekToLast() is called.
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"MergeIterator::Prev:BeforeSeekToLast",
"MergeIterator::Prev:BeforePrev",
[&](void* /*arg*/) { internal_iter2_->Add("z", kTypeValue, "7", 12u); });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
@ -2641,7 +2643,7 @@ TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace2) {
// mem table after MergeIterator::Prev() realized the mem tableiterator is at
// its end and before an SeekToLast() is called.
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"MergeIterator::Prev:BeforeSeekToLast", [&](void* /*arg*/) {
"MergeIterator::Prev:BeforePrev", [&](void* /*arg*/) {
internal_iter2_->Add("z", kTypeValue, "7", 12u);
internal_iter2_->Add("z", kTypeValue, "7", 11u);
});
@ -2679,7 +2681,7 @@ TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace3) {
// mem table after MergeIterator::Prev() realized the mem table iterator is at
// its end and before an SeekToLast() is called.
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"MergeIterator::Prev:BeforeSeekToLast", [&](void* /*arg*/) {
"MergeIterator::Prev:BeforePrev", [&](void* /*arg*/) {
internal_iter2_->Add("z", kTypeValue, "7", 16u, true);
internal_iter2_->Add("z", kTypeValue, "7", 15u, true);
internal_iter2_->Add("z", kTypeValue, "7", 14u, true);

View File

@ -2043,6 +2043,43 @@ TEST_P(DBIteratorTest, CreationFailure) {
delete iter;
}
TEST_P(DBIteratorTest, UpperBoundWithChangeDirection) {
Options options = CurrentOptions();
options.max_sequential_skip_in_iterations = 3;
DestroyAndReopen(options);
// write a bunch of kvs to the database.
ASSERT_OK(Put("a", "1"));
ASSERT_OK(Put("y", "1"));
ASSERT_OK(Put("y1", "1"));
ASSERT_OK(Put("y2", "1"));
ASSERT_OK(Put("y3", "1"));
ASSERT_OK(Put("z", "1"));
ASSERT_OK(Flush());
ASSERT_OK(Put("a", "1"));
ASSERT_OK(Put("z", "1"));
ASSERT_OK(Put("bar", "1"));
ASSERT_OK(Put("foo", "1"));
std::string upper_bound = "x";
Slice ub_slice(upper_bound);
ReadOptions ro;
ro.iterate_upper_bound = &ub_slice;
ro.max_skippable_internal_keys = 1000;
Iterator* iter = NewIterator(ro);
iter->Seek("foo");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("foo", iter->key().ToString());
iter->Prev();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
ASSERT_EQ("bar", iter->key().ToString());
delete iter;
}
TEST_P(DBIteratorTest, TableFilter) {
ASSERT_OK(Put("a", "1"));
dbfull()->Flush(FlushOptions());
@ -2109,6 +2146,47 @@ TEST_P(DBIteratorTest, TableFilter) {
}
}
TEST_P(DBIteratorTest, UpperBoundWithPrevReseek) {
Options options = CurrentOptions();
options.max_sequential_skip_in_iterations = 3;
DestroyAndReopen(options);
// write a bunch of kvs to the database.
ASSERT_OK(Put("a", "1"));
ASSERT_OK(Put("y", "1"));
ASSERT_OK(Put("z", "1"));
ASSERT_OK(Flush());
ASSERT_OK(Put("a", "1"));
ASSERT_OK(Put("z", "1"));
ASSERT_OK(Put("bar", "1"));
ASSERT_OK(Put("foo", "1"));
ASSERT_OK(Put("foo", "2"));
ASSERT_OK(Put("foo", "3"));
ASSERT_OK(Put("foo", "4"));
ASSERT_OK(Put("foo", "5"));
const Snapshot* snapshot = db_->GetSnapshot();
ASSERT_OK(Put("foo", "6"));
std::string upper_bound = "x";
Slice ub_slice(upper_bound);
ReadOptions ro;
ro.snapshot = snapshot;
ro.iterate_upper_bound = &ub_slice;
Iterator* iter = NewIterator(ro);
iter->SeekForPrev("goo");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("foo", iter->key().ToString());
iter->Prev();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("bar", iter->key().ToString());
delete iter;
db_->ReleaseSnapshot(snapshot);
}
TEST_P(DBIteratorTest, SkipStatistics) {
Options options = CurrentOptions();
options.statistics = rocksdb::CreateDBStatistics();

View File

@ -98,12 +98,13 @@ class SstFileManager {
// DeleteScheduler immediately
// @param bytes_max_delete_chunk: if a single file is larger than delete chunk,
// ftruncate the file by this size each time, rather than dropping the whole
// file. 0 means to always delete the whole file.
// file. 0 means to always delete the whole file. NOTE this options may not
// work well with checkpoints, which relies on file system hard links.
extern SstFileManager* NewSstFileManager(
Env* env, std::shared_ptr<Logger> info_log = nullptr,
std::string trash_dir = "", int64_t rate_bytes_per_sec = 0,
bool delete_existing_trash = true, Status* status = nullptr,
double max_trash_db_ratio = 0.25,
uint64_t bytes_max_delete_chunk = 64 * 1024 * 1024);
uint64_t bytes_max_delete_chunk = 0);
} // namespace rocksdb

View File

@ -6,7 +6,7 @@
#define ROCKSDB_MAJOR 5
#define ROCKSDB_MINOR 14
#define ROCKSDB_PATCH 0
#define ROCKSDB_PATCH 2
// Do not use these. We made the mistake of declaring macros starting with
// double underscore. Now we have to live with our choice. We'll deprecate these

View File

@ -3230,7 +3230,7 @@ Java_org_rocksdb_ColumnFamilyOptions_setCompactionFilterFactoryHandle(
JNIEnv* /*env*/, jobject /*jobj*/, jlong jopt_handle,
jlong jcompactionfilterfactory_handle) {
auto* cff_factory =
reinterpret_cast<std::shared_ptr<rocksdb::CompactionFilterFactory>*>(
reinterpret_cast<std::shared_ptr<rocksdb::CompactionFilterFactoryJniCallback>*>(
jcompactionfilterfactory_handle);
reinterpret_cast<rocksdb::ColumnFamilyOptions*>(jopt_handle)
->compaction_filter_factory = *cff_factory;

View File

@ -4292,25 +4292,12 @@ class JniUtil {
* @param bytes The bytes to copy
*
* @return the Java byte[] or nullptr if an exception occurs
*
* @throws RocksDBException thrown
* if memory size to copy exceeds general java specific array size limitation.
*/
static jbyteArray copyBytes(JNIEnv* env, std::string bytes) {
const jsize jlen = static_cast<jsize>(bytes.size());
jbyteArray jbytes = env->NewByteArray(jlen);
if(jbytes == nullptr) {
// exception thrown: OutOfMemoryError
return nullptr;
}
env->SetByteArrayRegion(jbytes, 0, jlen,
const_cast<jbyte*>(reinterpret_cast<const jbyte*>(bytes.c_str())));
if(env->ExceptionCheck()) {
// exception thrown: ArrayIndexOutOfBoundsException
env->DeleteLocalRef(jbytes);
return nullptr;
}
return jbytes;
return createJavaByteArrayWithSizeCheck(env, bytes.c_str(), bytes.size());
}
/**
@ -4473,6 +4460,47 @@ class JniUtil {
return jbyte_strings;
}
/**
* Copies bytes to a new jByteArray with the check of java array size limitation.
*
* @param bytes pointer to memory to copy to a new jByteArray
* @param size number of bytes to copy
*
* @return the Java byte[] or nullptr if an exception occurs
*
* @throws RocksDBException thrown
* if memory size to copy exceeds general java array size limitation to avoid overflow.
*/
static jbyteArray createJavaByteArrayWithSizeCheck(JNIEnv* env, const char* bytes, const size_t size) {
// Limitation for java array size is vm specific
// In general it cannot exceed Integer.MAX_VALUE (2^31 - 1)
// Current HotSpot VM limitation for array size is Integer.MAX_VALUE - 5 (2^31 - 1 - 5)
// It means that the next call to env->NewByteArray can still end with
// OutOfMemoryError("Requested array size exceeds VM limit") coming from VM
static const size_t MAX_JARRAY_SIZE = (static_cast<size_t>(1)) << 31;
if(size > MAX_JARRAY_SIZE) {
rocksdb::RocksDBExceptionJni::ThrowNew(env, "Requested array size exceeds VM limit");
return nullptr;
}
const jsize jlen = static_cast<jsize>(size);
jbyteArray jbytes = env->NewByteArray(jlen);
if(jbytes == nullptr) {
// exception thrown: OutOfMemoryError
return nullptr;
}
env->SetByteArrayRegion(jbytes, 0, jlen,
const_cast<jbyte*>(reinterpret_cast<const jbyte*>(bytes)));
if(env->ExceptionCheck()) {
// exception thrown: ArrayIndexOutOfBoundsException
env->DeleteLocalRef(jbytes);
return nullptr;
}
return jbytes;
}
/**
* Copies bytes from a rocksdb::Slice to a jByteArray
@ -4481,25 +4509,12 @@ class JniUtil {
* @param bytes The bytes to copy
*
* @return the Java byte[] or nullptr if an exception occurs
*
* @throws RocksDBException thrown
* if memory size to copy exceeds general java specific array size limitation.
*/
static jbyteArray copyBytes(JNIEnv* env, const Slice& bytes) {
const jsize jlen = static_cast<jsize>(bytes.size());
jbyteArray jbytes = env->NewByteArray(jlen);
if(jbytes == nullptr) {
// exception thrown: OutOfMemoryError
return nullptr;
}
env->SetByteArrayRegion(jbytes, 0, jlen,
const_cast<jbyte*>(reinterpret_cast<const jbyte*>(bytes.data())));
if(env->ExceptionCheck()) {
// exception thrown: ArrayIndexOutOfBoundsException
env->DeleteLocalRef(jbytes);
return nullptr;
}
return jbytes;
return createJavaByteArrayWithSizeCheck(env, bytes.data(), bytes.size());
}
/*

View File

@ -138,7 +138,7 @@ jobject Java_org_rocksdb_SstFileManager_getTrackedFiles(JNIEnv* env,
const rocksdb::HashMapJni::FnMapKV<const std::string, const uint64_t>
fn_map_kv =
[env, &tracked_files](
[env](
const std::pair<const std::string, const uint64_t>& pair) {
const jstring jtracked_file_path =
env->NewStringUTF(pair.first.c_str());

View File

@ -48,7 +48,7 @@ void Java_org_rocksdb_Transaction_setSnapshotOnNextOperation__JJ(
jlong jtxn_notifier_handle) {
auto* txn = reinterpret_cast<rocksdb::Transaction*>(jhandle);
auto* txn_notifier =
reinterpret_cast<std::shared_ptr<rocksdb::TransactionNotifier>*>(
reinterpret_cast<std::shared_ptr<rocksdb::TransactionNotifierJniCallback>*>(
jtxn_notifier_handle);
txn->SetSnapshotOnNextOperation(*txn_notifier);
}

View File

@ -28,8 +28,10 @@ public class ColumnFamilyHandle extends RocksObject {
* Gets the name of the Column Family.
*
* @return The name of the Column Family.
*
* @throws RocksDBException if an error occurs whilst retrieving the name.
*/
public byte[] getName() {
public byte[] getName() throws RocksDBException {
return getName(nativeHandle_);
}
@ -71,14 +73,22 @@ public class ColumnFamilyHandle extends RocksObject {
}
final ColumnFamilyHandle that = (ColumnFamilyHandle) o;
return rocksDB_.nativeHandle_ == that.rocksDB_.nativeHandle_ &&
getID() == that.getID() &&
Arrays.equals(getName(), that.getName());
try {
return rocksDB_.nativeHandle_ == that.rocksDB_.nativeHandle_ &&
getID() == that.getID() &&
Arrays.equals(getName(), that.getName());
} catch (RocksDBException e) {
throw new RuntimeException("Cannot compare column family handles", e);
}
}
@Override
public int hashCode() {
return Objects.hash(getName(), getID(), rocksDB_.nativeHandle_);
try {
return Objects.hash(getName(), getID(), rocksDB_.nativeHandle_);
} catch (RocksDBException e) {
throw new RuntimeException("Cannot calculate hash code of column family handle", e);
}
}
/**
@ -96,7 +106,7 @@ public class ColumnFamilyHandle extends RocksObject {
}
}
private native byte[] getName(final long handle);
private native byte[] getName(final long handle) throws RocksDBException;
private native int getID(final long handle);
private native ColumnFamilyDescriptor getDescriptor(final long handle) throws RocksDBException;
@Override protected final native void disposeInternal(final long handle);

View File

@ -65,8 +65,11 @@ public class WriteBatch extends AbstractWriteBatch {
* Retrieve the serialized version of this batch.
*
* @return the serialized representation of this write batch.
*
* @throws RocksDBException if an error occurs whilst retrieving
* the serialized batch data.
*/
public byte[] data() {
public byte[] data() throws RocksDBException {
return data(nativeHandle_);
}
@ -253,7 +256,7 @@ public class WriteBatch extends AbstractWriteBatch {
final int serializedLength);
private native void iterate(final long handle, final long handlerHandle)
throws RocksDBException;
private native byte[] data(final long nativeHandle);
private native byte[] data(final long nativeHandle) throws RocksDBException;
private native long getDataSize(final long nativeHandle);
private native boolean hasPut(final long nativeHandle);
private native boolean hasDelete(final long nativeHandle);

View File

@ -4,9 +4,11 @@
// (found in the LICENSE.Apache file in the root directory).
package org.rocksdb;
import org.junit.Assume;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.rules.TemporaryFolder;
import java.util.*;
@ -143,6 +145,39 @@ public class RocksDBTest {
}
}
@Rule
public ExpectedException thrown = ExpectedException.none();
@Test
public void getOutOfArrayMaxSizeValue() throws RocksDBException {
final int numberOfValueSplits = 10;
final int splitSize = Integer.MAX_VALUE / numberOfValueSplits;
Runtime runtime = Runtime.getRuntime();
long neededMemory = ((long)(splitSize)) * (((long)numberOfValueSplits) + 3);
boolean isEnoughMemory = runtime.maxMemory() - runtime.totalMemory() > neededMemory;
Assume.assumeTrue(isEnoughMemory);
final byte[] valueSplit = new byte[splitSize];
final byte[] key = "key".getBytes();
thrown.expect(RocksDBException.class);
thrown.expectMessage("Requested array size exceeds VM limit");
// merge (numberOfValueSplits + 1) valueSplit's to get value size exceeding Integer.MAX_VALUE
try (final StringAppendOperator stringAppendOperator = new StringAppendOperator();
final Options opt = new Options()
.setCreateIfMissing(true)
.setMergeOperator(stringAppendOperator);
final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) {
db.put(key, valueSplit);
for (int i = 0; i < numberOfValueSplits; i++) {
db.merge(key, valueSplit);
}
db.get(key);
}
}
@Test
public void multiGet() throws RocksDBException, InterruptedException {
try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath());

View File

@ -242,7 +242,7 @@ class BlockIter final : public InternalIterator {
}
// Makes Valid() return false, status() return `s`, and Seek()/Prev()/etc do
// nothing.
// nothing. Calls cleanup functions.
void Invalidate(Status s) {
// Assert that the BlockIter is never deleted while Pinning is Enabled.
assert(!pinned_iters_mgr_ ||
@ -252,6 +252,9 @@ class BlockIter final : public InternalIterator {
current_ = restarts_;
status_ = s;
// Call cleanup callbacks.
Cleanable::Reset();
// Clear prev entries cache.
prev_entries_keys_buff_.clear();
prev_entries_.clear();

View File

@ -72,8 +72,14 @@ inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
}
}
void FullFilterBlockBuilder::Reset() {
last_whole_key_recorded_ = false;
last_prefix_recorded_ = false;
}
Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
Status* status) {
Reset();
// In this impl we ignore BlockHandle
*status = Status::OK();
if (num_added_ != 0) {

View File

@ -52,6 +52,7 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
protected:
virtual void AddKey(const Slice& key);
std::unique_ptr<FilterBitsBuilder> filter_bits_builder_;
virtual void Reset();
private:
// important: all of these might point to invalid addresses

View File

@ -228,28 +228,12 @@ class MergingIterator : public InternalIterator {
Slice target = key();
for (auto& child : children_) {
if (&child != current_) {
if (!prefix_seek_mode_) {
child.Seek(target);
if (child.Valid()) {
// Child is at first entry >= key(). Step back one to be < key()
TEST_SYNC_POINT_CALLBACK("MergeIterator::Prev:BeforePrev",
&child);
assert(child.status().ok());
child.Prev();
} else {
// Child has no entries >= key(). Position at last entry.
TEST_SYNC_POINT("MergeIterator::Prev:BeforeSeekToLast");
considerStatus(child.status());
child.SeekToLast();
}
child.SeekForPrev(target);
TEST_SYNC_POINT_CALLBACK("MergeIterator::Prev:BeforePrev", &child);
considerStatus(child.status());
if (child.Valid() && comparator_->Equal(target, child.key())) {
child.Prev();
considerStatus(child.status());
} else {
child.SeekForPrev(target);
considerStatus(child.status());
if (child.Valid() && comparator_->Equal(target, child.key())) {
child.Prev();
considerStatus(child.status());
}
}
}
if (child.Valid()) {

View File

@ -48,6 +48,7 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock() {
std::string& index_key = p_index_builder_->GetPartitionKey();
filters.push_back({index_key, filter});
filters_in_partition_ = 0;
Reset();
}
void PartitionedFilterBlockBuilder::AddKey(const Slice& key) {

593
CHANGELOG
View File

@ -1,17 +1,37 @@
devel
-----
* added collection.indexes() as an alias for collection.getIndexes()
* disable V8 engine and JavaScript APIs for agency nodes
* renamed MMFiles engine compactor thread from "Compactor" to "MMFilesCompactor".
This change will be visible only on systems which allow assigning names to
threads.
* added configuration option `--rocksdb.sync-interval`
This option specifies interval (in milliseconds) that ArangoDB will use to
automatically synchronize data in RocksDB's write-ahead log (WAL) files to
disk. Automatic syncs will only be performed for not-yet synchronized data,
and only for operations that have been executed without the *waitForSync*
attribute.
Automatic synchronization is performed by a background thread. The default
sync interval is 100 milliseconds.
* added AQL functions `TO_BASE64`, `TO_HEX`, `ENCODE_URI_COMPONENT` and `SOUNDEX`
* PR5857: rocksdb engine would frequently request a new DelayToken. This caused
* PR #5857: RocksDB engine would frequently request a new DelayToken. This caused
excessive write delay on the next Put() call. Alternate approach taken.
* changed the thread handling in the scheduler. `--server.threads` will be
the maximum number of threads for the scheduler.
The hidden option `--server.maximal-threads` is now obsolete.
* The hidden option `--server.maximal-threads` is now obsolete.
The hidden options `--server.queue-size`, `--server.prioN-size` (N = 1 or 2)
* The hidden options `--server.queue-size`, `--server.prioN-size` (N = 1 or 2)
control the size of the internal queues. The defaults are sensible, do not
change these settings.
@ -24,7 +44,7 @@ devel
Queries directly referencing the document key benefit from this:
UPDATE {_key: '1'} WITH {foo: 'bar'} IN collection RETURN OLD
UPDATE {_key: '1'} WITH {foo: 'bar'} IN collection RETURN OLD
* Added load balancer support and user-restriction to cursor API.
@ -137,7 +157,7 @@ devel
should change the protocol from SSLv2 to TLSv12 if possible, by adjusting
the value of the `--ssl.protocol` startup option.
* added `overwrite` option to the `document rest-handler` to allow for easier syncing.
* added `overwrite` option to document insert operations to allow for easier syncing.
This implements almost the much inquired UPSERT. In reality it is a REPSERT
(replace/insert) because only replacement and not modification of documents
@ -218,7 +238,7 @@ devel
* fixed issue #3811: gharial api is now checking existence of `_from` and `_to` vertices
during edge creation
* There is new method `_profileQuery` on the database object to execute a query and
* There is a new method `_profileQuery` on the database object to execute a query and
print an explain with annotated runtime information.
* Query cursors can now be created with option `profile`, with a value of 0, 1 or 2.
@ -253,8 +273,6 @@ devel
* removed remainders of dysfunctional `/_admin/cluster-test` and `/_admin/clusterCheckPort`
API endpoints and removed them from documentation
* remove `_admin/echo` handler
* added new query option `stream` to enable streaming query execution via the
`POST /_api/cursor` rest interface.
@ -347,7 +365,263 @@ devel
* fixed agency restart from compaction without data
v3.3.8 (XXXX-XX-XX)
* fixed agency's log compaction for internal issue #2249
* only load Plan and Current from agency when actually needed
v3.3.13 (XXXX-XX-XX)
--------------------
* fixed issue #5827: Batch request handling incompatible with .NET's default
ContentType format
v3.3.12 (2018-07-12)
--------------------
* issue #5854: RocksDB engine would frequently request a new DelayToken. This caused
excessive write delay on the next Put() call. Alternate approach taken.
* fixed graph creation under some circumstances failing with 'edge collection
already used in edge def' despite the edge definitions being identical
* fixed issue #5727: Edge document with user provided key is inserted as many
times as the number of shards, violating the primary index
* fixed internal issue #2658: AQL modification queries did not allow `_rev`
checking. There is now a new option `ignoreRevs` which can be set to `false`
in order to force AQL modification queries to match revision ids before
doing any modifications
* fixed issue #5679: Replication applier restrictions will crash synchronisation
after initial sync
* fixed potential issue in RETURN DISTINCT CollectBlock implementation
that led to the block producing an empty result
* changed communication tasks to use boost strands instead of locks,
this fixes a race condition with parallel VST communication over
SSL
* fixed agency restart from compaction without data
* fixed for agent coming back to agency with changed endpoint and
total data loss
* more patient agency tests to allow for ASAN tests to successfully finish
v3.3.11 (2018-06-26)
--------------------
* upgraded arangosync version to 0.5.3
* upgraded arangodb starter version to 0.12.0
* fixed internal issue #2559: "unexpected document key" error when custom
shard keys are used and the "allowUserKeys" key generator option is set
to false
* fixed AQL DOCUMENT lookup function for documents for sharded collections with
more than a single shard and using a custom shard key (i.e. some shard
key attribute other than `_key`).
The previous implementation of DOCUMENT restricted to lookup to a single
shard in all cases, though this restriction was invalid. That lead to
`DOCUMENT` not finding documents in cases the wrong shard was contacted. The
fixed implementation in 3.3.11 will reach out to all shards to find the
document, meaning it will produce the correct result, but will cause more
cluster-internal traffic. This increase in traffic may be high if the number
of shards is also high, because each invocation of `DOCUMENT` will have to
contact all shards.
There will be no performance difference for non-sharded collections or
collections that are sharded by `_key` or that only have a single shard.
* reimplemented replication view in web UI
* fixed internal issue #2256: ui, document id not showing up when deleting a document
* fixed internal issue #2163: wrong labels within foxx validation of service
input parameters
* fixed internal issue #2160: fixed misplaced tooltips in indices view
* added new arangoinspect client tool, to help users and customers easily collect
information of any ArangoDB server setup, and facilitate troubleshooting for the
ArangoDB Support Team
v3.3.10 (2018-06-04)
--------------------
* make optimizer rule "remove-filter-covered-by-index" not stop after removing
a sub-condition from a FILTER statement, but pass the optimized FILTER
statement again into the optimizer rule for further optimizations.
This allows optimizing away some more FILTER conditions than before.
* allow accessing /_admin/status URL on followers too in active failover setup
* fix cluster COLLECT optimization for attributes that were in "sorted" variant of
COLLECT and that were provided by a sorted index on the collected attribute
* apply fulltext index optimization rule for multiple fulltext searches in
the same query
this fixes https://stackoverflow.com/questions/50496274/two-fulltext-searches-on-arangodb-cluster-v8-is-involved
* validate `_from` and `_to` values of edges on updates consistently
* fixed issue #5400: Unexpected AQL Result
* fixed issue #5429: Frequent 'updated local foxx repository' messages
* fixed issue #5252: Empty result if FULLTEXT() is used together with LIMIT offset
* fixed issue #5035: fixed a vulnerability issue within the web ui's index view
* inception was ignoring leader's configuration
v3.3.9 (2018-05-17)
-------------------
* added `/_admin/repair/distributeShardsLike` that repairs collections with
distributeShardsLike where the shards aren't actually distributed like in the
prototype collection, as could happen due to internal issue #1770
* fixed Foxx queues bug when queues are created in a request handler with an
ArangoDB authentication header
* upgraded arangosync version to 0.5.1
* upgraded arangodb starter version to 0.11.3
* fix cluster upgrading issue introduced in 3.3.8
the issue made arangod crash when starting a DB server with option
`--database.auto-upgrade true`
* fix C++ implementation of AQL ZIP function to return each distinct attribute
name only once. The previous implementation added non-unique attribute names
multiple times, which led to follow-up issues.
Now if an attribute name occurs multiple times in the input list of attribute
names, it will only be incorporated once into the result object, with the
value that corresponds to the first occurrence.
This fix also changes the V8 implementation of the ZIP function, which now
will always return the first value for non-unique attribute names and not the
last occurring value.
* self heal during a Foxx service install, upgrade or replace no longer breaks
the respective operation
* make /_api/index, /_api/database and /_api/user REST handlers use the scheduler's
internal queue, so they do not run in an I/O handling thread
* fixed issue #4919: C++ implementation of LIKE function now matches the old and
correct behavior of the JavaScript implementation.
* added REST API endpoint /_admin/server/availability for monitoring purposes
* UI: fixed an unreasonable event bug within the modal view engine
* fixed issue #3811: gharial api is now checking existence of _from and _to vertices
during edge creation
* fixed internal issue #2149: number of documents in the UI is not adjusted after
moving them
* fixed internal issue #2150: UI - loading a saved query does not update the list
of bind parameters
* fixed internal issue #2147 - fixed database filter in UI
* fixed issue #4934: Wrong used GeoIndex depending on FILTER order
* added `query` and `aql.literal` helpers to `@arangodb` module.
* remove post-sort from GatherNode in cluster AQL queries that do use indexes
for filtering but that do not require a sorted result
This optimization can speed up gathering data from multiple shards, because
it allows to remove a merge sort of the individual shards' results.
* extend the already existing "reduce-extraction-to-projection" AQL optimizer
rule for RocksDB to provide projections of up to 5 document attributes. The
previous implementation only supported a projection for a single document
attribute. The new implementation will extract up to 5 document attributes from
a document while scanning a collection via an EnumerateCollectionNode.
Additionally the new version of the optimizer rule can also produce projections
when scanning an index via an IndexNode.
The optimization is benefial especially for huge documents because it will copy
out only the projected attributes from the document instead of copying the entire
document data from the storage engine.
When applied, the explainer will show the projected attributes in a `projections`
remark for an EnumerateCollectionNode or IndexNode. The optimization is limited
to the RocksDB storage engine.
* added index-only optimization for AQL queries that can satisfy the retrieval of
all required document attributes directly from an index.
This optimization will be triggered for the RocksDB engine if an index is used
that covers all required attributes of the document used later on in the query.
If applied, it will save retrieving the actual document data (which would require
an extra lookup in RocksDB), but will instead build the document data solely
from the index values found. It will only be applied when using up to 5 attributes
from the document, and only if the rest of the document data is not used later
on in the query.
The optimization is currently available for the RocksDB engine for the index types
primary, edge, hash, skiplist and persistent.
If the optimization is applied, it will show up as "index only" in an AQL
query's execution plan for an IndexNode.
* added scan-only optimization for AQL queries that iterate over collections or
indexes and that do not need to return the actual document values.
Not fetching the document values from the storage engine will provide a
considerable speedup when using the RocksDB engine, but may also help a bit
in case of the MMFiles engine. The optimization will only be applied when
full-scanning or index-scanning a collection without refering to any of its
documents later on, and, for an IndexNode, if all filter conditions for the
documents of the collection are covered by the index.
If the optimization is applied, it will show up as "scan only" in an AQL
query's execution plan for an EnumerateCollectionNode or an IndexNode.
* extend existing "collect-in-cluster" optimizer rule to run grouping, counting
and deduplication on the DB servers in several cases, so that the coordinator
will only need to sum up the potentially smaller results from the individual shards.
The following types of COLLECT queries are covered now:
- RETURN DISTINCT expr
- COLLECT WITH COUNT INTO ...
- COLLECT var1 = expr1, ..., varn = exprn (WITH COUNT INTO ...), without INTO or KEEP
- COLLECT var1 = expr1, ..., varn = exprn AGGREGATE ..., without INTO or KEEP, for
aggregate functions COUNT/LENGTH, SUM, MIN and MAX.
* honor specified COLLECT method in AQL COLLECT options
for example, when the user explicitly asks for the COLLECT method
to be `sorted`, the optimizer will now not produce an alternative
version of the plan using the hash method.
additionally, if the user explcitly asks for the COLLECT method to
be `hash`, the optimizer will now change the existing plan to use
the hash method if possible instead of just creating an alternative
plan.
`COLLECT ... OPTIONS { method: 'sorted' }` => always use sorted method
`COLLECT ... OPTIONS { method: 'hash' }` => use hash if this is technically possible
`COLLECT ...` (no options) => create a plan using sorted, and another plan using hash method
* added bulk document lookups for MMFiles engine, which will improve the performance
of document lookups from an inside an index in case the index lookup produces many
documents
v3.3.8 (2018-04-24)
-------------------
* included version of ArangoDB Starter (`arangodb` binary) updated to v0.10.11,
@ -391,6 +665,7 @@ v3.3.8 (XXXX-XX-XX)
* supervision can be put into maintenance mode
v3.3.7 (2018-04-11)
-------------------
@ -438,7 +713,6 @@ v3.3.7 (2018-04-11)
* fixed internal issue #2215's FailedLeader timeout bug
v3.3.5 (2018-03-28)
-------------------
@ -755,7 +1029,7 @@ v3.3.rc2 (2017-11-22)
* UI: the graph viewer now displays updated label values correctly.
Additionally the included node/edge editor now closes automatically
after a successful node/edge update.
after a successful node/edge update.
* removed `--recycle-ids` option for arangorestore
@ -878,6 +1152,303 @@ v3.3.rc1 (2017-11-17)
* Foxx now warns about malformed configuration/dependency names and aliases in the manifest.
v3.2.16 (2018-07-12)
--------------------
* make edge cache initialization and invalidation more portable by avoiding memset
on non-POD types
* fixed internal issue #2256: ui, document id not showing up when deleting a document
* fixed issue #5400: Unexpected AQL Result
* Fixed issue #5035: fixed a vulnerability issue within the web ui's index view
* issue one HTTP call less per cluster AQL query
* self heal during a Foxx service install, upgrade or replace no longer breaks
the respective operation
* inception was ignoring leader's configuration
* more patient agency tests to allow for ASAN tests to successfully finish
* fixed for agent coming back to agency with changed endpoint and
total data loss
* fixed agency restart from compaction without data
v3.2.15 (2018-05-13)
--------------------
* upgraded arangodb starter version to 0.11.2
* make /_api/index and /_api/database REST handlers use the scheduler's internal
queue, so they do not run in an I/O handling thread
* fixed issue #3811: gharial api is now checking existence of _from and _to vertices
during edge creation
v3.2.14 (2018-04-20)
--------------------
* field "$schema" in Foxx manifest.json files no longer produce warnings
* added `@arangodb/locals` module to expose the Foxx service context as an
alternative to using `module.context` directly.
* the internal implementation of REST API `/_api/simple/by-example` now uses
C++ instead of JavaScript
* supervision can be switched to maintenance mode f.e. for rolling upgrades
v3.2.13 (2018-04-13)
--------------------
* improve startup resilience in case there are datafile errors (MMFiles)
also allow repairing broken VERSION files automatically on startup by
specifying the option `--database.ignore-datafile-errors true`
* fix issue #4582: UI query editor now supports usage of empty string as bind parameter value
* fix issue #4924: removeFollower now prefers to remove the last follower(s)
* fixed issue #4934: Wrong used GeoIndex depending on FILTER order
* fixed the behavior of clusterinfo when waiting for current to catch
up with plan in create collection.
* fix for internal issue #2215. supervision will now wait for agent to
fully prepare before adding 10 second grace period after leadership change
* fixed interal issue #2215 FailedLeader timeout bug
v3.2.12 (2018-02-27)
--------------------
* remove long disfunctional admin/long_echo handler
* fixed Foxx API:
* PUT /_api/foxx/service: Respect force flag
* PATCH /_api/foxx/service: Check whether a service under given mount exists
* fix issue #4457: create /var/tmp/arangod with correct user in supervisor mode
* fix internal issue #1848
AQL optimizer was trying to resolve attribute accesses
to attributes of constant object values at query compile time, but only did so far
the very first attribute in each object
this fixes https://stackoverflow.com/questions/48648737/beginner-bug-in-for-loops-from-objects
* fix inconvenience: If we want to start server with a non-existing
--javascript.app-path it will now be created (if possible)
* fixed: REST API `POST _api/foxx` now returns HTTP code 201 on success, as documented.
returned 200 before.
* fixed: REST API `PATCH _api/foxx/dependencies` now updates the existing dependencies
instead of replacing them.
* fixed: Foxx upload of single javascript file. You now can upload via http-url pointing
to a javascript file.
* fixed issue #4395: If your foxx app includes an `APP` folder it got accidently removed by selfhealing
this is not the case anymore.
* fix internal issue 1770: collection creation using distributeShardsLike yields
errors and did not distribute shards correctly in the following cases:
1. If numberOfShards * replicationFactor % nrDBServers != 0
(shards * replication is not divisible by DBServers).
2. If there was failover / move shard case on the leading collection
and creating the follower collection afterwards.
* fix timeout issues in replication client expiration
+ fix some inconsistencies in replication for RocksDB engine that could have led
to some operations not being shipped from master to slave servers
* fix issue #4272: VERSION file keeps disappearing
* fix internal issue #81: quotation marks disappeared when switching table/json
editor in the query editor ui
* make the default value of `--rocksdb.block-cache-shard-bits` use the RocksDB
default value. This will mostly mean the default number block cache shard
bits is lower than before, allowing each shard to store more data and cause
less evictions from block cache
* fix issue #4393: broken handling of unix domain sockets in
JS_Download
* fix internal bug #1726: supervision failed to remove multiple
removed servers from health UI
* fixed internal issue #1969 - command apt-get purge/remove arangodb3e was failing
* fixed a bug where supervision tried to deal with shards of virtual collections
v3.2.11 (2018-01-17)
--------------------
* Fixed an issue with the index estimates in RocksDB in the case a transaction is aborted.
Former the index estimates were modified if the transaction commited or not.
Now they will only be modified if the transaction commited successfully.
* Truncate in RocksDB will now do intermediate commits every 10.000 documents
if truncate fails or the server crashes during this operation all deletes
that have been commited so far are persisted.
* fixed issue #4308: Crash when getter for error.name throws an error (on Windows)
* UI: fixed a query editor caching and parsing issue for arrays and objects
* Fixed internal issue #1684: Web UI: saving arrays/objects as bind parameters faulty
* Fixed internal issue #1683: fixes an UI issue where a collection name gets wrongly cached
within the documents overview of a collection.
* issue #4222: Permission error preventing AQL query import / export on webui
* UI: optimized login view for very small screen sizes
* UI: Shard distribution view now has an accordion view instead of displaying
all shards of all collections at once.
* UI: optimized error messages for invalid query bind parameter
* fixed missing transaction events in RocksDB asynchronous replication
* fixed issue #4255: AQL SORT consuming too much memory
* fixed issue #4199: Internal failure: JavaScript exception in file 'arangosh.js'
at 98,7: ArangoError 4: Expecting type String
* fixed issue #3818: Foxx configuration keys cannot contain spaces (will not save)
* UI: displayed wrong "waitForSync" property for a collection when
using RocksDB as storage engine
* prevent binding to the same combination of IP and port on Windows
* fixed incorrect persistence of RAFT vote and term
v3.2.10 (2017-12-22)
--------------------
* replication: more robust initial sync
* fixed a bug in the RocksDB engine that would prevent recalculated
collection counts to be actually stored
* fixed issue #4095: Inconsistent query execution plan
* fixed issue #4056: Executing empty query causes crash
* fixed issue #4045: Out of memory in `arangorestore` when no access
rights to dump files
* fixed issue #3031: New Graph: Edge definitions with edges in
fromCollections and toCollections
* fixed issue #2668: UI: when following wrong link from edge to vertex in
nonexisting collection misleading error is printed
* UI: improved the behavior during collection creation in a cluster environment
* UI: the graph viewer backend now picks one random start vertex of the
first 1000 documents instead of calling any(). The implementation of
any is known to scale bad on huge collections with rocksdb.
* fixed snapshots becoming potentially invalid after intermediate commits in
the RocksDB engine
* backport agency inquire API changes
* fixed issue #3822: Field validation error in ArangoDB UI - Minor
* UI: fixed disappearing of the navigation label in some cases
* UI: fixed broken foxx configuration keys. Some valid configuration values
could not be edited via the ui.
* fixed issue #3640: limit in subquery
* UI: edge collections were wrongly added to from and to vertices select
box during graph creation
* fixed issue #3741: fix terminal color output in Windows
* fixed issue #3917: traversals with high maximal depth take extremely long
in planning phase.
* fix equality comparison for MMFiles documents in AQL functions UNIQUE
and UNION_DISTINCT
v3.2.9 (2017-12-04)
-------------------
* under certain conditions, replication could stop. Now fixed by adding an
equality check for requireFromPresent tick value
* fixed locking for replication context info in RocksDB engine
this fixes undefined behavior when parallel requests are made to the
same replication context
* UI: added not found views for documents and collections
* fixed issue #3858: Foxx queues stuck in 'progress' status
* allow compilation of ArangoDB source code with g++ 7
* fixed issue #3224: Issue in the Foxx microservices examples
* fixed a deadlock in user privilege/permission change routine
* fixed a deadlock on server shutdown
* fixed some collection locking issues in MMFiles engine
* properly report commit errors in AQL write queries to the caller for the
RocksDB engine
* UI: optimized error messages for invalid graph definitions. Also fixed a
graph renderer cleanrenderer cleanup error.
* UI: document/edge editor now remembering their modes (e.g. code or tree)
* UI: added a delay within the graph viewer while changing the colors of the
graph. Necessary due different browser behaviour.
* fix removal of failed cluster nodes via web interface
* back port of ClusterComm::wait fix in devel
among other things this fixes too eager dropping of other followers in case
one of the followers does not respond in time
* transact interface in agency should not be inquired as of now
* inquiry tests and blocking of inquiry on AgencyGeneralTransaction
v3.2.8 (2017-11-18)
-------------------
* fixed a race condition occuring when upgrading via linux package manager
* fixed authentication issue during replication
v3.2.7 (2017-11-13)
-------------------

View File

@ -1,49 +0,0 @@
Deprecated Features
-------------------
This file lists all features that have been deprecated in ArangoDB
or are known to become deprecated in a future version of ArangoDB.
Deprecated features will likely be removed in upcoming versions of
ArangoDB and shouldn't be used if possible.
## 2.5
* Foxx: method `controller.collection()` has been removed entirely. Please use `appContext.collection()` instead.
* Foxx: method `FoxxRepository.modelPrototype` has been removed entirely. Please use `FoxxRepository.model` instead.
* Foxx: the signature of `Model.extend()` has changed. `Model.extend({}, {attributes: {}})` does not work anymore. Please use `Model.extend({schema: {}})` instead.
* Foxx: the signature of method `requestContext.bodyParam()` has changed. `requestContext.bodyParam(paramName, description, Model)` does not work anymore. Please use `requestContext.bodyParam(paramName, options)` instead.
* Foxx: the signature of method `requestContext.queryParam()` has changed. `requestContext.queryParam({type: "string"})` does not work anymore. Please use `requestContext.queryParam({type: joi.string()})` instead.
* Foxx: the signature of method `requestContext.pathParam()` has changed. `requestContext.pathParam({type: "string"})` does not work anymore. Please use `requestContext.pathParam({type: joi.string()})` instead.
* Foxx: method `Model#toJSONSchema(id)` is deprecated, it will raise a warning if you use it. Please use `Foxx.toJSONSchema(id, model)` instead.
* General-Graph: In the module `org/arangodb/general-graph` the functions `_undirectedRelation` and `_directedRelation` are no longer available. Both functions have been unified to `_relation`.
* Graphs: The modules `org/arangodb/graph` and `org/arangodb/graph-blueprint` are deprecated. Please use module `org/arangodb/general-graph` instead.
* HTTP API: The api `_api/graph` is deprecated. Please use the general graph api `_api/gharial` instead.
## 2.6
* Foxx: method `Model#toJSONSchema(id)` has been removed entirely. Please use `Foxx.toJSONSchema(id, model)` instead.
* Foxx: Function-based Foxx Queue job types are deprecated and known to cause issues, they will raise a warning if you use them. Please use the new script-based job types instead.
* Foxx: the Foxx sessions option `jwt` is deprecated, it will raise a warning if you use it. Please use the `sesssions-jwt` app from the Foxx app store or use the `crypto` module's JWT functions directly.
* Foxx: the Foxx sessions option `type` is deprecated, it will raise a warning if you use it. Please use the options `cookie` and `header` instead.
* Foxx: the Foxx sessions option `sessionStorageApp` is deprecated, it will raise a warning if you use it. Please use the option `sessionStorage` instead.
* AQL: the AQL function `SKIPLIST` is deprecated. It will be removed in a future version of ArangoDB. Please use regular AQL constructs instead (e.g. `FOR doc IN collection FILTER doc.value >= @value SORT doc.value DESC LIMIT 1 RETURN doc`).
* Simple queries: the following simple query functions are now deprecated: collection.near(), collection.within(), collection.geo(), collection.fulltext(), collection.range(), collection.closedRange(). It is recommended to replace calls to these functions with equivalent AQL queries, which are more flexible.
* Simple queries: using negative values for SimpleQuery.skip() is deprecated. This functionality will be removed in future versions of ArangoDB.
## 2.7
* Foxx: the property `assets` in manifests is deprecated, it will raise a warning if you use it. Please use the `files` property and an external build tool instead.
* Foxx: properties `setup` and `teardown` in manifests are deprecated, they will raise a warning if you use them. Please use the `scripts` property instead.
* Foxx: Function-based Foxx Queue job types have been removed entirely. Please use the new script-based job types instead.
* Foxx: the Foxx sessions option `jwt` has been removed entirely. Please use the `sesssions-jwt` app from the Foxx app store or use the `crypto` module's JWT functions directly.
* Foxx: the Foxx sessions option `type` has been removed entirely. Please use the options `cookie` and `header` instead.
* Foxx: the Foxx sessions option `sessionStorageApp` has been removed entirely. Please use the option `sessionStorage` instead.
* AQL: the AQL function `SKIPLIST` has been removed.
* Simple queries: the following simple query functions are now deprecated: collection.near(), collection.within(), collection.geo(), collection.fulltext(), collection.range(), collection.closedRange(). It is recommended to replace calls to these functions with equivalent AQL queries, which are more flexible.
* Simple queries: using negative values for SimpleQuery.skip() is not supported any longer.
## 2.8
* Simple queries: the following simple query functions will be removed: collection.near(), collection.within(), collection.geo(), collection.fulltext(), collection.range(), collection.closedRange(). It is recommended to replace calls to these functions with equivalent AQL queries, which are more flexible.

View File

@ -23,7 +23,7 @@ Solution
The EDGES can be simply replaced by a call to the AQL traversal.
#### No options
**No options**
The syntax is slightly different but mapping should be simple:
@ -35,14 +35,14 @@ The syntax is slightly different but mapping should be simple:
[..] FOR v, e IN OUTBOUND @startId @@edgeCollection RETURN e
```
#### Using EdgeExamples
**Using EdgeExamples**
Examples have to be transformed into AQL filter statements.
How to do this please read the GRAPH_VERTICES section
in [Migrating GRAPH&#95;&#42; Functions from 2.8 or earlier to 3.0](MigratingGraphFunctionsTo3.md).
Apply these filters on the edge variable `e`.
#### Option incluceVertices
**Option incluceVertices**
In order to include the vertices you just use the vertex variable v as well:
@ -62,7 +62,7 @@ The NEIGHBORS is a breadth-first-search on the graph with a global unique check
Due to syntax changes the vertex collection of the start vertex is no longer mandatory to be given.
You may have to adjust bindParameteres for this query.
#### No options
**No options**
The default options did just return the neighbors `_id` value.
@ -76,7 +76,7 @@ The default options did just return the neighbors `_id` value.
NOTE: The direction cannot be given as a bindParameter any more it has to be hard-coded in the query.
#### Using edgeExamples
**Using edgeExamples**
Examples have to be transformed into AQL filter statements.
How to do this please read the GRAPH_VERTICES section
@ -109,7 +109,7 @@ FILTER e.label == 'friend'
RETURN DISTINCT n._id
```
#### Option includeData
**Option includeData**
If you want to include the data simply return the complete document instead of only the `_id`value.
@ -126,7 +126,7 @@ If you want to include the data simply return the complete document instead of o
This function computes all paths of the entire edge collection (with a given minDepth and maxDepth) as you can imagine this feature is extremely expensive and should never be used.
However paths can again be replaced by AQL traversal.
#### No options
**No options**
By default paths of length 0 to 10 are returned. And circles are not followed.
```
@ -138,7 +138,7 @@ FOR start IN @@vertexCollection
FOR v, e, p IN 0..10 OUTBOUND start @@edgeCollection RETURN {source: start, destination: v, edges: p.edges, vertices: p.vertices}
```
#### followCycles
**followCycles**
If this option is set we have to modify the options of the traversal by modifying the `uniqueEdges` property:
@ -151,7 +151,7 @@ FOR start IN @@vertexCollection
FOR v, e, p IN 0..10 OUTBOUND start @@edgeCollection OPTIONS {uniqueEdges: 'none'} RETURN {source: start, destination: v, edges: p.edges, vertices: p.vertices}
```
#### minDepth and maxDepth
**minDepth and maxDepth**
If this option is set we have to give these parameters directly before the direction.

View File

@ -24,16 +24,16 @@ Graph functions covered in this recipe:
Solution 1: Quick and Dirty (not recommended)
---------------------------------------------
### When to use this solution
**When to use this solution**
I am not willing to invest a lot if time into the upgrade process and i am
I am not willing to invest a lot if time into the upgrade process and I am
willing to surrender some performance in favor of less effort.
Some constellations may not work with this solution due to the nature of
user-defined functions.
Especially check for AQL queries that do both modifications
and `GRAPH_*` functions.
### Registering user-defined functions
**Registering user-defined functions**
This step has to be executed once on ArangoDB for every database we are using.
@ -46,13 +46,13 @@ graphs._registerCompatibilityFunctions();
These have registered all old `GRAPH_*` functions as user-defined functions again, with the prefix `arangodb::`.
### Modify the application code
**Modify the application code**
Next we have to go through our application code and replace all calls to `GRAPH_*` by `arangodb::GRAPH_*`.
Now run a testrun of our application and check if it worked.
Perform a test run of the application and check if it worked.
If it worked we are ready to go.
### Important Information
**Important Information**
The user defined functions will call translated subqueries (as described in Solution 2).
The optimizer does not know anything about these subqueries beforehand and cannot optimize the whole plan.
@ -62,14 +62,14 @@ a "really" translated query may work while the user-defined function work around
Solution 2: Translating the queries (recommended)
-------------------------------------------------
### When to use this solution
**When to use this solution**
I am willing to invest some time on my queries in order to get
maximum performance, full query optimization and a better
control of my queries. No forcing into the old layout
any more.
### Before you start
**Before you start**
If you are using `vertexExamples` which are not only `_id` strings do not skip
the GRAPH_VERTICES section, because it will describe how to translate them to
@ -90,9 +90,9 @@ FOR start GRAPH_VERTICES(@graph, @myExample)
All non GRAPH_VERTICES functions will only explain the transformation for a single input document's `_id`.
### Options used everywhere
**Options used everywhere**
#### Option edgeCollectionRestriction
**Option edgeCollectionRestriction**
In order to use edge Collection restriction we just use the feature that the traverser
can walk over a list of edge collections directly. So the edgeCollectionRestrictions
@ -108,7 +108,7 @@ just form this list (exampleGraphEdges):
Note: The `@graphName` bindParameter is not used anymore and probably has to be removed from the query.
#### Option includeData
**Option includeData**
If we use the option includeData we simply return the object directly instead of only the _id
@ -122,7 +122,7 @@ Example GRAPH_EDGES:
[..] FOR v, e IN ANY @startId GRAPH @graphName RETURN DISTINCT e
```
#### Option direction
**Option direction**
The direction has to be placed before the start id.
Note here: The direction has to be placed as Word it cannot be handed in via a bindParameter
@ -136,7 +136,7 @@ anymore:
[..] FOR v, e IN INBOUND @startId GRAPH @graphName RETURN DISTINCT e._id
```
#### Options minDepth, maxDepth
**Options minDepth, maxDepth**
If we use the options minDepth and maxDepth (both default 1 if not set) we can simply
put them in front of the direction part in the Traversal statement.
@ -151,7 +151,7 @@ Example GRAPH_EDGES:
[..] FOR v, e IN 2..4 ANY @startId GRAPH @graphName RETURN DISTINCT e._id
```
#### Option maxIteration
**Option maxIteration**
The option `maxIterations` is removed without replacement.
Your queries are now bound by main memory not by an arbitrary number of iterations.
@ -165,7 +165,7 @@ There we have three possibilities:
2. The example is `null` or `{}`.
3. The example is a non empty object or an array.
#### Example is '_id' string
**Example is '_id' string**
This is the easiest replacement. In this case we simply replace the function with a call to `DOCUMENT`:
@ -181,7 +181,7 @@ NOTE: The `@graphName` is not required anymore, we may have to adjust bindParame
The AQL graph features can work with an id directly, no need to call `DOCUMENT` before if we just need this to find a starting point.
#### Example is `null` or the empty object
**Example is `null` or the empty object**
This case means we use all documents from the graph.
Here we first have to now the vertex collections of the graph.
@ -225,7 +225,7 @@ collections are actually relevant as this `UNION` is a rather expensive operatio
If we use the option `vertexCollectionRestriction` in the original query. The `UNION` has to be formed
by the collections in this restriction instead of ALL collections.
#### Example is a non-empty object
**Example is a non-empty object**
First we follow the instructions for the empty object above.
In this section we will just focus on a single collection `vertices`, the UNION for multiple collections
@ -248,7 +248,7 @@ Example:
[..]
```
#### Example is an array
**Example is an array**
The idea transformation is almost identical to a single non-empty object.
For each element in the array we create the filter conditions and than we
@ -270,7 +270,7 @@ For each element in the array we create the filter conditions and than we
The GRAPH_EDGES can be simply replaced by a call to the AQL traversal.
#### No options
**No options**
The default options did use a direction `ANY` and returned a distinct result of the edges.
Also it did just return the edges `_id` value.
@ -283,7 +283,7 @@ Also it did just return the edges `_id` value.
[..] FOR v, e IN ANY @startId GRAPH @graphName RETURN DISTINCT e._id
```
#### Option edgeExamples.
**Option edgeExamples.**
See `GRAPH_VERTICES` on how to transform examples to AQL FILTER. Apply the filter on the edge variable `e`.
@ -291,7 +291,7 @@ See `GRAPH_VERTICES` on how to transform examples to AQL FILTER. Apply the filte
The GRAPH_NEIGHBORS is a breadth-first-search on the graph with a global unique check for vertices. So we can replace it by a an AQL traversal with these options.
#### No options
**No options**
The default options did use a direction `ANY` and returned a distinct result of the neighbors.
Also it did just return the neighbors `_id` value.
@ -304,16 +304,16 @@ Also it did just return the neighbors `_id` value.
[..] FOR n IN ANY @startId GRAPH @graphName OPTIONS {bfs: true, uniqueVertices: 'global'} RETURN n
```
#### Option neighborExamples
**Option neighborExamples**
See `GRAPH_VERTICES` on how to transform examples to AQL FILTER. Apply the filter on the neighbor variable `n`.
#### Option edgeExamples
**Option edgeExamples**
See `GRAPH_VERTICES` on how to transform examples to AQL FILTER. Apply the filter on the edge variable `e`.
However this is a bit more complicated as it interferes with the global uniqueness check.
For edgeExamples it is sufficent when any edge pointing to the neighbor matches the filter. Using `{uniqueVertices: 'global'}` first picks any edge randomly. Than it checks against this edge only.
For edgeExamples it is sufficient when any edge pointing to the neighbor matches the filter. Using `{uniqueVertices: 'global'}` first picks any edge randomly. Than it checks against this edge only.
If we know there are no vertex pairs with multiple edges between them we can use the simple variant which is save:
```
@ -334,7 +334,7 @@ If there may be multiple edges between the same pair of vertices we have to make
[..] FOR n, e IN ANY @startId GRAPH @graphName OPTIONS {bfs: true} FILTER e.label == 'friend' RETURN DISTINCT n._id
```
#### Option vertexCollectionRestriction
**Option vertexCollectionRestriction**
If we use the vertexCollectionRestriction we have to postFilter the neighbors based on their collection. Therefore we can make use of the function `IS_SAME_COLLECTION`:
@ -398,7 +398,7 @@ This function computes all paths of the entire graph (with a given minDepth and
However paths can again be replaced by AQL traversal.
Assume we only have one vertex collection `vertices` again.
#### No options
**No options**
By default paths of length 0 to 10 are returned. And circles are not followed.
```
@ -410,7 +410,7 @@ FOR start IN vertices
FOR v, e, p IN 0..10 OUTBOUND start GRAPH 'graph' RETURN {source: start, destination: v, edges: p.edges, vertices: p.vertices}
```
#### followCycles
**followCycles**
If this option is set we have to modify the options of the traversal by modifying the `uniqueEdges` property:
@ -428,7 +428,7 @@ FOR v, e, p IN 0..10 OUTBOUND start GRAPH 'graph' OPTIONS {uniqueEdges: 'none'}
This feature involves several full-collection scans and therefore is extremely expensive.
If you really need it you can transform it with the help of `ATTRIBUTES`, `KEEP` and `ZIP`.
#### Start with single _id
**Start with single _id**
```
// OLD
@ -445,7 +445,7 @@ FILTER LENGTH(shared) > 1 // Return them only if they share an attribute
RETURN ZIP([left._id], [KEEP(right, shared)]) // Build the result
```
#### Start with vertexExamples
**Start with vertexExamples**
Again we assume we only have a single collection `vertices`.
We have to transform the examples into filters. Iterate
@ -480,7 +480,7 @@ FOR left IN vertices
A shortest path computation is now done via the new SHORTEST_PATH AQL statement.
#### No options
**No options**
```
// OLD
@ -500,7 +500,7 @@ RETURN { // We rebuild the old format
}
```
#### Options weight and defaultWeight
**Options weight and defaultWeight**
The new AQL SHORTEST_PATH offers the options `weightAttribute` and `defaultWeight`.
@ -767,7 +767,7 @@ Path data (shortened):
The first and second vertex of the nth path are connected by the first edge
(`p[n].vertices[0]` ⟝ `p[n].edges[0]``p[n].vertices[1]`) and so on. This
structure might actually be more convenient to process compared to a tree-like
structure. Note that the edge documents are also included, in constrast to the
structure. Note that the edge documents are also included, in contrast to the
removed graph traversal function.
Contact us via our social channels if you need further help.

View File

@ -10,7 +10,7 @@ Solution
--------
Arangodb, as many other opensource projects nowadays is standing on the shoulder of giants.
This gives us a solid foundation to bring you a uniq feature set, but it introduces a lot of
This gives us a solid foundation to bring you a unique feature set, but it introduces a lot of
dependencies that need to be in place in order to compile arangodb.
Since build infrastructures are very different depending on the target OS, choose your target

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 124 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 56 KiB

View File

@ -11,11 +11,11 @@ If _config_ is a string, it will be interpreted as _config.url_.
**Arguments**
* **config**: `Object` (optional)
- **config**: `Object` (optional)
An object with the following properties:
* **url**: `string | Array<string>` (Default: `http://localhost:8529`)
- **url**: `string | Array<string>` (Default: `http://localhost:8529`)
Base URL of the ArangoDB server or list of server URLs.
@ -37,14 +37,14 @@ If _config_ is a string, it will be interpreted as _config.url_.
}
```
* **isAbsolute**: `boolean` (Default: `false`)
- **isAbsolute**: `boolean` (Default: `false`)
If this option is explicitly set to `true`, the _url_ will be treated as the
absolute database path. This is an escape hatch to allow using arangojs with
database APIs exposed with a reverse proxy and makes it impossible to switch
databases with _useDatabase_ or using _acquireHostList_.
* **arangoVersion**: `number` (Default: `30000`)
- **arangoVersion**: `number` (Default: `30000`)
Value of the `x-arango-version` header. This should match the lowest
version of ArangoDB you expect to be using. The format is defined as
@ -58,14 +58,14 @@ If _config_ is a string, it will be interpreted as _config.url_.
not available on every major version of ArangoDB as indicated in their
descriptions below (e.g. _collection.first_, _collection.bulkUpdate_).
* **headers**: `Object` (optional)
- **headers**: `Object` (optional)
An object with additional headers to send with every request.
Header names should always be lowercase. If an `"authorization"` header is
provided, it will be overridden when using _useBasicAuth_ or _useBearerAuth_.
* **agent**: `Agent` (optional)
- **agent**: `Agent` (optional)
An http Agent instance to use for connections.
@ -75,7 +75,7 @@ If _config_ is a string, it will be interpreted as _config.url_.
This option has no effect when using the browser version of arangojs.
* **agentOptions**: `Object` (Default: see below)
- **agentOptions**: `Object` (Default: see below)
An object with options for the agent. This will be ignored if _agent_ is
also provided.
@ -92,15 +92,41 @@ If _config_ is a string, it will be interpreted as _config.url_.
additional options to the underlying calls of the
[`xhr`](https://www.npmjs.com/package/xhr) module.
* **loadBalancingStrategy**: `string` (Default: `"NONE"`)
- **loadBalancingStrategy**: `string` (Default: `"NONE"`)
Determines the behaviour when multiple URLs are provided:
Determines the behavior when multiple URLs are provided:
* `NONE`: No load balancing. All requests will be handled by the first
- `NONE`: No load balancing. All requests will be handled by the first
URL in the list until a network error is encountered. On network error,
arangojs will advance to using the next URL in the list.
* `ONE_RANDOM`: Randomly picks one URL from the list initially, then
- `ONE_RANDOM`: Randomly picks one URL from the list initially, then
behaves like `NONE`.
* `ROUND_ROBIN`: Every sequential request uses the next URL in the list.
- `ROUND_ROBIN`: Every sequential request uses the next URL in the list.
## database.close
`database.close(): void`
Closes all active connections of the database instance.
Can be used to clean up idling connections during longer periods of inactivity.
**Note**: This method currently has no effect in the browser version of arangojs.
**Examples**
```js
const db = new Database();
const sessions = db.collection("sessions");
// Clean up expired sessions once per hour
setInterval(async () => {
await db.query(aql`
FOR session IN ${sessions}
FILTER session.expires < DATE_NOW()
REMOVE session IN ${sessions}
`);
// Make sure to close the connections because they're no longer used
db.close();
}, 1000 * 60 * 60);
```

View File

@ -7,7 +7,7 @@ _ArangoCursor_ instances provide an abstraction over the HTTP API's limitations.
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", null, null, Integer.class);
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class);
// query result list: [1, 2, 3, 4, 5]
Integer value = cursor.next();
assertThat(value, is(1));
@ -22,6 +22,16 @@ ArangoCursor.hasNext() : boolean
Returns _true_ if the cursor has more elements in its current batch of results or the cursor on the server has more batches.
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
boolean hasNext = cursor.hasNext();
```
## ArangoCursor.next
```
@ -30,6 +40,213 @@ ArangoCursor.next() : T
Returns the next element of the query result. If the current element is the last element of the batch and the cursor on the server provides more batches, the next batch is fetched from the server.
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
Integer value = cursor.next();
assertThat(value, is(1));
```
## ArangoCursor.first
```
ArangoCursor.first() : T
```
Returns the first element or {@code null} if no element exists.
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("RETURN 1", Integer.class)
Integer value = cursor.first();
assertThat(value, is(1));
```
## ArangoCursor.foreach
```
ArangoCursor.foreach(Consumer<? super T> action) : void
```
Performs the given action for each element of the _ArangoIterable_
**Arguments**
- **action**: `Consumer<? super T>`
A action to perform on the elements
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
cursor.foreach(e -> {
// remaining results: [1, 2, 3, 4, 5]
});
```
## ArangoCursor.map
```
ArangoCursor.map(Function<? super T, ? extends R> mapper) : ArangoIterable<R>
```
Returns a _ArangoIterable_ consisting of the results of applying the given function to the elements of this _ArangoIterable_.
**Arguments**
- **mapper**: `Function<? super T, ? extends R>`
A function to apply to each element
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
cursor.map(e -> e * 10).foreach(e -> {
// remaining results: [10, 20, 30, 40, 50]
});
```
## ArangoCursor.filter
```
ArangoCursor.filter(Predicate<? super T> predicate) : ArangoIterable<T>
```
**Arguments**
- **predicate**: `Predicate<? super T>`
A predicate to apply to each element to determine if it should be included
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
cursor.filter(e -> e < 4).foreach(e -> {
// remaining results: [1, 2, 3]
});
```
## ArangoCursor.anyMatch
```
ArangoCursor.anyMatch(Predicate<? super T> predicate) : boolean
```
Returns whether any elements of this _ArangoIterable_ match the provided predicate.
**Arguments**
- **predicate**: `Predicate<? super T>`
A predicate to apply to elements of this {@code ArangoIterable}
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
boolean match = cursor.anyMatch(e -> e == 3);
assertThat(match, is(true));
```
## ArangoCursor.allMatch
```
ArangoCursor.anyMatch(Predicate<? super T> predicate) : boolean
```
Returns whether all elements of this _ArangoIterable_ match the provided predicate.
**Arguments**
- **predicate**: `Predicate<? super T>`
A predicate to apply to elements of this {@code ArangoIterable}
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
boolean match = cursor.allMatch(e -> e <= 5);
assertThat(match, is(true));
```
## ArangoCursor.noneMatch
```
ArangoCursor.noneMatch(Predicate<? super T> predicate) : boolean
```
Returns whether no elements of this _ArangoIterable_ match the provided predicate.
**Arguments**
- **predicate**: `Predicate<? super T>`
A predicate to apply to elements of this {@code ArangoIterable}
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
boolean match = cursor.noneMatch(e -> e > 5);
assertThat(match, is(true));
```
## ArangoCursor.collectInto
```
ArangoCursor.collectInto(R target) : R
```
**Arguments**
Iterates over all elements of this {@code ArangoIterable} and adds each to the given target.
- **target**: `R <R extends Collection<? super T>>`
The collection to insert into
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
Collection<Integer> list = cursor.collectInto(new ArrayList());
// -- or --
Collection<Integer> set = cursor.collectInto(new HashSet());
```
## ArangoCursor.iterator
```
@ -38,6 +255,16 @@ ArangoCursor.iterator() : Iterator<T>
Returns an iterator over elements of the query result.
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
Iterator<Integer> iterator = cursor.iterator();
```
## ArangoCursor.asListRemaining
```
@ -46,6 +273,16 @@ ArangoCursor.asListRemaining() : List<T>
Returns the remaining results as a _List_.
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
Collection<Integer> list = cursor.asListRemaining();
```
## ArangoCursor.getCount
```
@ -54,6 +291,37 @@ ArangoCursor.getCount() : Integer
Returns the total number of result documents available (only available if the query was executed with the _count_ attribute set)
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", new AqlQueryOptions().count(true), Integer.class)
Integer count = cursor.getCount();
assertThat(count, is(5));
```
## ArangoCursor.count
```
ArangoCursor.count() : long
```
Returns the count of elements of this _ArangoIterable_.
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
long count = cursor.filter(e -> e < 4).count();
// remaining results: [1, 2, 3]
asserThat(count, is(3L));
```
## ArangoCursor.getStats
```
@ -62,6 +330,16 @@ ArangoCursor.getStats() : Stats
Returns extra information about the query result. For data-modification queries, the stats will contain the number of modified documents and the number of documents that could not be modified due to an error (if ignoreErrors query option is specified);
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
Stats stats = cursor.getStats();
```
## ArangoCursor.getWarnings
```
@ -70,6 +348,16 @@ ArangoCursor.getWarnings() : Collection<Warning>
Returns warnings which the query could have been produced.
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
Collection<Warning> warnings = cursor.getWarnings();
```
## ArangoCursor.isCached
```
@ -77,3 +365,13 @@ ArangoCursor.isCached() : boolean
```
Iindicating whether the query result was served from the query cache or not.
**Examples**
```Java
ArangoDB arango = new ArangoDB.Builder().build();
ArangoDatabase db = arango.db("myDB");
ArangoCursor<Integer> cursor = db.query("FOR x IN 1..5 RETURN x", Integer.class)
boolean cached = cursor.isCached();
```

View File

@ -9,14 +9,14 @@ Name | Language | Repository | &nbsp;
<span title="Same API as synchronous driver, except that it returns a CompletableFuture&lt;T&gt; instead of the result T directly" style="cursor: help;">ArangoDB-Java-Driver-Async</span> | Java | https://github.com/arangodb/arangodb-java-driver-async | [Changelog](https://github.com/arangodb/arangodb-java-driver-async/blob/master/ChangeLog.md#readme)
[ArangoJS](JS/README.md) | JavaScript | https://github.com/arangodb/arangojs | [Changelog](https://github.com/arangodb/arangojs/blob/master/CHANGELOG.md#readme)
[ArangoDB-PHP](PHP/README.md) | PHP | https://github.com/arangodb/arangodb-php | [Changelog](https://github.com/arangodb/arangodb-php/blob/devel/CHANGELOG.md#readme)
[Go-Driver](GO/README.md) | Go | https://github.com/arangodb/go-driver |
[Go-Driver](GO/README.md) | Go | https://github.com/arangodb/go-driver | [Changelog](https://github.com/arangodb/go-driver/blob/master/CHANGELOG.md#readme)
**Integrations**
Name | Language | Repository | &nbsp;
-----|----------|------------|-------
[Spring Data](SpringData/README.md) | Java | https://github.com/arangodb/spring-data | [Changelog](https://github.com/arangodb/spring-data/blob/master/ChangeLog.md#readme)
ArangoDB-Spark-Connector | Scala, Java | https://github.com/arangodb/arangodb-spark-connector | [Changelog](https://github.com/arangodb/arangodb-spark-connector/blob/master/ChangeLog.md#readme)
[ArangoDB-Spark-Connector](SparkConnector/README.md) | Scala, Java | https://github.com/arangodb/arangodb-spark-connector | [Changelog](https://github.com/arangodb/arangodb-spark-connector/blob/master/ChangeLog.md#readme)
**Community drivers**

View File

@ -1,6 +1,10 @@
# Summary
* [Introduction](README.md)
## Official Drivers
# https://@github.com/arangodb/arangodb-java-driver.git;arangodb-java-driver;docs/Drivers;;/
* [Java Driver](Java/README.md)
* [Getting Started](Java/GettingStarted/README.md)
@ -55,10 +59,6 @@
* [VertexCollection](JS/Reference/Graph/VertexCollection.md)
* [EdgeCollection](JS/Reference/Graph/EdgeCollection.md)
* [Route](JS/Reference/Route.md)
# https://@github.com/arangodb/spring-data.git;spring-data;docs/Drivers;;/
* [Spring Data ArangoDB](SpringData/README.md)
* [Getting Started](SpringData/GettingStarted/README.md)
* [Reference](SpringData/Reference/README.md)
# https://@github.com/arangodb/arangodb-php.git;arangodb-php;docs/Drivers;;/
* [ArangoDB-PHP](PHP/README.md)
* [Getting Started](PHP/GettingStarted/README.md)
@ -68,3 +68,16 @@
* [Getting Started](GO/GettingStarted/README.md)
* [Example Requests](GO/ExampleRequests/README.md)
* [Connection Management](GO/ConnectionManagement/README.md)
## Integrations
# https://@github.com/arangodb/spring-data.git;spring-data;docs/Drivers;;/
* [Spring Data ArangoDB](SpringData/README.md)
* [Getting Started](SpringData/GettingStarted/README.md)
* [Reference](SpringData/Reference/README.md)
# https://@github.com/arangodb/arangodb-spark-connector.git;arangodb-spark-connector;docs/Drivers;;/
* [ArangoDB Spark Connector](SparkConnector/README.md)
* [Getting Started](SparkConnector/GettingStarted/README.md)
* [Reference](SparkConnector/Reference/README.md)
* [Java](SparkConnector/Reference/Java.md)
* [Scala](SparkConnector/Reference/Scala.md)

View File

@ -0,0 +1,59 @@
<!-- don't edit here, its from https://@github.com/arangodb/arangodb-spark-connector.git / docs/Drivers/ -->
# ArangoDB Spark Connector - Getting Started
## Maven
```XML
<dependencies>
<dependency>
<groupId>com.arangodb</groupId>
<artifactId>arangodb-spark-connector</artifactId>
<version>1.0.2</version>
</dependency>
....
</dependencies>
```
## SBT
```Json
libraryDependencies += "com.arangodb" % "arangodb-spark-connector" % "1.0.2"
```
## Configuration
| property-key | description | default value |
| ------------------------- | -------------------------------------- | -------------- |
| arangodb.hosts | comma separated list of ArangoDB hosts | 127.0.0.1:8529 |
| arangodb.user | basic authentication user | root |
| arangodb.password | basic authentication password | |
| arangodb.useSsl | use SSL connection | false |
| arangodb.ssl.keyStoreFile | SSL certificate keystore file | |
| arangodb.ssl.passPhrase | SSL pass phrase | |
| arangodb.ssl.protocol | SSL protocol | TLS |
## Setup SparkContext
**Scala**
```Scala
val conf = new SparkConf()
.set("arangodb.hosts", "127.0.0.1:8529")
.set("arangodb.user", "myUser")
.set("arangodb.password", "myPassword")
...
val sc = new SparkContext(conf)
```
**Java**
```Java
SparkConf conf = new SparkConf()
.set("arangodb.hosts", "127.0.0.1:8529")
.set("arangodb.user", "myUser")
.set("arangodb.password", "myPassword");
...
JavaSparkContext sc = new JavaSparkContext(conf);
```

View File

@ -0,0 +1,6 @@
<!-- don't edit here, its from https://@github.com/arangodb/arangodb-spark-connector.git / docs/Drivers/ -->
# ArangoDB Spark Connector
- [Getting Started](GettingStarted/README.md)
- [Reference](Reference/README.md)
- [Changelog](https://github.com/arangodb/arangodb-spark-connector/blob/master/ChangeLog.md#readme)

View File

@ -0,0 +1,148 @@
<!-- don't edit here, its from https://@github.com/arangodb/arangodb-spark-connector.git / docs/Drivers/ -->
# ArangoDB Spark Connector - Java Reference
## ArangoSpark.save
```
ArangoSpark.save[T](rdd: JavaRDD[T], collection: String, options: WriteOptions)
```
Save data from rdd into ArangoDB
**Arguments**
- **rdd**: `JavaRDD[T]`
The rdd with the data to save
- **collection**: `String`
The collection to save in
- **options**: `WriteOptions`
- **database**: `String`
Database to write into
- **hosts**: `String`
Alternative hosts to context property `arangodb.hosts`
- **user**: `String`
Alternative user to context property `arangodb.user`
- **password**: `String`
Alternative password to context property `arangodb.password`
- **useSsl**: `Boolean`
Alternative useSsl to context property `arangodb.useSsl`
- **sslKeyStoreFile**: `String`
Alternative sslKeyStoreFile to context property `arangodb.ssl.keyStoreFile`
- **sslPassPhrase**: `String`
Alternative sslPassPhrase to context property `arangodb.ssl.passPhrase`
- **sslProtocol**: `String`
Alternative sslProtocol to context property `arangodb.ssl.protocol`
**Examples**
```Java
JavaSparkContext sc = ...
List<MyBean> docs = ...
JavaRDD<MyBean> documents = sc.parallelize(docs);
ArangoSpark.save(documents, "myCollection", new WriteOptions().database("myDB"));
```
## ArangoSpark.load
```
ArangoSparkload[T](sparkContext: JavaSparkContext, collection: String, options: ReadOptions, clazz: Class[T]): ArangoJavaRDD[T]
```
Load data from ArangoDB into rdd
**Arguments**
- **sparkContext**: `JavaSparkContext`
The sparkContext containing the ArangoDB configuration
- **collection**: `String`
The collection to load data from
- **options**: `ReadOptions`
- **database**: `String`
Database to write into
- **hosts**: `String`
Alternative hosts to context property `arangodb.hosts`
- **user**: `String`
Alternative user to context property `arangodb.user`
- **password**: `String`
Alternative password to context property `arangodb.password`
- **useSsl**: `Boolean`
Alternative useSsl to context property `arangodb.useSsl`
- **sslKeyStoreFile**: `String`
Alternative sslKeyStoreFile to context property `arangodb.ssl.keyStoreFile`
- **sslPassPhrase**: `String`
Alternative sslPassPhrase to context property `arangodb.ssl.passPhrase`
- **sslProtocol**: `String`
Alternative sslProtocol to context property `arangodb.ssl.protocol`
- **clazz**: `Class[T]`
The type of the document
**Examples**
```Java
JavaSparkContext sc = ...
ArangoJavaRDD<MyBean> rdd = ArangoSpark.load(sc, "myCollection", new ReadOptions().database("myDB"), MyBean.class);
```
## ArangoRDD.filter
```
ArangoJavaRDD.filter(condition: String): ArangoJavaRDD[T]
```
Adds a filter condition. If used multiple times, the conditions will be combined with a logical AND.
**Arguments**
- **condition**: `String`
The condition for the filter statement. Use `doc` inside to reference the document. e.g. `"doc.name == 'John'"`
**Examples**
```Java
JavaSparkContext sc = ...
ArangoJavaRDD<MyBean> rdd = ArangoSpark.load(sc, "myCollection", new ReadOptions().database("myDB"), MyBean.class);
ArangoJavaRDD<MyBean> rddFiltered = rdd.filter("doc.test <= 50");
```

View File

@ -0,0 +1,5 @@
<!-- don't edit here, its from https://@github.com/arangodb/arangodb-spark-connector.git / docs/Drivers/ -->
# ArangoDB Spark Connector - Reference
- [Scala](Scala.md)
- [Java](Java.md)

View File

@ -0,0 +1,208 @@
<!-- don't edit here, its from https://@github.com/arangodb/arangodb-spark-connector.git / docs/Drivers/ -->
# ArangoDB Spark Connector - Scala Reference
## ArangoSpark.save
```
ArangoSpark.save[T](rdd: RDD[T], collection: String, options: WriteOptions)
```
```
ArangoSpark.save[T](dataset: Dataset[T], collection: String, options: WriteOptions)
```
Save data from rdd or dataset into ArangoDB
**Arguments**
- **rdd**/**dataset**: `RDD[T]` or `Dataset[T]`
The rdd or dataset with the data to save
- **collection**: `String`
The collection to save in
- **options**: `WriteOptions`
- **database**: `String`
Database to write into
- **hosts**: `String`
Alternative hosts to context property `arangodb.hosts`
- **user**: `String`
Alternative user to context property `arangodb.user`
- **password**: `String`
Alternative password to context property `arangodb.password`
- **useSsl**: `Boolean`
Alternative useSsl to context property `arangodb.useSsl`
- **sslKeyStoreFile**: `String`
Alternative sslKeyStoreFile to context property `arangodb.ssl.keyStoreFile`
- **sslPassPhrase**: `String`
Alternative sslPassPhrase to context property `arangodb.ssl.passPhrase`
- **sslProtocol**: `String`
Alternative sslProtocol to context property `arangodb.ssl.protocol`
**Examples**
```Scala
val sc: SparkContext = ...
val documents = sc.parallelize((1 to 100).map { i => MyBean(i) })
ArangoSpark.save(documents, "myCollection", WriteOptions("myDB"))
```
## ArangoSpark.saveDF
```
ArangoSpark.saveDF(dataframe: DataFrame, collection: String, options: WriteOptions)
```
Save data from dataframe into ArangoDB
**Arguments**
- **dataframe**: DataFrame`
The dataFrame with the data to save
- **collection**: `String`
The collection to save in
- **options**: `WriteOptions`
- **database**: `String`
Database to write into
- **hosts**: `String`
Alternative hosts to context property `arangodb.hosts`
- **user**: `String`
Alternative user to context property `arangodb.user`
- **password**: `String`
Alternative password to context property `arangodb.password`
- **useSsl**: `Boolean`
Alternative useSsl to context property `arangodb.useSsl`
- **sslKeyStoreFile**: `String`
Alternative sslKeyStoreFile to context property `arangodb.ssl.keyStoreFile`
- **sslPassPhrase**: `String`
Alternative sslPassPhrase to context property `arangodb.ssl.passPhrase`
- **sslProtocol**: `String`
Alternative sslProtocol to context property `arangodb.ssl.protocol`
**Examples**
```Scala
val sc: SparkContext = ...
val documents = sc.parallelize((1 to 100).map { i => MyBean(i) })
val sql: SQLContext = SQLContext.getOrCreate(sc);
val df = sql.createDataFrame(documents, classOf[MyBean])
ArangoSpark.saveDF(df, "myCollection", WriteOptions("myDB"))
```
## ArangoSpark.load
```
ArangoSpark.load[T: ClassTag](sparkContext: SparkContext, collection: String, options: ReadOptions): ArangoRDD[T]
```
Load data from ArangoDB into rdd
**Arguments**
- **sparkContext**: `SparkContext`
The sparkContext containing the ArangoDB configuration
- **collection**: `String`
The collection to load data from
- **options**: `ReadOptions`
- **database**: `String`
Database to write into
- **hosts**: `String`
Alternative hosts to context property `arangodb.hosts`
- **user**: `String`
Alternative user to context property `arangodb.user`
- **password**: `String`
Alternative password to context property `arangodb.password`
- **useSsl**: `Boolean`
Alternative useSsl to context property `arangodb.useSsl`
- **sslKeyStoreFile**: `String`
Alternative sslKeyStoreFile to context property `arangodb.ssl.keyStoreFile`
- **sslPassPhrase**: `String`
Alternative sslPassPhrase to context property `arangodb.ssl.passPhrase`
- **sslProtocol**: `String`
Alternative sslProtocol to context property `arangodb.ssl.protocol`
**Examples**
```Scala
val sc: SparkContext = ...
val rdd = ArangoSpark.load[MyBean](sc, "myCollection", ReadOptions("myDB"))
```
## ArangoRDD.filter
```
ArangoRDD.filter(condition: String): ArangoRDD[T]
```
Adds a filter condition. If used multiple times, the conditions will be combined with a logical AND.
**Arguments**
- **condition**: `String`
The condition for the filter statement. Use `doc` inside to reference the document. e.g. `"doc.name == 'John'"`
**Examples**
```Scala
val sc: SparkContext = ...
val rdd = ArangoSpark.load[MyBean](sc, "myCollection").filter("doc.name == 'John'")
```

View File

@ -1,114 +1,114 @@
<!-- don't edit here, its from https://@github.com/arangodb/spring-data.git / docs/Drivers/ -->
# Spring Data ArangoDB - Getting Started
## Supported versions
| Spring Data ArangoDB | Spring Data | ArangoDB |
|----------------------|-------------|----------------|
| 1.0.0 | 1.13.x | 3.0*, 3.1, 3.2 |
| 2.0.0 | 2.0.x | 3.0*, 3.1, 3.2 |
Spring Data ArangoDB requires ArangoDB 3.0 or higher - which you can download [here](https://www.arangodb.com/download/) - and Java 8 or higher.
**Note**: ArangoDB 3.0 does not support the default transport protocol [VelocyStream](https://github.com/arangodb/velocystream). A manual switch to HTTP is required. See chapter [configuration](#configuration). Also ArangoDB 3.0 does not support geospatial queries.
## Maven
To use Spring Data ArangoDB in your project, your build automation tool needs to be configured to include and use the Spring Data ArangoDB dependency. Example with Maven:
```xml
<dependency>
<groupId>com.arangodb</groupId>
<artifactId>arangodb-spring-data</artifactId>
<version>{version}</version>
</dependency>
```
There is a [demonstration app](https://github.com/arangodb/spring-data-demo), which contains common use cases and examples of how to use Spring Data ArangoDB's functionality.
## Configuration
You can use Java to configure your Spring Data environment as show below. Setting up the underlying driver (`ArangoDB.Builder`) with default configuration automatically loads a properties file `arangodb.properties`, if it exists in the classpath.
```java
@Configuration
@EnableArangoRepositories(basePackages = { "com.company.mypackage" })
public class MyConfiguration extends AbstractArangoConfiguration {
@Override
public ArangoDB.Builder arango() {
return new ArangoDB.Builder();
}
@Override
public String database() {
// Name of the database to be used
return "example-database";
}
}
```
The driver is configured with some default values:
property-key | description | default value
-------------|-------------|--------------
arangodb.host | ArangoDB host | 127.0.0.1
arangodb.port | ArangoDB port | 8529
arangodb.timeout | socket connect timeout(millisecond) | 0
arangodb.user | Basic Authentication User |
arangodb.password | Basic Authentication Password |
arangodb.useSsl | use SSL connection | false
To customize the configuration, the parameters can be changed in the Java code.
```java
@Override
public ArangoDB.Builder arango() {
ArangoDB.Builder arango = new ArangoDB.Builder()
.host("127.0.0.1")
.port(8429)
.user("root");
return arango;
}
```
In addition you can use the *arangodb.properties* or a custom properties file to supply credentials to the driver.
*Properties file*
```
arangodb.host=127.0.0.1
arangodb.port=8529
# arangodb.hosts=127.0.0.1:8529 could be used instead
arangodb.user=root
arangodb.password=
```
*Custom properties file*
```java
@Override
public ArangoDB.Builder arango() {
InputStream in = MyClass.class.getResourceAsStream("my.properties");
ArangoDB.Builder arango = new ArangoDB.Builder()
.loadProperties(in);
return arango;
}
```
**Note**: When using ArangoDB 3.0 it is required to set the transport protocol to HTTP and fetch the dependency `org.apache.httpcomponents:httpclient`.
```java
@Override
public ArangoDB.Builder arango() {
ArangoDB.Builder arango = new ArangoDB.Builder()
.useProtocol(Protocol.HTTP_JSON);
return arango;
}
```
```xml
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.1</version>
</dependency>
```
# Spring Data ArangoDB - Getting Started
## Supported versions
| Spring Data ArangoDB | Spring Data | ArangoDB |
|----------------------|-------------|----------------|
| 1.0.0 | 1.13.x | 3.0*, 3.1, 3.2 |
| 2.0.0 | 2.0.x | 3.0*, 3.1, 3.2 |
Spring Data ArangoDB requires ArangoDB 3.0 or higher - which you can download [here](https://www.arangodb.com/download/) - and Java 8 or higher.
**Note**: ArangoDB 3.0 does not support the default transport protocol [VelocyStream](https://github.com/arangodb/velocystream). A manual switch to HTTP is required. See chapter [configuration](#configuration). Also ArangoDB 3.0 does not support geospatial queries.
## Maven
To use Spring Data ArangoDB in your project, your build automation tool needs to be configured to include and use the Spring Data ArangoDB dependency. Example with Maven:
```xml
<dependency>
<groupId>com.arangodb</groupId>
<artifactId>arangodb-spring-data</artifactId>
<version>{version}</version>
</dependency>
```
There is a [demonstration app](https://github.com/arangodb/spring-data-demo), which contains common use cases and examples of how to use Spring Data ArangoDB's functionality.
## Configuration
You can use Java to configure your Spring Data environment as show below. Setting up the underlying driver (`ArangoDB.Builder`) with default configuration automatically loads a properties file `arangodb.properties`, if it exists in the classpath.
```java
@Configuration
@EnableArangoRepositories(basePackages = { "com.company.mypackage" })
public class MyConfiguration extends AbstractArangoConfiguration {
@Override
public ArangoDB.Builder arango() {
return new ArangoDB.Builder();
}
@Override
public String database() {
// Name of the database to be used
return "example-database";
}
}
```
The driver is configured with some default values:
property-key | description | default value
-------------|-------------|--------------
arangodb.host | ArangoDB host | 127.0.0.1
arangodb.port | ArangoDB port | 8529
arangodb.timeout | socket connect timeout(millisecond) | 0
arangodb.user | Basic Authentication User |
arangodb.password | Basic Authentication Password |
arangodb.useSsl | use SSL connection | false
To customize the configuration, the parameters can be changed in the Java code.
```java
@Override
public ArangoDB.Builder arango() {
ArangoDB.Builder arango = new ArangoDB.Builder()
.host("127.0.0.1")
.port(8429)
.user("root");
return arango;
}
```
In addition you can use the *arangodb.properties* or a custom properties file to supply credentials to the driver.
*Properties file*
```
arangodb.host=127.0.0.1
arangodb.port=8529
# arangodb.hosts=127.0.0.1:8529 could be used instead
arangodb.user=root
arangodb.password=
```
*Custom properties file*
```java
@Override
public ArangoDB.Builder arango() {
InputStream in = MyClass.class.getResourceAsStream("my.properties");
ArangoDB.Builder arango = new ArangoDB.Builder()
.loadProperties(in);
return arango;
}
```
**Note**: When using ArangoDB 3.0 it is required to set the transport protocol to HTTP and fetch the dependency `org.apache.httpcomponents:httpclient`.
```java
@Override
public ArangoDB.Builder arango() {
ArangoDB.Builder arango = new ArangoDB.Builder()
.useProtocol(Protocol.HTTP_JSON);
return arango;
}
```
```xml
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.1</version>
</dependency>
```

View File

@ -1,14 +1,14 @@
<!-- don't edit here, its from https://@github.com/arangodb/spring-data.git / docs/Drivers/ -->
# Spring Data ArangoDB
- [Getting Started](GettingStarted/README.md)
- [Reference](Reference/README.md)
## Learn more
- [ArangoDB](https://www.arangodb.com/)
- [Demo](https://github.com/arangodb/spring-data-demo)
- [JavaDoc 1.0.0](http://arangodb.github.io/spring-data/javadoc-1_0/index.html)
- [JavaDoc 2.0.0](http://arangodb.github.io/spring-data/javadoc-2_0/index.html)
- [JavaDoc Java driver](http://arangodb.github.io/arangodb-java-driver/javadoc-4_3/index.html)
- [Changelog](https://github.com/arangodb/spring-data/blob/master/ChangeLog.md#changelog)
# Spring Data ArangoDB
- [Getting Started](GettingStarted/README.md)
- [Reference](Reference/README.md)
## Learn more
- [ArangoDB](https://www.arangodb.com/)
- [Demo](https://github.com/arangodb/spring-data-demo)
- [JavaDoc 1.0.0](http://arangodb.github.io/spring-data/javadoc-1_0/index.html)
- [JavaDoc 2.0.0](http://arangodb.github.io/spring-data/javadoc-2_0/index.html)
- [JavaDoc Java driver](http://arangodb.github.io/arangodb-java-driver/javadoc-4_3/index.html)
- [Changelog](https://github.com/arangodb/spring-data/blob/master/ChangeLog.md#changelog)

File diff suppressed because it is too large Load Diff

View File

@ -371,7 +371,7 @@ in a specific state on startup. the options for this value are:
- any: any directory state allowed
### Journal size
### Journal size (MMFiles only)
@startDocuBlock databaseMaximalJournalSize
@ -613,4 +613,4 @@ an **highly experimental** feature and it is to be expected that certain functio
some AQL functions etc) will be missing or severly broken. Nevertheless you may whish to reduce the footprint of ArangoDB by disabling V8.
This option is expected to **only** work reliably on a _Single-Server_, _Agency_ or _Active-Failover_ setup. Do not try to use
this feature on a _Coordinator_, or _DBServer_
this feature on a _Coordinator_, or _DBServer_

View File

@ -215,3 +215,10 @@ is committed automatically and a new transaction is started.
If enabled, throttles the ingest rate of writes if necessary to reduce chances
of compactions getting too far behind and blocking incoming writes. This option
is `true` by default.
`--rocksdb.sync-interval`
The interval (in milliseconds) that ArangoDB will use to automatically
synchronize data in RocksDB's write-ahead logs to disk. Automatic syncs will
only be performed for not-yet synchronized data, and only for operations that
have been executed without the *waitForSync* attribute.

View File

@ -4,6 +4,8 @@ Durability Configuration
Global Configuration
--------------------
**Pre-setting on database creation**
There are global configuration values for durability, which can be adjusted by
specifying the following configuration options:
@ -15,34 +17,16 @@ specifying the following configuration options:
@startDocuBlock WalLogfileSyncInterval
`--rocksdb.sync-interval`
Per-collection configuration
----------------------------
The interval (in milliseconds) that ArangoDB will use to automatically
synchronize data in RocksDB's write-ahead logs to disk. Automatic syncs will
only be performed for not-yet synchronized data, and only for operations that
have been executed without the *waitForSync* attribute.
You can also configure the durability behavior on a per-collection basis.
Use the ArangoDB shell to change these properties.
**Adjusting at run-time**
@startDocuBlock collectionProperties
Per-operation configuration
---------------------------
Many data-modification operations and also ArangoDB's transactions allow to specify
a *waitForSync* attribute, which when set ensures the operation data has been
synchronized to disk when the operation returns.
Disk-Usage Configuration
------------------------
The amount of disk space used by ArangoDB is determined by a few configuration
options.
Global Configuration
--------------------
The total amount of disk storage required by ArangoDB is determined by the size of
The total amount of disk storage required by the MMFiles engine is determined by the size of
the write-ahead logfiles plus the sizes of the collection journals and datafiles.
There are the following options for configuring the number and sizes of the write-ahead
@ -72,8 +56,36 @@ are is determined by the following global configuration value:
@startDocuBlock databaseMaximalJournalSize
Per-collection configuration
----------------------------
**Pre-setting during collection creation**
You can also configure the durability behavior on a per-collection basis.
Use the ArangoDB shell to change these properties.
@startDocuBlock collectionProperties
**Adjusting at run-time**
The journal size can also be adjusted on a per-collection level using the collection's
*properties* method.
Per-operation configuration
---------------------------
Many data-modification operations and also ArangoDB's transactions allow to specify
a *waitForSync* attribute, which when set ensures the operation data has been
synchronized to disk when the operation returns.
Disk-Usage Configuration (MMFiles engine)
-----------------------------------------
The amount of disk space used by the MMFiles engine is determined by a few configuration
options.

View File

@ -163,8 +163,8 @@ console.timeEnd
Stops a timer created by a call to *time* and logs the time elapsed.
console.timeEnd
---------------
console.trace
-------------
`console.trace()`

View File

@ -20,7 +20,7 @@ The following methods exist on the collection object (returned by *db.name*):
*Indexes*
* [collection.dropIndex(index)](../../Indexing/WorkingWithIndexes.md#dropping-an-index)
* [collection.dropIndex(index)](../../Indexing/WorkingWithIndexes.md#dropping-an-index-via-a-collection-handle)
* [collection.ensureIndex(description)](../../Indexing/WorkingWithIndexes.md#creating-an-index)
* [collection.getIndexes(name)](../../Indexing/WorkingWithIndexes.md#listing-all-indexes-of-a-collection)
* [collection.index(index)](../../Indexing/WorkingWithIndexes.md#index-identifiers-and-handles)

View File

@ -18,7 +18,7 @@ The following methods exists on the *_db* object:
*Indexes*
* [db._index(index)](../../Indexing/WorkingWithIndexes.md#fetching-an-index-by-handle)
* [db._dropIndex(index)](../../Indexing/WorkingWithIndexes.md#dropping-an-index)
* [db._dropIndex(index)](../../Indexing/WorkingWithIndexes.md#dropping-an-index-via-a-database-handle)
*Properties*

View File

@ -22,7 +22,7 @@ with 3 _Agents_, and two single server instances.
We will assume that all processes runs on the same machine (127.0.0.1). Such scenario
should be used for testing only.
### Agency
### Local Test Agency
To start up an _Agency_ you first have to activate it. This is done by providing
the option `--agency.activate true`.
@ -67,7 +67,7 @@ arangod --server.endpoint tcp://0.0.0.0:5003 \
--database.directory agent3 &
```
### Single Server Instances
### Single Server Test Instances
To start the two single server instances, you can use the following commands:
@ -121,7 +121,7 @@ If we use:
then the commands you have to use are reported in the following subparagraphs.
### Agency
### Agency
On 192.168.1.1:

View File

@ -1,3 +1,4 @@
<!-- don't edit here, its from https://@github.com/arangodb-helper/arangodb.git / docs/Manual/ -->
Using the ArangoDB Starter
==========================
@ -67,4 +68,3 @@ The _Starter_ will decide on which 2 machines to run a single server instance.
To override this decision (only valid while bootstrapping), add a
`--cluster.start-single=false` to the machine where the single server
instance should _not_ be started.

View File

@ -1,3 +1,4 @@
<!-- don't edit here, its from https://@github.com/arangodb-helper/arangodb.git / docs/Manual/ -->
Deploying using the ArangoDB Starter
====================================

View File

@ -23,7 +23,7 @@ In this paragraph we will include commands to manually start a Cluster with 3 _A
We will assume that all processes runs on the same machine (127.0.0.1). Such scenario
should be used for testing only.
### Agency
### Local Test Agency
To start up an _Agency_ you first have to activate it. This is done by providing
the option `--agency.activate true`.
@ -68,7 +68,7 @@ arangod --server.endpoint tcp://0.0.0.0:5003 \
--database.directory agent3 &
```
### DBServers and Coordinators
### Local Test DBServers and Coordinators
These two roles share a common set of relevant options. First you should specify
the role using `--cluster.my-role`. This can either be `PRIMARY` (a database server)
@ -165,7 +165,7 @@ If we use:
then the commands you have to use are reported in the following subparagraphs.
### Agency
### Agency
On 192.168.1.1:

View File

@ -1,3 +1,4 @@
<!-- don't edit here, its from https://@github.com/arangodb-helper/arangodb.git / docs/Manual/ -->
Using the ArangoDB Starter
==========================

View File

@ -1,3 +1,4 @@
<!-- don't edit here, its from https://@github.com/arangodb-helper/arangodb.git / docs/Manual/ -->
Using the ArangoDB Starter
==========================

View File

@ -174,5 +174,5 @@ curl -L localhost:8529/_api/agency/write -d '[[{"foo":["bar","baz","qux"]}]]'
are equivalent for example and will create and fill an array at `/foo`. Here, again, the outermost array is the container for the transaction arrays.
A complete guide of the API can be found in the [API section](../../HTTP/Agency/index.html).
A complete guide of the API can be found in the [API section](../../../HTTP/Agency/index.html).

View File

@ -43,6 +43,7 @@ returns information about the indexes
`getIndexes()`
Returns an array of all indexes defined for the collection.
Since ArangoDB 3.4, `indexes()` is an alias for `getIndexes()`.
Note that `_key` implicitly has an index assigned to it.
@ -116,7 +117,7 @@ regardless of the value of this attribute.
### Dropping an index
### Dropping an index via a collection handle
<!-- arangod/V8Server/v8-vocindex.cpp -->
@ -206,7 +207,7 @@ Returns the index with *index-handle* or null if no such index exists.
@endDocuBlock IndexHandle
### Dropping an index
### Dropping an index via a database handle
<!-- js/server/modules/@arangodb/arango-database.js -->

View File

@ -1,4 +1,4 @@
<!-- don't edit here, its from https://@github.com//arangodb-helper/arangodb.git / docs/Manual/ -->
<!-- don't edit here, its from https://@github.com/arangodb-helper/arangodb.git / docs/Manual/ -->
# ArangoDB Starter Architecture
## What does the Starter do

View File

@ -1,4 +1,4 @@
<!-- don't edit here, its from https://@github.com//arangodb-helper/arangodb.git / docs/Manual/ -->
<!-- don't edit here, its from https://@github.com/arangodb-helper/arangodb.git / docs/Manual/ -->
# Option reference
The ArangoDB Starter provides a lot of options to control various aspects

View File

@ -1,4 +1,4 @@
<!-- don't edit here, its from https://@github.com//arangodb-helper/arangodb.git / docs/Manual/ -->
<!-- don't edit here, its from https://@github.com/arangodb-helper/arangodb.git / docs/Manual/ -->
# ArangoDB Starter
This chapter documents the _ArangoDB Starter_.

View File

@ -1,4 +1,4 @@
<!-- don't edit here, its from https://@github.com//arangodb-helper/arangodb.git / docs/Manual/ -->
<!-- don't edit here, its from https://@github.com/arangodb-helper/arangodb.git / docs/Manual/ -->
# Security
Securing an ArangoDB deployment involves encrypting its connections and

View File

@ -174,7 +174,7 @@ RETURN { found: OLD, updated: NEW }
A more detailed description of `UPSERT` can be found here:
http://jsteemann.github.io/blog/2015/03/27/preview-of-the-upsert-command/
### Miscellaneous changes
### Miscellaneous AQL changes
When errors occur inside AQL user functions, the error message will now contain a stacktrace,
indicating the line of code in which the error occurred. This should make debugging AQL user functions

View File

@ -403,7 +403,7 @@ If the query cache is operated in `demand` mode, it can be controlled per query
if the cache should be checked for a result.
### Miscellaneous changes
### Miscellaneous AQL changes
### Optimizer

View File

@ -361,8 +361,8 @@ Authorization
Read more in the [overview](../Administration/ManagingUsers/README.md).
Foxx
----
Foxx and authorization
----------------------
* the [cookie session transport](../Foxx/Reference/Sessions/Transports/Cookie.md) now supports all options supported by the [cookie method of the response object](../Foxx/Reference/Routers/Response.md#cookie).

View File

@ -15,36 +15,870 @@ view in ArangoDB.
[ArangoSearch](../Views/ArangoSearch/README.md)
New geo index implementation
----------------------------
The geo index in ArangoDB has been reimplemented based on S2 library functionality.
The new geo index allows indexing points, but also indexing of more complex geographical
objects. The new implementation is much faster than the previous one for the RocksDB engine.
Additionally, several AQL functions have been added to facilitate working with
geographical data: `GEO_POINT`, `GEO_MULTIPOINT`, `GEO_POLYGON`, `GEO_LINESTRING` and
`GEO_MULTILINESTRING`. These functions will produce GeoJSON objects.
Additionally there are new geo AQL functions `GEO_CONTAINS`, `GEO_INTERSECTS` and `GEO_EQUALS`
for querying and comparing GeoJSON objects.
RocksDB storage engine
----------------------
### RocksDB as default storage engine
The default storage engine in ArangoDB 3.4 is now the RocksDB engine.
Previous versions of ArangoDB used MMFiles as the default storage engine. This
change will have an effect for new ArangoDB installations only, and only if no
storage engine is selected explicitly or the storage engine selected is "auto".
In this case, a new installation will default to the RocksDB storage engine.
Existing ArangoDB installations upgraded to 3.4 from previous versions will
continue to use their previously selected storage engine.
### Optimized binary storage format
The RocksDB storage engine in ArangoDB 3.4 now also uses an optimized binary
format for storing documents. This format allows inserting new documents in
an order that RocksDB prefers. Using the new format will reduce the number
of compactions that RocksDB needs to do for the ArangoDB documents stored,
allowing for better long-term insertion performance.
The new binary format will only be used for new installations that start with
ArangoDB 3.4. Existing installations upgraded from previous versions will
continue to use the previous binary format.
Note that there is no need to use the new binary format for installations upgraded
from 3.3, as the old binary format will continue to work as before.
In order to use the new binary format with existing data, it is required to
create a logical dump of the database data, shut down the server, erase the
database directory and restore the data from the logical dump.
### Better control of WAL sync interval
ArangoDB 3.4 also provides a new configuration option `--rocksdb.sync-interval`
to control how frequently ArangoDB will automatically synchronize data in RocksDB's
write-ahead log (WAL) files to disk. Automatic syncs will only be performed for
not-yet synchronized data, and only for operations that have been executed without
the *waitForSync* attribute.
Automatic synchronization of RocksDB WAL file data is performed by a background
thread in ArangoDB. The default sync interval is 100 milliseconds. This can be
adjusted so syncs happen more or less frequently.
### Reduced replication catch-up time
The catch-up time for comparing the contents of two collections (or shards) on two
different hosts via the incremental replication protocol has been reduced when using
the RocksDB storage engine.
### Improved geo index performance
The rewritten geo index implementation 3.4 speeds up the RocksDB-based geo index
functionality by a factor of 3 to 6 for many common cases when compared to the
RocksDB-based geo index in 3.3.
A notable implementation detail of previous versions of ArangoDB was that accessing
a RocksDB collection with a geo index acquired a collection-level lock. This severely
limited concurrent access to RocksDB collections with geo indexes in previous
versions. This requirement is now gone and no extra locks need to be acquired when
accessing a RocksDB collection with a geo index.
### Optional caching for documents and primary index values
The RocksDB engine now provides a new per-collection property `cacheEnabled` which
enables in-memory caching of documents and primary index entries. This can potentially
speed up point-lookups significantly, especially if collection have a subset of frequently
accessed documents.
The option can be enabled for a collection as follows:
```
db.<collection>.properties({ cacheEnabled: true });
```
If the cache is enabled, it will be consulted when reading documents and primary index
entries for the collection. If there is a cache miss and the document or primary index
entry has to be looked up from the RocksDB storage engine, the cache will be populated.
The per-collection cache utilization for primary index entries can be checked via the
command `db.<collection>.indexes(true)`, which will provide the attributes `cacheInUse`,
`cacheSize` and `cacheLifeTimeHitRate`.
Memory for the documents and primary index entries cache will be provided by ArangoDB's
central cache facility, whose maximal size can be configured by adjusting the value of
the startup option `--cache.size`.
Please note that caching may adversely affect the performance for collections that are
frequently updated. This is because cache entries need to be invalidated whenever documents
in the collection are updated, replaced or removed. Additionally, enabling caching will
subtract memory from the overall cache, so that less RAM may be available for other
items that use in-memory caching (e.g. edge index entries). It is therefore recommended
to turn on caching only for dedicated collections for which the caching effects have been
confirmed to be positive.
### Exclusive collection access option
In contrast to the MMFiles engine, the RocksDB engine does not require collection-level
locks. This is good in general because it allows concurrent access to a RocksDB
collection.
Reading documents does not require any locks with the RocksDB engine, and writing documents
will acquire per-document locks. This means that different documents can be modified
concurrently by different transactions.
When concurrent transactions modify the same documents in a RocksDB collection, there
will be a write-write conflict, and one of the transactions will be aborted. This is
incompatible with the MMFiles engine, in which write-write conflicts are impossible due
to its collection-level locks. In the MMFiles engine, a write transaction always has
exclusive access to a collection, and locks out all other writers.
While making access to a collection exclusive is almost always undesired from the
throughput perspective, it can greatly simplify client application development. Therefore
the RocksDB engine now provides optional exclusive access to collections on a
per-query/per-transaction basis.
For AQL queries, all data-modification operations now support the `exclusive` option, e.g.
FOR doc IN collection
UPDATE doc WITH { updated: true } OPTIONS { exclusive: true }
JavaScript-based transactions can specify which collections to lock exclusively in the
`exclusive` sub-attribute of their `collections` attribute:
```js
db._executeTransaction({
collections: {
exclusive: [ "collection" ]
},
...
});
```
Note that using exclusive access for RocksDB collections will serialize write operations
to RocksDB collections, so it should be used with extreme care.
### RocksDB library upgrade
The version of the bundled RocksDB library was upgraded from 5.9 to 5.14.2.
Collection and document operations
----------------------------------
### Repsert operation
The existing functionality for inserting documents got an extra option to turn
an insert into a replace, in case that a document with the specified `_key` value
already exists. This type of operation is called a "Repsert" (Replace-insert).
Using the new option client applications do not need to check first whether a
given document exists, but can use a single atomic operation to conditionally insert
or replace it.
Here is an example of control flow that was previously necessary to conditionally
insert or replace a document:
```js
doc = { _key: "someKey", value1: 123, value2: "abc" };
// check if the document already exists...
if (!db.collection.exists(doc._key)) {
// ... document did not exist, so insert it
db.collection.insert(doc);
} else {
// ... document did exist, so replace it
db.collection.replace(doc._key, doc);
}
```
With ArangoDB 3.4 this can now be simplified to:
```js
doc = { _key: "someKey", value1: 123, value2: "abc" };
// insert the document if it does not exist yet, other replace
db.collection.insert(doc, { overwrite: true });
```
Client applications can also optionally retrieve the old revision of the document
in case the insert turned into a replace operation:
```js
doc = { _key: "someKey", value1: 123, value2: "abc" };
// insert the document if it does not exist yet, other replace
// in case of a replace, previous will be populated, in case of an
// insert, previous will be undefined
previous = db.collection.insert(doc, { overwrite: true, returnOld: true }).old;
```
The same functionality is available for the document insert method in the
HTTP REST API. The HTTP endpoint for `POST /_api/document` will now accept the
optional URL parameters `overwrite` and `returnOld`.
AQL also supports making an INSERT a conditional REPSERT, by setting the option
`overwrite` for it:
```
INSERT { _key: "someKey", value1: 123, value2: "abc" } INTO collection OPTIONS { overwrite: true }
```
Please note that in a cluster setup the Repsert operation requires the collection
to be sharded by `_key`.
### Graph API extensions
The REST APIs for modifying graphs at endpoint `/_api/gharial` now support returning
the old revision of vertices / edges after modifying them. The APIs also supports
returning the just-inserted vertex / edge. This is in line with the already existing
single-document functionality provided at endpoint `/_api/document`.
The old/new revisions can be accessed by passing the URL parameters `returnOld` and
`returnNew` to the following endpoints:
* /_api/gharial/<graph>/vertex/<collection>
* /_api/gharial/<graph>/edge/<collection>
The exception from this is that the HTTP DELETE verb for these APIs does not
support `returnOld` because that would make the existing API incompatible.
### Additional key generators
In addition to the existing key generators `traditional` (which is still the
default key generator) and `autoincrement`, ArangoDB 3.4 adds the following key
generators:
* `padded`:
The `padded` key generator generates keys of a fixed length (16 bytes) in
ascending lexicographical sort order. This is ideal for usage with the RocksDB
engine, which will slightly benefit keys that are inserted in lexicographically
ascending order. The key generator can be used in a single-server or cluster.
* `uuid`: the `uuid` key generator generates universally unique 128 bit keys, which
are stored in hexadecimal human-readable format. This key generator can be used
in a single-server or cluster to generate "seemingly random" keys. The keys
produced by this key generator are not lexicographically sorted.
Example for the *padded* key generator:
```
db._create("padded", { keyOptions: { type: "padded" } });
db.padded.insert({});
{
"_id" : "padded/0000000009d0d1c0",
"_key" : "0000000009d0d1c0",
"_rev" : "_XI6VqNK--_"
}
db.padded.insert({});
{
"_id" : "padded/0000000009d0d1c4",
"_key" : "0000000009d0d1c4",
"_rev" : "_XI6VquC--_"
}
```
Example for the *uuid* key generator:
```js
db._create("uuid", { keyOptions: { type: "uuid" } });
db.uuid.insert({});
{
"_id" : "uuid/16d5dc96-79d6-4803-b547-5a34ce795099",
"_key" : "16d5dc96-79d6-4803-b547-5a34ce795099",
"_rev" : "_XI6VPc2--_"
}
db.uuid.insert({});
{
"_id" : "uuid/0af83d4a-56d4-4553-a97d-c7ed2644dc09",
"_key" : "0af83d4a-56d4-4553-a97d-c7ed2644dc09",
"_rev" : "_XI6VQgO--_"
}
```
### Miscellaneous improvements
The command `db.<collection>.indexes()` was added as an alias for the already existing
`db.<collection>.getIndexes()` method for retrieving all indexes of a collection. The
alias name is more consistent with the already existing method names for retrieving
all databases and collections.
Cluster improvements
--------------------
### Load-balancer support
ArangoDB now supports running multiple coordinators behind a load balancer that
randomly routes client requests to the different coordinators. It is not required
anymore that load balancers implement session or connection stickiness on behalf
of ArangoDB.
In particular, the following ArangoDB APIs were extended to work well with load
balancing:
* the cursor API at endpoint `/_api/cursor`
* the jobs API at endpoint `/_api/job`
* the tasks API at endpoint `/_api/tasks`
* Pregel APIs at endpoint `/_api/pregel`
Some of these APIs build up coordinator-local state in memory when being first
accessed, and allow accessing further data using follow-up requests. This caused
problems in previous versions of ArangoDB, when load balancers routed the follow
up requests to these APIs to different coordinators that did not have access to
the other coordinator's in-memory state.
With ArangoDB 3.4, if such an API is accessed by a follow-up request that refers
to state being created on a different coordinator, the actually accessed coordinator
will forward the client request to the correct coordinator. Client applications
and load balancers do not need to be aware of which coordinator they had used
for the previous requests, though from a performance point of view accessing the
same coordinator for a sequence of requests will still be beneficial.
If a coordinator forwards a request to a different coordinator, it will send the
client an extra HTTP header `x-arango-request-served-by` with the id of the
coordinator it forwarded the request to. Client applications or load balancers
can optionally use that information to make follow-up requests to the "correct"
coordinator to save the forwarding.
### Refusal to start mixed-engine clusters
Starting a cluster with coordinators and DB servers using different storage
engines is not supported. Doing it anyway will now log an error and abort a
coordinator's startup.
Previous versions of ArangoDB did not detect the usage of different storage
engines in a cluster, but the runtime behavior of the cluster was undefined.
### Startup safety checks
The new option `--cluster.require-persisted-id` can be used to prevent the startup
of a cluster node using the wrong data directory.
If the option is set to true, then the ArangoDB instance will only start if a
UUID file (containing the instance's cluster-wide ID) is found in the database
directory on startup. Setting this option will make sure the instance is started
using an already existing database directory and not a new one.
For the first start, the UUID file must either be created manually or the option
must be set to `false` for the initial startup and later be changed to `true`.
### Coordinator storage engine
In previous versions of ArangoDB, cluster coordinator nodes used the storage
engine selected by the database administrator (i.e. MMFiles or RocksDB).
Although all database and document data was forwarded from coordinators to be
stored on the database servers and not on the coordinator nodes, the storage
engine used on the coordinator was checking and initializing its on-disk state
on startup.
Especially because no "real" data was stored by the coordinator's storage engine,
using a storage engine here did not provide any value but only introduced
unnecessary potential points of failure.
As of ArangoDB 3.4, cluster coordinator nodes will now use an internal "cluster"
storage engine, which actually does not store any data. That prevents 3.4
coordinators from creating any files or directories inside the database directory
except the meta data files such as `ENGINE`, `LOCK`, `SERVER`, `UUID` and `VERSION`.
And as no files need to be read on coordinator startup except these mentioned
files, it also reduces the possibility of data corruption on coordinator nodes.
AQL
---
### AQL query profiling
AQL queries can now be executed with optional profiling, using ArangoDB 3.4's new
`db._queryProfile()` function.
This new function is a hybrid of the already existing `db._query()` and `db._explain()`
functions:
* `db._query()` will execute an AQL query, but not show the execution plan nor
runtime profile information
* `db._explain()` will show the query's execution plan, but not execute the query
* `db._queryProfile()` will run the query, collect the runtime costs of each component
of the query, and finally show the query's execution plan with actual runtime information.
This is very useful for debugging AQL query performance and optimizing queries.
### Revised cluster-internal AQL protocol
When running an AQL query in a cluster, the coordinator has to distribute the
individual parts of the AQL query to the relevant shards that will participate
in the execution of the query.
Up to including ArangoDB 3.3, the coordinator has deployed the query parts to the
individual shards one by one. The more shards were involved in a query, the more
cluster-internal requests this required, and the longer the setup took.
In ArangoDB 3.4 the coordinator will now only send a single request to each of
the involved database servers (in contrast to one request per shard involved).
This will speed up the setup phase of most AQL queries, which will be noticable for
queries that affect a lot of shards.
The AQL setup has been changed from a two-step protocol to a single-step protocol,
which additionally reduces the total number of cluster-internal requests necessary
for running an AQL query.
The internal protocol and APIs have been adjusted so that AQL queries can now get
away with less cluster-internal requests than in 3.3 also after the setup phase.
Finally, there is now an extra optimization for trivial AQL queries that will only
access a single document by its primary key (see below).
### AQL functions added
The following AQL functions have been added in ArangoDB 3.4:
* `TO_BASE64`: creates the base64-encoded representation of a value
* `TO_HEX`: creates a hex-encoded string representation of a value
* `ENCODE_URI_COMPONENT`: URI-encodes a string value, for later usage in URLs
* `SOUNDEX`: calculates the soundex fingerprint of a string value
* `ASSERT`: aborts a query if a condition is not met
* `WARN`: makes a query produce a warning if a condition is not met
* `IS_KEY`: this function checks if the value passed to it can be used as a document
key, i.e. as the value of the `_key` attribute for a document
* `SORTED`: will return a sorted version of the input array using AQL's internal
comparison order
* `SORTED_UNIQUE`: same as `SORTED`, but additionally removes duplicates
* `COUNT_DISTINCT`: counts the number of distinct / unique items in an array
The following AQL functions have been added to make working with geographical
data easier:
* `GEO_POINT`
* `GEO_MULTIPOINT`
* `GEO_POLYGON`
* `GEO_LINESTRING`
* `GEO_MULTILINESTRING`
* `GEO_CONTAINS`
* `GEO_INTERSECTS`
* `GEO_EQUALS`.
The first five functions will produce GeoJSON objects from coordinate data. The
latter three functions can be used for querying and comparing GeoJSON objects.
The following AQL functions can now be used as aggregation functions in a
COLLECT statement:
* `UNIQUE`
* `SORTED_UNIQUE`
* `COUNT_DISTINCT`
### Distributed COLLECT
In the general case, AQL COLLECT operations are expensive to execute in a cluster,
because the database servers need to send all shard-local data to the coordinator
for a centralized aggregation.
The AQL query optimizer can push some parts of certain COLLECT operations to the
database servers so they can do a per-shard aggregation. The database servers can
then send only the already aggregated results to the coordinator for a final aggregation.
For several queries this will reduce the amount of data that has to be transferred
between the database servers servers and the coordinator by a great extent, and thus
will speed up these queries. Work on this has started with ArangoDB 3.3.5, but
ArangoDB 3.4 allows more cases in which COLLECT operations can partially be pushed to
the database servers.
In ArangoDB 3.3, the following aggregation functions could make use of a distributed
COLLECT in addition to `COLLECT WITH COUNT INTO` and `RETURN DISTINCT`:
* `COUNT`
* `SUM`
* `MIN`
* `MAX`
ArangoDB 3.4 additionally enables distributed COLLECT queries that use the following
aggregation functions:
* `AVERAGE`
* `VARIANCE`
* `VARIANCE_SAMPLE`
* `STDDEV`
* `STDDEV_SAMPLE`
### Native AQL function implementations
All built-in AQL functions now have a native implementation in C++.
Previous versions of ArangoDB had AQL function implementations in both C++ and
in JavaScript.
The JavaScript implementations of AQL functions were powered by the V8 JavaScript
engine, which first required the conversion of all function input into V8's own
data structures, and a later conversion of the function result data into ArangoDB's
native format.
As all AQL functions are now exclusively implemented in native C++, no more
conversions have to be performed to invoke any of the built-in AQL functions.
This will speed considerably speed up the following AQL functions that had a
JavaScript implementation in previous versions of ArangoDB, and any AQL expression
that uses any of these functions:
* `APPLY`
* `CALL`
* `CURRENT_USER`
* `DATE_ADD`
* `DATE_COMPARE`
* `DATE_DAYOFWEEK`
* `DATE_DAYOFYEAR`
* `DATE_DAYS_IN_MONTH`
* `DATE_DAY`
* `DATE_DIFF`
* `DATE_FORMAT`
* `DATE_HOUR`
* `DATE_ISO8601`
* `DATE_ISOWEEK`
* `DATE_LEAPYEAR`
* `DATE_MILLISECOND`
* `DATE_MINUTE`
* `DATE_MONTH`
* `DATE_NOW`
* `DATE_QUARTER`
* `DATE_SECOND`
* `DATE_SUBTRACT`
* `DATE_TIMESTAMP`
* `DATE_YEAR`
* `IS_DATESTRING`
* `IS_IN_POLYGON`
* `LTRIM`
* `RTRIM`
* `FIND_FIRST`
* `FIND_LAST`
* `REVERSE`
* `SPLIT`
* `SUBSTITUTE`
* `SHA512`
* `TRANSLATE`
* `WITHIN_RECTANGLE`
Additionally, the AQL functions `FULLTEXT`, `NEAR` and `WITHIN` now use the native
implementations even when executed in a cluster. In previous versions of ArangoDB,
these functions had native implementations for single-server setups only, but fell
back to using the JavaScript variants in a cluster environment.
Apart from saving conversion overhead, another side effect of adding native
implementations for all built-in AQL functions is that AQL does not require the usage
of V8 anymore, except for user-defined functions.
If no user-defined functions are used in AQL, end users do not need to put aside
dedicated V8 contexts for executing AQL queries with ArangoDB 3.4, making server
configuration less complex and easier to understand.
### Single document optimizations
In a cluster, the cost of setting up a distributed query can be considerable for
trivial AQL queries that will only access a single document, e.g.
FOR doc IN collection FILTER doc._key == ... RETURN doc
FOR doc IN collection FILTER doc._key == ... REMOVE doc IN collection
FOR doc IN collection FILTER doc._key == ... REMOVE doc._key IN collection
REMOVE... IN collection
FOR doc IN collection FILTER doc._key == ... UPDATE doc WITH { ... } IN collection
FOR doc IN collection FILTER doc._key == ... UPDATE doc._key WITH { ... } IN collection
UPDATE ... WITH { ... } IN collection
FOR doc IN collection FILTER doc._key == ... REPLACE doc WITH { ... } IN collection
FOR doc IN collection FILTER doc._key == ... REPLACE doc._key WITH { ... } IN collection
REPLACE ... WITH { ... } IN collection
INSERT { ... } INTO collection
All of the above queries will affect at most a single document, identified by its
primary key. The AQL query optimizer can now detect this, and use a specialized
code path for directly carrying out the operation on the participating database
server(s). This special code path bypasses the general AQL query cluster setup and
shutdown, which would have prohibitive costs for these kinds of queries.
In case the optimizer makes use of the special code path, the explain output will
contain the rule `optimize-cluster-single-document-operations`.
The optimization will fire automatically only for queries with the above patterns.
It will only fire when using `_key`, and will be most effective if `_key` is also used
as the collection's shard key.
### Subquery optimizations
The AQL query optimizer can now optimize certain subqueries automatically so that
they perform less work.
The new optimizer rule `optimize-subqueries` will fire in the following situations:
* in case only a few results are used from a non-modifying subquery, the rule will
automatically add a LIMIT statement into the subquery.
For example, the unbounded subquery
LET docs = (
FOR doc IN collection
FILTER ...
RETURN doc
)
RETURN docs[0]
will be turned into a subquery that only produces a single result value:
LET docs = (
FOR doc IN collection
FILTER ...
LIMIT 1
RETURN doc
)
RETURN docs[0]
* in case the result returned by a subquery is not used later but only the number
of subquery results, the optimizer will modify the result value of the subquery
so that it will return constant values instead of potentially more expensive
data structures.
For example, the following subquery returning entire documents
RETURN LENGTH(
FOR doc IN collection
FILTER ...
RETURN doc
)
will be turned into a subquery that returns only simple boolean values:
RETURN LENGTH(
FOR doc IN collection
FILTER ...
RETURN true
)
This saves copying the document data from the subquery to the outer scope and
may also enable follow-up optimizations.
### Fullcount changes
The behavior of the `fullCount` option for AQL query cursors was adjusted to conform
to users' demands. The value returned in the `fullCount` result attribute will now
be produced only by the last `LIMIT` statement on the top level of the query.
`LIMIT` statements in subqueries will not have any effect on the `fullCount` results
any more. This is a change to previous versions of ArangoDB, in which the `fullCount`
value was produced by the last `LIMIT` statement in a query, regardless if the `LIMIT`
was on the top level of the query or in a subquery.
### Improved sparse index support
The AQL query optimizer can now use sparse indexes in more cases than it did in
ArangoDB 3.3. If a sparse index is not used in a query because the query optimizer
cannot prove itself that the index attribute value cannot be `null`, it is now often
useful to add an extra filter condition to the query that requires the sparse index'
attribute to be non-null.
For example, if for the following query there is a sparse index on `value` in any
of the collections, the optimizer cannot prove that `value` can never be `null`:
FOR doc1 IN collection1
FOR doc2 IN collection2
FILTER doc1.value == doc2.value
RETURN [doc1, doc2]
By adding an extra filter condition to the query that excludes `null` values explicitly,
the optimizer in 3.4 will now be able to use a sparse index on `value`:
FOR doc1 IN collection1
FOR doc2 IN collection2
FILTER doc1.value == doc2.value
FILTER doc2.value != null
RETURN [doc1, doc2]
The optimizer in 3.3 was not able to detect this, and refused to use sparse indexes
for such queries.
### Miscellaneous changes
The `NEAR` AQL function now does not default to a limit of 100 documents any more
when no limit value was specified. The previously used limit value of 100 was an
arbitrary limit that acted contrary to user expectations.
Streaming AQL Cursors
---------------------
It is now possible to create AQL query cursors with the new *stream* option.
Specify *true* and the query will be executed in a **streaming** fashion. The query result is
not stored on the server, but calculated on the fly. *Beware*: long-running queries will
need to hold the collection locks for as long as the query cursor exists.
When set to *false* the query will be executed right away in its entirety.
In that case query results are either returned right away (if the result set is small enough),
or stored on the arangod instance and accessible via the cursor API. It is advisable
to *only* use this option on short-running queries *or* without exclusive locks (write locks on MMFiles).
Please note that the query options `cache`, `count` and `fullCount` will not work on streaming
queries. Additionally query statistics, warnings and profiling data will only be available
after the query is finished.
The default value is *false*
AQL query cursors created by client applications traditionally executed an AQL query,
and built up the entire query result in memory. Once the query completed, the results
were sent back to the client application in chunks of configurable size.
This approach was a good fit for the MMFiles engine with its collection-level locks,
and usually smaller-than-RAM query results. For the RocksDB engine with its document-level
locks and lock-free reads and potentially huge query results, this approach does not always
fit.
ArangoDB 3.4 allows to optionally execute AQL queries initiated via the cursor API in a
streaming fashion. The query result will then be calculated on the fly, and results are
sent back to the client application as soon as they become available on the server, even
if the query has not yet completed.
This is especially useful for queries that produce big result sets (e.g.
`FOR doc IN collection RETURN doc` for big collections). Such queries will take very long
to complete without streaming, because the entire query result will be computed first and
stored in memory. Executing such queries in non-streaming fashion may lead to client
applications timing out before receiving the first chunk of data from the server. Additionally,
creating a huge query result set on the server may make it run out of memory, which is also
undesired. Creating a streaming cursor for such queries will solve both problems.
Please be aware that long-running AQL queries will need to hold the collection locks for
as long as the query cursor exists when streaming is used. This is in contrast to the traditional,
non-streaming variant that requires holding the collection locks only during the actual AQL query
execution, but not while results are sent back to the client application.
This is especially important for the MMFiles engine, which uses collection-level locks.
Please note that the query options `cache`, `count` and `fullCount` will not work with streaming
cursors. Additionally, the query statistics, warnings and profiling data will only be available
when the last result batch for the query is sent.
By default, query cursors created via the cursor API are non-streaming in ArangoDB 3.4,
but streaming can be turned on on a per-query basis by setting the `stream` attribute
in the request to the cursor API at endpoint `/_api/cursor`.
However, streaming cursors are enabled for the following parts of ArangoDB in 3.4:
* when exporting data from collections using the arangoexport binary
* when using `db.<collection>.toArray()` from the Arango shell.
Native implementations
----------------------
The following internal and external functionality has been ported from JavaScript-based
implementations to C++-based implementations in ArangoDB 3.4:
* the statistics gathering background thread
* the REST APIs for graph, vertex, and edge management at `/_api/gharial`
* the implementations of all built-in AQL functions
* all other parts of AQL except user-defined functions
By making the listed functionality not use and depend on the V8 JavaScript engine,
the respective functionality can now be invoked more efficiently, without requiring
the conversion of data between ArangoDB's native format and V8's internal format.
As less functionality depends on the V8 JavaScript engine, an ArangoDB 3.4 server
will not require as many V8 contexts as previous versions.
This should reduce problems with servers running out of available V8 contexts or
using a lot of memory just for keeping V8 contexts around.
As a consequence, ArangoDB agency nodes in 3.4 will now turn off the V8 JavaScript
engine at startup automatically.
Single document operations
--------------------------
Foxx
----
Foxx CLI
Security
--------
### Ownership for cursors and jobs
Cursors for AQL query results and jobs created by the APIs at endpoints `/_api/cursor`
and `/_api/job` are now tied to the user that first created the cursor/job.
Follow-up requests to consume or remove data of an already created cursor or job will
now be denied if attempted by a different user.
### Dropped support for SSLv2
ArangoDB 3.4 will not start when attempting to bind the server to a Secure Sockets
Layer (SSL) v2 endpoint. Additionally, the client tools (arangosh, arangoimport,
arangodump, arangorestore etc.) will refuse to connect to an SSLv2-enabled server.
SSLv2 can be considered unsafe nowadays and as such has been disabled in the OpenSSL
library by default in recent versions. ArangoDB is following this step.
Clients that use SSLv2 with ArangoDB should change the protocol from SSLv2 to TLSv12
if possible, by adjusting the value of the `--ssl.protocol` startup option for the
`arangod` server and all client tools.
Client tools
------------
### Arangodump
Arangodump can now dump multiple collections in parallel. This can significantly
reduce the time required to take a backup.
By default, arangodump will use 2 threads for dumping collections. The number of
threads used by arangodump can be adjusted by using the `--threads` option when
invoking it.
### Arangorestore
Arangorestore can now restore multiple collections in parallel. This can significantly
reduce the time required to recover data from a backup.
By default, arangorestore will use 2 threads for restoring collections. The number of
threads used by arangorestore can be adjusted by using the `--threads` option when
invoking it.
### Arangoimport
Arangoimp was renamed to arangoimport for consistency.
The 3.4 release packages will still install `arangoimp` as a symlink so user scripts
invoking `arangoimp` do not need to be changed.
Arangoimport now can pace the data load rate automatically based on the actual rate of
data the server can handle. This is useful in contexts when the server has a limited
I/O bandwidth, which is often the case in cloud environments. Loading data too quickly
may lead to the server exceeding its provisioned I/O operations quickly, which will
make the cloud environment throttle the disk performance and slowing it down drastically.
Using a controlled and adaptive import rate allows preventing this throttling.
The pacing algorithm is turned on by default, but can be disabled by manually specifying
any value for the `--batch-size` parameter.
Arangoimport also got an extra option `--create-database` so that it can automatically
create the target database should this be desired. Previous versions of arangoimp
provided options for creating the target collection only
(`--create-collection`, `--create-collection-type`).
Finally, arangoimport got an option `--latency` which can be used to print microsecond
latency statistics on 10 second intervals for import runs. This can be used to get
additional information about the import run performance and performance development.
When you now have AQL queries that `INSERT`, `UPDATE`, `REMOVE`, `REPLACE` or fetch a single document
in a cluster by e.g. using `FILTER _key == '123'`, the coordinator node will now directly
carry out the change on the DBServer instead of instanciating respective AQL-snippets
on the DBServer. This reduces the amount of cluster roundtrips and thus improves the performance.
Miscellaneous features
----------------------
- new optional collection property `cacheEnabled` which enables in-memory caching
for documents and primary index entries. This can potentially speed up point-lookups
significantly, especially if your collections has a subset of frequently accessed
keys. Please test this feature carefully to ensure that it does not adversely
affect the performance of your system.
### Logging without escaping non-printable characters
The new option `--log.escape` can be used to enable a slightly different log output
format.
If set to `true` (which is the default value), then the logging will work as in
previous versions of ArangoDB, and the following characters in the log output are
escaped:
* the carriage return character (hex 0d)
* the newline character (hex 0a)
* the tabstop character (hex 09)
* any other characters with an ordinal value less than hex 20
If the `--log.escape` option is set to `false` however, no characters are escaped
when logging them. Characters with an ordinal value less than hex 20 (including
carriage return, newline and tabstop) will not be printed in this mode, but will
be replaced with a space character (hex 20). This is because these characters are
often undesired in logs anyway.
Another positive side effect of turning off the escaping is that it will slightly
reduce the CPU overhead for logging. However, this will only be noticable when the
logging is set to a very verbose level (e.g. log levels debug or trace).

View File

@ -401,7 +401,7 @@ The `@arangodb/request` response object now stores the parsed JSON response
body in a property `json` instead of `body` when the request was made using the
`json` option. The `body` instead contains the response body as a string.
### Edges API
### JavaScript Edges API
When completely replacing an edge via a collection's `replace()` function the replacing
edge data now needs to contain the `_from` and `_to` attributes for the new edge. Previous
@ -447,7 +447,7 @@ The collection function `byConditionSkiplist()` has been removed in 3.0. The sam
can be achieved by issuing an AQL query with the target condition, which will automatically use
a suitable index if present.
#### Revision id handling
#### Javascript Revision id handling
The `exists()` method of a collection now throws an exception when the specified document
exists but its revision id does not match the revision id specified. Previous versions of
@ -609,7 +609,7 @@ based on AQL internally in 3.0, the API now returns a JSON object with a `result
### Edges API
#### CRUD operations
#### CRUD operations on edges
The API for documents and edges have been unified in ArangoDB 3.0. The CRUD operations
for documents and edges are now handled by the same endpoint at `/_api/document`. For
@ -791,7 +791,7 @@ contain a JSON object with an attribute named `user`, containing the name of the
be created. Previous versions of ArangoDB also checked this attribute, but additionally
looked for an attribute `username` if the `user` attribute did not exist.
### Undocumented APIs
### Undocumented HTTP APIs
The following undocumented HTTP REST endpoints have been removed from ArangoDB's REST
API:

View File

@ -6,28 +6,82 @@ upgrading to ArangoDB 3.4, and adjust any client programs if necessary.
The following incompatible changes have been made in ArangoDB 3.4:
Geo-Index
Geo indexes
-----------
- The on-disk storage format for indexes of type `geo` has changed for the RocksDB
storage engine. This also affects `geo1` and `geo2` indexes.
This **requires** users to start the arangod process with the
`--database.auto-upgrade true` option to allow ArangoDB recreating these
indexes using the new on-disk format.
The on-disk format for geo indexes is incompatible with the on-disk format used
in 3.3 and 3.2, so downgrading from 3.4 to 3.3 is not supported.
- Geo indexes will now be reported no longer as _geo1_ or _geo2_ but as type `geo`.
The two previously known geo index types (`geo1`and `geo2`) are **deprecated**.
APIs for creating indexes (`ArangoCollection.ensureIndex`) will continue to support
`geo1`and `geo2`.
RocksDB engine
--------------
- The RocksDB on-disk storage format for indexes with type `geo` has changed
(also affects `geo1` and `geo2` indexes).
This **requires** users to start the arangod process with the
`--database.auto-upgrade true` option to allow arangodb
to recreate these indexes with the new on-disk format.
Installations that start using ArangoDB 3.4 will use an optimized on-disk format
for storing documents using the RocksDB storage engine. This format cannot be used
ArangoDB 3.3 or before, meaning it is not possible to downgrade from a fresh 3.4
install to 3.3 or earlier when using the RocksDB engine.
- Geo-Indexes will now reported themselves no longer as _geo1_ or _geo2_ but
with type `geo` . The two previously known geo-index types (`geo1`and `geo2`)
are **deprecated**. APIs to create indexes (`ArangoCollection.ensureIndex`)
will continue to support `geo1`and `geo2`.
Installations that were originally set up with older versions of ArangoDB (e.g. 3.2
or 3.3) will continue to use the existing on-disk format for the RocksDB engine
even with ArangoDB 3.4.
Client tools
------------
HTTP API
--------
Threading and request handling
------------------------------
The processing of incoming requests and the execution of requests by server threads
has changed in 3.4.
Previous ArangoDB versions had a hard-coded implicit lower bound of 64 running
threads, and up to which they would increase the number of running server threads.
That value could be increased further by adjusting the option `--server.maximal-threads`.
The configuration option `--server.threads` existed, but did not effectively set
or limit the number of running threads.
In ArangoDB 3.4, the number of threads ArangoDB uses for request handling can now
be strictly bounded by configuration options.
The number of server threads is now configured by the following startup options:
- `--server.threads`: determines the maximum number of request processing threads
the server will start for request handling. If that number of threads is already
running, arangod will not start further threads for request handling
- `--server.minimal-threads`: determines the minimum number of request processing
threads the server will start and always keep around
The actual number of request processing threads is adjusted dynamically at runtime
and will float between `--server.minimal-threads` and `--server.threads`.
To avoid overloading servers, the value of `--server.threads` should not exceed the
server's number of hardware threads in ArangoDB 3.4.
HTTP REST API
-------------
The following incompatible changes were made in context of ArangoDB's HTTP REST
APIs:
- `GET /_api/index` will now return type `geo` for geo indexes, not type `geo1`
or `geo2`.
For geo indexes, the index API will not return the attributes `constraint` and
`ignoreNull` anymore. These attributes were initially deprecated in ArangoDB 2.5
- `GET /_api/aqlfunction` was migrated to match the general structure of
ArangoDB Replies. It now returns an object with a "result" attribute that
ArangoDB replies. It now returns an object with a "result" attribute that
contains the list of available AQL user functions:
```json
@ -69,34 +123,252 @@ HTTP API
"deletedCount": 10
}
```
- `POST /_admin/echo` - previously was a `GET` route, now is `POST` - expects an
arbitrary JSON object to send back next to other informations
- `GET /_admin/server/mode` can now be used to detect whether the remote is in read-only mode or not
- `GET /_admin/status` now returns the attribute `operationMode` instead of `mode`.
The previously existing attribute `writeOpsEnabled` is no longer returned and was
replaced with an attribute `readOnly` with the inverted meaning.
- if authentication is turned on, requests to databases by users with insufficient
access rights will be answered with HTTP 401 (forbidden) instead of HTTP 404 (not found).
The following APIs have been added or augmented:
- `POST /_api/document/{collection}` now supports repsert (replace-insert).
- `POST /_api/document/{collection}` now supports upsert,
this can be achieved by using the new parameter `overwrite=true`.
If you overwrite you can choose to get the old version of the document by specifying
`returnOld=true`
This can be achieved by using the URL parameter `overwrite=true`. When set to
`true`, insertion will not fail in case of a primary key conflict, but turn
into a replace operation.
- `GET /_api/replication/inventory` GET -
new parameters:
- global - if its a database wide replication
- batchId - rocksdb specific offset
When an insert turns into a replace, the previous version of the document can
be retrieved by passing the URL parameter `returnOld=true`
- `_api/view` - the new Views API, several new routes for manipulating views.
- APIs for view management have been added at endpoint `/_api/view`.
- `GET /_admin/test` was completely removed
- `GET /_admin/clusterCheckPort` was completely removed
- `GET /_admin/cluster-test` was completely removed
- The REST APIs for modifying graphs at endpoint `/_api/gharial` now support returning
the old revision of vertices / edges after modifying them. The APIs also supports
returning the just-inserted vertex / edge. This is in line with the already existing
single-document functionality provided at endpoint `/_api/document`.
The old/new revisions can be accessed by passing the URL parameters `returnOld` and
`returnNew` to the following endpoints:
* /_api/gharial/<graph>/vertex/<collection>
* /_api/gharial/<graph>/edge/<collection>
The exception from this is that the HTTP DELETE verb for these APIs does not
support `returnOld` because that would make the existing API incompatible.
The following, partly undocumented REST APIs have been removed in ArangoDB 3.4:
- `GET /_admin/test`
- `GET /_admin/clusterCheckPort`
- `GET /_admin/cluster-test`
- `GET /_admin/statistics/short`
- `GET /_admin/statistics/long`
Miscellaneous
-------------
AQL
---
AQL Functions
-------------
- CALL / APPLY
- may emmit `ERROR_QUERY_FUNCTION_NAME_UNKNOWN` or `ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH`
instead of `ERROR_QUERY_FUNCTION_NOT_FOUND` in some situations.
- are now able to be invoked recursive
- the AQL functions `CALL` and `APPLY` may now throw the errors 1540
(`ERROR_QUERY_FUNCTION_NAME_UNKNOWN`) and 1541 (`ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH`)
instead of error 1582 (`ERROR_QUERY_FUNCTION_NOT_FOUND`) in some situations.
- the `NEAR` AQL function now does not default to a limit of 100 documents
any more, but will return all documents if no limit is specified.
- the existing "fulltext-index-optimizer" optimizer rule has been removed
because its duty is now handled by the new "replace-function-with-index" rule.
- the behavior of the `fullCount` option for AQL query cursors has changed so that it
will only take into account `LIMIT` statements on the top level of the query.
`LIMIT` statements in subqueries will not have any effect on the `fullCount` results
any more.
- the `NEAR`, `WITHIN` and `FULLTEXT` AQL functions do not support accessing
collections dynamically anymore.
The name of the underlying collection and the name of the index attribute to be
used have to specified using either collection name identifiers, string literals
or bind parameters, but must not be specified using query variables.
For example, the following AQL queries are ok:
FOR doc IN NEAR(myCollection, 2.5, 3) RETURN doc
FOR doc IN NEAR(@@collection, 2.5, 3) RETURN doc
FOR doc IN FULLTEXT("myCollection", "body", "foxx") RETURN doc
FOR doc IN FULLTEXT(@@collection, @attribute, "foxx") RETURN doc
Contrary, the following queries will fail to execute with 3.4 because of dynamic
collection/attribute names used in them:
FOR name IN ["col1", "col2"] FOR doc IN NEAR(name, 2.5, 3) RETURN doc
FOR doc IN collection
FOR match IN FULLTEXT(PARSE_IDENTIFIER(doc).collection, PARSE_IDENTIFIER(doc).key, "foxx") RETURN doc
- the AQL warning 1577 (collection used in expression) will not occur anymore
It was used in previous versions of ArangoDB when the name of a collection was
used in an expression in an AQL query, e.g.
RETURN c1 + c2
Due to internal changes in AQL this is not detected anymore in 3.4, so this
particular warning will not be raised.
- the undocumented built-in visitor functions for AQL traversals have been removed,
as they were based on JavaScript implementations:
- `HASATTRIBUTESVISITOR`
- `PROJECTINGVISITOR`
- `IDVISITOR`
- `KEYVISITOR`
- `COUNTINGVISITOR`
Using any of these functions from inside AQL will now produce an error.
Usage of V8
-----------
The internal usage of the V8 JavaScript for non-user actions has been reduced
in ArangoDB. Several APIs have been rewritten to not depend on V8 and thus do
not require using a V8 context for execution.
Compared to ArangoDB 3.3, the following parts of ArangoDB can now be used
without requiring V8 contexts:
- all of AQL (with the exception of user-defined functions)
- the graph modification APIs at endpoint `/_api/gharial`
- background server statistics gathering
Reduced usage of V8 by ArangoDB may allow end users to lower the configured
numbers of V8 contexts to start. In terms of configuration options, these
are:
- `--javascript.v8-contexts`: the maximum number of V8 contexts to create
- `--javascript.v8-contexts-minimim`: the minimum number of V8 contexts to
create at server start and to keep around
The default values for these startup options have not been changed in ArangoDB
3.4, but depending on the actual workload, 3.4 ArangoDB instances may need
less V8 contexts than 3.3.
Startup option changes
----------------------
The arangod, the following startup options have changed:
- the hidden option `--server.maximal-threads` is now obsolete.
Setting the option will have no effect.
The number of server threads is now configured by the following startup options:
- `--server.threads`: determines the maximum number of request processing threads
the server will start
- `--server.minimal-threads`: determines the minimum number of request processing
threads the server will start
The actual number of request processing threads is adjusted dynamically at runtime
and will float between `--server.minimal-threads` and `--server.threads`. Thus the
value configured for `--server.threads` should not greatly exceed the server's number
of hardware threads.
- the option `--server.maximal-queue-size` has been renamed to `--server.queue-size`.
- the default value for the existing startup option `--javascript.gc-interval`
has been increased from every 1000 to every 2000 requests, and the default value
for the option `--javascript.gc-frequency` has been increased from 30 to 60 seconds.
This will make the V8 garbage collection run less often by default than in previous
versions, reducing CPU load a bit and leaving more V8 contexts available on average.
- the startup option `--cluster.my-local-info` has been removed in favor of persisted
server UUIDs.
The option `--cluster.my-local-info` was deprecated since ArangoDB 3.3.
- the startup option `--database.check-30-revisions` was removed. It was used for
checking the revision ids of documents for having been created with ArangoDB 3.0,
which required a dump & restore migration of the data to 3.1.
As direct upgrades from ArangoDB 3.0 to 3.4 or from 3.1 to 3.4 are not supported,
this option has been removed in 3.4.
- the option `--replication.automatic-failover` was renamed to `--replication.active-failover`
Using the old option name will still work in ArangoDB 3.4, but support for the old
option name will be removed in future versions of ArangoDB.
Permissions
-----------
The behavior of permissions for databases and collections changed:
The new fallback rule for databases for which no access level is explicitly
specified is now:
* Choose the higher access level of:
* A wildcard database grant
* A database grant on the `_system` database
The new fallback rule for collections for which no access level is explicitly
specified is now:
* Choose the higher access level of:
* Any wildcard access grant in the same database, or on "*/*"
* The access level for the current database
* The access level for the `_system` database
SSLv2
-----
Support for SSLv2 has been removed from arangod and all client tools.
Startup will now be aborted when using SSLv2 for a server endpoint, or when connecting
with one of the client tools via an SSLv2 connection.
SSLv2 has been disabled in the OpenSSL library by default in recent versions
because of security vulnerabilities inherent in this protocol.
As it is not safe at all to use this protocol, the support for it has also
been stopped in ArangoDB. End users that use SSLv2 for connecting to ArangoDB
should change the protocol from SSLv2 to TLSv12 if possible, by adjusting
the value of the `--ssl.protocol` startup option.
Mixed-engine clusters
---------------------
Starting a cluster with coordinators and DB servers using different storage
engines is not supported. Doing it anyway will now log an error and abort a
coordinator's startup.
Previous versions of ArangoDB did not detect the usage of different storage
engines in a cluster, but the runtime behavior of the cluster was undefined.
Client tools
------------
The client tool _arangoimp_ has been renamed to _arangoimport_ for consistency.
Release packages will still install arangoimp as a symlink to arangoimport,
so user scripts invoking arangoimp do not need to be changed to work with
ArangoDB 3.4.
Miscellaneous changes
---------------------
For the MMFiles engine, the compactor thread(s) were renamed from "Compactor"
to "MMFilesCompactor".
This change will be visible only on systems which allow assigning names to
threads.

View File

@ -17,3 +17,4 @@ For further information about _datacenter to datacenter replication_, please ref
- [Troubleshooting](../../Troubleshooting/DC2DC/README.md)
- [Monitoring](../../Monitoring/DC2DC/README.md)
- [Security](../../Security/DC2DC/README.md)

View File

@ -16,13 +16,13 @@ Components
### Replication Logger
#### Purpose
**Purpose**
The _replication logger_ will write all data-modification operations into the
_write-ahead log_. This log may then be read by clients to replay any data
modification on a different server.
#### Checking the state
**Checking the state**
To query the current state of the _logger_, use the *state* command:
@ -73,7 +73,7 @@ and maximum tick values per logfile:
### Replication Applier
#### Purpose
**Purpose**
The purpose of the _replication applier_ is to read data from a master database's
event log, and apply them locally. The _applier_ will check the master database

View File

@ -1,4 +1,4 @@
<!-- don't edit here, its from https://@github.com//arangodb-helper/arangodb.git / docs/Manual/ -->
<!-- don't edit here, its from https://@github.com/arangodb-helper/arangodb.git / docs/Manual/ -->
Securing Starter Deployments
============================

View File

@ -1,4 +1,4 @@
<!-- don't edit here, its from https://@github.com//arangodb-helper/arangodb.git / docs/Manual/ -->
<!-- don't edit here, its from https://@github.com/arangodb-helper/arangodb.git / docs/Manual/ -->
# Starting an ArangoDB cluster or database the easy way
Starting an ArangoDB cluster is complex. It involves starting various servers with

View File

@ -1,3 +1,4 @@
<!-- don't edit here, its from https://@github.com/arangodb-helper/arangodb.git / docs/Manual/ -->
Upgrading _Starter_ Deployments
===============================

View File

@ -186,14 +186,6 @@ During view modification the following directives apply:
impact performance and waste disk space for each commit call without any
added benefits
* commitTimeoutMsec: (optional; default: `5000`; to disable use: `0`)
try to commit as much as possible before *count* milliseconds
for the case where there are a lot of inserts/updates, a lower value will
cause a delay in the view accounting for them, due skipping of some commits
for the case where there are a lot of inserts/updates, a higher value will
cause higher memory consumption between commits due to accumulation of
document modifications while a commit is in progress
* consolidate: (optional; default: `none`)
a per-policy mapping of thresholds in the range `[0.0, 1.0]` to determine data
store segment merge candidates, if specified then only the listed policies
@ -235,18 +227,6 @@ During view modification the following directives apply:
* locale: (optional; default: `C`)
the default locale used for ordering processed attribute names
### View properties (unmodifiable)
* collections:
an internally tracked list of collection identifiers which were explicitly
added to the current view by the user via view 'link' property modification
the list may have no-longer valid identifiers if the user did not explicitly
drop the link for the said collection identifier from the current view
invalid collection identifiers are removed during view property modification
among other things used for acquiring collection locks in transactions (i.e.
during a view query no documents will be returned for collections not in this
list) and generating view properties 'links' list
### Link properties
* analyzers: (optional; default: `[ 'identity' ]`)

View File

@ -281,6 +281,7 @@ function book-check-markdown-leftovers()
function check-dangling-anchors()
{
rm -rf /tmp/tags/
echo "${STD_COLOR}##### checking for dangling anchors${RESET}"
find books/ -name '*.html' | while IFS= read -r htmlf; do
fn=$(basename "${htmlf}")
@ -289,6 +290,30 @@ function check-dangling-anchors()
grep '<h. ' < "${htmlf}" | \
sed -e 's;.*id=";;' -e 's;".*;;' > "/tmp/tags/${dir}/${fn}"
done
fail=0
rm -f /tmp/failduplicatetags.txt
find /tmp/tags -type f | while IFS= read -r htmlf; do
sort "${htmlf}" |grep -v ^$ > /tmp/sorted.txt
sort -u "${htmlf}" |grep -v ^$ > /tmp/sortedunique.txt
if test "$(comm -3 /tmp/sorted.txt /tmp/sortedunique.txt|wc -l)" -ne 0; then
echo "${ERR_COLOR}"
echo "in ${htmlf}: "
comm -3 /tmp/sorted.txt /tmp/sortedunique.txt
echo "${RESET}"
touch /tmp/failduplicatetags.txt
fi
done
rm -f /tmp/sorted.txt /tmp/sortedunique.txt
if test -f /tmp/failduplicatetags.txt; then
echo "${ERR_COLOR}"
echo "duplicate anchors detected - see above"
echo "${RESET}"
rm -f /tmp/failduplicatetags.txt
exit 1
fi
rm -f /tmp/anchorlist.txt
echo "${STD_COLOR}##### fetching anchors from generated http files${RESET}"
@ -361,17 +386,21 @@ function check-dangling-anchors()
function book-check-images-referenced()
{
NAME="$1"
set +e
find "${NAME}" -name \*.png | while IFS= read -r image; do
baseimage=$(basename "$image")
if ! grep -Rq "${baseimage}" "${NAME}"; then
echo "${ERR_COLOR}"
echo "$image is not used!"
echo "${RESET}"
exit "1"
fi
done
set -e
echo "${STD_COLOR}##### checking for unused image files ${NAME}${RESET}"
ERRORS=$(find "${NAME}" -name '*.png' | while IFS= read -r image; do
baseimage=$(basename "$image")
if ! grep -Rq "${baseimage}" "${NAME}"; then
printf "\n${image}"
fi
done
)
if test "$(printf "${ERRORS}" | wc -l)" -gt 0; then
echo "${ERR_COLOR}";
echo "the following images are not referenced by any page: "
echo "${ERRORS}"
echo "${RESET}";
exit 1;
fi
}
function build-book-symlinks()

View File

@ -11,7 +11,7 @@ The name or id of the collection to dump.
@RESTQUERYPARAM{chunkSize,number,optional} Approximate maximum size of the returned result.
@RESTQUERYPARAM{batchId,integer,required,int64}
@RESTQUERYPARAM{batchId,number,required}
rocksdb only - The id of the snapshot to use
@RESTQUERYPARAM{from,number,optional}

View File

@ -12,7 +12,7 @@ Include system collections in the result. The default value is *true*.
@RESTQUERYPARAM{global,boolean,optional}
Include alll databases in the response. Only works on `_system` The default value is *false*.
@RESTQUERYPARAM{batchId,integer,required,int64}
@RESTQUERYPARAM{batchId,number,required}
The RocksDB engine requires a valid batchId for this API call
@RESTDESCRIPTION

View File

@ -5,8 +5,7 @@
The interval (in milliseconds) that ArangoDB will use to automatically
synchronize data in its write-ahead logs to disk. Automatic syncs will
only
be performed for not-yet synchronized data, and only for operations that
have been executed without the *waitForSync* attribute.
only be performed for not-yet synchronized data, and only for operations
that have been executed without the *waitForSync* attribute.
@endDocuBlock

View File

@ -17,3 +17,4 @@ EXTERNAL_DOC_kube-arangodb=master
EXTERNAL_DOC_foxx-cli=master
EXTERNAL_DOC_arangodb-php=devel
EXTERNAL_DOC_go-driver=master
EXTERNAL_DOC_arangodb-spark-connector=master

View File

@ -38,7 +38,7 @@ ActionFeature::ActionFeature(application_features::ApplicationServer* server)
: ApplicationFeature(server, "Action"),
_allowUseDatabase(false) {
setOptional(true);
startsAfter("Logger");
startsAfter("ClusterPhase");
}
void ActionFeature::collectOptions(std::shared_ptr<ProgramOptions> options) {

View File

@ -51,19 +51,13 @@ AgencyFeature::AgencyFeature(application_features::ApplicationServer* server)
_supervisionTouched(false),
_waitForSync(true),
_supervisionFrequency(1.0),
_compactionStepSize(20000),
_compactionKeepSize(10000),
_compactionStepSize(1000),
_compactionKeepSize(50000),
_maxAppendSize(250),
_supervisionGracePeriod(10.0),
_cmdLineTimings(false) {
setOptional(true);
startsAfter("Cluster");
startsAfter("Database");
startsAfter("Endpoint");
startsAfter("QueryRegistry");
startsAfter("Random");
startsAfter("Scheduler");
startsAfter("Server");
startsAfter("FoxxPhase");
}
AgencyFeature::~AgencyFeature() {}
@ -201,7 +195,7 @@ void AgencyFeature::validateOptions(std::shared_ptr<ProgramOptions> options) {
if (_compactionKeepSize == 0) {
LOG_TOPIC(WARN, Logger::AGENCY)
<< "agency.compaction-keep-size must not be 0, set to 1000";
_compactionKeepSize = 1000;
_compactionKeepSize = 50000;
}
if (!_agencyMyAddress.empty()) {
@ -231,16 +225,26 @@ void AgencyFeature::validateOptions(std::shared_ptr<ProgramOptions> options) {
if (result.touched("agency.supervision")) {
_supervisionTouched = true;
}
// turn off the following features, as they are not needed in an agency:
// - MMFilesPersistentIndex: not needed by agency even if MMFiles is
// the selected storage engine
// - ArangoSearch: not needed by agency even if MMFiles is the selected
// storage engine
// - Statistics: turn off statistics gathering for agency
// - Action/Script/FoxxQueues/Frontend: Foxx and JavaScript APIs
application_features::ApplicationServer::disableFeatures(
{"MMFilesPersistentIndex", "ArangoSearch", "Statistics", "V8Platform", "V8Dealer", "Action", "Script", "FoxxQueues", "Frontend"}
);
}
void AgencyFeature::prepare() {
}
void AgencyFeature::start() {
if (!isEnabled()) {
return;
}
// Available after validateOptions of ClusterFeature
// Find the agency prefix:
auto feature = ApplicationServer::getFeature<ClusterFeature>("Cluster");
if (!feature->agencyPrefix().empty()) {
@ -248,13 +252,13 @@ void AgencyFeature::start() {
std::string("/") + feature->agencyPrefix());
arangodb::consensus::Job::agencyPrefix = feature->agencyPrefix();;
}
// TODO: Port this to new options handling
std::string endpoint;
if (_agencyMyAddress.empty()) {
std::string port = "8529";
// Available after prepare of EndpointFeature
EndpointFeature* endpointFeature =
ApplicationServer::getFeature<EndpointFeature>("Endpoint");
auto endpoints = endpointFeature->httpEndpoints();
@ -286,9 +290,15 @@ void AgencyFeature::start() {
_waitForSync, _supervisionFrequency, _compactionStepSize,
_compactionKeepSize, _supervisionGracePeriod, _cmdLineTimings,
_maxAppendSize)));
AGENT = _agent.get();
}
void AgencyFeature::start() {
if (!isEnabled()) {
return;
}
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Starting agency personality";
_agent->start();

View File

@ -381,12 +381,14 @@ priv_rpc_ret_t Agent::recvAppendEntriesRPC(
// Else we want to indicate to the leader that we are behind and need data:
// a single false will go back and trigger _confirmed[thisfollower] = 0
if (nqs == 0) {
if (_state.lastIndex() > 0) {
auto lastIndex = _state.lastIndex();
if (lastIndex > 0) {
LOG_TOPIC(DEBUG, Logger::AGENCY)
<< "Finished empty AppendEntriesRPC from " << leaderId << " with term " << term;
{
WRITE_LOCKER(oLocker, _outputLock);
_commitIndex = leaderCommitIndex;
_commitIndex = std::max(
_commitIndex, std::min(leaderCommitIndex, lastIndex));
if (_commitIndex >= _state.nextCompactionAfter()) {
_compactor.wakeUp();
}
@ -489,16 +491,13 @@ void Agent::sendAppendEntriesRPC() {
commitIndex = _commitIndex;
}
// If lastConfirmed is smaller than our first log entry's index, and
// given that our first log entry is either the 0-entry or a compacted
// state and that compactions are only performed up to a RAFT-wide
// committed index, and by that up to absolut truth we can correct
// lastConfirmed to one minus our first log index.
if (lastConfirmed < _state.firstIndex()) {
lastConfirmed = _state.firstIndex() - 1;
// Note that this can only ever happen if _state.firstIndex() is
// greater than 0, so there is no underflow.
}
// If the follower is behind our first log entry send last snapshot and
// following logs. Else try to have the follower catch up in regular order.
bool needSnapshot = lastConfirmed < _state.firstIndex();
if (needSnapshot) {
lastConfirmed = _state.lastCompactionAt() - 1;
}
LOG_TOPIC(TRACE, Logger::AGENCY)
<< "Getting unconfirmed from " << lastConfirmed << " to " << lastConfirmed+99;
// If lastConfirmed is one minus the first log entry, then this is
@ -539,16 +538,14 @@ void Agent::sendAppendEntriesRPC() {
}
index_t lowest = unconfirmed.front().index;
bool needSnapshot = false;
Store snapshot(this, "snapshot");
index_t snapshotIndex;
term_t snapshotTerm;
if (lowest > lastConfirmed) {
if (lowest > lastConfirmed || needSnapshot) {
// Ooops, compaction has thrown away so many log entries that
// we cannot actually update the follower. We need to send our
// latest snapshot instead:
needSnapshot = true;
bool success = false;
try {
success = _state.loadLastCompactedSnapshot(snapshot,
@ -898,16 +895,20 @@ bool Agent::challengeLeadership() {
/// Get last acknowledged responses on leader
query_t Agent::lastAckedAgo() const {
void Agent::lastAckedAgo(Builder& ret) const {
std::unordered_map<std::string, index_t> confirmed;
std::unordered_map<std::string, SteadyTimePoint> lastAcked;
std::unordered_map<std::string, SteadyTimePoint> lastSent;
index_t lastCompactionAt, nextCompactionAfter;
{
MUTEX_LOCKER(tiLocker, _tiLock);
lastAcked = _lastAcked;
confirmed = _confirmed;
lastSent = _lastSent;
lastCompactionAt = _state.lastCompactionAt();
nextCompactionAfter = _state.nextCompactionAfter();
}
std::function<double(std::pair<std::string,SteadyTimePoint> const&)> dur2str =
@ -917,22 +918,22 @@ query_t Agent::lastAckedAgo() const {
std::floor(duration<double>(steady_clock::now()-i.second).count()*1.0e3);
};
auto ret = std::make_shared<Builder>();
{ VPackObjectBuilder e(ret.get());
if (leading()) {
for (auto const& i : lastAcked) {
auto lsit = lastSent.find(i.first);
ret->add(VPackValue(i.first));
{ VPackObjectBuilder o(ret.get());
ret->add("lastAckedTime", VPackValue(dur2str(i)));
ret->add("lastAckedIndex", VPackValue(confirmed.at(i.first)));
if (i.first != id()) {
ret->add("lastAppend", VPackValue(dur2str(*lsit)));
}}
}
}}
return ret;
ret.add("lastCompactionAt", VPackValue(lastCompactionAt));
ret.add("nextCompactionAfter", VPackValue(nextCompactionAfter));
if (leading()) {
ret.add(VPackValue("lastAcked"));
VPackObjectBuilder b(&ret);
for (auto const& i : lastAcked) {
auto lsit = lastSent.find(i.first);
ret.add(VPackValue(i.first));
{ VPackObjectBuilder o(&ret);
ret.add("lastAckedTime", VPackValue(dur2str(i)));
ret.add("lastAckedIndex", VPackValue(confirmed.at(i.first)));
if (i.first != id()) {
ret.add("lastAppend", VPackValue(dur2str(*lsit)));
}}
}
}
}
@ -1578,7 +1579,6 @@ void Agent::rebuildDBs() {
_commitIndex = lastCompactionIndex;
_waitForCV.broadcast();
// Apply logs from last applied index to leader's commit index
LOG_TOPIC(DEBUG, Logger::AGENCY)
<< "Rebuilding key-value stores from index "
@ -1610,16 +1610,13 @@ void Agent::compact() {
commitIndex = _commitIndex;
}
if (commitIndex > _config.compactionKeepSize()) {
// If the keep size is too large, we do not yet compact
// TODO: check if there is at problem that we call State::compact()
// now with a commit index that may have been slightly modified by other
// threads
// TODO: the question is if we have to lock out others while we
// call compact or while we grab _commitIndex and then call compact
if (!_state.compact(commitIndex - _config.compactionKeepSize())) {
if (commitIndex >= _state.nextCompactionAfter()) {
// This check needs to be here, because the compactor thread wakes us
// up every 5 seconds.
// Note that it is OK to compact anywhere before or at _commitIndex.
if (!_state.compact(commitIndex, _config.compactionKeepSize())) {
LOG_TOPIC(WARN, Logger::AGENCY) << "Compaction for index "
<< commitIndex - _config.compactionKeepSize()
<< commitIndex << " with keep size " << _config.compactionKeepSize()
<< " did not work.";
}
}

View File

@ -240,7 +240,7 @@ class Agent final : public arangodb::Thread,
query_t allLogs() const;
/// @brief Last contact with followers
query_t lastAckedAgo() const;
void lastAckedAgo(Builder&) const;
/// @brief Am I active agent
bool active() const;

View File

@ -39,8 +39,8 @@ config_t::config_t()
_supervisionTouched(false),
_waitForSync(true),
_supervisionFrequency(5.0),
_compactionStepSize(2000),
_compactionKeepSize(500),
_compactionStepSize(1000),
_compactionKeepSize(50000),
_supervisionGracePeriod(15.0),
_cmdLineTimings(false),
_version(0),
@ -620,7 +620,7 @@ bool config_t::merge(VPackSlice const& conf) {
_compactionStepSize = conf.get(compactionStepSizeStr).getUInt();
ss << _compactionStepSize << " (persisted)";
} else {
_compactionStepSize = 2000;
_compactionStepSize = 1000;
ss << _compactionStepSize << " (default)";
}
} else {
@ -636,7 +636,7 @@ bool config_t::merge(VPackSlice const& conf) {
_compactionKeepSize = conf.get(compactionKeepSizeStr).getUInt();
ss << _compactionKeepSize << " (persisted)";
} else {
_compactionStepSize = 500;
_compactionKeepSize = 50000;
ss << _compactionKeepSize << " (default)";
}
} else {

View File

@ -53,7 +53,7 @@ void Compactor::run() {
{
CONDITION_LOCKER(guard, _cv);
if (!_wakeupCompactor) {
_cv.wait();
_cv.wait(5000000); // just in case we miss a wakeup call!
}
_wakeupCompactor = false;
}
@ -63,10 +63,9 @@ void Compactor::run() {
}
try {
_agent->compact();
}
catch (std::exception const& e) {
LOG_TOPIC(ERR, Logger::AGENCY) << "Expection during compaction, details: "
_agent->compact(); // Note that this checks nextCompactionAfter again!
} catch (std::exception const& e) {
LOG_TOPIC(ERR, Logger::AGENCY) << "Exception during compaction, details: "
<< e.what();
}
}

View File

@ -441,20 +441,20 @@ bool Job::abortable(Node const& snapshot, std::string const& jobId) {
}
void Job::doForAllShards(Node const& snapshot,
std::string& database,
std::vector<shard_t>& shards,
std::string& database,
std::vector<shard_t>& shards,
std::function<void(Slice plan, Slice current, std::string& planPath)> worker) {
for (auto const& collShard : shards) {
std::string shard = collShard.shard;
std::string collection = collShard.collection;
for (auto const& collShard : shards) {
std::string shard = collShard.shard;
std::string collection = collShard.collection;
std::string planPath =
planColPrefix + database + "/" + collection + "/shards/" + shard;
std::string curPath = curColPrefix + database + "/" + collection
+ "/" + shard + "/servers";
+ "/" + shard + "/servers";
Slice plan = snapshot.hasAsSlice(planPath).first;
Slice current = snapshot.hasAsSlice(curPath).first;
Slice plan = snapshot.hasAsSlice(planPath).first;
Slice current = snapshot.hasAsSlice(curPath).first;
worker(plan, current, planPath);
}
@ -509,10 +509,10 @@ void Job::addPreconditionCollectionStillThere(Builder& pre,
}
void Job::addPreconditionServerNotBlocked(Builder& pre, std::string const& server) {
pre.add(VPackValue(blockedServersPrefix + server));
{ VPackObjectBuilder serverLockEmpty(&pre);
pre.add("oldEmpty", VPackValue(true));
}
pre.add(VPackValue(blockedServersPrefix + server));
{ VPackObjectBuilder serverLockEmpty(&pre);
pre.add("oldEmpty", VPackValue(true));
}
}
void Job::addPreconditionServerHealth(Builder& pre, std::string const& server,
@ -524,10 +524,10 @@ void Job::addPreconditionServerHealth(Builder& pre, std::string const& server,
}
void Job::addPreconditionShardNotBlocked(Builder& pre, std::string const& shard) {
pre.add(VPackValue(blockedShardsPrefix + shard));
{ VPackObjectBuilder shardLockEmpty(&pre);
pre.add("oldEmpty", VPackValue(true));
}
pre.add(VPackValue(blockedShardsPrefix + shard));
{ VPackObjectBuilder shardLockEmpty(&pre);
pre.add("oldEmpty", VPackValue(true));
}
}
void Job::addPreconditionUnchanged(Builder& pre,

View File

@ -532,7 +532,7 @@ RestStatus RestAgencyHandler::handleConfig() {
body.add("term", Value(_agent->term()));
body.add("leaderId", Value(_agent->leaderID()));
body.add("commitIndex", Value(last));
body.add("lastAcked", _agent->lastAckedAgo()->slice());
_agent->lastAckedAgo(body);
body.add("configuration", _agent->config().toBuilder()->slice());
}

View File

@ -65,6 +65,7 @@ State::State()
_collectionsChecked(false),
_collectionsLoaded(false),
_nextCompactionAfter(0),
_lastCompactionAt(0),
_queryRegistry(nullptr),
_cur(0) {}
@ -806,6 +807,7 @@ bool State::loadCompacted() {
_cur = basics::StringUtils::uint64(ii.get("_key").copyString());
_log.clear(); // will be filled in loadRemaining
// Schedule next compaction:
_lastCompactionAt = _cur;
_nextCompactionAfter = _cur + _agent->config().compactionStepSize();
} catch (std::exception const& e) {
LOG_TOPIC(ERR, Logger::AGENCY) << e.what() << " " << __FILE__
@ -1035,16 +1037,23 @@ bool State::find(index_t prevIndex, term_t prevTerm) {
return _log.at(prevIndex).term == prevTerm;
}
index_t State::lastCompactionAt() const {
return _lastCompactionAt;
}
/// Log compaction
bool State::compact(index_t cind) {
// We need to compute the state at index cind and
bool State::compact(index_t cind, index_t keep) {
// We need to compute the state at index cind and use:
// cind <= _commitIndex
// and usually it is < because compactionKeepSize > 0. We start at the
// latest compaction state and advance from there:
// We start at the latest compaction state and advance from there:
// We keep at least `keep` log entries before the compacted state,
// for forensic analysis and such that the log is never empty.
{
MUTEX_LOCKER(_logLocker, _logLock);
if (cind <= _cur) {
LOG_TOPIC(INFO, Logger::AGENCY)
LOG_TOPIC(DEBUG, Logger::AGENCY)
<< "Not compacting log at index " << cind
<< ", because we already have a later snapshot at index " << _cur;
return true;
@ -1053,7 +1062,9 @@ bool State::compact(index_t cind) {
// Move next compaction index forward to avoid a compaction wakeup
// whilst we are working:
_nextCompactionAfter += _agent->config().compactionStepSize();
_nextCompactionAfter
= (std::max)(_nextCompactionAfter.load(),
cind + _agent->config().compactionStepSize());
Store snapshot(_agent, "snapshot");
index_t index;
@ -1084,8 +1095,8 @@ bool State::compact(index_t cind) {
// Now clean up old stuff which is included in the latest compaction snapshot:
try {
compactVolatile(cind);
compactPersisted(cind);
compactVolatile(cind, keep);
compactPersisted(cind, keep);
removeObsolete(cind);
} catch (std::exception const& e) {
if (!_agent->isStopping()) {
@ -1100,31 +1111,46 @@ bool State::compact(index_t cind) {
}
/// Compact volatile state
bool State::compactVolatile(index_t cind) {
// Note that we intentionally keep the index cind although it is, strictly
// speaking, no longer necessary. This is to make sure that _log does not
// become empty! DO NOT CHANGE! This is used elsewhere in the code!
bool State::compactVolatile(index_t cind, index_t keep) {
// Note that we intentionally keep some log entries before cind
// although it is, strictly speaking, no longer necessary. This is to
// make sure that _log does not become empty! DO NOT CHANGE! This is
// used elsewhere in the code! Furthermore, it allows for forensic
// analysis in case of bad things having happened.
if (keep >= cind) { // simply keep everything
return true;
}
TRI_ASSERT(keep < cind);
index_t cut = cind - keep;
MUTEX_LOCKER(mutexLocker, _logLock);
if (!_log.empty() && cind > _cur && cind - _cur < _log.size()) {
_log.erase(_log.begin(), _log.begin() + (cind - _cur));
TRI_ASSERT(_log.begin()->index == cind);
if (!_log.empty() && cut > _cur && cut - _cur < _log.size()) {
_log.erase(_log.begin(), _log.begin() + (cut - _cur));
TRI_ASSERT(_log.begin()->index == cut);
_cur = _log.begin()->index;
}
return true;
}
/// Compact persisted state
bool State::compactPersisted(index_t cind) {
// Note that we intentionally keep the index cind although it is, strictly
// speaking, no longer necessary. This is to make sure that _log does not
// become empty! DO NOT CHANGE! This is used elsewhere in the code!
bool State::compactPersisted(index_t cind, index_t keep) {
// Note that we intentionally keep some log entries before cind
// although it is, strictly speaking, no longer necessary. This is to
// make sure that _log does not become empty! DO NOT CHANGE! This is
// used elsewhere in the code! Furthermore, it allows for forensic
// analysis in case of bad things having happened.
if (keep >= cind) { // simply keep everything
return true;
}
TRI_ASSERT(keep < cind);
index_t cut = cind - keep;
auto bindVars = std::make_shared<VPackBuilder>();
bindVars->openObject();
bindVars->close();
std::stringstream i_str;
i_str << std::setw(20) << std::setfill('0') << cind;
i_str << std::setw(20) << std::setfill('0') << cut;
std::string const aql(std::string("FOR l IN log FILTER l._key < \"") +
i_str.str() + "\" REMOVE l IN log");
@ -1150,14 +1176,14 @@ bool State::compactPersisted(index_t cind) {
/// Remove outdated compaction snapshots
bool State::removeObsolete(index_t cind) {
if (cind > 3 * _agent->config().compactionStepSize()) {
if (cind > 3 * _agent->config().compactionKeepSize()) {
auto bindVars = std::make_shared<VPackBuilder>();
bindVars->openObject();
bindVars->close();
std::stringstream i_str;
i_str << std::setw(20) << std::setfill('0')
<< -3 * _agent->config().compactionStepSize() + cind;
<< -3 * _agent->config().compactionKeepSize() + cind;
std::string const aql(std::string("FOR c IN compact FILTER c._key < \"") +
i_str.str() + "\" REMOVE c IN compact");
@ -1212,6 +1238,10 @@ bool State::persistCompactionSnapshot(index_t cind,
res = trx.finish(result.result);
if (res.ok()) {
_lastCompactionAt = cind;
}
return res.ok();
}

View File

@ -143,7 +143,8 @@ class State {
}
/// @brief compact state machine
bool compact(arangodb::consensus::index_t cind);
bool compact(arangodb::consensus::index_t cind,
arangodb::consensus::index_t keep);
private:
/// @brief Remove RAFT conflicts. i.e. All indices, where higher term version
@ -167,6 +168,9 @@ class State {
/// `index` to 0 if there is no compacted snapshot.
bool loadLastCompactedSnapshot(Store& store, index_t& index, term_t& term);
/// @brief lastCompactedAt
index_t lastCompactionAt() const;
/// @brief nextCompactionAfter
index_t nextCompactionAfter() const {
return _nextCompactionAfter;
@ -227,10 +231,12 @@ class State {
bool createCollection(std::string const& name);
/// @brief Compact persisted logs
bool compactPersisted(arangodb::consensus::index_t cind);
bool compactPersisted(arangodb::consensus::index_t cind,
arangodb::consensus::index_t keep);
/// @brief Compact RAM logs
bool compactVolatile(arangodb::consensus::index_t cind);
bool compactVolatile(arangodb::consensus::index_t cind,
arangodb::consensus::index_t keep);
/// @brief Remove obsolete logs
bool removeObsolete(arangodb::consensus::index_t cind);
@ -253,8 +259,9 @@ class State {
bool _collectionsLoaded;
std::multimap<std::string,arangodb::consensus::index_t> _clientIdLookupTable;
/// @brief Next compaction after
/// @brief compaction indexes
std::atomic<index_t> _nextCompactionAfter;
std::atomic<index_t> _lastCompactionAt;
/// @brief Our query registry
aql::QueryRegistry* _queryRegistry;

View File

@ -36,7 +36,8 @@ AqlFunctionFeature::AqlFunctionFeature(
application_features::ApplicationServer* server)
: application_features::ApplicationFeature(server, "AQLFunctions") {
setOptional(false);
startsAfter("EngineSelector");
startsAfter("V8Phase");
startsAfter("Aql");
}

View File

@ -198,7 +198,8 @@ class MinElementSorting final : public SortingStrategy, public OurLessThan {
arangodb::transaction::Methods* trx,
std::vector<std::deque<AqlItemBlock*>>& gatherBlockBuffer,
std::vector<SortRegister>& sortRegisters) noexcept
: OurLessThan(trx, gatherBlockBuffer, sortRegisters) {
: OurLessThan(trx, gatherBlockBuffer, sortRegisters),
_blockPos(nullptr) {
}
virtual ValueType nextValue() override {

View File

@ -581,11 +581,12 @@ void Condition::normalize() {
#endif
}
void Condition::CollectOverlappingMembers(ExecutionPlan const* plan,
void Condition::collectOverlappingMembers(ExecutionPlan const* plan,
Variable const* variable,
AstNode* andNode,
AstNode* otherAndNode,
std::unordered_set<size_t>& toRemove,
bool isSparse,
bool isFromTraverser) {
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>>
result;
@ -595,13 +596,33 @@ void Condition::CollectOverlappingMembers(ExecutionPlan const* plan,
for (size_t i = 0; i < n; ++i) {
auto operand = andNode->getMemberUnchecked(i);
bool allowOps = operand->isComparisonOperator();
if (isSparse && allowOps && !isFromTraverser &&
(operand->type == NODE_TYPE_OPERATOR_BINARY_NE || operand->type == NODE_TYPE_OPERATOR_BINARY_GT)) {
// look for != null and > null
// these can be removed if we are working with a sparse index!
auto lhs = operand->getMember(0);
auto rhs = operand->getMember(1);
clearAttributeAccess(result);
if (lhs->isAttributeAccessForVariable(result, isFromTraverser) &&
result.first == variable) {
if (rhs->isNullValue()) {
toRemove.emplace(i);
// removed, no need to go on below...
continue;
}
}
}
if (isFromTraverser) {
allowOps = allowOps || operand->isArrayComparisonOperator();
} else {
allowOps = allowOps && operand->type != NODE_TYPE_OPERATOR_BINARY_NE &&
operand->type != NODE_TYPE_OPERATOR_BINARY_NIN;
}
if (allowOps) {
auto lhs = operand->getMember(0);
auto rhs = operand->getMember(1);
@ -642,7 +663,8 @@ void Condition::CollectOverlappingMembers(ExecutionPlan const* plan,
/// @brief removes condition parts from another
AstNode* Condition::removeIndexCondition(ExecutionPlan const* plan,
Variable const* variable,
AstNode* other) {
AstNode const* other,
bool isSparse) {
if (_root == nullptr || other == nullptr) {
return _root;
}
@ -664,8 +686,7 @@ AstNode* Condition::removeIndexCondition(ExecutionPlan const* plan,
size_t const n = andNode->numMembers();
std::unordered_set<size_t> toRemove;
CollectOverlappingMembers(plan, variable, andNode, otherAndNode, toRemove,
false);
collectOverlappingMembers(plan, variable, andNode, otherAndNode, toRemove, isSparse, false);
if (toRemove.empty()) {
return _root;
@ -715,8 +736,7 @@ AstNode* Condition::removeTraversalCondition(ExecutionPlan const* plan,
size_t const n = andNode->numMembers();
std::unordered_set<size_t> toRemove;
CollectOverlappingMembers(plan, variable, andNode, otherAndNode, toRemove,
true);
collectOverlappingMembers(plan, variable, andNode, otherAndNode, toRemove, false, true);
if (toRemove.empty()) {
return _root;

View File

@ -183,9 +183,9 @@ class Condition {
~Condition();
public:
static void CollectOverlappingMembers(
static void collectOverlappingMembers(
ExecutionPlan const* plan, Variable const* variable, AstNode* andNode,
AstNode* otherAndNode, std::unordered_set<size_t>& toRemove,
AstNode* otherAndNode, std::unordered_set<size_t>& toRemove, bool isSparse,
bool isFromTraverser);
/// @brief return the condition root
@ -228,7 +228,7 @@ class Condition {
void normalize();
/// @brief removes condition parts from another
AstNode* removeIndexCondition(ExecutionPlan const*, Variable const*, AstNode*);
AstNode* removeIndexCondition(ExecutionPlan const*, Variable const*, AstNode const*, bool isSparse);
/// @brief removes condition parts from another
AstNode* removeTraversalCondition(ExecutionPlan const*, Variable const*, AstNode*);

View File

@ -106,7 +106,7 @@ class EngineInfoContainerDBServer {
) const;
/// @returns type of the "main node" if applicable,
/// 'ExecutionNode::MAX_NODE_TYPE_VALUE' otherwise
/// 'ExecutionNode::MAX_NODE_TYPE_VALUE' otherwise
ExecutionNode::NodeType type() const noexcept {
return _type;
}

View File

@ -702,7 +702,6 @@ std::string ExecutionBlock::typeToString(ExecutionBlock::Type type) {
// to please compiler in non-maintainer mode
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,
std::string("when converting ExecutionBlock::Type to string: got invalid type"));
return "";
}
return got->second;
}

View File

@ -586,7 +586,7 @@ ExecutionEngine* ExecutionEngine::instantiateFromPlan(
// in short: this avoids copying the return values
engine->resultRegister(
dynamic_cast<ReturnBlock*>(root)->returnInheritedResults());
static_cast<ReturnBlock*>(root)->returnInheritedResults());
}
engine->_root = root;

View File

@ -1180,7 +1180,7 @@ void ExecutionNode::RegisterPlan::after(ExecutionNode* en) {
#ifdef USE_IRESEARCH
case ExecutionNode::ENUMERATE_IRESEARCH_VIEW: {
auto ep = static_cast<iresearch::IResearchViewNode const*>(en);
auto ep = ExecutionNode::castTo<iresearch::IResearchViewNode const*>(en);
TRI_ASSERT(ep);
ep->planNodeRegisters(nrRegsHere, nrRegs, varInfo, totalNrRegs, ++depth);

View File

@ -595,7 +595,7 @@ SubqueryNode* ExecutionPlan::getSubqueryFromExpression(
/// @brief get the output variable from a node
Variable const* ExecutionPlan::getOutVariable(ExecutionNode const* node) const {
if (node->getType() == ExecutionNode::CALCULATION) {
// CalculationNode has an outVariale() method
// CalculationNode has an outVariable() method
return ExecutionNode::castTo<CalculationNode const*>(node)->outVariable();
}
@ -614,9 +614,13 @@ Variable const* ExecutionPlan::getOutVariable(ExecutionNode const* node) const {
TRI_ASSERT(v != nullptr);
return v;
}
if (node->getType() == ExecutionNode::SUBQUERY) {
return ExecutionNode::castTo<SubqueryNode const*>(node)->outVariable();
}
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,
"invalid node type in getOutVariable");
std::string("invalid node type '") + node->getTypeString() + "' in getOutVariable");
}
/// @brief creates an anonymous COLLECT node (for a DISTINCT)

View File

@ -26,20 +26,35 @@
namespace arangodb {
namespace aql {
enum class ExecutionState {
enum class ExecutionState {
// done with this block, definitely no more results
DONE,
DONE,
// (potentially) more results available. this may "lie" and
// report that there are more results when in fact there are
// none (background: to accurately determine that there are
// more results we may need to execute expensive operations
// on the preceeding blocks, which we want to avoid)
HASMORE,
// on the preceeding blocks, which we want to avoid)
HASMORE,
// unclear if more results available or not. caller is asked
// to try again
WAITING
WAITING
};
inline std::ostream& operator<<(std::ostream& ostream, ExecutionState state) {
switch (state) {
case ExecutionState::DONE:
ostream << "DONE";
break;
case ExecutionState::HASMORE:
ostream << "HASMORE";
break;
case ExecutionState::WAITING:
ostream << "WAITING";
break;
}
return ostream;
}
} // aql
} // arangodb
#endif

View File

@ -281,6 +281,9 @@ std::vector<std::pair<std::string, format_func_t>> const sortedDateMap = {
wrk.append(std::to_string(abs(yearnum)));
return;
}
TRI_ASSERT(yearnum >= 0);
if (yearnum > 99999) {
// intentionally nothing
} else if (yearnum > 9999) {
@ -291,10 +294,8 @@ std::vector<std::pair<std::string, format_func_t>> const sortedDateMap = {
wrk.append("+000");
} else if (yearnum > 9) {
wrk.append("+0000");
} else if (yearnum >= 0) {
wrk.append("+00000");
} else {
wrk.append("+");
wrk.append("+00000");
}
wrk.append(std::to_string(yearnum));
}},
@ -316,12 +317,9 @@ std::vector<std::pair<std::string, format_func_t>> const sortedDateMap = {
wrk.append("-");
}
wrk.append(std::to_string(abs(yearnum)));
}
else {
if (yearnum < 0) {
wrk.append("0000");
wrk.append(std::to_string(yearnum));
} else if (yearnum < 9) {
} else {
TRI_ASSERT(yearnum >= 0);
if (yearnum < 9) {
wrk.append("000");
wrk.append(std::to_string(yearnum));
} else if (yearnum < 99) {

View File

@ -2452,7 +2452,7 @@ void arangodb::aql::removeFiltersCoveredByIndexRule(
if (indexesUsed.size() == 1) {
// single index. this is something that we can handle
auto newNode = condition.removeIndexCondition(
plan.get(), indexNode->outVariable(), indexCondition->root());
plan.get(), indexNode->outVariable(), indexCondition->root(), indexesUsed[0].getIndex()->sparse());
if (newNode == nullptr) {
// no condition left...
@ -3658,7 +3658,7 @@ void arangodb::aql::collectInClusterRule(Optimizer* opt,
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
continue;
}
auto* expr = static_cast<CalculationNode const*>(setter)->expression();
auto* expr = ExecutionNode::castTo<CalculationNode const*>(setter)->expression();
if (expr == nullptr) {
continue;
}
@ -4164,6 +4164,17 @@ void arangodb::aql::restrictToSingleShardRule(
toRemove.clear();
break;
}
if (c->getType() == EN::CALCULATION) {
auto cn = ExecutionNode::castTo<CalculationNode const*>(c);
auto expr = cn->expression();
if (expr != nullptr && !expr->canRunOnDBServer()) {
// found something that must not run on a DB server,
// but that must run on a coordinator. stop optimization here!
toRemove.clear();
break;
}
}
}
for (auto const& it : toRemove) {

View File

@ -59,7 +59,7 @@ ExecutionNode* hasSingleDep(ExecutionNode const* in, EN::NodeType const type) {
Index* hasSingleIndexHandle(ExecutionNode const* node) {
TRI_ASSERT(node->getType() == EN::INDEX);
IndexNode const* indexNode = static_cast<IndexNode const*>(node);
IndexNode const* indexNode = ExecutionNode::castTo<IndexNode const*>(node);
auto indexHandleVec = indexNode->getIndexes();
if (indexHandleVec.size() == 1) {
return indexHandleVec.front().getIndex().get();
@ -79,7 +79,7 @@ std::vector<AstNode const*> hasBinaryCompare(ExecutionNode const* node) {
// returns any AstNode in the expression that is
// a binary comparison.
TRI_ASSERT(node->getType() == EN::INDEX);
IndexNode const* indexNode = static_cast<IndexNode const*>(node);
IndexNode const* indexNode = ExecutionNode::castTo<IndexNode const*>(node);
AstNode const* cond = indexNode->condition()->root();
std::vector<AstNode const*> result;
@ -196,8 +196,8 @@ bool substituteClusterSingleDocumentOperationsIndex(Optimizer* opt,
}
Index* index = ::hasSingleIndexHandle(node, Index::TRI_IDX_TYPE_PRIMARY_INDEX);
if (index){
IndexNode* indexNode = static_cast<IndexNode*>(node);
if (index) {
IndexNode* indexNode = ExecutionNode::castTo<IndexNode*>(node);
auto binaryCompares = ::hasBinaryCompare(node);
std::string key = ::getFirstKey(binaryCompares);
if (key.empty()) {
@ -207,7 +207,7 @@ bool substituteClusterSingleDocumentOperationsIndex(Optimizer* opt,
auto* parentModification = ::hasSingleParent(node, {EN::INSERT, EN::REMOVE, EN::UPDATE, EN::REPLACE});
if (parentModification){
auto mod = static_cast<ModificationNode*>(parentModification);
auto mod = ExecutionNode::castTo<ModificationNode*>(parentModification);
auto parentType = parentModification->getType();
auto const& vec = mod->getVariablesUsedHere();
@ -219,7 +219,7 @@ bool substituteClusterSingleDocumentOperationsIndex(Optimizer* opt,
TRI_ASSERT(vec.size() == 1);
} else {
update = vec.front();
if(vec.size() > 1){
if (vec.size() > 1) {
keyVar = vec.back();
}
}
@ -286,7 +286,7 @@ bool substituteClusterSingleDocumentOperationsNoIndex(Optimizer* opt,
}
for (auto* node : nodes) {
auto mod = static_cast<ModificationNode*>(node);
auto mod = ExecutionNode::castTo<ModificationNode*>(node);
if (!::depIsSingletonOrConstCalc(node)) {
continue;
@ -324,7 +324,7 @@ bool substituteClusterSingleDocumentOperationsNoIndex(Optimizer* opt,
while (cursor) {
cursor = ::hasSingleDep(cursor, EN::CALCULATION);
if (cursor) {
CalculationNode* c = static_cast<CalculationNode*>(cursor);
CalculationNode* c = ExecutionNode::castTo<CalculationNode*>(cursor);
if (c->setsVariable(keySet)) {
calc = c;
break;

View File

@ -51,9 +51,9 @@ OptimizerRulesFeature::OptimizerRulesFeature(
application_features::ApplicationServer* server)
: application_features::ApplicationFeature(server, "OptimizerRules") {
setOptional(false);
startsAfter("EngineSelector");
startsAfter("V8Phase");
startsAfter("Aql");
startsAfter("Cluster");
}
void OptimizerRulesFeature::prepare() {

View File

@ -572,9 +572,9 @@ void arangodb::aql::replaceNearWithinFulltext(Optimizer* opt
return astnode;
};
CalculationNode* calc = static_cast<CalculationNode*>(node);
CalculationNode* calc = ExecutionNode::castTo<CalculationNode*>(node);
auto* original = getAstNode(calc);
auto* replacement = Ast::traverseAndModify(original,visitor);
auto* replacement = Ast::traverseAndModify(original, visitor);
// replace root node if it was modified
// TraverseAndModify has no access to roots parent

Some files were not shown because too many files have changed in this diff Show More