From 44bc62531741b59b7a0cda13550ad5d21f5b57fe Mon Sep 17 00:00:00 2001 From: Jan Date: Wed, 10 Apr 2019 12:50:10 +0200 Subject: [PATCH] improve documentation a bit (#8722) --- Documentation/Books/Manual/Indexing/Ttl.md | 120 +++++++++++++----- .../Books/Manual/Indexing/WhichIndex.md | 19 +-- .../Manual/ReleaseNotes/NewFeatures35.md | 67 ++-------- arangod/RestServer/TtlFeature.cpp | 9 ++ arangod/RestServer/TtlFeature.h | 1 + 5 files changed, 120 insertions(+), 96 deletions(-) diff --git a/Documentation/Books/Manual/Indexing/Ttl.md b/Documentation/Books/Manual/Indexing/Ttl.md index e605eb9b88..4ff0bd57ae 100644 --- a/Documentation/Books/Manual/Indexing/Ttl.md +++ b/Documentation/Books/Manual/Indexing/Ttl.md @@ -7,39 +7,42 @@ Introduction to TTL (time-to-live) Indexes The TTL index provided by ArangoDB is used for removing expired documents from a collection. -The TTL index is set up by setting an `expireAfter` value and by picking a single -document attribute which contains the documents' creation date and time. Documents -are expired after `expireAfter` seconds after their creation time. The creation time -is specified as a numeric timestamp (Unix timestamp) or a date string in format -`YYYY-MM-DDTHH:MM:SS` with optional milliseconds. All date strings will be interpreted -as UTC dates. +The TTL index is set up by setting an `expireAfter` value and by selecting a single +document attribute which contains a reference timepoint. For each document, that +reference timepoint can then be specified as a numeric timestamp (Unix timestamp) or +a date string in format `YYYY-MM-DDTHH:MM:SS` with optional milliseconds. +All date strings will be interpreted as UTC dates. -For example, if `expireAfter` is set to 600 seconds (10 minutes) and the index -attribute is "creationDate" and there is the following document: +Documents will count as expired when wall clock time is beyond the per-document +reference timepoint value plus the index' `expireAfter` value added to it. + +### Removing documents at a fixed period after creation / update + +One use case supported by TTL indexes is to remove documents at a fixed duration +after they have been created or last updated. This requires setting up the index +with an attribute that contains the documents' creation or last-updated time. + +Let's assume the index attribute is set to "creationDate", and the `expireAfter` +attribute of the index was set to 600 seconds (10 minutes). + + db.collection.ensureIndex({ type: "ttl", fields: ["creationDate"], expireAfter: 600 }); + +Let's further assume the following document now gets inserted into the collection: { "creationDate" : 1550165973 } -This document will be indexed with a creation date time value of `1550165973`, -which translates to the human-readable date `2019-02-14T17:39:33.000Z`. The document +This document will be indexed with a reference timepoint value of `1550165973`, +which translates to the human-readable date/time `2019-02-14T17:39:33.000Z`. The document will expire 600 seconds afterwards, which is at timestamp `1550166573` (or -`2019-02-14T17:49:33.000Z` in the human-readable version). +`2019-02-14T17:49:33.000Z` in the human-readable version). From that point on, the +document is a candidate for being removed. -The actual removal of expired documents will not necessarily happen immediately. -Expired documents will eventually removed by a background thread that is periodically -going through all TTL indexes and removing the expired documents. - -There is no guarantee when exactly the removal of expired documents will be carried -out, so queries may still find and return documents that have already expired. These -will eventually be removed when the background thread kicks in and has capacity to -remove the expired documents. It is guaranteed however that only documents which are -past their expiration time will actually be removed. - Please note that the numeric date time values for the index attribute should be -specified in milliseconds since January 1st 1970 (Unix timestamp). To calculate the current +specified in seconds since January 1st 1970 (Unix timestamp). To calculate the current timestamp from JavaScript in this format, there is `Date.now() / 1000`, to calculate it -from an arbitrary Date instance, there is `Date.getTime() / 1000`. +from an arbitrary `Date` instance, there is `Date.getTime() / 1000`. -Alternatively, the index attribute values can be specified as a date string in format +Alternatively, the reference timepoints can be specified as a date string in format `YYYY-MM-DDTHH:MM:SS` with optional milliseconds. All date strings will be interpreted as UTC dates. @@ -47,17 +50,61 @@ The above example document using a datestring attribute value would be { "creationDate" : "2019-02-14T17:39:33.000Z" } +Now any data-modification access to the document could update the value in the document's +`creationDate` attribute to the current date/time, which would prolong the existence +of the document and keep it from being expired and removed. + +Setting a document's reference timepoint on initial insertion or updating it on every +subsequent modification of the document will not be performed by ArangoDB. Instead, it +is the tasks of client applications to set and update the reference timepoints whenever +the use case requires it. + +### Removing documents at certain points in time + +Another use case is to specify a per-document expiration/removal timepoint, and setting +the `expireAfter` attribute to a low value (e.g. 0 seconds). + +Let's assume the index attribute is set to "expireDate", and the `expireAfter` +attribute of the index was set to 0 seconds (immediately when wall clock time reaches +the value specified in `expireDate`). + + db.collection.ensureIndex({ type: "ttl", fields: ["expireDate"], expireAfter: 0 }); + +When storing the following document in the collection, it will expire at the timepoint +specified in the document itself: + + { "expireDate" : "2019-03-28T01:06:00Z" } + +As `expireAfter` was set to 0, the document will count as expired when wall clock time +has reached the timeout. + +It should be intuitive to see that the `expireDate` can be differently per document. +This allows mixing of documents with different expiration periods by calculating their +expiration dates differently in the client application. + +### Preventing documents from being removed + In case the index attribute does not contain a numeric value nor a proper date string, the document will not be stored in the TTL index and thus will not become a candidate for expiration and removal. Providing either a non-numeric value or even no value for -the index attribute is a supported way of keeping documents from being expired and removed. +the index attribute is a supported way to keep documents from being expired and removed. -There can at most be one TTL index per collection. It is not recommended to use -TTL indexes for user-land AQL queries, as TTL indexes may store a transformed, -always numerical version of the index attribute value. +### Limitations + +The actual removal of expired documents will not necessarily happen immediately when +they have reached their expiration time. +Expired documents will eventually be removed by a background thread that is periodically +going through all TTL indexes and removing the expired documents. + +There is no guarantee when exactly the removal of expired documents will be carried +out, so queries may still find and return documents that have already expired. These +will eventually be removed when the background thread kicks in and has spare capacity to +remove the expired documents. It is guaranteed however that only documents which are +past their expiration time will actually be removed. + +The frequency for invoking the background removal thread can be configured using +the `--ttl.frequency` startup option. The frequency is specified in milliseconds. -The frequency for invoking the background removal thread can be configured -using the `--ttl.frequency` startup option. In order to avoid "random" load spikes by the background thread suddenly kicking in and removing a lot of documents at once, the number of to-be-removed documents per thread invocation can be capped. @@ -66,6 +113,19 @@ controlled by the startup option `--ttl.max-total-removes`. The maximum number o documents in a single collection at once can be controlled by the startup option `--ttl.max-collection-removes`. +There can at most be one TTL index per collection. It is not recommended to rely on +TTL indexes for user-land AQL queries. This is because TTL indexes may store a transformed, +always numerical version of the index attribute value even if it was originally passed +in as a datestring. + +Please note that there is one background thread per ArangoDB database server instance +for performing the removal of expired documents of all collections in all databases. +If the number of databases and collections with TTL indexes is high and there are many +documents to remove from these, the background thread may at least temporarily lag +behind with its removal operations. It should eventually catch up in case the number +of to-be-removed documents per invocation is not higher than the background thread's +configured threshold values. + Accessing TTL Indexes from the Shell ------------------------------------- diff --git a/Documentation/Books/Manual/Indexing/WhichIndex.md b/Documentation/Books/Manual/Indexing/WhichIndex.md index 8c4732e2f1..6a9e81894b 100644 --- a/Documentation/Books/Manual/Indexing/WhichIndex.md +++ b/Documentation/Books/Manual/Indexing/WhichIndex.md @@ -65,11 +65,11 @@ different usage scenarios: expired documents from a collection. The TTL index is set up by setting an `expireAfter` value and by picking a single - document attribute which contains the documents' creation date and time. Documents - are expired after `expireAfter` seconds after their creation time. The creation time - is specified as either a numeric timestamp (Unix timestamp) or a date string in format - `YYYY-MM-DDTHH:MM:SS` with optional milliseconds. All date strings will be interpreted - as UTC dates. + document attribute which contains the documents' reference timepoint. Documents + are expired `expireAfter` seconds after their reference timepoint has been reached. + The documents' reference timepoint is specified as either a numeric timestamp + (Unix timestamp) or a date string in format `YYYY-MM-DDTHH:MM:SS` with optional + milliseconds. All date strings will be interpreted as UTC dates. For example, if `expireAfter` is set to 600 seconds (10 minutes) and the index attribute is "creationDate" and there is the following document: @@ -94,7 +94,7 @@ different usage scenarios: past their expiration time will actually be removed. Please note that the numeric date time values for the index attribute should be - specified in milliseconds since January 1st 1970 (Unix timestamp). To calculate the current + specified in seconds since January 1st 1970 (Unix timestamp). To calculate the current timestamp from JavaScript in this format, there is `Date.now() / 1000`, to calculate it from an arbitrary Date instance, there is `Date.getTime() / 1000`. @@ -102,7 +102,7 @@ different usage scenarios: `YYYY-MM-DDTHH:MM:SS` with optional milliseconds. All date strings will be interpreted as UTC dates. - The above example document using a datestring attribute value would be + The above example document using a date string attribute value would be { "creationDate" : "2019-02-14T17:39:33.000Z" } @@ -111,8 +111,9 @@ different usage scenarios: for expiration and removal. Providing either a non-numeric value or even no value for the index attribute is a supported way of keeping documents from being expired and removed. - It is not recommended to use TTL indexes for user-land AQL queries, as TTL indexes may - store a transformed, always numerical version of the index attribute value. + It is not recommended to rely on TTL indexes for user-land AQL queries. This is because + TTL indexe may store a transformed, always numerical version of the index attribute value + even if it was originally passed in as a datestring. - geo index: the geo index provided by ArangoDB allows searching for documents within a radius around a two-dimensional earth coordinate (point), or to diff --git a/Documentation/Books/Manual/ReleaseNotes/NewFeatures35.md b/Documentation/Books/Manual/ReleaseNotes/NewFeatures35.md index 8b3b7fbc5a..60d2dc6d2b 100644 --- a/Documentation/Books/Manual/ReleaseNotes/NewFeatures35.md +++ b/Documentation/Books/Manual/ReleaseNotes/NewFeatures35.md @@ -190,65 +190,18 @@ other operations on the collection. TTL (time-to-live) Indexes -------------------------- -The new TTL indexes provided by ArangoDB can be used for removing expired documents -from a collection. +The new TTL indexes feature provided by ArangoDB can be used for automatically +removing expired documents from a collection. -A TTL index can be set up by setting an `expireAfter` value and by picking a single -document attribute which contains the documents' creation date and time. Documents -expire `expireAfter` seconds after their creation time. The creation time -is specified as either a numeric timestamp or a UTC datestring. +TTL indexes support eventual removal of documents which are past a configured +expiration timepoint. The expiration timepoints can be based upon the documents' +original insertion or last-updated timepoints, with adding a period during +which to retain the documents. +Alternatively, expiration timepoints can be specified as absolute values per +document. +It is also possible to exclude documents from automatic expiration and removal. -For example, if `expireAfter` is set to 600 seconds (10 minutes) and the index -attribute is "creationDate" and there is the following document: - - { "creationDate" : 1550165973 } - -This document will be indexed with a creation timestamp value of `1550165973`, -which translates to the human-readable date string `2019-02-14T17:39:33.000Z`. The -document will expire 600 seconds afterwards, which is at timestamp `1550166573` (or -`2019-02-14T17:49:33.000Z` in the human-readable version). - -The actual removal of expired documents will not necessarily happen immediately. -Expired documents will eventually removed by a background thread that is periodically -going through all TTL indexes and removing the expired documents. - -There is no guarantee when exactly the removal of expired documents will be carried -out, so queries may still find and return documents that have already expired. These -will eventually be removed when the background thread kicks in and has capacity to -remove the expired documents. It is guaranteed however that only documents which are -past their expiration time will actually be removed. - -Please note that the numeric timestamp values for the index attribute should be -specified in seconds since January 1st 1970 (Unix timestamp). To calculate the current -timestamp from JavaScript in this format, there is `Date.now() / 1000`, to calculate it -from an arbitrary Date instance, there is `Date.getTime() / 1000`. - -Alternatively, the index attribute values can be specified as a date string in format -`YYYY-MM-DDTHH:MM:SS` with optional milliseconds. All date strings will be interpreted -as UTC dates. - -The above example document using a datestring attribute value would be - - { "creationDate" : "2019-02-14T17:39:33.000Z" } - -In case the index attribute does not contain a numeric value nor a proper date string, -the document will not be stored in the TTL index and thus will not become a candidate -for expiration and removal. Providing either a non-numeric value or even no value for -the index attribute is a supported way of keeping documents from being expired and removed. - -There can at most be one TTL index per collection. It is not recommended to use -TTL indexes for user-land AQL queries, as TTL indexes may store a transformed, -always numerical version of the index attribute value. - -The frequency for invoking the background removal thread can be configured -using the `--ttl.frequency` startup option. -In order to avoid "random" load spikes by the background thread suddenly kicking -in and removing a lot of documents at once, the number of to-be-removed documents -per thread invocation can be capped. -The total maximum number of documents to be removed per thread invocation is -controlled by the startup option `--ttl.max-total-removes`. The maximum number of -documents in a single collection at once can be controlled by the startup option -`--ttl.max-collection-removes`. +Also see the [TTL Indexes](../Indexing/Ttl.md) page. HTTP API extensions diff --git a/arangod/RestServer/TtlFeature.cpp b/arangod/RestServer/TtlFeature.cpp index cc15475f1d..d755d51844 100644 --- a/arangod/RestServer/TtlFeature.cpp +++ b/arangod/RestServer/TtlFeature.cpp @@ -108,6 +108,9 @@ Result TtlProperties::fromVelocyPack(VPackSlice const& slice) { return Result(TRI_ERROR_BAD_PARAMETER, "expecting numeric value for frequency"); } frequency = slice.get("frequency").getNumericValue(); + if (frequency < TtlProperties::minFrequency) { + return Result(TRI_ERROR_BAD_PARAMETER, "too low value for frequency"); + } } if (slice.hasKey("maxTotalRemoves")) { if (!slice.get("maxTotalRemoves").isNumber()) { @@ -439,6 +442,12 @@ void TtlFeature::validateOptions(std::shared_ptr options) { << "invalid value for '--ttl.max-collection-removes'."; FATAL_ERROR_EXIT(); } + + if (_properties.frequency < TtlProperties::minFrequency) { + LOG_TOPIC("ea696", FATAL, arangodb::Logger::STARTUP) + << "too low value for '--ttl.frequency'."; + FATAL_ERROR_EXIT(); + } } void TtlFeature::start() { diff --git a/arangod/RestServer/TtlFeature.h b/arangod/RestServer/TtlFeature.h index 2db0cf1133..c88868c79c 100644 --- a/arangod/RestServer/TtlFeature.h +++ b/arangod/RestServer/TtlFeature.h @@ -55,6 +55,7 @@ struct TtlStatistics { }; struct TtlProperties { + static constexpr uint64_t minFrequency = 1 * 1000; // milliseconds uint64_t frequency = 30 * 1000; // milliseconds uint64_t maxTotalRemoves = 1000000; uint64_t maxCollectionRemoves = 1000000;