diff --git a/Documentation/Books/AQL/DataQueries.md b/Documentation/Books/AQL/DataQueries.md index 45150a0b60..21e2d7de66 100644 --- a/Documentation/Books/AQL/DataQueries.md +++ b/Documentation/Books/AQL/DataQueries.md @@ -191,9 +191,9 @@ FOR u IN users INSERT u IN backup ``` -As a final example, let's find some documents in collection *users* and -remove them from collection *backup*. The link between the documents in both -collections is established via the documents' keys: +Subsequently, let's find some documents in collection *users* and remove them +from collection *backup*. The link between the documents in both collections is +established via the documents' keys: ```js FOR u IN users @@ -201,6 +201,14 @@ FOR u IN users REMOVE u IN backup ``` +The following example will remove all documents from both *users* and *backup*: + +```js +LET r1 = (FOR u IN users REMOVE u IN users) +LET r2 = (FOR u IN backup REMOVE u IN backup) +RETURN true +``` + ### Returning documents Data-modification queries can optionally return documents. In order to reference @@ -286,13 +294,12 @@ must be known to the AQL executor at query-compile time and cannot change at runtime. Using a bind parameter to specify the [collection name](../Manual/Appendix/Glossary.html#collection-name) is allowed. -Data-modification operations are restricted to one collection at a time. It is -not possible to use multiple data-modification operations for the same collection -in the same query, or follow up a data-modification operation for a specific -collection with a read operation for the same collection. Neither is it possible -to follow up any data-modification operation with a traversal query (which may -read from arbitrary collections not necessarily known at the start of the -traversal). +It is not possible to use multiple data-modification operations for the same +collection in the same query, or follow up a data-modification operation for a +specific collection with a read operation for the same collection. Neither is +it possible to follow up any data-modification operation with a traversal query +(which may read from arbitrary collections not necessarily known at the start of +the traversal). That means you may not place several `REMOVE` or `UPDATE` statements for the same collection into the same query. It is however possible to modify different collections diff --git a/Documentation/Books/AQL/Fundamentals/Syntax.md b/Documentation/Books/AQL/Fundamentals/Syntax.md index 5564ff69a8..8e675c3d42 100644 --- a/Documentation/Books/AQL/Fundamentals/Syntax.md +++ b/Documentation/Books/AQL/Fundamentals/Syntax.md @@ -170,7 +170,7 @@ FOR f IN ´filter´ Collection names can be used in queries as they are. If a collection happens to have the same name as a keyword, the name must be enclosed in backticks. -Please refer to the [Naming Conventions in ArangoDB](../../Manual/DataModeling/NamingConventions/CollectionNames.html) +Please refer to the [Naming Conventions in ArangoDB](../../Manual/DataModeling/NamingConventions/CollectionAndViewNames.html) about collection naming conventions. AQL currently has a limit of up to 256 collections used in one AQL query. diff --git a/Documentation/Books/AQL/Operations/For.md b/Documentation/Books/AQL/Operations/For.md index b78e1768f1..a32ac8bd82 100644 --- a/Documentation/Books/AQL/Operations/For.md +++ b/Documentation/Books/AQL/Operations/For.md @@ -16,6 +16,16 @@ FOR vertexVariableName, edgeVariableName, pathVariableName IN traversalExpressio ``` For this special case see [the graph traversals chapter](../Graphs/Traversals.md). + +For views, there is a special (optional) `SEARCH` keyword: + +```js +FOR variableName IN viewName SEARCH searchExpression +``` + +Details can be found in [the views chapter](../Views/README.md). + + For all other cases read on: Each array element returned by *expression* is visited exactly once. It is diff --git a/Documentation/Books/AQL/Views/ArangoSearch/README.md b/Documentation/Books/AQL/Views/ArangoSearch/README.md index 6715757c11..66c34e64f5 100644 --- a/Documentation/Books/AQL/Views/ArangoSearch/README.md +++ b/Documentation/Books/AQL/Views/ArangoSearch/README.md @@ -1,7 +1,7 @@ ArangoSearch Views in AQL ========================= -Views of type **arangosearch** are an integration layer meant to seamlessly +Views of type `arangosearch` are an integration layer meant to seamlessly integrate with and natively expose the full power of the [IResearch library](https://github.com/iresearch-toolkit/iresearch) to the ArangoDB user. @@ -9,7 +9,168 @@ to the ArangoDB user. They provide the capability to: * evaluate together documents located in different collections * search documents based on AQL boolean expressions and functions -* sort the result set based on how closely each document matched the search +* sort the result set based on how closely each document matched the search + +Overview and Significance +------------------------- + +Looking up documents in an ArangoSearch View is done via the `FOR` keyword: + +```js +FOR doc IN someView + ... +``` + +`FOR` operations over ArangoSearch Views have an additional, optional, `SEARCH` +keyword: + +```js +FOR doc IN someView + SEARCH searchExpression +``` + +### SEARCH + +`SEARCH` expressions look a lot like `FILTER` operations, but have some noteable +differences. + +First of all, filters and functions in `SEARCH`, when applied to documents +_emitted from an ArangoSearch View_, work _only_ on attributes linked in the +view. + +For example, given a collection `myCol` with the following documents: + +```js +[ + { someAttr: 'One', anotherAttr: 'One' }, + { someAttr: 'Two', anotherAttr: 'Two' } +] +``` + +with a view, where `someAttr` is indexed by the following view `myView`: + +```js +{ + "type": "arangosearch", + "links": { + "myCol": { + "fields": { + "someAttr": {} + } + } + } +} +``` + +Then, a search on `someAttr` yields the following result: + +```js +FOR doc IN myView + SEARCH doc.someAttr == 'One' + RETURN doc +``` +```js +[ { someAttr: 'One', anotherAttr: 'One' } ] +``` + +While a search on `anotherAttr` yields an empty result: + +```js +FOR doc IN myView + SEARCH doc.anotherAttr == 'One' + RETURN doc +``` +```js +[] +``` + +- This only applies to the expression after the `SEARCH` keyword. +- This only applies to tests regarding documents emitted from a view. Other + tests are not affected. +- In order to use `SEARCH` using all attributes of a linked sources, the special +`includeAllFields` [link property](../../../Manual/Views/ArangoSearch/DetailedOverview.html#link-properties) was desinged. + +### SORT + +The document search via the `SEARCH` keyword and the sorting via the +ArangoSearch functions, namely `BM25()` and `TFIDF()`, are closely intertwined. +The query given in the `SEARCH` expression is not only used to filter documents, +but also is used with the sorting functions to decide which document matches +the query best. Other documents in the view also affect this decision. + +Therefore the ArangoSearch sorting functions can work _only_ on documents +emitted from a view, as both the corresponding `SEARCH` expression and the view +itself are consulted in order to sort the results. + +The `BOOST()` function, described below, can be used to fine-tune the resulting +ranking by weighing sub-expressions in `SEARCH` differently. + +### Arrays and trackListPositions + +Unless [**trackListPositions**](../../../Manual/Views/ArangoSearch/DetailedOverview.html#link-properties) +is set to `true`, which it is not by default, arrays behave differently. Namely +they behave like a disjunctive superposition of their values - this is best +shown with an example. + +With `trackListPositions: false`, which is the default, and given a document +`doc` containing + +```js +{ attr: [ 'valueX', 'valueY', 'valueZ' ] } +``` + +in a `SEARCH` clause, the expression + +```js +doc.attr == 'valueX' +``` + +will be true, as will be + +```js +doc.attr == 'valueY' +``` + +and `== valueZ`. With `trackListPositions: true`, + +```js +doc.attr[0] == 'valueX' +``` + +would work as usual. + +### Comparing analyzed fields + +As described in [value analysis](#arangosearch-value-analysis), when a field is +processed by a specific analyzer, comparison tests are done per word. For +example, given the field `text` is analyzed with `"text_en"` and contains the +string `"a quick brown fox jumps over the lazy dog"`, the following expression +will be true: + +```js +ANALYZER(d.text == 'fox', "text_en") +``` + +Note also, that the words analyzed in the text are stemmed, so this is also +true: + +```js +ANALYZER(d.text == 'jump', "text_en") +``` + +So a comparison will actually test if a word is contained in the text. With +`trackListPositions: false`, this means for arrays if the word is contained in +any element of the array. For example, given + +```js +d.text = [ "a quick", "brown fox", "jumps over the", "lazy dog"] +``` + +the following will be true: + +```js +ANALYZER(d.text == 'jump', "text_en") +``` ArangoSearch value analysis --------------------------- @@ -36,55 +197,58 @@ e.g. to match docs with 'word == quick' OR 'word == brown' OR 'word == fox' FOR doc IN someCollection FILTER doc.word IN TOKENS('a quick brown fox', 'text_en') - RETRUN doc + RETURN doc ArangoSearch filters -------------------- -The basic ArangoSearch functionality can be accessed via SEARCH statement with -common AQL filters and operators, e.g.: +The basic ArangoSearch functionality can be accessed via the `SEARCH` statement +with common AQL filters and operators, e.g.: -- *AND* -- *OR* -- *NOT* -- *==* -- *<=* -- *>=* -- *<* -- *>* -- *!=* -- *IN * -- *IN * +``` +- `AND` +- `OR` +- `NOT` +- `==` +- `<=` +- `>=` +- `<` +- `>` +- `!=` +- `IN ` +- `IN ` +``` However, the full power of ArangoSearch is harnessed and exposed via functions, during both the search and sort stages. -Note, that SEARCH statement is meant to be treated as a part of the -expression, but not as an individual statement in contrast to FILTER +Note, that `SEARCH` statement, in contrast to `FILTER`, is meant to be treated +as a part of the `FOR` operation, not as an individual statement. The supported AQL context functions are: ### ANALYZER() -`ANALYZER(search-expression, analyzer)` +`ANALYZER(searchExpression, analyzer)` -Override analyzer in a context of **search-expression** with another one, denoted -by a specified **analyzer** argument, making it available for search functions. +Override analyzer in a context of **searchExpression** with another one, +denoted by a specified **analyzer** argument, making it available for search +functions. -- *search-expression* - any valid search expression -- *analyzer* - string with the analyzer to imbue, i.e. *"text_en"* or one of the other - [available string analyzers](../../../Manual/Views/ArangoSearch/Analyzers.html) +- *searchExpression* - any valid search expression +- *analyzer* - string with the analyzer to imbue, i.e. *"text_en"* or one of the + other [available string analyzers](../../../Manual/Views/ArangoSearch/Analyzers.html) By default, context contains `Identity` analyzer. ### BOOST() -`BOOST(search-expression, boost)` +`BOOST(searchExpression, boost)` -Override boost in a context of **search-expression** with a specified value, +Override boost in a context of **searchExpression** with a specified value, making it available for scorer functions. -- *search-expression* - any valid search expression +- *searchExpression* - any valid search expression - *boost* - numeric boost value By default, context contains boost value equal to `1.0`. @@ -93,28 +257,36 @@ The supported search functions are: ### EXISTS() -Note: Will only match **attribute-name** values that have been processed with -the link property **storeValues** set to anything other than **none**. +Note: Will only match values when the specified attribute has been processed +with the link property **storeValues** set to **"id"** (by default it's +**"none"**). -`EXISTS(attribute-name)` +`EXISTS(doc.someAttr)` -Match documents where the attribute **attribute-name** exists in the document. +Match documents **doc** where the attribute **someAttr** exists in the +document. -`EXISTS(attribute-name, "analyzer" [, analyzer])` +This also works with sub-attributes, e.g. -Match documents where the **attribute-name** exists in the document and -was indexed by the specified **analyzer**. -In case if **analyzer** isn't specified, current context analyzer (e.g. specified by -`ANALYZER` function) will be used. +`EXISTS(doc.someAttr.anotherAttr)` -`EXISTS(attribute-name, type)` +as long as the field is processed by the view with **storeValues** not +**none**. -Match documents where the **attribute-name** exists in the document +`EXISTS(doc.someAttr, "analyzer", analyzer)` + +Match documents where **doc.someAttr** exists in the document _and_ was indexed +by the specified **analyzer**. **analyzer** is optional and defaults to the +current context analyzer (e.g. specified by `ANALYZER` function). + +`EXISTS(doc.someAttr, type)` + +Match documents where the **doc.someAttr** exists in the document and is of the specified type. -- *attribute-name* - the path of the attribute to exist in the document -- *analyzer* - string with the analyzer used, i.e. *"text_en"* or one of the other - [available string analyzers](../../../Manual/Views/ArangoSearch/Analyzers.html) +- *doc.someAttr* - the path of the attribute to exist in the document +- *analyzer* - string with the analyzer used, i.e. *"text_en"* or one of the + other [available string analyzers](../../../Manual/Views/ArangoSearch/Analyzers.html) - *type* - data type as string; one of: - **bool** - **boolean** @@ -122,57 +294,84 @@ Match documents where the **attribute-name** exists in the document - **null** - **string** -In case if **analyzer** isn't specified, current context analyzer (e.g. specified by -`ANALYZER` function) will be used. +In case if **analyzer** isn't specified, current context analyzer (e.g. +specified by `ANALYZER` function) will be used. ### PHRASE() ``` -PHRASE(attribute-name, - phrasePart [, skipTokens, phrasePart [, ... skipTokens, phrasePart]] +PHRASE(doc.someAttr, + phrasePart [, skipTokens] [, phrasePart | , phrasePart, skipTokens]* [, analyzer]) ``` Search for a phrase in the referenced attributes. -The phrase can be expressed as an arbitrary number of *phraseParts* separated by *skipToken* number of tokens. +The phrase can be expressed as an arbitrary number of *phraseParts* separated by +*skipToken* number of tokens. -- *attribute-name* - the path of the attribute to compare against in the document -- *phrasePart* - a string to search in the token stream; may consist of several words; will be split using the specified *analyzer* +- *doc.someAttr* - the path of the attribute to compare against in the document +- *phrasePart* - a string to search in the token stream; may consist of several + words; will be split using the specified *analyzer* - *skipTokens* number of words or tokens to treat as wildcards -- *analyzer* - string with the analyzer used, i.e. *"text_en"* or one of the other - [available string analyzers](../../../Manual/Views/ArangoSearch/Analyzers.html) +- *analyzer* - string with the analyzer used, i.e. *"text_en"* or one of the + other [available string analyzers + ](../../../Manual/Views/ArangoSearch/Analyzers.html) + +For example, given a document `doc` containing the text `"Lorem ipsum dolor sit +amet, consectetur adipiscing elit"`, the following expression will be `true`: + +```js +PHRASE(doc.text, "ipsum", 1, "sit", 2, "adipiscing", "text_de") +``` + +Specifying deep attributes like `doc.some.deep.attr` is also allowed. The +attribute has to be processed by the view as specified in the link. ### STARTS_WITH() -`STARTS_WITH(attribute-name, prefix)` +`STARTS_WITH(doc.someAttr, prefix)` -Match the value of the **attribute-name** that starts with **prefix** +Match the value of the **doc.someAttr** that starts with **prefix** -- *attribute-name* - the path of the attribute to compare against in the document +- *doc.someAttr* - the path of the attribute to compare against in the document - *prefix* - a string to search at the start of the text +Specifying deep attributes like `doc.some.deep.attr` is also allowed. The +attribute has to be processed by the view as specified in the link. + ### TOKENS() `TOKENS(input, analyzer)` -Split the **input** string with the help of the specified **analyzer** into an Array. -The resulting Array can i.e. be used in subsequent `FILTER` or `SEARCH` statements with the **IN** operator. -This can be used to better understand how the specific analyzer is going to behave. +Split the **input** string with the help of the specified **analyzer** into an +Array. The resulting Array can i.e. be used in subsequent `FILTER` or `SEARCH` +statements with the **IN** operator. This can be used to better understand how +the specific analyzer is going to behave. - *input* string to tokenize -- *analyzer* one of the [available string analyzers](../../../Manual/Views/ArangoSearch/Analyzers.html) +- *analyzer* one of the [available string_analyzers](../../../Manual/Views/ArangoSearch/Analyzers.html) ### MIN_MATCH() -`MIN_MATCH(search-expression, [..., search-expression], min-match-count)` +`MIN_MATCH(searchExpression [, searchExpression]*, minMatchCount)` -Match documents where at least **min-match-count** of the specified **search-expression**s -are satisfied. +Match documents where at least **minMatchCount** of the specified +**searchExpression**s are satisfied. -- *search-expression* - any valid search expression -- *min-match-count* - minimum number of search-expressions that should be satisfied +- *searchExpression* - any valid search expression +- *minMatchCount* - minimum number of *searchExpression*s that should be + satisfied -#### Searching examples +For example, + +```js +MIN_MATCH(doc.text == 'quick', doc.text == 'brown', doc.text == 'fox', 2) +``` + +if `doc.text`, as analyzed by the current analyzer, contains 2 out of 'quick', +'brown' and 'fox', it will be included as matched one. + +### Searching examples to match documents which have a 'name' attribute @@ -275,47 +474,98 @@ to match documents where 'description' best matches 'a quick brown fox' FOR doc IN someView SEARCH ANALYZER(doc.description IN TOKENS('a quick brown fox', 'text_en'), 'text_en') RETURN doc -ArangoSearch sort ------------------ +ArangoSearch sorting +-------------------- -A major feature of ArangoSearch views is their capability of sorting results +A major feature of ArangoSearch Views is their capability of sorting results based on the creation-time search conditions and zero or more sorting functions. -The sorting functions are meant to be user-defined. +The ArangoSearch sorting functions available are `TFIDF()` and `BM25()`. -Note: Similar to other sorting functions on regular collections the first - argument to any sorting function is _always_ either the document emitted by - the `FOR` statement, or some sub-attribute of it. +Note: The first argument to any ArangoSearch sorting function is _always_ the +document emitted by a `FOR` operation over an ArangoSearch View. -The sorting functions are meant to be user-defined. The following functions are already built in: +Note: An ArangoSearch sorting function is _only_ allowed as an argument to a +`SORT` operation. But they can be mixed with other arguments to `SORT`. + +So the following examples are valid: + +```js +FOR doc IN someView + SORT TFIDF(doc) +``` + +```js +FOR a IN viewA + FOR b IN viewB + SORT BM25(a), TFIDF(b) +``` + +```js +FOR a IN viewA + FOR c IN someCollection + FOR b IN viewB + SORT TFIDF(b), c.name, BM25(a) +``` + +while these will _not_ work: + +```js +FOR doc IN someCollection + SORT TFIDF(doc) // !!! Error +``` +```js +FOR doc IN someCollection + RETURN BM25(doc) // !!! Error +``` +```js +FOR doc IN someCollection + SORT BM25(doc.someAttr) // !!! Error +``` +```js +FOR doc IN someView + SORT TFIDF("someString") // !!! Error +``` +```js +FOR doc IN someView + SORT BM25({some: obj}) // !!! Error +``` + +The following sorting methods are available: ### Literal sorting -You can sort documents by simply specifying the *attribute-name* directly, as you do using indices in other places. +You can sort documents by simply specifying arbitrary values or expressions, as +you do in other places. -### Best Matching 25 Algorithm +### BM25() -`BM25(attribute-name, [k, [b]])` +`BM25(doc, k, b)` + +- *k* (number, _optional_): calibrates the text term frequency scaling, the default is +_1.2_. A *k* value of _0_ corresponds to a binary model (no term frequency), and a large +value corresponds to using raw term frequency +- *b* (number, _optional_): determines the scaling by the total text length, the default +is _0.75_. At the extreme values of the coefficient *b*, BM25 turns into ranking +functions known as BM11 (for *b* = `1`, corresponds to fully scaling the term weight by +the total text length) and BM15 (for *b* = `0`, corresponds to no length normalization) Sorts documents using the [**Best Matching 25** algorithm](https://en.wikipedia.org/wiki/Okapi_BM25). +See the [`BM25()` section in ArangoSearch Scorers](../../../Manual/Views/ArangoSearch/Scorers.html) +for details. -Optionally the term frequency **k** and coefficient **b** of the algorithm can be specified as floating point numbers: +### TFIDF() -- *k* defaults to `1.2`; *k* calibrates the text term frequency scaling. - A *k* value of *0* corresponds to a binary model (no term frequency), - and a large value corresponds to using raw term frequency. +`TFIDF(doc, withNorms)` -- *b* defaults to `0.75`; *b* determines the scaling by the total text length. - - b = 1 corresponds to fully scaling the term weight by the total text length - - b = 0 corresponds to no length normalization. - -At the extreme values of the coefficient *b*, BM25 turns into ranking functions known as BM11 (for b = 1) and BM15 (for b = 0). +- *doc* (document): must be emitted by `FOR doc IN someView` +- *withNorms* (bool, _optional_): specifying whether scores should be + normalized, the default is _false_ -### Term Frequency – Inverse Document Frequency Algorithm +Sorts documents using the +[**term frequency–inverse document frequency** algorithm](https://en.wikipedia.org/wiki/TF-IDF). +See the +[`TFIDF()` section in ArangoSearch Scorers](../../../Manual/Views/ArangoSearch/Scorers.html) +for details. -`TFIDF(attribute-name, [with-norms])` - -Sorts documents using the [**term frequency–inverse document frequency** algorithm](https://en.wikipedia.org/wiki/TF-IDF). - - optionally specifying that norms should be used via **with-norms** ### Sorting examples diff --git a/Documentation/Books/AQL/Views/README.md b/Documentation/Books/AQL/Views/README.md index 40673a6dbd..54fedd8433 100644 --- a/Documentation/Books/AQL/Views/README.md +++ b/Documentation/Books/AQL/Views/README.md @@ -4,10 +4,22 @@ Views in AQL Conceptually a **view** is just another document data source, similar to an array or a document/edge collection, e.g.: -FOR doc IN exampleView +```js +FOR doc IN exampleView SEARCH ... FILTER ... SORT ... RETURN ... +``` + +Other than collections, views have an additional but optional `SEARCH` keyword: + +```js +FOR doc IN exampleView + SEARCH ... + FILTER ... + SORT ... + RETURN ... +``` A view is meant to be an abstraction over a transformation applied to documents of zero or more collections. The transformation is view-implementation specific @@ -17,6 +29,5 @@ represent all documents available in the specified set of collections. Views can be defined and administered on a per view-type basis via the [web interface](../../Manual/Programs/WebInterface/index.html). -The currently supported view implementations are: - -- **arangosearch** as described in [ArangoSearch View](ArangoSearch/README.md) +Currently there is a single supported view implementation, namely +`arangosearch` as described in [ArangoSearch View](ArangoSearch/README.md). diff --git a/Documentation/Books/Drivers/JS/Reference/Database/ViewAccess.md b/Documentation/Books/Drivers/JS/Reference/Database/ViewAccess.md index acb073e8b3..094770012b 100644 --- a/Documentation/Books/Drivers/JS/Reference/Database/ViewAccess.md +++ b/Documentation/Books/Drivers/JS/Reference/Database/ViewAccess.md @@ -14,7 +14,7 @@ Returns a _ArangoSearchView_ instance for the given view name. - **viewName**: `string` - Name of the arangosearch view. + Name of the `arangosearch` view. **Examples** diff --git a/Documentation/Books/Drivers/Java/Reference/Collection/CollectionManipulation.md b/Documentation/Books/Drivers/Java/Reference/Collection/CollectionManipulation.md index eb5b6bcddf..8870d7ea09 100644 --- a/Documentation/Books/Drivers/Java/Reference/Collection/CollectionManipulation.md +++ b/Documentation/Books/Drivers/Java/Reference/Collection/CollectionManipulation.md @@ -73,8 +73,8 @@ Creates a collection with the given _options_ for this collection's name, then r ```Java ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); -db.createCollection("potatos", new CollectionCreateOptions()); -// the document collection "potatos" now exists +db.createCollection("potatoes", new CollectionCreateOptions()); +// the document collection "potatoes" now exists ``` ## ArangoCollection.create @@ -144,9 +144,9 @@ Alternative for [ArangoDatabase.createCollection](#arangodatabasecreatecollectio ```Java ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); -ArangoCollection collection = db.collection("potatos"); +ArangoCollection collection = db.collection("potatoes"); collection.create(new CollectionCreateOptions()); -// the document collection "potatos" now exists +// the document collection "potatoes" now exists ``` ## ArangoCollection.load diff --git a/Documentation/Books/Drivers/Java/Reference/Collection/README.md b/Documentation/Books/Drivers/Java/Reference/Collection/README.md index 1b8f36ecb1..8400e8a744 100644 --- a/Documentation/Books/Drivers/Java/Reference/Collection/README.md +++ b/Documentation/Books/Drivers/Java/Reference/Collection/README.md @@ -26,7 +26,7 @@ Checks whether the collection exists ```Java ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); -ArangoCollection collection = db.collection("potatos"); +ArangoCollection collection = db.collection("potatoes"); boolean exists = collection.exists(); ``` @@ -44,7 +44,7 @@ Returns information about the collection. ```Java ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); -ArangoCollection collection = db.collection("potatos"); +ArangoCollection collection = db.collection("potatoes"); CollectionEntity info = collection.getInfo(); ``` @@ -62,7 +62,7 @@ Reads the properties of the specified collection. ```Java ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); -ArangoCollection collection = db.collection("potatos"); +ArangoCollection collection = db.collection("potatoes"); CollectionPropertiesEntity properties = collection.getProperties(); ``` @@ -80,7 +80,7 @@ Retrieve the collections revision. ```Java ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); -ArangoCollection collection = db.collection("potatos"); +ArangoCollection collection = db.collection("potatoes"); CollectionRevisionEntity revision = collection.getRevision(); ``` @@ -98,7 +98,7 @@ Fetches a list of all indexes on this collection. ```Java ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); -ArangoCollection collection = db.collection("potatos"); +ArangoCollection collection = db.collection("potatoes"); Collection indexes = collection.getIndexes(); ``` diff --git a/Documentation/Books/Drivers/Java/Reference/Database/ViewAccess.md b/Documentation/Books/Drivers/Java/Reference/Database/ViewAccess.md index 6a6a9159f0..3d146e0f37 100644 --- a/Documentation/Books/Drivers/Java/Reference/Database/ViewAccess.md +++ b/Documentation/Books/Drivers/Java/Reference/Database/ViewAccess.md @@ -32,7 +32,7 @@ ArangoView view = db.view("myView"); ArangoDatabase.arangoSearch(String name) : ArangoSearch ``` -Returns a _ArangoSearch_ instance for the given ArangoSearch view name. +Returns a _ArangoSearch_ instance for the given ArangoSearch View name. **Arguments** diff --git a/Documentation/Books/Drivers/Java/Reference/View/ArangoSearch.md b/Documentation/Books/Drivers/Java/Reference/View/ArangoSearch.md index f0879ef406..7f52b92f71 100644 --- a/Documentation/Books/Drivers/Java/Reference/View/ArangoSearch.md +++ b/Documentation/Books/Drivers/Java/Reference/View/ArangoSearch.md @@ -2,7 +2,7 @@ # ArangoSearch API These functions implement the -[HTTP API for ArangoSearch views](../../../..//HTTP/Views/ArangoSearch.html). +[HTTP API for ArangoSearch Views](../../../..//HTTP/Views/ArangoSearch.html). ## ArangoDatabase.createArangoSearch @@ -10,7 +10,7 @@ These functions implement the ArangoDatabase.createArangoSearch(String name, ArangoSearchCreateOptions options) : ViewEntity ``` -Creates a ArangoSearch view with the given _options_, then returns view information from the server. +Creates a ArangoSearch View with the given _options_, then returns view information from the server. **Arguments** @@ -41,8 +41,8 @@ Creates a ArangoSearch view with the given _options_, then returns view informat ```Java ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); -db.createArangoSearch("potatos", new ArangoSearchPropertiesOptions()); -// the ArangoSearch view "potatos" now exists +db.createArangoSearch("potatoes", new ArangoSearchPropertiesOptions()); +// the ArangoSearch View "potatoes" now exists ``` ## ArangoSearch.create @@ -51,7 +51,7 @@ db.createArangoSearch("potatos", new ArangoSearchPropertiesOptions()); ArangoSearch.create(ArangoSearchCreateOptions options) : ViewEntity ``` -Creates a ArangoSearch view with the given _options_, then returns view information from the server. +Creates a ArangoSearch View with the given _options_, then returns view information from the server. Alternative for [ArangoDatabase.createArangoSearch](#arangodatabasecreatearangosearch). @@ -80,10 +80,10 @@ Alternative for [ArangoDatabase.createArangoSearch](#arangodatabasecreatearangos ```Java ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); -ArangoSearch view = db.arangoSearch("potatos"); +ArangoSearch view = db.arangoSearch("potatoes"); view.create(new ArangoSearchPropertiesOptions()); -// the ArangoSearch view "potatos" now exists +// the ArangoSearch View "potatoes" now exists ``` ## ArangoSearch.getProperties @@ -99,7 +99,7 @@ Reads the properties of the specified view. ```Java ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); -ArangoSearch view = db.arangoSearch("potatos"); +ArangoSearch view = db.arangoSearch("potatoes"); ArangoSearchPropertiesEntity properties = view.getProperties(); ``` diff --git a/Documentation/Books/Drivers/Java/Reference/View/README.md b/Documentation/Books/Drivers/Java/Reference/View/README.md index 2adb04fa40..4ff8316d08 100644 --- a/Documentation/Books/Drivers/Java/Reference/View/README.md +++ b/Documentation/Books/Drivers/Java/Reference/View/README.md @@ -23,7 +23,7 @@ Checks whether the view exists ```Java ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); -ArangoView view = db.view("potatos"); +ArangoView view = db.view("potatoes"); boolean exists = view.exists(); ``` @@ -41,7 +41,7 @@ Returns information about the view. ```Java ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); -ArangoView view = db.view("potatos"); +ArangoView view = db.view("potatoes"); ViewEntity info = view.getInfo(); ``` diff --git a/Documentation/Books/Drivers/Java/Reference/View/ViewManipulation.md b/Documentation/Books/Drivers/Java/Reference/View/ViewManipulation.md index eaffb3af20..627ed7a1b7 100644 --- a/Documentation/Books/Drivers/Java/Reference/View/ViewManipulation.md +++ b/Documentation/Books/Drivers/Java/Reference/View/ViewManipulation.md @@ -28,7 +28,7 @@ Creates a view of the given _type_, then returns view information from the serve ArangoDB arango = new ArangoDB.Builder().build(); ArangoDatabase db = arango.db("myDB"); db.createView("myView", ViewType.ARANGO_SEARCH); -// the view "potatos" now exists +// the view "potatoes" now exists ``` ## ArangoView.rename diff --git a/Documentation/Books/Manual/Appendix/Glossary.md b/Documentation/Books/Manual/Appendix/Glossary.md index 0990a9aacd..4a2c16bf0c 100644 --- a/Documentation/Books/Manual/Appendix/Glossary.md +++ b/Documentation/Books/Manual/Appendix/Glossary.md @@ -20,7 +20,7 @@ opaque strings when they store or use it locally. Collection Name --------------- -A collection name identifies a collection in a database. It is a string and is unique within the database. Unlike the collection identifier it is supplied by the creator of the collection. The collection name must consist of letters, digits, and the _ (underscore) and - (dash) characters only. Please refer to [NamingConventions](../DataModeling/NamingConventions/CollectionNames.md) for more information on valid collection names. +A collection name identifies a collection in a database. It is a string and is unique within the database. Unlike the collection identifier it is supplied by the creator of the collection. The collection name must consist of letters, digits, and the _ (underscore) and - (dash) characters only. Please refer to [NamingConventions](../DataModeling/NamingConventions/CollectionAndViewNames.md) for more information on valid collection names. Database -------- @@ -222,7 +222,7 @@ renamed. Renaming a view will change the view name, but not the view identifier. The conceptual transformation function employed by a view type is implementation specific. The type is specified by the user when the view is created, and cannot be changed later. The following view types are currently supported: -* [arangosearch](../Views/ArangoSearch/README.md) +* [`arangosearch`](../Views/ArangoSearch/README.md) View Identifier --------------- @@ -244,7 +244,7 @@ A view name identifies a view in a database. It is a string and is unique within the database. Unlike the view identifier it is supplied by the creator of the view. The view name must consist of letters, digits, and the _ (underscore) and - (dash) characters only. Please refer to -[NamingConventions](../DataModeling/NamingConventions/CollectionNames.md) for +[Naming Conventions](../DataModeling/NamingConventions/CollectionAndViewNames.md) for more information on valid view names, which follow the same guidelines as collection names. diff --git a/Documentation/Books/Manual/DataModeling/Collections/README.md b/Documentation/Books/Manual/DataModeling/Collections/README.md index cd994525d7..f2821272fa 100644 --- a/Documentation/Books/Manual/DataModeling/Collections/README.md +++ b/Documentation/Books/Manual/DataModeling/Collections/README.md @@ -11,11 +11,14 @@ Address of a Collection ----------------------- All collections in ArangoDB have a unique identifier and a unique -name. ArangoDB internally uses the collection's unique identifier to look up -collections. This identifier, however, is managed by ArangoDB and the user has -no control over it. In order to allow users to use their own names, each collection -also has a unique name which is specified by the user. To access a collection -from the user perspective, the [collection name](../../Appendix/Glossary.md#collection-name) should be used, i.e.: +name. The namespace for collections is shared with views, so there cannot exist +a collection and a view with the same name in the same database. ArangoDB +internally uses the collection's unique identifier to look up collections. This +identifier, however, is managed by ArangoDB and the user has no control over it. +In order to allow users to use their own names, each collection also has a +unique name which is specified by the user. To access a collection from the user +perspective, the [collection name](../../Appendix/Glossary.md#collection-name) +should be used, i.e.: ### Collection `db._collection(collection-name)` diff --git a/Documentation/Books/Manual/DataModeling/NamingConventions/CollectionNames.md b/Documentation/Books/Manual/DataModeling/NamingConventions/CollectionAndViewNames.md similarity index 71% rename from Documentation/Books/Manual/DataModeling/NamingConventions/CollectionNames.md rename to Documentation/Books/Manual/DataModeling/NamingConventions/CollectionAndViewNames.md index 552bb429ea..5d9599076f 100644 --- a/Documentation/Books/Manual/DataModeling/NamingConventions/CollectionNames.md +++ b/Documentation/Books/Manual/DataModeling/NamingConventions/CollectionAndViewNames.md @@ -1,17 +1,16 @@ -Collection Names -================ +Collection and View Names +========================= -Users can pick names for their collections as desired, provided the following -naming constraints are not violated: +Users can pick names for their collections (or views) as desired, provided the +following naming constraints are not violated: * Collection names must only consist of the letters *a* to *z* (both in lower and upper case), the numbers *0* to *9*, and the underscore (*_*) or dash (*-*) symbols. This also means that any non-ASCII collection names are not allowed * User-defined collection names must always start with a letter. System collection - names must start with an underscore. - All collection names starting with an underscore are considered to be system - collections that are for ArangoDB's internal use only. System collection names + names must start with an underscore. + All collection names starting with an underscore are considered to be system + collections that are for ArangoDB's internal use only. System collection names should not be used by end users for their own collections * The maximum allowed length of a collection name is 64 bytes * Collection names are case-sensitive - diff --git a/Documentation/Books/Manual/DataModeling/Views/DatabaseMethods.md b/Documentation/Books/Manual/DataModeling/Views/DatabaseMethods.md index a91be79856..54bd49ead8 100644 --- a/Documentation/Books/Manual/DataModeling/Views/DatabaseMethods.md +++ b/Documentation/Books/Manual/DataModeling/Views/DatabaseMethods.md @@ -10,6 +10,16 @@ View Returns the view with the given name or null if no such view exists. + @startDocuBlockInline viewDatabaseGet + @EXAMPLE_ARANGOSH_OUTPUT{viewDatabaseGet} + ~ db._createView("example", "arangosearch", {}); + | view = db._view("example"); + // or, alternatively + view = db["example"] + ~ db._dropView("example"); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewDatabaseGet + `db._view(view-identifier)` Returns the view with the given identifier or null if no such view exists. @@ -23,17 +33,10 @@ Get a view by name: @startDocuBlockInline viewDatabaseNameKnown @EXAMPLE_ARANGOSH_OUTPUT{viewDatabaseNameKnown} - db._view("demo"); + db._view("demoView"); @END_EXAMPLE_ARANGOSH_OUTPUT @endDocuBlock viewDatabaseNameKnown -Get a view by id: - -``` -arangosh> db._view(123456); -[ArangoView 123456, "demo"] -``` - Unknown view: @startDocuBlockInline viewDatabaseNameUnknown @@ -50,24 +53,33 @@ Create `db._createView(view-name, view-type, view-properties)` -*view-type* must be one of the supported [View Types](README.md) - -*view-properties* view configuration specific to each view-type - Creates a new view named *view-name* of type *view-type* with properties -*view-properties*. If the view name already exists or if the name format is -invalid, an error is thrown. For more information on valid view names please -refer to the [naming conventions](../NamingConventions/README.md). +*view-properties*. + +*view-name* is a string and the name of the view. No view or collection with the +same name may already exist in the current database. For more information on +valid view names please refer to the [naming conventions +](../NamingConventions/README.md). + +*view-type* must be the string `"arangosearch"`, as it is currently the only +supported view type. + +*view-properties* is an optional object containing view configuration specific +to each view-type. Currently, only ArangoSearch Views are supported. See +[ArangoSearch View definition +](../../Views/ArangoSearch/DetailedOverview.md#view-definitionmodification) for +details. **Examples** -Create a view: + @startDocuBlockInline viewDatabaseCreate + @EXAMPLE_ARANGOSH_OUTPUT{viewDatabaseCreate} + v = db._createView("example", "arangosearch"); + v.properties() + db._dropView("example") + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewDatabaseCreate -``` -arangosh> v = db._createView("example", \, \); -arangosh> v.properties(); -arangosh> db._dropView("example"); -``` All Views --------- @@ -81,13 +93,15 @@ Returns all views of the given database. **Examples** -Query views: +List all views: -``` -arangosh> db._createView("example", \, \); -arangosh> db._views(); -arangosh> db._dropView("example"); -``` + @startDocuBlockInline viewDatabaseList + @EXAMPLE_ARANGOSH_OUTPUT{viewDatabaseList} + ~ db._createView("exampleView", "arangosearch"); + db._views(); + ~ db._dropView("exampleView"); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewDatabaseList Drop ---- @@ -108,9 +122,10 @@ thrown if there is no such view. Drop a view: -``` -arangosh> db._createView("example", \, \); -arangosh> v = db._view("example"); -arangosh> db._dropView("example"); -arangosh> v; -``` + @startDocuBlockInline viewDatabaseDrop + @EXAMPLE_ARANGOSH_OUTPUT{viewDatabaseDrop} + db._createView("exampleView", "arangosearch"); + db._dropView("exampleView"); + db._view("exampleView"); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewDatabaseDrop diff --git a/Documentation/Books/Manual/DataModeling/Views/README.md b/Documentation/Books/Manual/DataModeling/Views/README.md index 2fc5bb4bcb..21c49a0a7a 100644 --- a/Documentation/Books/Manual/DataModeling/Views/README.md +++ b/Documentation/Books/Manual/DataModeling/Views/README.md @@ -2,42 +2,97 @@ JavaScript Interface to Views ============================= This is an introduction to ArangoDB's interface for views and how to handle -views fron the JavaScript shell _arangosh_. For other languages see the +views from the JavaScript shell _arangosh_. For other languages see the corresponding language API. Address of a View ----------------- -All views in ArangoDB have a unique identifier and a unique name. -ArangoDB internally uses -the view's unique identifier to look up views. This identifier, however, is -managed by ArangoDB and the user has no control over it. In order to allow users -to use their own names, each view also has a unique name which is specified by -the user. To access a view from the user perspective, the -[view name](../../Appendix/Glossary.md#view-name) should be used, i.e.: +Like [collections](../Collections/README.md), views are accessed by the user via +their unique name and internally via their identifier. Using the identifier for +accessing views is discouraged. Views share their namespace with collections, +so there cannot exist a view and a collection with the same name in the same +database. -### View -`db._view(view-name)` +Usage +----- -A view is created by a ["db._createView"](DatabaseMethods.md) call. The returned -object may then be used via the [exposed methods](ViewMethods.md). +Here follow some basic usage examples. More details can be found in the +following chapters: +- [Database Methods for Views](DatabaseMethods.md) +- [View Methods](ViewMethods.md) +- [ArangoSearch Views](../Views/README.md) -For example: Assume that the -[view identifier](../../Appendix/Glossary.md#view-identifier) is *7254820* and -the name is *demo*, then the view can be accessed as: +Create a view with default properties: - db._view("demo") + @startDocuBlockInline viewUsage_01 + @EXAMPLE_ARANGOSH_OUTPUT{viewUsage_01} + ~ db._create("colA"); + ~ db._create("colB"); + view = db._createView("myView", "arangosearch", {}); + ~ addIgnoreCollection("colA"); + ~ addIgnoreCollection("colB"); + ~ addIgnoreView("myView"); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewUsage_01 -If no view with such a name exists, then *null* is returned. +Get this view again later by name: -### Create -`db._createView(view-name, view-type, view-properties)` + @startDocuBlockInline viewUsage_02 + @EXAMPLE_ARANGOSH_OUTPUT{viewUsage_02} + view = db._view("myView"); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewUsage_02 -This call will create a new view called *view-name*. This method is a database -method and is documented in detail in -[Database Methods](DatabaseMethods.md#create) +Get the view properties: -### View Types + @startDocuBlockInline viewUsage_03 + @EXAMPLE_ARANGOSH_OUTPUT{viewUsage_03} + view.properties(); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewUsage_03 -The currently supported view implementation is: **arangosearch** as described in -[ArangoSearch View](../../Views/ArangoSearch/README.md). +Set a view property: + + @startDocuBlockInline viewUsage_04 + @EXAMPLE_ARANGOSH_OUTPUT{viewUsage_04} + view.properties({cleanupIntervalStep: 12}); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewUsage_04 + +Add a link: + + @startDocuBlockInline viewUsage_05 + @EXAMPLE_ARANGOSH_OUTPUT{viewUsage_05} + view.properties({links: {colA: {includeAllFields: true}}}); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewUsage_05 + +Add another link: + + @startDocuBlockInline viewUsage_06 + @EXAMPLE_ARANGOSH_OUTPUT{viewUsage_06} + view.properties({links: {colB: {fields: {text: {}}}}}); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewUsage_06 + +Remove the first link again: + + @startDocuBlockInline viewUsage_07 + @EXAMPLE_ARANGOSH_OUTPUT{viewUsage_07} + view.properties({links: {colA: null}}); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewUsage_07 + +Drop the view: + + @startDocuBlockInline viewUsage_08 + @EXAMPLE_ARANGOSH_OUTPUT{viewUsage_08} + ~ removeIgnoreCollection("colA"); + ~ removeIgnoreCollection("colB"); + ~ removeIgnoreView("myView"); + db._dropView("myView"); + ~ db._drop("colA"); + ~ db._drop("colB"); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewUsage_08 diff --git a/Documentation/Books/Manual/DataModeling/Views/ViewMethods.md b/Documentation/Books/Manual/DataModeling/Views/ViewMethods.md index 0561efc2d2..9b2935451a 100644 --- a/Documentation/Books/Manual/DataModeling/Views/ViewMethods.md +++ b/Documentation/Books/Manual/DataModeling/Views/ViewMethods.md @@ -12,15 +12,17 @@ Drops a *view* and all its data. **Examples** - Drop a view: -``` -arangosh> db._createView("example", \, \); -arangosh> v = db._view("example"); -arangosh> v.drop(); -arangosh> v; -``` + @startDocuBlockInline viewDrop + @EXAMPLE_ARANGOSH_OUTPUT{viewDrop} + | v = db._createView("example", "arangosearch"); + // or + v = db._view("example"); + v.drop(); + db._view("example"); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewDrop Query Name ---------- @@ -35,37 +37,40 @@ Returns the name of the *view*. Get view name: -``` -arangosh> db._createView("example", \, \); -arangosh> v = db._view("example"); -arangosh> v.name(); -arangosh> db._dropView("example"); -``` + @startDocuBlockInline viewName + @EXAMPLE_ARANGOSH_OUTPUT{viewName} + v = db._view("demoView"); + v.name(); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewName -Modify Name ------------ +Rename +------ `view.rename(new-name)` Renames a view using the *new-name*. The *new-name* must not already be used by -a different view. *new-name* must also be a valid view name. For -more information on valid view names please refer to the +a different view or collection in the same database. *new-name* must also be a +valid view name. For more information on valid view names please refer to the [naming conventions](../NamingConventions/README.md). If renaming fails for any reason, an error is thrown. +**Note**: this method is not available in a cluster. + **Examples** -``` -arangosh> db._createView("example", \, \); -arangosh> v = db._view("example"); -arangosh> v.name(); -arangosh> v.rename("example-renamed"); -arangosh> v.name(); -arangosh> db._dropView("example-renamed"); -``` + @startDocuBlockInline viewRename + @EXAMPLE_ARANGOSH_OUTPUT{viewRename} + v = db._createView("example", "arangosearch"); + v.name(); + v.rename("exampleRenamed"); + v.name(); + ~ db._dropView("exampleRenamed"); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewRename Query Type ---------- @@ -80,12 +85,12 @@ Returns the type of the *view*. Get view type: -``` -arangosh> db._createView("example", \, \); -arangosh> v = db._view("example"); -arangosh> v.type(); -arangosh> db._dropView("example"); -``` + @startDocuBlockInline viewType + @EXAMPLE_ARANGOSH_OUTPUT{viewType} + v = db._view("demoView"); + v.type(); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewType Query Properties ---------------- @@ -101,30 +106,46 @@ each of the supported [View Types](README.md). Get view properties: -``` -arangosh> db._createView("example", \, \); -arangosh> v = db._view("example"); -arangosh> v.properties(); -arangosh> db._dropView("example"); -``` + @startDocuBlockInline viewGetProperties + @EXAMPLE_ARANGOSH_OUTPUT{viewGetProperties} + v = db._view("demoView"); + v.properties(); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewGetProperties Modify Properties ----------------- -`view.properties(view-property-modification)` +`view.properties(view-property-modification, partialUpdate)` Modifies the properties of the *view*. The format of the result is specific to -each of the supported [View Types](README.md). +each of the supported [View Types](README.md). *partialUpdate* is an optional +boolean parameter (`true` by default) that determines how +*view-property-modification* is merged with current view *properties* (adds or +updates *view-property-modification* properties to current if `true` and, +additionally, removes all other properties if `false`). + +Currently, the only supported view type is `arangosearch`, and its properties +can be found in +[](../../Views/ArangoSearch/DetailedOverview.md#view-properties). **Examples** Modify view properties: -``` -arangosh> db._createView("example", \, \); -arangosh> v = db._view("example"); -arangosh> v.properties(\); -arangosh> db._dropView("example"); -``` + @startDocuBlockInline viewModifyProperties + @EXAMPLE_ARANGOSH_OUTPUT{viewModifyProperties} + ~ db._createView("example", "arangosearch"); + v = db._view("example"); + | v.properties(); + // set cleanupIntervalStep to 12 + | v.properties({cleanupIntervalStep: 12}); + // add a link + | v.properties({links: {demo: {}}}) + // remove a link + v.properties({links: {demo: null}}) + ~ db._dropView("example"); + @END_EXAMPLE_ARANGOSH_OUTPUT + @endDocuBlock viewModifyProperties diff --git a/Documentation/Books/Manual/SUMMARY.md b/Documentation/Books/Manual/SUMMARY.md index fd79d7fa9b..94c0b8cd6c 100644 --- a/Documentation/Books/Manual/SUMMARY.md +++ b/Documentation/Books/Manual/SUMMARY.md @@ -91,7 +91,7 @@ * [View Methods](DataModeling/Views/ViewMethods.md) * [Naming Conventions](DataModeling/NamingConventions/README.md) * [Database Names](DataModeling/NamingConventions/DatabaseNames.md) - * [Collection Names](DataModeling/NamingConventions/CollectionNames.md) + * [Collection and View Names](DataModeling/NamingConventions/CollectionAndViewNames.md) * [Document Keys](DataModeling/NamingConventions/DocumentKeys.md) * [Attribute Names](DataModeling/NamingConventions/AttributeNames.md) * [Indexing](Indexing/README.md) diff --git a/Documentation/Books/Manual/Views/ArangoSearch/Analyzers.md b/Documentation/Books/Manual/Views/ArangoSearch/Analyzers.md index 8985646979..bde101b8e9 100644 --- a/Documentation/Books/Manual/Views/ArangoSearch/Analyzers.md +++ b/Documentation/Books/Manual/Views/ArangoSearch/Analyzers.md @@ -1,111 +1,57 @@ ArangoSearch Analyzers ====================== -To simplify query syntax ArangoSearch provides a concept of named analyzers which -are merely aliases for type+configuration of IResearch analyzers. Management of -named analyzers will be exposed in upcoming ArangoDB versions via REST, GUI -and JavaScript APIs, e.g. - -`db._globalSettings("iresearch.analyzers")` - -A user then merely uses these analyzer names in ArangoSearch view configurations -and AQL queries. - -ArangoSearch provides a 'text' analyzer to analyze human readable text. A required -configuration parameter for this type of analyzer is 'locale' used to specify -the language used for analysis. - -The ArangoDB administrator may then set up a named analyzer 'text_des': - -```json -{ - "name": "text_des", - "type": "text", - "properties": { - "locale": "de-ch" - } -} -``` - -The user is then immediately able to run queries with the said analyzer, e.g. - -`SEARCH doc.description IN TOKENS('Ein brauner Fuchs springt', 'text_des')` - -Similarly an administrator may choose to deploy a custom DNA analyzer 'DnaSeq': - -```json -{ - "name": "dna", - "type": "DnaSeq", - "properties": "use-human-config" -} -``` - -The user is then immediately able to run queries with the said analyzer, e.g. - -`SEARCH doc.dna IN TOKENS('ACGTCGTATGCACTGA', 'DnaSeq')` - -To a limited degree the concept of 'analysis' is even available in non-IResearch -AQL, e.g. the `TOKENS(...)` function will utilize the power of IResearch to break -up a value into an AQL array that can be used anywhere in the AQL query. - -In plain terms this means a user can match a document attribute when its -value matches at least one value form a set, (yes this is independent of doc), -e.g. to match docs with 'word == quick' OR 'word == brown' OR 'word == fox' - - FOR doc IN someCollection - FILTER doc.word IN TOKENS('a quick brown fox', 'text_en') - RETURN doc - -Runtime-plugging functionality for analyzers is not available in ArangoDB at -this point in time, so ArangoDB comes with a few default-initialized analyzers: +To simplify query syntax ArangoSearch provides a concept of named analyzers +which are merely aliases for type+configuration of IResearch analyzers. In the +future, users will be able to specify their own named analyzers. For now, +ArangoDB comes with the following analyzers: * `identity` treat the value as an atom * `text_de` tokenize the value into case-insensitive word stems as per the German locale, - do not discard any any stopwords + do not discard any stopwords * `text_en` tokenize the value into case-insensitive word stems as per the English locale, - do not discard any any stopwords + do not discard any stopwords * `text_es` tokenize the value into case-insensitive word stems as per the Spanish locale, - do not discard any any stopwords + do not discard any stopwords * `text_fi` tokenize the value into case-insensitive word stems as per the Finnish locale, - do not discard any any stopwords + do not discard any stopwords * `text_fr` tokenize the value into case-insensitive word stems as per the French locale, - do not discard any any stopwords + do not discard any stopwords * `text_it` tokenize the value into case-insensitive word stems as per the Italian locale, - do not discard any any stopwords + do not discard any stopwords * `text_nl` tokenize the value into case-insensitive word stems as per the Dutch locale, - do not discard any any stopwords + do not discard any stopwords * `text_no` tokenize the value into case-insensitive word stems as per the Norwegian - locale, do not discard any any stopwords + locale, do not discard any stopwords * `text_pt` tokenize the value into case-insensitive word stems as per the Portuguese - locale, do not discard any any stopwords + locale, do not discard any stopwords * `text_ru` tokenize the value into case-insensitive word stems as per the Russian locale, - do not discard any any stopwords + do not discard any stopwords * `text_sv` tokenize the value into case-insensitive word stems as per the Swedish locale, - do not discard any any stopwords + do not discard any stopwords * `text_zh` tokenize the value into word stems as per the Chinese locale diff --git a/Documentation/Books/Manual/Views/ArangoSearch/DetailedOverview.md b/Documentation/Books/Manual/Views/ArangoSearch/DetailedOverview.md index c0e3677f19..64df2221d8 100644 --- a/Documentation/Books/Manual/Views/ArangoSearch/DetailedOverview.md +++ b/Documentation/Books/Manual/Views/ArangoSearch/DetailedOverview.md @@ -1,18 +1,18 @@ -# Detailed overview of ArangoSearch views +# Detailed overview of ArangoSearch Views -ArangoSearch is a powerful fulltext search component with additional functionality, -supported via the 'text' analyzer and 'tfidf'/'bm25' [scorers](Scorers.md), -without impact on performance when specifying documents from different collections or -filtering on multiple document attributes. +ArangoSearch is a powerful fulltext search component with additional +functionality, supported via the 'text' analyzer and 'tfidf'/'bm25' +[scorers](Scorers.md), without impact on performance when specifying documents +from different collections or filtering on multiple document attributes. ## View datasource -Search functionality is exposed to ArangoDB via the view API for views -of type *arangosearch*. The ArangoSearch view is merely an identity -transformation applied onto documents stored in linked collections of the same ArangoDB database. -In plain terms an ArangoSearch view only allows filtering and sorting of documents -located in collections of the same database. The matching documents themselves -are returned as-is from their corresponding collections. +Search functionality is exposed to ArangoDB via the view API for views of type +`arangosearch`. The ArangoSearch View is merely an identity transformation +applied onto documents stored in linked collections of the same ArangoDB +database. In plain terms an ArangoSearch View only allows filtering and sorting +of documents located in collections of the same database. The matching documents +themselves are returned as-is from their corresponding collections. ## Links to ArangoDB collections @@ -21,48 +21,45 @@ which ArangoDB collections a given ArangoSearch View should query for documents and how these documents should be queried. An ArangoSearch Link is a uni-directional connection from an ArangoDB collection -to an ArangoSearch view describing how data coming from the said collection should -be made available in the given view. Each ArangoSearch Link in an ArangoSearch -view is uniquely identified by the name of the ArangoDB collection it links to. -An ArangoSearch view may have zero or more links, each to a distinct ArangoDB -collection. Similarly an ArangoDB collection may be referenced via links by zero -or more distinct ArangoSearch views. In plain terms any given ArangoSearch view -may be linked to any given ArangoDB collection of the same database with zero or -at most one link. However, any ArangoSearch view may be linked to multiple -distinct ArangoDB collections and similarly any ArangoDB collection may be -referenced by multiple ArangoSearch views. +to an ArangoSearch View describing how data coming from the said collection +should be made available in the given view. Each ArangoSearch Link in an +ArangoSearch view is uniquely identified by the name of the ArangoDB collection +it links to. An ArangoSearch View may have zero or more links, each to a +distinct ArangoDB collection. Similarly an ArangoDB collection may be referenced +via links by zero or more distinct ArangoSearch Views. In other words, any given +ArangoSearch View may be linked to any given ArangoDB collection of the same +database with zero or one link. However, any ArangoSearch View may be linked to +multiple distinct ArangoDB collections and similarly any ArangoDB collection may +be referenced by multiple ArangoSearch Views. -To configure an ArangoSearch view for consideration of documents from a given +To configure an ArangoSearch View for consideration of documents from a given ArangoDB collection a link definition must be added to the properties of the -said ArangoSearch view defining the link parameters as per the section +said ArangoSearch View defining the link parameters as per the section [View definition/modification](#view-definitionmodification). ## Index -Inverted Index is the heart of ArangoSearch. The index consists of several -independent segments and the index segment itself is meant to be treated as -a standalone index. - - +Inverted Index is the heart of ArangoSearch. The index consists of several +independent segments and the index segment itself is meant to be treated as a +standalone index. ## Analyzers -To simplify query syntax ArangoSearch provides a concept of -[named analyzers](Analyzers.md) which are merely aliases for -type+configuration of IResearch analyzers. Management of named analyzers -is exposed via REST, GUI and JavaScript APIs. +To simplify query syntax ArangoSearch provides a concept of [named +analyzers](Analyzers.md) which are merely aliases for type+configuration of +IResearch analyzers. ## View definition/modification -An ArangoSearch view is configured via an object containing a set of +An ArangoSearch View is configured via an object containing a set of view-specific configuration directives and a map of link-specific configuration directives. During view creation the following directives apply: -* **id** (_optional_; type: `string`): the desired view identifier * **name** (_required_; type: `string`): the view name -* **type** (_required_; type: `string`): the value "arangosearch"
+* **type** (_required_; type: `string`): the value `"arangosearch"` * any of the directives from the section [View properties](#view-properties) During view modification the following directives apply: @@ -75,118 +72,139 @@ During view modification the following directives apply: ## View properties The following terminology from ArangoSearch architecture is used to understand -view properties assignment of its type:
-The index consists of several independent segments and the index **segment** itself -is meant to be treated as a standalone index. **Commit** is meant to be treated -as the procedure of accumulating processed data -creating new index segments. **Consolidation** is meant to be treated as the procedure -of joining multiple index segments into a bigger one and removing garbage documents -(e.g. deleted from a collection). **Cleanup** is meant to be treated as the -procedure of removing unused segments after release of internal resources. +view properties assignment of its type: + +The index consists of several independent segments and the index **segment** +itself is meant to be treated as a standalone index. **Commit** is meant to be +treated as the procedure of accumulating processed data creating new index +segments. **Consolidation** is meant to be treated as the procedure of joining +multiple index segments into a bigger one and removing garbage documents (e.g. +deleted from a collection). **Cleanup** is meant to be treated as the procedure +of removing unused segments after release of internal resources. + +* **cleanupIntervalStep** (_optional_; type: `integer`; default: `10`; to + disable use: `0`) -* **cleanupIntervalStep** (_optional_; type: `integer`; default: `10`; to disable use: `0`)
ArangoSearch waits at least this many commits between removing unused files in - its data directory - for the case where the consolidation policies merge segments often (i.e. a - lot of commit+consolidate). A lower value will cause a lot of disk space to - be wasted - for the case where the consolidation policies rarely merge segments (i.e. - few inserts/deletes). A higher value will impact performance without any - added benefits. - >With every "commit" or "consolidate" operation a new state of the view - internal data-structures is created on disk old states/snapshots are released once there are no longer any users - remaining however, the files for the released states/snapshots are left on disk, and - only removed by "cleanup" operation. + its data directory for the case where the consolidation policies merge + segments often (i.e. a lot of commit+consolidate). A lower value will cause a + lot of disk space to be wasted for the case where the consolidation policies + rarely merge segments (i.e. few inserts/deletes). A higher value will impact + performance without any added benefits. -* **consoloidationIntervalMsec** (_optional_; type: `integer`; default: `60000`; to disable use: `0`)
- ArangoSearch waits at least this many milliseconds between committing view data store - changes and making documents visible to queries - for the case where there are a lot of inserts/updates. A lower value will - cause the view not to account for them, (until commit), and memory usage - would continue to grow - for the case where there are a few inserts/updates. A higher value will - impact performance and waste disk space for each commit call without any - added benefits. - >For data retrieval ArangoSearch views follow the concept of - "eventually-consistent", i.e. eventually all the data in ArangoDB will be - matched by corresponding query expressions - the concept of ArangoSearch view "commit" operation is introduced to - control the upper-bound on the time until document addition/removals are - actually reflected by corresponding query expressions - once a "commit" operation is complete all documents added/removed prior to - the start of the "commit" operation will be reflected by queries invoked in - subsequent ArangoDB transactions, in-progress ArangoDB transactions will - still continue to return a repeatable-read state. + > With every **commit** or **consolidate** operation a new state of the view + > internal data-structures is created on disk. Old states/snapshots are + > released once there are no longer any users remaining. However, the files + > for the released states/snapshots are left on disk, and only removed by + > "cleanup" operation. -* **consolidationPolicy** (_optional_; type: `object`; default: `{}`)
- the consolidation policy to apply for selecting data store segment merge +* **consolidationIntervalMsec** (_optional_; type: `integer`; default: `60000`; + to disable use: `0`) + + ArangoSearch waits _at least_ this many milliseconds between committing view + data store changes and making documents visible to queries. A lower value + will cause the view not to account for them, (until commit), and memory usage + would continue to grow for the case where there are a few inserts/updates. A + higher value will impact performance and waste disk space for each commit call + without any added benefits. + + > For data retrieval ArangoSearch Views follow the concept of + > "eventually-consistent", i.e. eventually all the data in ArangoDB will be + > matched by corresponding query expressions. The concept of an ArangoSearch + > View "commit" operation is introduced to control the upper-bound on the time + > until document addition/removals are actually reflected by corresponding + > query expressions. Once a **commit** operation is complete, all documents + > added/removed prior to the start of the **commit** operation will be + > reflected by queries invoked in subsequent ArangoDB transactions, while + > in-progress ArangoDB transactions will still continue to return a + > repeatable-read state. + +* **consolidationPolicy** (_optional_; type: `object`; default: `{}`) + + The consolidation policy to apply for selecting data store segment merge candidates. - >With each ArangoDB transaction that inserts documents one or more - ArangoSearch internal segments gets created - similarly for removed documents the segments that contain such documents - will have these documents marked as 'deleted' - over time this approach causes a lot of small and sparse segments to be - created - a "consolidation" operation selects one or more segments and copies all of - their valid documents into a single new segment, thereby allowing the - search algorithm to perform more optimally and for extra file handles to be - released once old segments are no longer used. - * **type** (_optional_; type: `string`; default: `"bytes_accum"`)
- the segment candidates for the "consolidation" operation are selected based - upon several possible configurable formulas as defined by their types - the currently supported types are: - - **bytes**: consolidate if and only if - `{threshold} > segment_bytes / (all_segment_bytes / number_of_segments)` - i.e. the candidate segment byte size is less that the average segment byte size multiplied by the `{threshold}` - - **bytes_accum**: consolidate if and only if - `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` - i.e. the sum of all candidate segment byte size is less than the total segment byte size multiplied by the `{threshold}` - - **count**: consolidate if and only if - `{threshold} > segment_docs{valid} / (all_segment_docs{valid} / number_of_segments)` - i.e. the candidate segment non-deleted document count is less that the average segment non-deleted document count size multiplied by the `{threshold}` - - **fill**: consolidate if and only if: - `{threshold} > #segment_docs{valid} / (#segment_docs{valid} + number_of_segment_docs{removed})` - i.e. the candidate segment valid document count is less that the average segment total document count multiplied by the `{threshold}` + > With each ArangoDB transaction that inserts documents, one or more + > ArangoSearch internal segments gets created. Similarly, for removed + > documents the segments containing such documents will have these documents + > marked as "deleted". Over time this approach causes a lot of small and + > sparse segments to be created. A **consolidation** operation selects one or + > more segments and copies all of their valid documents into a single new + > segment, thereby allowing the search algorithm to perform more optimally and + > for extra file handles to be released once old segments are no longer used. - * **segmentThreshold** (_optional_; type: `integer`; default: `300`)
- apply the "consolidation" operation if and only if `{segmentThreshold} < number_of_segments` + * **type** (_optional_; type: `string`; default: `"bytes_accum"`) - * **threshold** (_optional_; type: `float`; default: `0.85`)
- select a given segment for "consolidation" if and only if the formula based - on *type* (as defined above) evaluates to true, valid value range - `[0.0, 1.0]` + The segment candidates for the "consolidation" operation are selected based + upon several possible configurable formulas as defined by their types. + The currently supported types are: + + - **bytes**: Consolidate if and only if `{threshold} > segment_bytes / + (all_segment_bytes / number_of_segments)`, i.e. the candidate segment's + byte size is less than the average segment's byte size multiplied by the + `{threshold}`. + - **bytes_accum**: Consolidate if and only if `{threshold} > (segment_bytes + + sum_of_merge_candidate_segment_bytes) / all_segment_bytes`, i.e. the sum + of all candidate segment's byte size is less than the total segment byte + size multiplied by the `{threshold}`. + - **count**: Consolidate if and only if `{threshold} > segment_docs{valid} / + (all_segment_docs{valid} / number_of_segments)`, i.e. the candidate + segment's non-deleted document count is less than the average segment's + non-deleted document count size multiplied by the `{threshold}`. + - **fill**: Consolidate if and only if `{threshold} > #segment_docs{valid} / + (#segment_docs{valid} + number_of_segment_docs{removed})`, i.e. the + candidate segment's valid document count is less than the average + segment's total document count multiplied by the `{threshold}`. + + * **segmentThreshold** (_optional_; type: `integer`; default: `300`) + + Apply the "consolidation" operation if and only if `{segmentThreshold} < + number_of_segments`. + + * **threshold** (_optional_; type: `float`; default: `0.85`) + + Select a given segment for **consolidation** if and only if the formula + based on *type* (as defined above) evaluates to true. Valid values are in + the range `[0.0, 1.0]`. ## Link properties -* **analyzers** (_optional_; type: `array`; subtype: `string`; default: `[ 'identity' ]`)
- a list of analyzers, by name as defined via the [Analyzers](Analyzers.md), that - should be applied to values of processed document attributes +* **analyzers** (_optional_; type: `array`; subtype: `string`; default: `[ + 'identity' ]`) -* **fields** (_optional_; type: `object`; default: `{}`)
- an object `{attribute-name: [Link properties]}` of fields that should be - processed at each level of the document - each key specifies the document attribute to be processed, the value of - *includeAllFields* is also consulted when selecting fields to be processed - each value specifies the [Link properties](#link-properties) directives to be used when - processing the specified field, a Link properties value of `{}` denotes - inheritance of all (except *fields*) directives from the current level + A list of analyzers, by name as defined via the [Analyzers](Analyzers.md), + that should be applied to values of processed document attributes. -* **includeAllFields** (_optional_; type: `boolean`; default: `false`)
- if true then process all document attributes (if not explicitly specified - then process the fields with default Link properties directives, i.e. `{}`), - otherwise only consider attributes mentioned in *fields* +* **fields** (_optional_; type: `object`; default: `{}`) -* **trackListPositions** (_optional_; type: `boolean`; default: `false`)
- if true then for array values track the value position in the array, e.g. when - querying for the input: `{ attr: [ 'valueX', 'valueY', 'valueZ' ] }` - the user must specify: `doc.attr[1] == 'valueY'` - otherwise all values in an array are treated as equal alternatives, e.g. when - querying for the input: `{ attr: [ 'valueX', 'valueY', 'valueZ' ] }` - the user must specify: `doc.attr == 'valueY'` + An object `{attribute-name: [Link properties]}` of fields that should be + processed at each level of the document. Each key specifies the document + attribute to be processed. Note that the value of `includeAllFields` is also + consulted when selecting fields to be processed. Each value specifies the + [Link properties](#link-properties) directives to be used when processing the + specified field, a Link properties value of `{}` denotes inheritance of all + (except `fields`) directives from the current level. -* **storeValues** (_optional_; type: `string`; default: `"none"`)
- how should the view track the attribute values, this setting allows for - additional value retrieval optimizations, one of: - * **none**: Do not store values by the view - * **id**: Store only information about value presence, to allow use of the `EXISTS()` function +* **includeAllFields** (_optional_; type: `boolean`; default: `false`) + + If set to `true`, then process all document attributes. Otherwise, only + consider attributes mentioned in `fields`. Attributes not explicitly + specified in `fields` will be processed with default link properties, i.e. + `{}`. + +* **trackListPositions** (_optional_; type: `boolean`; default: `false`) + + If set to `true`, then for array values track the value position in arrays. + E.g., when querying for the input `{ attr: [ 'valueX', 'valueY', 'valueZ' ] + }`, the user must specify: `doc.attr[1] == 'valueY'`. Otherwise, all values in + an array are treated as equal alternatives. E.g., when querying for the input + `{ attr: [ 'valueX', 'valueY', 'valueZ' ] }`, the user must specify: `doc.attr + == 'valueY'`. + +* **storeValues** (_optional_; type: `string`; default: `"none"`) + + This property controls how the view should keep track of the attribute values. + Valid values are: + * **none**: Do not store values with the view. + * **id**: Store information about value presence to allow use of the + `EXISTS()` function. diff --git a/Documentation/Books/Manual/Views/ArangoSearch/GettingStarted.md b/Documentation/Books/Manual/Views/ArangoSearch/GettingStarted.md index 9b6d03c00f..e116138a34 100644 --- a/Documentation/Books/Manual/Views/ArangoSearch/GettingStarted.md +++ b/Documentation/Books/Manual/Views/ArangoSearch/GettingStarted.md @@ -1,4 +1,4 @@ -# Getting started with ArangoSearch views +# Getting started with ArangoSearch Views ## The DDL configuration @@ -8,7 +8,9 @@ especially database schemas. All DDL operations on Views can be done via JavaScript or REST calls. The DDL syntax follows the well established ArangoDB guidelines and thus is very -similar between JavaScript and REST. This article uses the JavaScript syntax. +similar between the [JavaScript interface for views](../../DataModeling/Views/README.md) +and the [HTTP interface for views](../../../HTTP/Views/index.html).This article +uses the JavaScript syntax. Assume the following collections were initially defined in a database using the following commands: @@ -40,29 +42,40 @@ v0 = db._createView("ExampleView", "arangosearch", {}); v0 = db._view("ExampleView"); v0.properties({ links: { - 'ExampleCollection0': /* collection Link 0 with additional custom configuration */ + /* collection Link 0 with additional custom configuration: */ + 'ExampleCollection0': { - includeAllFields: true, /* examine fields of all linked collections using default configuration */ + /* examine fields of all linked collections, + using default configuration: */ + includeAllFields: true, fields: { - name: /* a field to apply custom configuration that will index English text */ + /* a field to apply custom configuration + that will index English text: */ + name: { analyzers: ["text_en"] }, - text: /* another field to apply custom that will index Chineese text */ + /* another field to apply custom configuration + that will index Chinese text: */ + text: { analyzers: ["text_zh"] } } }, - 'ExampleCollection1': /* collection Link 1 with custom configuration */ + /* collection Link 1 with custom configuration: */ + 'ExampleCollection1': { - includeAllFields: true, /* examine all fields using default configuration */ + /* examine all fields using default configuration: */ + includeAllFields: true, fields: { a: { - analyzers: ["text_en"] /* a field to apply custom configuration that will index English text */ + /* a field to apply custom configuration + that will index English text: */ + analyzers: ["text_en"] } } } diff --git a/Documentation/Books/Manual/Views/ArangoSearch/README.md b/Documentation/Books/Manual/Views/ArangoSearch/README.md index 325f690cee..431ca1661a 100644 --- a/Documentation/Books/Manual/Views/ArangoSearch/README.md +++ b/Documentation/Books/Manual/Views/ArangoSearch/README.md @@ -1,4 +1,4 @@ -# ArangoSearch views powered by IResearch +# ArangoSearch Views powered by IResearch ArangoSearch is a natively integrated AQL extension making use of the IResearch library. @@ -9,16 +9,16 @@ ArangoSearch allows one to: * filter documents based on AQL boolean expressions and functions * sort the result set based on how closely each document matched the filter -A concept of value 'analysis' that is meant to break up a given value into +A concept of value "analysis" that is meant to break up a given value into a set of sub-values internally tied together by metadata which influences both the filter and sort stages to provide the most appropriate match for the specified conditions, similar to queries to web search engines. In plain terms this means a user can for example: -* request documents where the 'body' attribute best matches 'a quick brown fox' -* request documents where the 'dna' attribute best matches a DNA sub sequence -* request documents where the 'name' attribute best matches gender +* request documents where the `body` attribute best matches `a quick brown fox` +* request documents where the `dna` attribute best matches a DNA sub sequence +* request documents where the `name` attribute best matches gender * etc. (via custom analyzers) ## The IResearch Library @@ -28,7 +28,7 @@ in modern C++, optimized for speed and memory footprint, with source available from https://github.com/iresearch-toolkit/iresearch IResearch is the framework for indexing, filtering and sorting of data. -The indexing stage can treat each data item as an atom or use custom 'analyzers' +The indexing stage can treat each data item as an atom or use custom "analyzers" to break the data item into sub-atomic pieces tied together with internally tracked metadata. @@ -37,9 +37,9 @@ custom implementations of analyzers (used during the indexing and filtering stages) and scorers (used during the sorting stage) allowing full control over the behavior of the engine. -## Using ArangoSearch views +## Using ArangoSearch Views To get more familiar with ArangoSearch usage, you may start with [Getting Started](GettingStarted.md) simple guide and then explore details of ArangoSearch in [Detailed Overview](DetailedOverview.md), [Analyzers](Analyzers.md) - and [Scorers](Scorers.md) topics. \ No newline at end of file + and [Scorers](Scorers.md) topics. diff --git a/Documentation/Books/Manual/Views/ArangoSearch/Scorers.md b/Documentation/Books/Manual/Views/ArangoSearch/Scorers.md index 12bfc51794..f66483c4d5 100644 --- a/Documentation/Books/Manual/Views/ArangoSearch/Scorers.md +++ b/Documentation/Books/Manual/Views/ArangoSearch/Scorers.md @@ -1,58 +1,47 @@ ArangoSearch Scorers ==================== -ArangoSearch accesses scorers directly by their internal names. The -name (in upper-case) of the scorer is the function name to be used in the -['SORT' section](../../../AQL/Views/ArangoSearch/index.html#arangosearch-sort). -Function arguments, (excluding the first argument), are serialized as a -string representation of a JSON array and passed directly to the corresponding -scorer. The first argument to any scorer function is the reference to the -current document emitted by the `FOR` statement, i.e. it would be 'doc' for this -statement: +ArangoSearch Scorers are special functions that allow to sort documents from a +view by their score regarding the analyzed fields. -```js -FOR doc IN someView -``` - -IResearch provides a 'bm25' scorer implementing the -[BM25 algorithm](https://en.wikipedia.org/wiki/Okapi_BM25). This scorer -optionally takes 'k' and 'b' positional parameters. - -The user is able to run queries with the said scorer, e.g. - -```js -SORT BM25(doc, 1.2, 0.75) -``` - -The function arguments will then be serialized into a JSON representation: - -```json -[ 1.2, 0.75 ] -``` - -and passed to the scorer implementation. - -Similarly an administrator may choose to deploy a custom DNA analyzer 'DnaRank'. - -The user is then immediately able to run queries with the said scorer, e.g. - -```js -SORT DNARANK(doc, 123, 456, "abc", { "def": "ghi" }) -``` - -The function arguments will then be serialized into a JSON representation: - -```json -[ 123, 456, "abc", { "def": "ghi" } ] -``` - -and passed to the scorer implementation. - -Runtime-plugging functionality for scores is not available in ArangoDB at this -point in time, so ArangoDB comes with a few default-initialized scores: - -- *attribute-name*: order results based on the value of **attribute-name** +Details about their usage in AQL can be found in the +[ArangoSearch `SORT` section](../../../AQL/Views/ArangoSearch/index.html#arangosearch-sorting). - BM25: order results based on the [BM25 algorithm](https://en.wikipedia.org/wiki/Okapi_BM25) - TFIDF: order results based on the [TFIDF algorithm](https://en.wikipedia.org/wiki/TF-IDF) + +### `BM25()` - Best Matching 25 Algorithm + +IResearch provides a 'bm25' scorer implementing the +[BM25 algorithm](https://en.wikipedia.org/wiki/Okapi_BM25). Optionally, free +parameters **k** and **b** of the algorithm typically using for advanced +optimization can be specified as floating point numbers. + +`BM25(doc, k, b)` + +- *doc* (document): must be emitted by `FOR doc IN someView` + +- *k* (number, _optional_): term frequency, the default is _1.2_. *k* + calibrates the text term frequency scaling. A *k* value of *0* corresponds to + a binary model (no term frequency), and a large value corresponds to using raw + term frequency. + +- *b* (number, _optional_): determines the scaling by the total text length, the + default is _0.75_. *b* determines the scaling by the total text length. + - b = 1 corresponds to fully scaling the term weight by the total text length + - b = 0 corresponds to no length normalization. + +At the extreme values of the coefficient *b*, BM25 turns into the ranking +functions known as BM11 (for b = 1) and BM15 (for b = 0). + +### `TFIDF()` - Term Frequency – Inverse Document Frequency Algorithm + +Sorts documents using the +[**term frequency–inverse document frequency** algorithm](https://en.wikipedia.org/wiki/TF-IDF). + +`TFIDF(doc, withNorms)` + +- *doc* (document): must be emitted by `FOR doc IN someView` +- *withNorms* (bool, _optional_): specifying whether norms should be used via + **with-norms**, the default is _false_ diff --git a/Documentation/Examples/viewDatabaseCreate.generated b/Documentation/Examples/viewDatabaseCreate.generated new file mode 100644 index 0000000000..ac6d187b3f --- /dev/null +++ b/Documentation/Examples/viewDatabaseCreate.generated @@ -0,0 +1,15 @@ +arangosh> v = db._createView("example", "arangosearch"); +[ArangoView 119, "example" (type arangosearch)] +arangosh> v.properties() +{ + "links" : { + }, + "cleanupIntervalStep" : 10, + "consolidationPolicy" : { + "segmentThreshold" : 300, + "threshold" : 0.8500000238418579, + "type" : "bytes_accum" + }, + "consolidationIntervalMsec" : 60000 +} +arangosh> db._dropView("example") diff --git a/Documentation/Examples/viewDatabaseDrop.generated b/Documentation/Examples/viewDatabaseDrop.generated new file mode 100644 index 0000000000..01f521d9a8 --- /dev/null +++ b/Documentation/Examples/viewDatabaseDrop.generated @@ -0,0 +1,5 @@ +arangosh> db._createView("exampleView", "arangosearch"); +[ArangoView 119, "exampleView" (type arangosearch)] +arangosh> db._dropView("exampleView"); +arangosh> db._view("exampleView"); +null diff --git a/Documentation/Examples/viewDatabaseGet.generated b/Documentation/Examples/viewDatabaseGet.generated new file mode 100644 index 0000000000..1f61f77592 --- /dev/null +++ b/Documentation/Examples/viewDatabaseGet.generated @@ -0,0 +1,5 @@ +arangosh> view = db._view("example"); +........> // or, alternatively +[ArangoView 115, "example" (type arangosearch)] +arangosh> view = db["example"] +[ArangoView 115, "example" (type arangosearch)] diff --git a/Documentation/Examples/viewDatabaseList.generated b/Documentation/Examples/viewDatabaseList.generated new file mode 100644 index 0000000000..e5648ba7c2 --- /dev/null +++ b/Documentation/Examples/viewDatabaseList.generated @@ -0,0 +1,5 @@ +arangosh> db._views(); +[ + [ArangoView 115, "demoView" (type arangosearch)], + [ArangoView 119, "exampleView" (type arangosearch)] +] diff --git a/Documentation/Examples/viewDatabaseNameKnown.generated b/Documentation/Examples/viewDatabaseNameKnown.generated index bbed6e5ec0..191777432a 100644 --- a/Documentation/Examples/viewDatabaseNameKnown.generated +++ b/Documentation/Examples/viewDatabaseNameKnown.generated @@ -1,2 +1,2 @@ -arangosh> db._view("demo"); -null +arangosh> db._view("demoView"); +[ArangoView 115, "demoView" (type arangosearch)] diff --git a/Documentation/Examples/viewDrop.generated b/Documentation/Examples/viewDrop.generated new file mode 100644 index 0000000000..0379819437 --- /dev/null +++ b/Documentation/Examples/viewDrop.generated @@ -0,0 +1,8 @@ +arangosh> v = db._createView("example", "arangosearch"); +........> // or +[ArangoView 149, "example" (type arangosearch)] +arangosh> v = db._view("example"); +[ArangoView 149, "example" (type arangosearch)] +arangosh> v.drop(); +arangosh> db._view("example"); +null diff --git a/Documentation/Examples/viewGetProperties.generated b/Documentation/Examples/viewGetProperties.generated new file mode 100644 index 0000000000..41318af849 --- /dev/null +++ b/Documentation/Examples/viewGetProperties.generated @@ -0,0 +1,14 @@ +arangosh> v = db._view("demoView"); +[ArangoView 115, "demoView" (type arangosearch)] +arangosh> v.properties(); +{ + "links" : { + }, + "cleanupIntervalStep" : 10, + "consolidationPolicy" : { + "segmentThreshold" : 300, + "threshold" : 0.8500000238418579, + "type" : "bytes_accum" + }, + "consolidationIntervalMsec" : 60000 +} diff --git a/Documentation/Examples/viewModifyProperties.generated b/Documentation/Examples/viewModifyProperties.generated new file mode 100644 index 0000000000..b6cb62c0f7 --- /dev/null +++ b/Documentation/Examples/viewModifyProperties.generated @@ -0,0 +1,66 @@ +arangosh> v = db._view("example"); +[ArangoView 122, "example" (type arangosearch)] +arangosh> v.properties(); +........> // set cleanupIntervalStep to 12 +{ + "links" : { + }, + "cleanupIntervalStep" : 10, + "consolidationPolicy" : { + "segmentThreshold" : 300, + "threshold" : 0.8500000238418579, + "type" : "bytes_accum" + }, + "consolidationIntervalMsec" : 60000 +} +arangosh> v.properties({cleanupIntervalStep: 12}); +........> // add a link +{ + "cleanupIntervalStep" : 12, + "consolidationIntervalMsec" : 60000, + "consolidationPolicy" : { + "segmentThreshold" : 300, + "threshold" : 0.8500000238418579, + "type" : "bytes_accum" + }, + "links" : { + } +} +arangosh> v.properties({links: {demo: {}}}) +........> // remove a link +{ + "cleanupIntervalStep" : 12, + "consolidationIntervalMsec" : 60000, + "consolidationPolicy" : { + "segmentThreshold" : 300, + "threshold" : 0.8500000238418579, + "type" : "bytes_accum" + }, + "links" : { + "demo" : { + "analyzers" : [ + "identity" + ], + "fields" : { + }, + "includeAllFields" : false, + "trackListPositions" : false, + "storeValues" : "none", + "id" : "133", + "type" : "arangosearch", + "view" : "hFFDA13719B2C/122" + } + } +} +arangosh> v.properties({links: {demo: null}}) +{ + "cleanupIntervalStep" : 12, + "consolidationIntervalMsec" : 60000, + "consolidationPolicy" : { + "segmentThreshold" : 300, + "threshold" : 0.8500000238418579, + "type" : "bytes_accum" + }, + "links" : { + } +} diff --git a/Documentation/Examples/viewName.generated b/Documentation/Examples/viewName.generated new file mode 100644 index 0000000000..322ad2185a --- /dev/null +++ b/Documentation/Examples/viewName.generated @@ -0,0 +1,4 @@ +arangosh> v = db._view("demoView"); +[ArangoView 115, "demoView" (type arangosearch)] +arangosh> v.name(); +demoView diff --git a/Documentation/Examples/viewRename.generated b/Documentation/Examples/viewRename.generated new file mode 100644 index 0000000000..d813acf5d2 --- /dev/null +++ b/Documentation/Examples/viewRename.generated @@ -0,0 +1,7 @@ +arangosh> v = db._createView("example", "arangosearch"); +[ArangoView 180, "example" (type arangosearch)] +arangosh> v.name(); +example +arangosh> v.rename("exampleRenamed"); +arangosh> v.name(); +exampleRenamed diff --git a/Documentation/Examples/viewType.generated b/Documentation/Examples/viewType.generated new file mode 100644 index 0000000000..bc39e3cfc1 --- /dev/null +++ b/Documentation/Examples/viewType.generated @@ -0,0 +1,4 @@ +arangosh> v = db._view("demoView"); +[ArangoView 115, "demoView" (type arangosearch)] +arangosh> v.type(); +arangosearch diff --git a/Documentation/Examples/viewUsage_01.generated b/Documentation/Examples/viewUsage_01.generated new file mode 100644 index 0000000000..e13f7e26c9 --- /dev/null +++ b/Documentation/Examples/viewUsage_01.generated @@ -0,0 +1,2 @@ +arangosh> view = db._createView("myView", "arangosearch", {}); +[ArangoView 131393, "myView" (type arangosearch)] diff --git a/Documentation/Examples/viewUsage_02.generated b/Documentation/Examples/viewUsage_02.generated new file mode 100644 index 0000000000..1b87d3f2b2 --- /dev/null +++ b/Documentation/Examples/viewUsage_02.generated @@ -0,0 +1,2 @@ +arangosh> view = db._view("myView"); +[ArangoView 131393, "myView" (type arangosearch)] diff --git a/Documentation/Examples/viewUsage_03.generated b/Documentation/Examples/viewUsage_03.generated new file mode 100644 index 0000000000..d96d7f97ec --- /dev/null +++ b/Documentation/Examples/viewUsage_03.generated @@ -0,0 +1,12 @@ +arangosh> view.properties(); +{ + "links" : { + }, + "cleanupIntervalStep" : 10, + "consolidationPolicy" : { + "segmentThreshold" : 300, + "threshold" : 0.8500000238418579, + "type" : "bytes_accum" + }, + "consolidationIntervalMsec" : 60000 +} diff --git a/Documentation/Examples/viewUsage_04.generated b/Documentation/Examples/viewUsage_04.generated new file mode 100644 index 0000000000..509aa68cee --- /dev/null +++ b/Documentation/Examples/viewUsage_04.generated @@ -0,0 +1,12 @@ +arangosh> view.properties({cleanupIntervalStep: 12}); +{ + "cleanupIntervalStep" : 12, + "consolidationIntervalMsec" : 60000, + "consolidationPolicy" : { + "segmentThreshold" : 300, + "threshold" : 0.8500000238418579, + "type" : "bytes_accum" + }, + "links" : { + } +} diff --git a/Documentation/Examples/viewUsage_05.generated b/Documentation/Examples/viewUsage_05.generated new file mode 100644 index 0000000000..3afb158054 --- /dev/null +++ b/Documentation/Examples/viewUsage_05.generated @@ -0,0 +1,25 @@ +arangosh> view.properties({links: {colA: {includeAllFields: true}}}); +{ + "cleanupIntervalStep" : 12, + "consolidationIntervalMsec" : 60000, + "consolidationPolicy" : { + "segmentThreshold" : 300, + "threshold" : 0.8500000238418579, + "type" : "bytes_accum" + }, + "links" : { + "colA" : { + "analyzers" : [ + "identity" + ], + "fields" : { + }, + "includeAllFields" : true, + "trackListPositions" : false, + "storeValues" : "none", + "id" : "131408", + "type" : "arangosearch", + "view" : "hC4048A4AEAB4/131393" + } + } +} diff --git a/Documentation/Examples/viewUsage_06.generated b/Documentation/Examples/viewUsage_06.generated new file mode 100644 index 0000000000..c77cb75266 --- /dev/null +++ b/Documentation/Examples/viewUsage_06.generated @@ -0,0 +1,40 @@ +arangosh> view.properties({links: {colB: {fields: {text: {}}}}}); +{ + "cleanupIntervalStep" : 12, + "consolidationIntervalMsec" : 60000, + "consolidationPolicy" : { + "segmentThreshold" : 300, + "threshold" : 0.8500000238418579, + "type" : "bytes_accum" + }, + "links" : { + "colA" : { + "analyzers" : [ + "identity" + ], + "fields" : { + }, + "includeAllFields" : true, + "trackListPositions" : false, + "storeValues" : "none", + "id" : "131408", + "type" : "arangosearch", + "view" : "hC4048A4AEAB4/131393" + }, + "colB" : { + "analyzers" : [ + "identity" + ], + "fields" : { + "text" : { + } + }, + "includeAllFields" : false, + "trackListPositions" : false, + "storeValues" : "none", + "id" : "131416", + "type" : "arangosearch", + "view" : "hC4048A4AEAB4/131393" + } + } +} diff --git a/Documentation/Examples/viewUsage_07.generated b/Documentation/Examples/viewUsage_07.generated new file mode 100644 index 0000000000..f8d7bf1c50 --- /dev/null +++ b/Documentation/Examples/viewUsage_07.generated @@ -0,0 +1,27 @@ +arangosh> view.properties({links: {colA: null}}); +{ + "cleanupIntervalStep" : 12, + "consolidationIntervalMsec" : 60000, + "consolidationPolicy" : { + "segmentThreshold" : 300, + "threshold" : 0.8500000238418579, + "type" : "bytes_accum" + }, + "links" : { + "colB" : { + "analyzers" : [ + "identity" + ], + "fields" : { + "text" : { + } + }, + "includeAllFields" : false, + "trackListPositions" : false, + "storeValues" : "none", + "id" : "131416", + "type" : "arangosearch", + "view" : "hC4048A4AEAB4/131393" + } + } +} diff --git a/Documentation/Examples/viewUsage_08.generated b/Documentation/Examples/viewUsage_08.generated new file mode 100644 index 0000000000..bce6359bd6 --- /dev/null +++ b/Documentation/Examples/viewUsage_08.generated @@ -0,0 +1 @@ +arangosh> db._dropView("myView"); diff --git a/Documentation/Scripts/exampleHeader.js b/Documentation/Scripts/exampleHeader.js index d2306912f3..e79e934543 100644 --- a/Documentation/Scripts/exampleHeader.js +++ b/Documentation/Scripts/exampleHeader.js @@ -221,7 +221,8 @@ var runTestFuncCatch = function (execFunction, testName, expectError) { }; var checkForOrphanTestCollections = function(msg) { - var cols = db._collections().map(function(c){ + const colsAndViews = db._collections().concat(db._views()); + var cols = colsAndViews.map(function(c){ return c.name(); }); var orphanColls = []; @@ -253,6 +254,10 @@ var addIgnoreCollection = function(collectionName) { ignoreCollectionAlreadyThere.push(collectionName); }; +var addIgnoreView = function(viewName) { + addIgnoreCollection(viewName); +}; + var removeIgnoreCollection = function(collectionName) { // print("from now on checking again whether this collection dropped: " + collectionName); for (j = 0; j < collectionAlreadyThere.length; j++) { @@ -268,6 +273,10 @@ var removeIgnoreCollection = function(collectionName) { }; +var removeIgnoreView = function (viewName) { + removeIgnoreCollection(viewName); +}; + var checkIgnoreCollectionAlreadyThere = function () { if (ignoreCollectionAlreadyThere.length > 0) { allErrors += "some temporarily ignored collections haven't been cleaned up: " + @@ -278,6 +287,6 @@ var checkIgnoreCollectionAlreadyThere = function () { // Set the first available list of already there collections: var err = allErrors; -checkForOrphanTestCollections('Collections already there which we will ignore from now on:'); +checkForOrphanTestCollections('Collections or views already there which we will ignore from now on:'); print(allErrors + '\n'); allErrors = err; diff --git a/Documentation/Scripts/setup-arangosh.js b/Documentation/Scripts/setup-arangosh.js index 0326e7b8a5..3f5950354e 100644 --- a/Documentation/Scripts/setup-arangosh.js +++ b/Documentation/Scripts/setup-arangosh.js @@ -20,3 +20,7 @@ db.demo.save({ db._drop("animals"); db._create("animals"); collectionAlreadyThere.push("animals"); + +db._dropView("demoView"); +db._createView("demoView", "arangosearch"); +collectionAlreadyThere.push("demoView");