From e9c680ec08c40806c79cc4e70e8e36726a1dc5fc Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Fri, 5 May 2017 13:00:32 +0200 Subject: [PATCH 01/18] add rocksdb geo index implementation --- GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK | 13 + arangod/RocksDBEngine/CMakeLists.txt | 5 +- arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp | 2383 +++++++++++++++++ arangod/RocksDBEngine/RocksDBGeoIndexImpl.h | 115 + 4 files changed, 2514 insertions(+), 2 deletions(-) create mode 100644 GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK create mode 100644 arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp create mode 100644 arangod/RocksDBEngine/RocksDBGeoIndexImpl.h diff --git a/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK b/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK new file mode 100644 index 0000000000..a68a755185 --- /dev/null +++ b/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK @@ -0,0 +1,13 @@ +1. Create new GeoIndex. +2. Attach to existing GeoIndex +3. Drop GeoIndex +4. Disconnect from GeoIndex +5. PotRead +6. PotWrite +7. SlotRead +8. SlotWrite +9. CreatePot (take the next number) +10. CreateSlot (take the next number) +11. Also needs start and stop transaction routine and to use them Das sind die + Punkte, die Richard eingeplant hatte zum Anschluss. Über die obere Ebene + weiß er praktisch nichts. diff --git a/arangod/RocksDBEngine/CMakeLists.txt b/arangod/RocksDBEngine/CMakeLists.txt index be5aab4d8f..0454c22abe 100644 --- a/arangod/RocksDBEngine/CMakeLists.txt +++ b/arangod/RocksDBEngine/CMakeLists.txt @@ -11,9 +11,10 @@ set(ROCKSDB_SOURCES RocksDBEngine/RocksDBEdgeIndex.cpp RocksDBEngine/RocksDBEngine.cpp RocksDBEngine/RocksDBExportCursor.cpp + RocksDBEngine/RocksDBGeoIndexImpl.cpp + RocksDBEngine/RocksDBHashIndex.cpp RocksDBEngine/RocksDBIndex.cpp RocksDBEngine/RocksDBIndexFactory.cpp - RocksDBEngine/RocksDBHashIndex.cpp RocksDBEngine/RocksDBKey.cpp RocksDBEngine/RocksDBKeyBounds.cpp RocksDBEngine/RocksDBLogValue.cpp @@ -30,8 +31,8 @@ set(ROCKSDB_SOURCES RocksDBEngine/RocksDBTransactionState.cpp RocksDBEngine/RocksDBTypes.cpp RocksDBEngine/RocksDBV8Functions.cpp + RocksDBEngine/RocksDBVPackIndex.cpp RocksDBEngine/RocksDBValue.cpp RocksDBEngine/RocksDBView.cpp - RocksDBEngine/RocksDBVPackIndex.cpp ) set(ROCKSDB_SOURCES ${ROCKSDB_SOURCES} PARENT_SCOPE) diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp new file mode 100644 index 0000000000..17c458330a --- /dev/null +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp @@ -0,0 +1,2383 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author R. A. Parker +//////////////////////////////////////////////////////////////////////////////// + +/* GeoIndex.c - GeoIndex algorithms */ +/* Version 2.1 8.1.2012 R. A. Parker */ +#define _USE_MATH_DEFINES +#include +#include + +#include "RocksDBGeoIndexImpl.h" + +/* Radius of the earth used for distances */ +#define EARTHRADIAN 6371000.0 + +#define GEOSLOTSTART 50 +#define GEOPOTSTART 100 + +#if GeoIndexFIXEDSET == 2 +#define GeoIndexFIXEDPOINTS 2 +#endif +#if GeoIndexFIXEDSET == 3 +#define GeoIndexFIXEDPOINTS 3 +#endif +#if GeoIndexFIXEDSET == 4 +#define GeoIndexFIXEDPOINTS 4 +#endif +#if GeoIndexFIXEDSET == 5 +#define GeoIndexFIXEDPOINTS 5 +#endif +#if GeoIndexFIXEDSET == 6 +#define GeoIndexFIXEDPOINTS 6 +#endif +#if GeoIndexFIXEDSET == 8 +#define GeoIndexFIXEDPOINTS 8 +#endif +#ifndef GeoIndexFIXEDPOINTS +#define GeoIndexFIXEDPOINTS 1 +#endif + +namespace arangodb { namespace rocks { + +/* =================================================== */ +/* GeoIndexFixed structure. */ +/* Only occurs once, and that is in the GeoIx struct */ +/* holds the x,y and z coordinates (between -1 and +1) */ +/* of the fixed points used for pot rejection purposes */ +/* They are computed at GeoIndex_new time and not */ +/* changed after that */ +/* =================================================== */ +typedef struct { + double x[GeoIndexFIXEDPOINTS]; + double y[GeoIndexFIXEDPOINTS]; + double z[GeoIndexFIXEDPOINTS]; +} GeoIndexFixed; +/* =================================================== */ +/* GeoPot structure */ +/* These only occur in the main index itself, and the */ +/* GeoIx structure has an array of them. The data */ +/* items are arranged so that the access during a */ +/* search is approximately sequential, which should be */ +/* a little faster on most machines. */ +/* The first two data items are used for several */ +/* different purposes. LorLeaf is zero for a leaf pot */ +/* and the left child for a non-leaf pot. RorPoints */ +/* is the right child for a non-leaf pot, and the */ +/* number of points in the pot for a leaf pot */ +/* The three GeoString values give the bounds (weak) */ +/* for the Hilbert values in this pot. middle is not */ +/* used for a leaf pot. */ +/* maxdist is the maximum, over all points descendent */ +/* from this pot, of the distances to the fixed points */ +/* level is the AVL-level. It is 1 for a leaf pot, */ +/* and always at least 1 more and at most 2 more than */ +/* each of its children, and exactly 1 more than at */ +/* least one of its children, - the AVL spec. */ +/* "points" lists the slotid of the points. This is */ +/* only used for a leaf pot. */ +/* =================================================== */ +typedef struct { + int LorLeaf; + int RorPoints; + GeoString middle; + GeoFix maxdist[GeoIndexFIXEDPOINTS]; + GeoString start; + GeoString end; + int level; + int points[GeoIndexPOTSIZE]; +} GeoPot; +/* =================================================== */ +/* GeoIx structure */ +/* This is the REAL GeoIndex structure - the one in */ +/* the GeoIndex.h file is just a sham (it says it is */ +/* a char!) to keep the structure private so that the */ +/* GeoIndex.h is short and contains only data of */ +/* interest to the user. */ +/* The GeoIx structure basically consists of two */ +/* arrays - the slots (the points) and the pots (the */ +/* Balanced (AVL) search tree for finding near points) */ +/* The Fixed-point data is held here also, giving the */ +/* x, y and z coordinates of the fixed points, this */ +/* data being the fastest to use */ +/* potct and slotct are used when the index needs to */ +/* grow (because it has run out of slots or pots) */ +/* There is no provision at present for the index to */ +/* get smaller when the majority of points are deleted */ +/* =================================================== */ +typedef struct { + GeoIndexFixed fixed; /* fixed point data */ + int potct; /* pots allocated */ + int slotct; /* slots allocated */ + GeoPot* ypots; /* the pots themselves */ + GeoCoordinate* gxc; /* the slots themselves */ + size_t _memoryUsed; /* the amount of memory currently used */ +} GeoIx; +/* =================================================== */ +/* GeoDetailedPoint structure */ +/* The routine GeoMkDetail is given a point - really */ +/* just a latitude and longitude, and computes all the */ +/* values in this GeoDetailedPoint structure. */ +/* This is intended to include everything that will be */ +/* needed about the point, and is called both for the */ +/* searches (count and distance) and the updates */ +/* (insert and remove). It is only ever useful */ +/* locally - it is created, populated, used and */ +/* forgotten all within a single user's call */ +/* the GeoIx is noted there to simplify some calls */ +/* The GeoCoordinate (a pointer to the user's one) */ +/* is included. The x, y and z coordinates (between */ +/* 1 and -1) are computed, as is the GeoString - the */ +/* Hilbert curve value used to decide where in the */ +/* index a point belongs. The fixdist array is the */ +/* distance to the fixed points. */ +/* The other two entries (snmd and distrej) are not */ +/* computed by GeoMkDetail, but are put put in place */ +/* later, for the searches only, by GeoSetDistance. */ +/* They basically hold the radius of the circle around */ +/* the target point outside which indexed points will */ +/* be too far to be of interest. This is set once and */ +/* for all in the case of a search-by-distance, but */ +/* for a search-by-count the interesting distance */ +/* decreases as further points are found. */ +/* Anyway, snmd hold the radius in SNMD form (squared */ +/* normalized mole distance) being the distance in */ +/* three-dimensional space between two points passing */ +/* through the earth (as a mole digs!) - this being */ +/* the fastest to compute on the fly, and is used for */ +/* looking at individual points to decide whether to */ +/* include them. The distrej array, on the other hand */ +/* is the array of distances to the fixed points, and */ +/* is used to reject pots (leaf or non-leaf). */ +/* The routine GeoPotJunk is used to test this, */ +/* by comparing the distances in the pot the this array*/ +/* =================================================== */ +typedef struct { + GeoIx* gix; + GeoCoordinate* gc; + double x; + double y; + double z; + GeoString gs; + GeoFix fixdist[GeoIndexFIXEDPOINTS]; + double snmd; + GeoFix distrej[GeoIndexFIXEDPOINTS]; +} GeoDetailedPoint; +/* =================================================== */ +/* GeoResults structure */ +/* During the searches, this structure is used to */ +/* accumulate the points that will be returned */ +/* In the case of a search-by-distance, the results are*/ +/* simply a list, which is grown by about 50% if the */ +/* initial allocation (100) is inadequte. In the case */ +/* of a search-by-count, the exact number needed is */ +/* known from the start, but the structure is not just */ +/* a simple list in this case. Instead it is organized*/ +/* as a "priority queue" to enable large values of the */ +/* parameter to be rapidly processed. In the */ +/* case of count, each value is kept to be larger that */ +/* both of its "children" - at 2n+1 and 2n+2. Hence */ +/* the largest distance is always at position 0 and can*/ +/* be readily found, but if it is to be replaced, there*/ +/* is some procession (no more than log(count) work) */ +/* to do to find the correct place to insert the new */ +/* one in the priority queue. This work is done in the*/ +/* GeoResultsInsertPoint routine (not used by distance)*/ +/* =================================================== */ +typedef struct { + int pointsct; + int allocpoints; + int* slot; + double* snmd; +} GeoResults; +/* =================================================== */ +/* GeoStack structure */ +/* During searches of both kinds, at any time there is */ +/* this "stack" (first-in-last-out) of pots still to be*/ +/* processed. At the start of a search of either type,*/ +/* this structure is populated (by GeoStackSet) by */ +/* starting at the root pot, and selecting a child that*/ +/* could contain the target point. The other pot is */ +/* put on the stack and processing continues. The */ +/* stack is then processed by taking a pot off, */ +/* discarding it if the maximum distance to a fixed */ +/* point is too low, and otherwise putting both the */ +/* children onto the stack (since it is faster to do */ +/* this than suffer the cache miss to determine whether*/ +/* either or both of the children can be rejected) */ +/* =================================================== */ +typedef struct { + GeoResults* gr; + GeoDetailedPoint* gd; + int stacksize; + int potid[50]; +} GeoStack; +/* =================================================== */ +/* GeoPath structure */ +/* Similar in many ways to the GeoStack, above, this */ +/* structure is used during insertion and deletion. */ +/* Notice that the pots of the index to not contain */ +/* pointers to their parent, since this is not needed */ +/* during a search. During insertion and removal, */ +/* however, it is necessary to move upwards to */ +/* propogate the maximum distances and to balance the */ +/* tree. Hence the GeoFind procedure, called at the */ +/* beginning of insertion and deletion, populates this */ +/* structure so that the full path from the root node */ +/* to the current pot being considered is known, and */ +/* its parent found when needed. */ +/* =================================================== */ +typedef struct { + GeoIx* gix; + int pathlength; + int path[50]; +} GeoPath; + +/* =================================================== */ +/* GeoIndex_Distance routine */ +/* This is the user-facing routine to compute the */ +/* distance in meters between any two points, given */ +/* by latitude and longitude in a pair of GeoCoordinate*/ +/* structures. It operates by first converting the */ +/* two points into x, y and z coordinates in 3-space, */ +/* then computing the distance between them (again in */ +/* three space) using Pythagoras, computing the angle */ +/* subtended at the earth's centre, between the two */ +/* points, and finally muliply this angle (in radians) */ +/* by the earth's radius to convert it into meters. */ +/* =================================================== */ +double GeoIndex_distance(GeoCoordinate* c1, GeoCoordinate* c2) { + /* math.h under MacOS defines y1 and j1 as global variable */ + double xx1, yy1, z1, x2, y2, z2, mole; + z1 = sin(c1->latitude * M_PI / 180.0); + xx1 = cos(c1->latitude * M_PI / 180.0) * cos(c1->longitude * M_PI / 180.0); + yy1 = cos(c1->latitude * M_PI / 180.0) * sin(c1->longitude * M_PI / 180.0); + z2 = sin(c2->latitude * M_PI / 180.0); + x2 = cos(c2->latitude * M_PI / 180.0) * cos(c2->longitude * M_PI / 180.0); + y2 = cos(c2->latitude * M_PI / 180.0) * sin(c2->longitude * M_PI / 180.0); + mole = sqrt((xx1 - x2) * (xx1 - x2) + (yy1 - y2) * (yy1 - y2) + + (z1 - z2) * (z1 - z2)); + if (mole > 2.0) mole = 2.0; /* make sure arcsin succeeds! */ + return 2.0 * EARTHRADIAN * asin(mole / 2.0); +} +/* =================================================== */ +/* GeoIndexFreePot */ +/* takes the supplied pot, and puts it back onto the */ +/* free list. */ +/* =================================================== */ +void GeoIndexFreePot(GeoIx* gix, int pot) { + gix->ypots[pot].LorLeaf = gix->ypots[0].LorLeaf; + gix->ypots[0].LorLeaf = pot; +} +/* =================================================== */ +/* GeoIndexNewPot */ +/* During insertion, it may happen that a leaf pot */ +/* becomes full. In this case this routine is called */ +/* (always twice, as it happens) to allocate a new */ +/* leaf pot, and a new pot to become the parent of both*/ +/* the old and the new leaf pots. Usually this will */ +/* be a simple matter of taking a pot off the free */ +/* list, but occasionally the free list will be empty, */ +/* in which case the pot array must be realloced. */ +/* NOTICE that in this case, the pots may have moved, */ +/* so it is critically important ot ensure that any */ +/* pointers to pots are re-computed after this routine */ +/* has been called! The GeoIndex_insert routine is */ +/* therefore careful to get the new pots (if any are */ +/* needed) before it gets too far into things. */ +/* =================================================== */ +int GeoIndexNewPot(GeoIx* gix) { + int j; + GeoPot* gp; + if (gix->ypots[0].LorLeaf == 0) { + /* do the growth calculation in long long to make sure it doesn't */ + /* overflow when the size gets to be near 2^31 */ + long long x = gix->potct; + long long y = 100 + GeoIndexGROW; + x = x * y + 99; + y = 100; + x = x / y; + if (x > 1000000000L) return -2; + int newpotct = (int)x; + gp = static_cast(TRI_Reallocate(TRI_UNKNOWN_MEM_ZONE, gix->ypots, + newpotct * sizeof(GeoPot))); + + if (gp == nullptr) { + return -2; + } + gix->ypots = gp; + + // update memory usage + gix->_memoryUsed -= gix->potct * sizeof(GeoPot); + gix->_memoryUsed += newpotct * sizeof(GeoPot); + + for (j = gix->potct; j < newpotct; j++) { + GeoIndexFreePot(gix, j); + } + gix->potct = newpotct; + } + j = gix->ypots[0].LorLeaf; + gix->ypots[0].LorLeaf = gix->ypots[j].LorLeaf; + return j; +} +/* =================================================== */ +/* GeoIndex_new routine */ +/* User-facing routine to create a whole new GeoIndex. */ +/* Much of the bulk of the code in this routine is */ +/* populating the fixed points, depending on which */ +/* set of fixed points are in used. */ +/* The first job is to allocate the initial arrays for */ +/* holding the points, and the pots that index them. */ +/* If this fails, no harm is done and the nullptr */ +/* is returned. Otherwise all the point and pots are */ +/* put onto their respective free lists. */ +/* The fixed point structure is then set up. */ +/* Finally the root pot (pot 1) is set up to be a leaf */ +/* pot containing no points, but with the start and end*/ +/* GeoString values (points on the Hilbert Curve) set */ +/* to be "low values" and "high values" respectively, */ +/* being slightly outside the range of possible */ +/* GeoString values of real (latitude, longitude) */ +/* points */ +/* =================================================== */ +GeoIdx* GeoIndex_new(void) { + GeoIx* gix; + int i, j; + + gix = static_cast( + TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(GeoIx), false)); + + if (gix == nullptr) { + return (GeoIdx*)gix; + } + + /* try to allocate all the things we need */ + gix->ypots = static_cast( + TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, GEOPOTSTART * sizeof(GeoPot), false)); + gix->gxc = static_cast(TRI_Allocate( + TRI_UNKNOWN_MEM_ZONE, GEOSLOTSTART * sizeof(GeoCoordinate), false)); + + /* if any of them fail, free the ones that succeeded */ + /* and then return the nullptr for our user */ + if ((gix->ypots == nullptr) || (gix->gxc == nullptr)) { + if (gix->ypots != nullptr) { + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix->ypots); + } + + if (gix->gxc != nullptr) { + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix->gxc); + } + + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix); + + return nullptr; + } + + // set initial memory usage + gix->_memoryUsed = + GEOPOTSTART * sizeof(GeoPot) + GEOSLOTSTART * sizeof(GeoCoordinate); + + /* initialize chain of empty slots */ + for (i = 0; i < GEOSLOTSTART; i++) { + if (i < GEOSLOTSTART - 1) + (gix->gxc[i]).latitude = i + 1; + else + (gix->gxc[i]).latitude = 0; + } + + /* similarly set up free chain of empty pots */ + for (i = 0; i < GEOPOTSTART; i++) { + if (i < GEOPOTSTART - 1) + gix->ypots[i].LorLeaf = i + 1; + else + gix->ypots[i].LorLeaf = 0; + } + + gix->potct = GEOPOTSTART; + gix->slotct = GEOSLOTSTART; + + /* set up the fixed points structure */ + + for (i = 0; i < GeoIndexFIXEDPOINTS; i++) { + double lat = 90.0; + double lon = 0.0; +#if GeoIndexFIXEDSET == 2 + if (i == 1) { + lat = -90.0; + lon = 0.0; + } +#endif +#if GeoIndexFIXEDSET == 3 + if (i == 1) { + lat = -30.0; + lon = 0.0; + } + if (i == 2) { + lat = -30; + lon = 180.0; + } +#endif +#if GeoIndexFIXEDSET == 4 + if (i == 1) { + lat = -19.471220634490691369246; + lon = 180.0; + } + if (i == 2) { + lat = -19.471220634490691369246; + lon = -60.0; + } + if (i == 3) { + lat = -19.471220634490691369246; + lon = 60.0; + } +#endif +#if GeoIndexFIXEDSET == 5 + if (i == 1) { + lat = -90.0; + lon = 0.0; + } + if (i == 2) { + lat = 0.0; + lon = 0.0; + } + if (i == 3) { + lat = 0.0; + lon = 120.0; + } + if (i == 4) { + lat = 0.0; + lon = -120.0; + } +#endif +#if GeoIndexFIXEDSET == 6 + if (i == 1) { + lat = -90.0; + lon = 0.0; + } + if (i == 2) { + lat = 0.0; + lon = 0.0; + } + if (i == 3) { + lat = 0.0; + lon = 180.0; + } + if (i == 4) { + lat = 0.0; + lon = 90.0; + } + if (i == 5) { + lat = 0.0; + lon = -90.0; + } +#endif +#if GeoIndexFIXEDSET == 8 + if (i == 1) { + lat = -90.0; + lon = 0.0; + } + if (i == 2) { + lat = 19.471220634490691369246; + lon = 0.0; + } + if (i == 3) { + lat = -19.471220634490691369246; + lon = 180.0; + } + if (i == 4) { + lat = 19.471220634490691369246; + lon = 120.0; + } + if (i == 5) { + lat = -19.471220634490691369246; + lon = -60.0; + } + if (i == 6) { + lat = 19.471220634490691369246; + lon = -120.0; + } + if (i == 7) { + lat = -19.471220634490691369246; + lon = 60.0; + } +#endif + + double z = sin(lat * M_PI / 180.0); + double x = cos(lat * M_PI / 180.0) * cos(lon * M_PI / 180.0); + double y = cos(lat * M_PI / 180.0) * sin(lon * M_PI / 180.0); + (gix->fixed.x)[i] = x; + (gix->fixed.y)[i] = y; + (gix->fixed.z)[i] = z; + } + /* set up the root pot */ + + j = GeoIndexNewPot(gix); + gix->ypots[j].LorLeaf = 0; /* leaf pot */ + gix->ypots[j].RorPoints = 0; /* with no points in it! */ + gix->ypots[j].middle = 0ll; + gix->ypots[j].start = 0ll; + gix->ypots[j].end = 0x1FFFFFFFFFFFFFll; + gix->ypots[j].level = 1; + for (i = 0; i < GeoIndexFIXEDPOINTS; i++) gix->ypots[j].maxdist[i] = 0; + return (GeoIdx*)gix; +} +/* =================================================== */ +/* GeoIndex_free routine */ +/* Destroys the GeoIndex, and frees all the memory that*/ +/* this GeoIndex system allocated. Note that any */ +/* objects that may have been pointed to by the user's */ +/* data pointers are (of course) not freed by this call*/ +/* =================================================== */ +void GeoIndex_free(GeoIdx* gi) { + GeoIx* gix; + + if (gi == nullptr) { + return; + } + + gix = (GeoIx*)gi; + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix->gxc); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix->ypots); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix); +} +/* =================================================== */ +/* GeoMkHilbert routine */ +/* Points in this system are indexed by the "GeoString */ +/* value, which is the distance to the point along the */ +/* Hilbert Curve. This space-filling curve is best */ +/* understood in a square, where the curve joins the */ +/* bottom left to the bottom right. It consists of */ +/* four copies of the Hilbert curve, one in each of the*/ +/* four squares, going via the points half-way up the */ +/* left side, the middle of the (large) square and half*/ +/* way up the right side. Notice that the first and */ +/* last of these are flipped on a diagonal, whereas the*/ +/* middle two, going along the top half, are in the */ +/* original orientation, but at half the size. This */ +/* description matches the code below, except that the */ +/* two hemispheres are imagined to be squares where the*/ +/* poles are the top line and the bottom line of the */ +/* square. */ +/* =================================================== */ + +/* 2^25 / 90 rounded down. Used to convert */ +/* degrees of longitude and latitude into */ +/* integers for use making a GeoString */ +#define STRINGPERDEGREE 372827.01 +/* 2^26 - 1 = 0x3ffffff */ +#define HILBERTMAX 67108863 +GeoString GeoMkHilbert(GeoCoordinate* c) { + /* math.h under MacOS defines y1 and j1 as global variable */ + double xx1, yy1; + GeoString z; + int x, y; + int i, temp; + yy1 = c->latitude + 90.0; + z = 0; + xx1 = c->longitude; + if (c->longitude < 0.0) { + xx1 = c->longitude + 180.0; + z = 1; + } + x = (int)(xx1 * STRINGPERDEGREE); + y = (int)(yy1 * STRINGPERDEGREE); + for (i = 0; i < 26; i++) { + z <<= 2; + int nz = ((y >> 24) & 2) + (x >> 25); + x = (x << 1) & (HILBERTMAX); + y = (y << 1) & (HILBERTMAX); + if (nz == 0) { + temp = x; + x = y; + y = temp; + } + if (nz == 1) { + temp = HILBERTMAX - x; + x = HILBERTMAX - y; + y = temp; + z += 3; + } + if (nz == 2) { + z += 1; + } + if (nz == 3) { + z += 2; + } + } + return z + 1ll; +} +/* =================================================== */ +/* GeoMkDetail routine */ +/* At the beginning of both searches, and also at the */ +/* start of an insert or remove, this routine is called*/ +/* to compute all the detail that can usefully be found*/ +/* once and for all. The timings below were on done on*/ +/* a 2011 ordinary desktop pentium */ +/* 0.94 microseconds is - very approximately - 20% of */ +/* the execution time of searches and/or updates, so */ +/* is an obvious target for future speedups should they*/ +/* be required (possibly by using less-accurate trig. */ +/* it consists of three essentially separate tasks */ +/* 1. Find the GeoString (Hilbert) value. */ +/* 2. compute the x, y and z coordinates */ +/* 3. find the distances to the fixed points */ +/* all of these are needed for all of the operations */ +/* =================================================== */ +#if GEOFIXLEN == 2 +#define ARCSINFIX 41720.0 +/* resolution about 300 meters */ +#endif +#if GEOFIXLEN == 4 +#define ARCSINFIX 1520000000.0 +/* resolution about 3 cm */ +#endif +void GeoMkDetail(GeoIx* gix, GeoDetailedPoint* gd, GeoCoordinate* c) { + /* entire routine takes about 0.94 microseconds */ + /* math.h under MacOS defines y1 and j1 as global variable */ + gd->gix = gix; + gd->gc = c; // XQXQ copies the pointer! + /* The GeoString computation takes about 0.17 microseconds */ + gd->gs = GeoMkHilbert(c); + double const lat = c->latitude * M_PI / 180.0; + double const lon = c->longitude * M_PI / 180.0; + double latCos = cos(lat); + /* This part takes about 0.32 microseconds */ + gd->z = sin(lat); + gd->x = latCos * cos(lon); + gd->y = latCos * sin(lon); + /* And this bit takes about 0.45 microseconds */ + for (int i = 0; i < GeoIndexFIXEDPOINTS; i++) { + double xx1 = (gix->fixed.x)[i]; + double yy1 = (gix->fixed.y)[i]; + double z1 = (gix->fixed.z)[i]; + double snmd = (xx1 - gd->x) * (xx1 - gd->x) + (yy1 - gd->y) * (yy1 - gd->y) + + (z1 - gd->z) * (z1 - gd->z); + (gd->fixdist)[i] = (GeoFix)(asin(sqrt(snmd) / 2.0) * ARCSINFIX); + } +} +/* =================================================== */ +/* GeoMetersToSNMD */ +/* When searching for a point "by distance" rather than*/ +/* by count, this routine is used to reverse-engineer */ +/* the distance in meters into a Squared Normalized */ +/* Mole Distance (SNMD), since this is faster to */ +/* compute for each individual point. Hence, rather */ +/* than convert all the distances to meters and compare*/ +/* the system works backwards a bit so that, for each */ +/* point considered, only half of the distance */ +/* calculation needs to be done. This is, of course */ +/* considerably faster. */ +/* =================================================== */ +double GeoMetersToSNMD(double meters) { + double angle, hnmd; + angle = 0.5 * meters / EARTHRADIAN; + hnmd = sin(angle); /* half normalized mole distance */ + if (angle >= M_PI / 2.0) + return 4.0; + else + return hnmd * hnmd * 4.0; +} + +double GeoFixtoSNMD(GeoFix gf) { + double x; + x = gf; + x = x / ARCSINFIX; + x = sin(x); + x = x * x; + x = x * 4.0; + return x; +} +/* =================================================== */ +/* GeoSetDistance */ +/* During a search (of either type), the target point */ +/* is first "detailed". When the distance of interest */ +/* to the target point is known (either at the start */ +/* of a search-by-distance or each time a new good */ +/* point is found during a search-by-count) this */ +/* routine is called to set the snmd and distrej valeus*/ +/* so that as much as possible is known to speed up */ +/* consideration of any new points */ +/* =================================================== */ +void GeoSetDistance(GeoDetailedPoint* gd, double snmd) { + GeoFix gf; + int i; + gd->snmd = snmd; + gf = (GeoFix)(asin(sqrt(snmd) / 2.0) * ARCSINFIX); + gf++; + for (i = 0; i < GeoIndexFIXEDPOINTS; i++) { + if ((gd->fixdist)[i] <= gf) + (gd->distrej)[i] = 0; + else + (gd->distrej)[i] = (gd->fixdist)[i] - gf; + } +} + +/* CRUD interface */ +int SlotRead(GeoIx * gix, int slot, GeoCoordinate * gc) +{ + memcpy(gc,gix->gxc+slot,sizeof(GeoCoordinate)); + return 0; +} +int PotRead(GeoIx * gix, int pot, GeoPot * gp) +{ + memcpy(gp,gix->ypots+pot,sizeof(GeoPot)); + return 0; +} +void SlotWrite(GeoIx * gix,int slot, GeoCoordinate * gc) +{ + memcpy(gix->gxc+slot,gc,sizeof(GeoCoordinate)); +} +void PotWrite(GeoIx * gix,int pot, GeoPot * gp) +{ + memcpy(gix->ypots+pot,gp,sizeof(GeoPot)); +} + +/* =================================================== */ +/* GeoStackSet routine */ +/* The searches (by count and by distance) both start */ +/* by detailing the point and then calling GeoStackSet */ +/* Starting from the root pot (pot 1) the tree is */ +/* descended towards the (actually the earliest) pot */ +/* that could contain the target point. As the */ +/* descent proceeds, the other child of each parent pot*/ +/* is put onto the stack, so that after the routine */ +/* completes, the pots on the stack are a division of */ +/* the index into a set of (disjoint) intervals with */ +/* a strong tendency for the ones containing near */ +/* points (on the Hilbert curve, anyway) to be on the */ +/* to of the stack and to contain few points */ +/* =================================================== */ +void GeoStackSet(GeoStack* gk, GeoDetailedPoint* gd, GeoResults* gr) { + int pot; + GeoIx* gix; + GeoPot gp; + gix = gd->gix; + gk->gr = gr; + gk->gd = gd; + gk->stacksize = 0; + pot = 1; + while (1) { + PotRead(gix,pot,&gp); + if (gp.LorLeaf == 0) break; + if (gp.middle > gd->gs) { + gk->potid[gk->stacksize] = gp.RorPoints; + pot = gp.LorLeaf; + } else { + gk->potid[gk->stacksize] = gp.LorLeaf; + pot = gp.RorPoints; + } + gk->stacksize++; + } + gk->potid[gk->stacksize] = pot; +} +/* =================================================== */ +/* GeoResultsCons routine */ +/* Constructs (allocates) a new structure suitable for */ +/* holding the results of a search. The GeoResults */ +/* structure just holds the slotid of each point chosen*/ +/* and the (SNMD) distance to the target point */ +/* =================================================== */ +GeoResults* GeoResultsCons(int alloc) { + GeoResults* gres; + int* sa; + double* dd; + + if (alloc <= 0) { + return nullptr; + } + + gres = static_cast( + TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(GeoResults), false)); + sa = static_cast( + TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, alloc * sizeof(int), false)); + dd = static_cast( + TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, alloc * sizeof(double), false)); + if ((gres == nullptr) || (sa == nullptr) || (dd == nullptr)) { + if (gres != nullptr) { + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gres); + } + + if (sa != nullptr) { + TRI_Free(TRI_UNKNOWN_MEM_ZONE, sa); + } + + if (dd != nullptr) { + TRI_Free(TRI_UNKNOWN_MEM_ZONE, dd); + } + + return nullptr; + } + gres->pointsct = 0; + gres->allocpoints = alloc; + gres->slot = sa; + gres->snmd = dd; + /* no need to initialize maxsnmd */ + return gres; +} +/* =================================================== */ +/* GeoResultsStartCount */ +/* The GeoResultsCons routine allocates the memory */ +/* but if the search is by count, it is also necessary */ +/* to initialize the results list with "fake" points */ +/* at the impossible SNMD distance of 10, so that any */ +/* real point will be closer than that and be taken */ +/* The GeoResultsStartCount routine does just that */ +/* =================================================== */ +void GeoResultsStartCount(GeoResults* gr) { + int i; + for (i = 0; i < gr->allocpoints; i++) { + gr->slot[i] = 0; + gr->snmd[i] = 10.0; + } +} +/* =================================================== */ +/* GeoResultsInsertPoint */ +/* when a point is to be considered as a candidate for */ +/* being returned in a search-by-count process, the */ +/* slot and snmd are presented to this routine. If the*/ +/* point is too distant, it is ignored. Otherwise the */ +/* most distant "old" point (which is always at zero */ +/* as the results are maintained as a priority queue */ +/* in this case) is discarded, and the new point must */ +/* be put into its proper place to re-establish the */ +/* priority queue - that every entry n is greater than */ +/* or equal, in SNMD distance, than both its children */ +/* which are at 2n+1 and 2n+2 */ +/* =================================================== */ +void GeoResultsInsertPoint(GeoResults* gr, int slot, double snmd) { + /* math.h under MacOS defines y1 and j1 as global variable */ + int i, temp; + if (snmd >= gr->snmd[0]) return; + if (gr->slot[0] == 0) gr->pointsct++; + i = 0; /* i is now considered empty */ + while (1) { + int jj1 = 2 * i + 1; + int jj2 = 2 * i + 2; + if (jj1 < gr->allocpoints) { + if (jj2 < gr->allocpoints) { + if (gr->snmd[jj1] > gr->snmd[jj2]) { + temp = jj1; + // jj1=jj2; + jj2 = temp; + } + /* so now jj2 is >= jj1 */ + if (gr->snmd[jj2] <= snmd) { + gr->snmd[i] = snmd; + gr->slot[i] = slot; + return; + } + gr->snmd[i] = gr->snmd[jj2]; + gr->slot[i] = gr->slot[jj2]; + i = jj2; + continue; + } + if (gr->snmd[jj1] <= snmd) { + gr->snmd[i] = snmd; + gr->slot[i] = slot; + return; + } + gr->snmd[i] = gr->snmd[jj1]; + gr->slot[i] = gr->slot[jj1]; + i = jj1; + continue; + } + gr->snmd[i] = snmd; + gr->slot[i] = slot; + return; + } +} +/* =================================================== */ +/* GeoResultsGrow */ +/* During a search-by distance (the search-by-count */ +/* allocates the correct size at the outset) it may be */ +/* necessary to return an unbounded amount of data. */ +/* initially 100 entries are allocted, but this routine*/ +/* ensures that another one is available. If the */ +/* allocation fails, -1 is returned. */ +/* =================================================== */ +int GeoResultsGrow(GeoResults* gr) { + int newsiz; + int* sa; + double* dd; + if (gr->pointsct < gr->allocpoints) return 0; + /* otherwise grow by about 50% */ + newsiz = gr->pointsct + (gr->pointsct / 2) + 1; + if (newsiz > 1000000000) return -1; + sa = static_cast( + TRI_Reallocate(TRI_UNKNOWN_MEM_ZONE, gr->slot, newsiz * sizeof(int))); + dd = static_cast( + TRI_Reallocate(TRI_UNKNOWN_MEM_ZONE, gr->snmd, newsiz * sizeof(double))); + if ((sa == nullptr) || (dd == nullptr)) { + if (sa != nullptr) gr->slot = sa; + if (dd != nullptr) gr->snmd = dd; + return -1; + } + + gr->slot = sa; + gr->snmd = dd; + gr->allocpoints = newsiz; + return 0; +} + +/* =================================================== */ +/* GeoAnswers */ +/* At the end of any search (of either type) the */ +/* GeoResults structure holds the slotid and snmd */ +/* distance of the points to be returned. This routine*/ +/* constructs and populates the GeoCoordinates */ +/* structure with the require data by fetching the */ +/* coodinates from the index, and by convertin the */ +/* snmd distance into meters. It should be noticed */ +/* that the latitude and longitude are copied into the */ +/* new data, so that the GeoCoordinates structure */ +/* remains valid even if the index is subsequently */ +/* updated or even freed. NOTICE also that the */ +/* distances returned may not agree precisely with the */ +/* distances that could be calculated by a separate */ +/* call to GeoIndex_distance because of rounding errors*/ +/* =================================================== */ +GeoCoordinates* GeoAnswers(GeoIx* gix, GeoResults* gr, bool returnDistances) { + GeoCoordinates* ans; + GeoCoordinate* gc; + int i, j; + double mole; + + if (gr->pointsct == 0) { + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr->slot); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr->snmd); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr); + return nullptr; + } + + ans = static_cast( + TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(GeoCoordinates), false)); + gc = static_cast(TRI_Allocate( + TRI_UNKNOWN_MEM_ZONE, gr->pointsct * sizeof(GeoCoordinate), false)); + + if ((ans == nullptr) || (gc == nullptr)) { + if (ans != nullptr) { + TRI_Free(TRI_UNKNOWN_MEM_ZONE, ans); + } + if (gc != nullptr) { + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gc); + } + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr->slot); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr->snmd); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr); + return nullptr; + } + ans->length = gr->pointsct; + ans->coordinates = gc; + j = 0; + for (i = 0; i < gr->allocpoints; i++) { + if (j >= gr->pointsct) break; + int slot = gr->slot[i]; + //GeoCoordinate * Rslot; + if (slot == 0) continue; + SlotRead(gix,slot,ans->coordinates+j); + if (returnDistances) { + mole = sqrt(gr->snmd[i]); + if (mole > 2.0) mole = 2.0; /* make sure arcsin succeeds! */ + gr->snmd[j] = 2.0 * EARTHRADIAN * asin(mole / 2.0); + } + j++; + } + // note that these are uncalculated if returnDistances is false! + ans->distances = gr->snmd; + + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr->slot); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr); + + return ans; +} +/* =================================================== */ +/* GeoPotJunk */ +/* A detailed point containing the target point set */ +/* with the current distance is compared to a pot */ +/* If any of the fixed points are too close to all the */ +/* descendents of a pot, 1 is returned to indicate that*/ +/* the pot is "junk" = it may be ignored in its */ +/* entirety because it contains no points close enough */ +/* to the target. Otherwise 0 is returned. */ +/* =================================================== */ +int GeoPotJunk(GeoDetailedPoint* gd, int pot) { + int i; + GeoPot gp; + PotRead(gd->gix,pot,&gp); + for (i = 0; i < GeoIndexFIXEDPOINTS; i++) + if (gp.maxdist[i] < gd->distrej[i]) return 1; + return 0; +} +/* =================================================== */ +/* GeoSNMD */ +/* Finds the SNMD (Squared NormalizedMole Distance) */ +/* from the point (which must be "detailed" gd, and the*/ +/* ordinary point (just given by lat/longitude) */ +/* The cartesian coordinates of the ordinary point are */ +/* found, and then the differences squared returned. */ +/* =================================================== */ +double GeoSNMD(GeoDetailedPoint* gd, GeoCoordinate* c) { + double const lat = c->latitude * M_PI / 180.0; + double const lon = c->longitude * M_PI / 180.0; + double const latCos = cos(lat); + double x, y, z; + z = sin(lat); + x = latCos * cos(lon); + y = latCos * sin(lon); + return (x - gd->x) * (x - gd->x) + (y - gd->y) * (y - gd->y) + + (z - gd->z) * (z - gd->z); +} +/* =================================================== */ +/* GeoIndex_PointsWithinRadius */ +/* This is the basic user-visible call to find all the */ +/* the points in the index that are within the */ +/* specified distance of the target point */ +/* First the GeoIndex must be cast to the correct */ +/* (GeoIx) structure so that it can be used! */ +/* the result structure is then set up initially to */ +/* hold up to 100 results points, and the point is then*/ +/* detailed (GeoString, x,y,z and distances to fixed */ +/* points). The stack is then populated with the */ +/* initial descending set of pots ending with the one */ +/* nearest the target point, and the distance set on */ +/* the detailed point by converting the meters into an */ +/* SNMD. The pots on the stack are then considered. */ +/* If the call to GeoPotJunk indicates that there are */ +/* no points in that pot within the required circle, */ +/* the pot is discarded. Otherwise, if the pot is a */ +/* leaf pot, the points are considered individually, */ +/* and notice the recovery to free everything if there */ +/* is a need to grow the results structure and there */ +/* is not enough memory. If the pot is not a leaf pot */ +/* it is replaced on the stack by both its children */ +/* Processing continues until the stack is empty */ +/* At the end, the GeoAnswers routine is used to */ +/* convert the pot/snmd collection of the GeoResults */ +/* structure, into the distance (in meters) and the */ +/* GeoCoordinate data (lat/longitude and data pointer) */ +/* needed for the return to the caller. */ +/* =================================================== */ +GeoCoordinates* GeoIndex_PointsWithinRadius(GeoIdx* gi, GeoCoordinate* c, + double d) { + GeoResults* gres; + GeoCoordinates* answer; + GeoDetailedPoint gd; + GeoStack gk; + GeoPot gp; + GeoCoordinate Xslot; + int r, slot, i; + double snmd, maxsnmd; + GeoIx* gix; + if (c->longitude < -180.0) return nullptr; + if (c->longitude > 180.0) return nullptr; + if (c->latitude < -90.0) return nullptr; + if (c->latitude > 90.0) return nullptr; + gix = (GeoIx*)gi; + gres = GeoResultsCons(100); + if (gres == nullptr) return nullptr; + GeoMkDetail(gix, &gd, c); + GeoStackSet(&gk, &gd, gres); + maxsnmd = GeoMetersToSNMD(d); + GeoSetDistance(&gd, maxsnmd); + gk.stacksize++; + while (gk.stacksize >= 1) { + gk.stacksize--; + int pot = gk.potid[gk.stacksize]; + if (GeoPotJunk(&gd, pot)) continue; + PotRead(gix,pot,&gp); + if (gp.LorLeaf == 0) { + for (i = 0; i < gp.RorPoints; i++) { + slot = gp.points[i]; + SlotRead(gix,slot,&Xslot); + snmd = GeoSNMD(&gd, &Xslot); + if (snmd > (maxsnmd * 1.00000000000001)) continue; + r = GeoResultsGrow(gres); + if (r == -1) { + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gres->snmd); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gres->slot); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gres); + return nullptr; + } + gres->slot[gres->pointsct] = slot; + gres->snmd[gres->pointsct] = snmd; + gres->pointsct++; + } + } else { + gk.potid[gk.stacksize++] = gp.LorLeaf; + gk.potid[gk.stacksize++] = gp.RorPoints; + } + } + answer = GeoAnswers(gix, gres, true); + return answer; /* note - this may be nullptr */ +} +/* =================================================== */ +/* GeoIndex_NearestCountPoints */ +/* The other user-visible search call, which finds the */ +/* nearest points for a user-specified */ +/* processing is not dissimilar to the previous routine*/ +/* but here the results structure is allocated at the */ +/* correct size and used as a priority queue. Since */ +/* it always helps if more points are found (the */ +/* distance of interest drops, so that pots are more */ +/* readily rejected) some care is taken when a pot is */ +/* not rejected to put the one most likely to contain */ +/* useful points onto the top of the stack for early */ +/* processing. */ +/* =================================================== */ +GeoCoordinates* GeoIndex_NearestCountPoints(GeoIdx* gi, GeoCoordinate* c, + int count) { + GeoResults* gr; + GeoDetailedPoint gd; + GeoCoordinates* answer; + GeoStack gk; + GeoPot gp; + int slot, i, left; + GeoCoordinate Xslot; + double snmd; + GeoIx* gix; + if (c->longitude < -180.0) return nullptr; + if (c->longitude > 180.0) return nullptr; + if (c->latitude < -90.0) return nullptr; + if (c->latitude > 90.0) return nullptr; + + gix = (GeoIx*)gi; + gr = GeoResultsCons(count); + if (gr == nullptr) return nullptr; + GeoMkDetail(gix, &gd, c); + GeoStackSet(&gk, &gd, gr); + GeoResultsStartCount(gr); + left = count; + + while (gk.stacksize >= 0) { + int pot = gk.potid[gk.stacksize--]; + PotRead(gix,pot,&gp); + if (left <= 0) { + GeoSetDistance(&gd, gr->snmd[0]); + if (GeoPotJunk(&gd, pot)) continue; + } + if (gp.LorLeaf == 0) { + for (i = 0; i < gp.RorPoints; i++) { + slot = gp.points[i]; + SlotRead(gix,slot,&Xslot); + snmd = GeoSNMD(&gd, &Xslot); + GeoResultsInsertPoint(gr, slot, snmd); + left--; + if (left < -1) left = -1; + } + } else { + if (gd.gs > gp.middle) { + gk.potid[++gk.stacksize] = gp.LorLeaf; + gk.potid[++gk.stacksize] = gp.RorPoints; + } else { + gk.potid[++gk.stacksize] = gp.RorPoints; + gk.potid[++gk.stacksize] = gp.LorLeaf; + } + } + } + answer = GeoAnswers(gix, gr, true); + return answer; /* note - this may be nullptr */ +} +/* =================================================== */ +/* GeoIndexFreeSlot */ +/* return the specified slot to the free list */ +/* =================================================== */ +void GeoIndexFreeSlot(GeoIx* gix, int slot) { + gix->gxc[slot].latitude = gix->gxc[0].latitude; + gix->gxc[0].latitude = slot; +} +/* =================================================== */ +/* GeoIndexNewSlot */ +/* If there is a fre slot already on the free list, */ +/* just return its slot number. Otherwise the entire */ +/* slot list is realloc'd. Although this might change */ +/* the physical memory location of all the indexed */ +/* points, this is not a problem since the slotid */ +/* values are not changed. */ +/* The GeoIndexGROW, which specifies the percentage */ +/* of growth to be used, is in GeoIndex.h. Notice also*/ +/* that some care is take to ensure that, in the case */ +/* of memory allocation failure, the index is still */ +/* kept unchanged even though the new point cannot be */ +/* added to the index. */ +/* =================================================== */ +int GeoIndexNewSlot(GeoIx* gix) { + int j; + GeoCoordinate* gc; + if (gix->gxc[0].latitude == 0.0) { + /* do the growth calculation in long long to make sure it doesn't */ + /* overflow when the size gets to be near 2^31 */ + long long x = gix->slotct; + long long y = 100 + GeoIndexGROW; + x = x * y + 99; + y = 100; + x = x / y; + if (x > 2000000000L) return -2; + int newslotct = (int)x; + gc = static_cast(TRI_Reallocate( + TRI_UNKNOWN_MEM_ZONE, gix->gxc, newslotct * sizeof(GeoCoordinate))); + + if (gc == nullptr) { + return -2; + } + gix->gxc = gc; + + // update memory usage + gix->_memoryUsed -= gix->slotct * sizeof(GeoCoordinate); + gix->_memoryUsed += newslotct * sizeof(GeoCoordinate); + + for (j = gix->slotct; j < newslotct; j++) GeoIndexFreeSlot(gix, j); + gix->slotct = newslotct; + } + j = (int)(gix->gxc[0].latitude); + gix->gxc[0].latitude = gix->gxc[j].latitude; + return j; +} +/* =================================================== */ +/* GeoFind */ +/* This routine is used during insertion and removal, */ +/* but is not used during the searches. */ +/* Find the given point if it is in the index, and set */ +/* the GeoPath data structure to give the path from the*/ +/* root pot (pot 1) to the leaf pot, if any, containing*/ +/* the sepecified (detailed) point, or - if the point */ +/* is not present, to the first leaf pot into which the*/ +/* specified point may be inserted. */ +/* To start with, the index tree is descended, starting*/ +/* with the root (which, rather bizzarly, is at the */ +/* top of this tree!) always taking the right branch if*/ +/* both would do, to reach the rightmost leaf pot that */ +/* could contain the specified point. */ +/* We then proceed leftwards through the points until */ +/* either the specified point is found in the index, or*/ +/* the first leaf pot is found that could contain the */ +/* specified point. It is worth noting that the first */ +/* pot of all has "low-values" as its "start" GeoString*/ +/* so that this process cannot go off the front of the */ +/* index. Notice also that it is not expected to be */ +/* very common that a large number of points with the */ +/* same GeoString (so within 30 centimeters!) will be */ +/* inserted into the index, and that even if there are */ +/* the inefficiency of this code is only moderate, and */ +/* manifests itself only during maintenance */ +/* the return value is 1 if the point is found and 2 */ +/* if it is not found */ +/* =================================================== */ +int GeoFind(GeoPath* gt, GeoDetailedPoint* gd) { + int pot, pot1; + int i; + int slot; + GeoIx* gix; + GeoCoordinate gc; + GeoPot gp; + gix = gd->gix; + gt->gix = gix; + pot = 1; + gt->pathlength = 0; + while (1) { + PotRead(gix,pot,&gp); + gt->path[gt->pathlength] = pot; + gt->pathlength++; + if (gp.LorLeaf == 0) break; + if (gp.middle > gd->gs) + pot = gp.LorLeaf; + else + pot = gp.RorPoints; + } + /* so we have a pot such that top is bigger but bottom isn't */ + while (1) /* so look for an exact match */ + { + for (i = 0; i < gp.RorPoints; i++) { + slot = gp.points[i]; + SlotRead(gix,slot,&gc); + if (((gd->gc)->latitude == gc.latitude) && + ((gd->gc)->longitude == gc.longitude) && + ((gd->gc)->data == gc.data)) { + gt->path[gt->pathlength] = i; + return 1; + } + } + if (gp.start < gd->gs) break; + /* need to find the predecessor of this pot */ + /* this is expected to be a rare event, so */ + /* no time is wasted to simplify this! */ + while (1) { + gt->pathlength--; + pot1 = gt->path[gt->pathlength - 1]; + PotRead(gix,pot1,&gp); + if (pot == gp.RorPoints) break; /* cannot go off the front */ + pot = pot1; + } + PotRead(gix,pot1,&gp); + pot = gp.LorLeaf; + /* now we have a pot whose iterated right child we want */ + while (1) { + PotRead(gix,pot,&gp); + gt->path[gt->pathlength] = pot; + gt->pathlength++; + if (gp.LorLeaf == 0) break; + pot = gp.RorPoints; + } + } + return 2; +} +/* =================================================== */ +/* GeoPopulateMaxdist */ +/* During maintencance, when the points in a leaf pot */ +/* have been changed, this routine merely looks at all */ +/* the points in the pot, details them, and rebuilds */ +/* the list of maximum distances. */ +/* =================================================== */ +void GeoPopulateMaxdist(GeoIx* gix, GeoPot* gp, GeoString* gsa) { + int i, j; + GeoDetailedPoint gd; + GeoCoordinate Xslot; + gsa[0] = 0x1FFFFFFFFFFFFFll; + gsa[1] = 0ll; + for (j = 0; j < GeoIndexFIXEDPOINTS; j++) gp->maxdist[j] = 0; + for (i = 0; i < gp->RorPoints; i++) { + SlotRead(gix,gp->points[i],&Xslot); + GeoMkDetail(gix, &gd, &Xslot); + for (j = 0; j < GeoIndexFIXEDPOINTS; j++) + if (gd.fixdist[j] > gp->maxdist[j]) gp->maxdist[j] = gd.fixdist[j]; + if (gd.gs < gsa[0]) gsa[0] = gd.gs; + if (gd.gs > gsa[1]) gsa[1] = gd.gs; + } + gp->level = 1; +} +/* =================================================== */ +/* GeoGetPot */ +/* This routine simply converts a path and a height */ +/* into a pot id. */ +/* =================================================== */ +int GeoGetPot(GeoPath* gt, int height) { + return gt->path[gt->pathlength - height]; +} +/* =================================================== */ +/* GeoAdjust */ +/* During insertion and deletion, this routine is used */ +/* to populate the data correctly for the parent pot */ +/* specified (which may not be a leaf pot) by taking */ +/* the data from the child pots. It populates the */ +/* start, middle and end GeoStrings, the level, and */ +/* the maximum distances to the fixed points. */ +/* =================================================== */ +void GeoAdjust(GeoIx* gix, int potx) /* the kids are alright */ +{ + int poty, potz; /* x = (yz) */ + int i; + GeoPot gpx; + GeoPot gpy; + GeoPot gpz; + PotRead(gix,potx,&gpx); + poty = gpx.LorLeaf; + PotRead(gix,poty,&gpy); + potz = gpx.RorPoints; + PotRead(gix,potz,&gpz); + gpx.start = gpy.start; + gpx.end = gpz.end; + gpx.middle = gpz.start; + gpx.level = gpy.level; + if ((gpz.level) > gpx.level) gpx.level = gpz.level; + gpx.level++; + for (i = 0; i < GeoIndexFIXEDPOINTS; i++) { + gpx.maxdist[i] = gpy.maxdist[i]; + if (gpx.maxdist[i] < gpz.maxdist[i]) gpx.maxdist[i] = gpz.maxdist[i]; + } + PotWrite(gix,potx,&gpx); +} +/* =================================================== */ +/* RotateLeft */ +/* The operation used during tree balancing to convert */ +/* A(BC) into (AB)C. To start with, E is A(BC) and */ +/* D is BC. D is then change to be (AB) and */ +/* GeoAdjust is used to re-populate its data. E is */ +/* then set to be DC = (AB)C, and again GeoAdjust is */ +/* used to set the GeoStrings, level and distances to */ +/* the fixed points, taking the data from the children */ +/* in both cases */ +/* =================================================== */ +void RotateLeft(GeoIx* gix, int pote) { + int pota, potb, potc, potd; + GeoPot gpd; + GeoPot gpe; + PotRead(gix,pote,&gpe); + potd = gpe.RorPoints; + PotRead(gix,potd,&gpd); + pota = gpe.LorLeaf; + potb = gpd.LorLeaf; + potc = gpd.RorPoints; + gpd.LorLeaf = pota; + gpd.RorPoints = potb; + PotWrite(gix,potd,&gpd); // inefficiency - consider changing GeoAdjust + GeoAdjust(gix, potd); // inefficiency - re-reading potd + gpe.LorLeaf = potd; + gpe.RorPoints = potc; + PotWrite(gix,pote,&gpe); // same again + GeoAdjust(gix, pote); +} +/* =================================================== */ +/* RotateRight */ +/* The mirror-image or inverse of RotateLeft. */ +/* Changes (AB)C into A(BC). The given parent pot is */ +/* E = (AB)C and D is AB. D is then reused to be BC */ +/* and GeoAdjusted, and then E set to be AD = A(BC) and*/ +/* also GeoAdjusted */ +/* =================================================== */ +void RotateRight(GeoIx* gix, int pote) { + int pota, potb, potc, potd; + GeoPot gpd; + GeoPot gpe; + PotRead(gix,pote,&gpe); + potd = gpe.LorLeaf; + PotRead(gix,potd,&gpd); + pota = gpd.LorLeaf; + potb = gpd.RorPoints; + potc = gpe.RorPoints; + gpd.LorLeaf = potb; + gpd.RorPoints = potc; + PotWrite(gix,potd,&gpd); // same inefficiency as RotateLeft + GeoAdjust(gix, potd); + gpe.LorLeaf = pota; + gpe.RorPoints = potd; + PotWrite(gix,pote,&gpe); + GeoAdjust(gix, pote); +} +/* =================================================== */ +/* GeoIndex_insert */ +/* The user-facing routine to insert a new point into */ +/* the index. First the index is cast into a GeoIx */ +/* so that it can be used, and then the point is */ +/* sanity checked. The point is then detailed and the */ +/* GeoFind routine called. If the point is found, this*/ +/* is an error. Otherwise a new slot is populated with*/ +/* the data from the point, and then the point is put */ +/* into the first leaf pot into which it may go based */ +/* on its GeoString value. If there is no room in that*/ +/* pot, the pot is split into two (necessitating a tree*/ +/* balancing operation) which starts by obtaining the */ +/* two new pots. . . continued below */ +/* =================================================== */ +int GeoIndex_insert(GeoIdx* gi, GeoCoordinate* c) { + int i, j, slot, pot, pot1; + int pota, poty, potz; + int lva, lvy, lvz; + int height, rebalance; + GeoDetailedPoint gd; + GeoCoordinate Xslot; + GeoPath gt; + GeoPot gp; + GeoPot gp1; + GeoPot gp2; + GeoPot gpx; + GeoPot gpy; + GeoPot gpz; + GeoPot gpa; + GeoString gsa[2]; + GeoIx* gix; + gix = (GeoIx*)gi; + rebalance = 0; + if (c->longitude < -180.0) return -3; + if (c->longitude > 180.0) return -3; + if (c->latitude < -90.0) return -3; + if (c->latitude > 90.0) return -3; + GeoMkDetail(gix, &gd, c); + i = GeoFind(>, &gd); + if (i == 1) return -1; + pot = gt.path[gt.pathlength - 1]; + PotRead(gix,pot,&gp); + /* new point, so we try to put it in */ + slot = GeoIndexNewSlot(gix); + if (slot == -2) return -2; /* no room :( */ + SlotRead(gix,slot,&Xslot); + Xslot.latitude = c->latitude; + Xslot.longitude = c->longitude; + Xslot.data = c->data; + SlotWrite(gix,slot,&Xslot); +// XQXQ need to insert this + /* check first if we are going to need two new pots, and */ + /* if we are, go get them now before we get too tangled */ + if (gp.RorPoints == GeoIndexPOTSIZE) { + rebalance = 1; + pot1 = GeoIndexNewPot(gix); + int pot2 = GeoIndexNewPot(gix); + PotRead(gix,pot,&gp); /* XQXQ won't have to do this on Rocks */ + if ((pot1 == -2) || (pot2 == -2)) { + GeoIndexFreeSlot(gix, slot); + if (pot1 != -2) GeoIndexFreePot(gix, pot1); + if (pot2 != -2) GeoIndexFreePot(gix, pot2); + return -2; + } + /* =================================================== */ + /* GeoIndex_insert continued */ + /* New pots are pot1 and pot2 which will be the new */ + /* leaf pots with half the points each, and the old */ + /* pot will become the parent of both of them */ + /* After moving all the points to pot2, the half with */ + /* the lowest GeoString are moved into pot1. The two */ + /* pots are then inspected with GeoPopulateMaxdist */ + /* to ascertain what the actual distances and GeoString*/ + /* values are. The GeoString boundary between the two */ + /* pots is set at the midpoint between the current */ + /* actual boundaries and finally the current pot is */ + /* set to be either pot1 or pot2 depending on where the*/ + /* new point (which has still not been inserted) shoud */ + /* go. Continued below . . . . */ + /* =================================================== */ + PotRead(gix,pot1,&gp1); //XQXQ don't think this is even needed now! + PotRead(gix,pot2,&gp2); //XQXQ on Rocks it is reading a pot that is not there. + /* pot is old one, pot1 and pot2 are the new ones */ + gp1.LorLeaf = 0; /* leaf pot */ + gp1.RorPoints = 0; /* no points in it yet */ + /* first move the points from pot to pot2 */ + gp2.LorLeaf = 0; /* leaf pot */ + gp2.RorPoints = gp.RorPoints; + for (i = 0; i < gp.RorPoints; i++) gp2.points[i] = gp.points[i]; + /* move the first half of the points from pot2 to pot1 */ + GeoString mings; + for (i = 0; i < (GeoIndexPOTSIZE / 2); i++) { + mings = 0x1FFFFFFFFFFFFFll; + int js = 0; + for (j = 0; j < gp2.RorPoints; j++) { + GeoString gs; + SlotRead(gix,gp2.points[j],&Xslot); + gs = GeoMkHilbert(&Xslot); + if (gs < mings) { + mings = gs; + js = j; + } + } + gp1.points[gp1.RorPoints] = gp2.points[js]; + gp2.points[js] = gp2.points[gp2.RorPoints - 1]; + gp2.RorPoints--; + gp1.RorPoints++; + } + GeoPopulateMaxdist(gix, &gp2, gsa); + mings = gsa[0]; + GeoPopulateMaxdist(gix, &gp1, gsa); + mings = (mings + gsa[1]) / 2ll; + gp1.start = gp.start; + gp1.end = mings; + gp2.start = mings; + gp2.end = gp.end; + gp.LorLeaf = pot1; + gp.RorPoints = pot2; + PotWrite(gix,pot,&gp); + PotWrite(gix,pot1,&gp1); + PotWrite(gix,pot2,&gp2); + GeoAdjust(gix, pot); // XQXQ this is doing needless reads and writes + gt.pathlength++; + if (gd.gs < mings) { + gp = gp1; + pot=pot1; + gt.path[gt.pathlength - 1] = pot1; + } else { + gp = gp2; + pot=pot2; + gt.path[gt.pathlength - 1] = pot2; + } + } + /* =================================================== */ + /* GeoIndex_insert continued */ + /* finally the new point is inserted into the pot, and */ + /* the maximum distances to the fixed points propogated*/ + /* up as far as necessary. The rebalancing of the tree*/ + /* is then done, but only if the pot splitting happend */ + /* to rebalance, the sequence of pots going back up is */ + /* traversed using the path structure, and the standard*/ + /* AVL balancing is used by doing the necessary */ + /* rotations and level changes necessary to ensure that*/ + /* every parent has at least one child one level lower */ + /* and the other child is either also one level lower, */ + /* or two levels lower. The details are also given in */ + /* the accompanying documentation */ + /* =================================================== */ + /* so we have a pot and a path we can use */ + /* gp is the pot, gt set correctly */ + gp.points[gp.RorPoints] = slot; + gp.RorPoints++; + PotWrite(gix,pot,&gp); + /* now propagate the maxdistances */ // XQXQ should reverse the loop i/j order + for (i = 0; i < GeoIndexFIXEDPOINTS; i++) { + j = gt.pathlength - 1; + while (j >= 0) { + PotRead(gix,gt.path[j],&gpa); + if (gd.fixdist[i] > gpa.maxdist[i]) { + gpa.maxdist[i] = gd.fixdist[i]; + PotWrite(gix,gt.path[j],&gpa); + } + else + break; + j--; + } + } + /* just need to balance the tree */ + if (rebalance == 0) return 0; + height = 2; + while (true) { + int potx = GeoGetPot(>, height); + PotRead(gix,potx,&gpx); + int lvx = gpx.level; + if (potx == 1) break; + /* root pot ? */ + pot1 = GeoGetPot(>, height + 1); /* pot1=parent(x) */ + PotRead(gix,pot1,&gp1); + int lv1 = gp1.level; + if (lv1 > lvx) break; + if (gp1.LorLeaf == potx) /* gpx is the left child? */ + { + pota = gp1.RorPoints; /* 1 = (xa) */ + PotRead(gix,pota,&gpa); + lva = gpa.level; + if ((lva + 1) == lv1) /* so it is legal to up lev(1) */ + { + gp1.level++; + PotWrite(gix,pot1,&gp1); + height++; + continue; + } + poty = gpx.RorPoints; + PotRead(gix,poty,&gpy); + lvy = gpy.level; + potz = gpx.LorLeaf; + PotRead(gix,potz,&gpz); + lvz = gpz.level; + if (lvy <= lvz) { + RotateRight(gix, pot1); + height++; + continue; + } + RotateLeft(gix, potx); + RotateRight(gix, pot1); + } else /* gpx is the right child */ + { + pota = gp1.LorLeaf; /* 1 = (ax) */ + PotRead(gix,pota,&gpa); + lva = gpa.level; + if ((lva + 1) == lv1) /* so it is legal to up lev(1) */ + { + gp1.level++; + PotWrite(gix,pot1,&gp1); + height++; + continue; + } + poty = gpx.LorLeaf; + PotRead(gix,poty,&gpy); + lvy = gpy.level; + potz = gpx.RorPoints; + PotRead(gix,potz,&gpz); + lvz = gpz.level; + if (lvy <= lvz) { + RotateLeft(gix, pot1); + height++; + continue; + } + RotateRight(gix, potx); + RotateLeft(gix, pot1); + } + } + return 0; +} +/* =================================================== */ +/* GeoIndex_remove */ +/* As a user-facing routine, this starts by casting the*/ +/* GeoIndex structure to a GeoIx, so that its members */ +/* can be accessed. The point is then detailed, and */ +/* GeoFind is used to check whether it is there. If */ +/* not, this is an error. Otherwise the point is */ +/* removed from the pot and the distances recalculated */ +/* using the GeoPopulateMaxdist routine. It is then */ +/* checked whether there are now too few points in the */ +/* pot that used to contain the point, and if so there */ +/* are eight cases as to what is to be done. In four */ +/* of them, a point is moved from the adjacent leaf pot*/ +/* which may be at the same level or one lower, and may*/ +/* be either side of the current one. This is done if */ +/* there are too many points in the two leaf pots to */ +/* amalgamate them. In the other four cases the two */ +/* leaf pots are amalgamated, which results in the */ +/* releasing of two pots (which are put back into the */ +/* free chain using GeoIndexFreePot) Continued . . . . */ +/* =================================================== */ +int GeoIndex_remove(GeoIdx* gi, GeoCoordinate* c) { + GeoDetailedPoint gd; + GeoCoordinate Xslot; + int rebalance; + int levn, levc; + GeoPot gp; + int potp; + GeoPot gpp; + int potb; + GeoPot gpb; + int potn; + GeoPot gpn; + int potc; + GeoPot gpc; + GeoPath gt; + GeoString gsa[2]; + int i, pot, potix, slot, pathix; + GeoIx* gix; + if (c->longitude < -180.0) return -3; + if (c->longitude > 180.0) return -3; + if (c->latitude < -90.0) return -3; + if (c->latitude > 90.0) return -3; + gix = (GeoIx*)gi; + GeoMkDetail(gix, &gd, c); + i = GeoFind(>, &gd); + if (i != 1) return -1; + pot = gt.path[gt.pathlength - 1]; + PotRead(gix,pot,&gp); + potix = gt.path[gt.pathlength]; + slot = gp.points[potix]; + GeoIndexFreeSlot(gix, slot); // XQXQ Need to delete slot + gp.points[potix] = gp.points[gp.RorPoints - 1]; + gp.RorPoints--; + GeoPopulateMaxdist(gix, &gp, gsa); + PotWrite(gix,pot,&gp); + if (pot == 1) return 0; /* just allow root pot to have fewer points */ + rebalance = 0; + if ((2 * gp.RorPoints) < GeoIndexPOTSIZE) { + int j, js; + GeoString mings, gs; + potp = gt.path[gt.pathlength - 2]; + PotRead(gix,potp,&gpp); + if (gpp.LorLeaf == pot) { + /* Left */ + potb = gpp.RorPoints; + PotRead(gix,potb,&gpb); + if (gpb.LorLeaf == 0) { + /* Left Brother */ + if ((gpb.RorPoints + gp.RorPoints) > GeoIndexPOTSIZE) { + /* Left Brother Lots */ + mings = 0x1FFFFFFFFFFFFFll; + js = 0; + for (j = 0; j < gpb.RorPoints; j++) { + SlotRead(gix,gpb.points[j],&Xslot); + gs = GeoMkHilbert(&Xslot); + if (gs < mings) { + mings = gs; + js = j; + } + } + gp.points[gp.RorPoints] = gpb.points[js]; + gpb.points[js] = gpb.points[gpb.RorPoints - 1]; + gpb.RorPoints--; + gp.RorPoints++; + GeoPopulateMaxdist(gix, &gp, gsa); + mings = gsa[1]; + GeoPopulateMaxdist(gix, &gpb, gsa); + mings = (mings + gsa[0]) / 2ll; + gp.end = mings; + gpb.start = mings; + gpp.middle = mings; + PotWrite(gix,pot,&gp); + PotWrite(gix,potb,&gpb); + GeoAdjust(gix, potp); + } else { + /* Left Brother Few */ + gpp.LorLeaf = 0; + i = 0; + for (j = 0; j < gpb.RorPoints; j++) + gpp.points[i++] = gpb.points[j]; + for (j = 0; j < gp.RorPoints; j++) gpp.points[i++] = gp.points[j]; + gpp.RorPoints = i; + GeoIndexFreePot(gix, pot); + GeoIndexFreePot(gix, potb); + GeoPopulateMaxdist(gix, &gpp, gsa); + gt.pathlength--; + rebalance = 1; + PotWrite(gix,potp,&gpp); + } + } else { + /* Left Nephew */ + potn = gpb.LorLeaf; + PotRead(gix,potn,&gpn); + if ((gpn.RorPoints + gp.RorPoints) > GeoIndexPOTSIZE) { + /* Left Nephew Lots */ + mings = 0x1FFFFFFFFFFFFFll; + js = 0; + for (j = 0; j < gpn.RorPoints; j++) { + SlotRead(gix,gpn.points[j],&Xslot); + gs = GeoMkHilbert(&Xslot); + if (gs < mings) { + mings = gs; + js = j; + } + } + gp.points[gp.RorPoints] = gpn.points[js]; + gpn.points[js] = gpn.points[gpn.RorPoints - 1]; + gpn.RorPoints--; + gp.RorPoints++; + GeoPopulateMaxdist(gix, &gp, gsa); + mings = gsa[1]; + GeoPopulateMaxdist(gix, &gpn, gsa); + mings = (mings + gsa[0]) / 2ll; + gp.end = mings; + gpn.start = mings; + gpb.start = mings; + gpp.middle = mings; + PotWrite(gix,pot,&gp); + PotWrite(gix,potn,&gpn); + GeoAdjust(gix, potb); + GeoAdjust(gix, potp); + } else { + /* Left Nephew Few */ + potc = gpb.RorPoints; + i = gp.RorPoints; + for (j = 0; j < gpn.RorPoints; j++) gp.points[i++] = gpn.points[j]; + gp.RorPoints = i; + gpp.RorPoints = potc; + gpp.middle = gpb.middle; + gp.end = gpp.middle; + GeoIndexFreePot(gix, potn); + GeoIndexFreePot(gix, potb); + GeoPopulateMaxdist(gix, &gp, gsa); + PotWrite(gix,pot,&gp); + PotWrite(gix,potp,&gpp); + GeoAdjust(gix, potp); + gt.pathlength--; + rebalance = 1; + } + } + } else { + /* Right */ + potb = gpp.LorLeaf; + PotRead(gix,potb,&gpb); + if (gpb.LorLeaf == 0) { + /* Right Brother */ + if ((gpb.RorPoints + gp.RorPoints) > GeoIndexPOTSIZE) { + /* Right Brother Lots */ + mings = 0ll; + js = 0; + for (j = 0; j < gpb.RorPoints; j++) { + SlotRead(gix,gpb.points[j],&Xslot); + gs = GeoMkHilbert(&Xslot); + if (gs > mings) { + mings = gs; + js = j; + } + } + gp.points[gp.RorPoints] = gpb.points[js]; + gpb.points[js] = gpb.points[gpb.RorPoints - 1]; + gpb.RorPoints--; + gp.RorPoints++; + GeoPopulateMaxdist(gix, &gp, gsa); + mings = gsa[0]; + GeoPopulateMaxdist(gix, &gpb, gsa); + mings = (mings + gsa[1]) / 2ll; + gp.start = mings; + gpb.end = mings; + gpp.middle = mings; + PotWrite(gix,pot,&gp); + PotWrite(gix,potb,&gpb); + GeoAdjust(gix, potp); + } else { + /* Right Brother Few */ + /* observe this is identical to Left Brother Few */ + gpp.LorLeaf = 0; + i = 0; + for (j = 0; j < gpb.RorPoints; j++) + gpp.points[i++] = gpb.points[j]; + for (j = 0; j < gp.RorPoints; j++) gpp.points[i++] = gp.points[j]; + gpp.RorPoints = i; + GeoIndexFreePot(gix, pot); + GeoIndexFreePot(gix, potb); + GeoPopulateMaxdist(gix, &gpp, gsa); + gt.pathlength--; + rebalance = 1; + //PotWrite(gix,pot,&gp); + PotWrite(gix,potp,&gpp); + } + } else { + /* Right Nephew */ + potn = gpb.RorPoints; + PotRead(gix,potn,&gpn); + if ((gpn.RorPoints + gp.RorPoints) > GeoIndexPOTSIZE) { + /* Right Nephew Lots */ + mings = 0ll; + js = 0; + for (j = 0; j < gpn.RorPoints; j++) { + SlotRead(gix,gpn.points[j],&Xslot); + gs = GeoMkHilbert(&Xslot); + if (gs > mings) { + mings = gs; + js = j; + } + } + gp.points[gp.RorPoints] = gpn.points[js]; + gpn.points[js] = gpn.points[gpn.RorPoints - 1]; + gpn.RorPoints--; + gp.RorPoints++; + GeoPopulateMaxdist(gix, &gp, gsa); + mings = gsa[0]; + GeoPopulateMaxdist(gix, &gpn, gsa); + mings = (mings + gsa[1]) / 2ll; + gp.start = mings; + gpn.end = mings; + gpb.end = mings; + gpp.middle = mings; + PotWrite(gix,pot,&gp); + PotWrite(gix,potn,&gpn); + GeoAdjust(gix, potb); + GeoAdjust(gix, potp); + } else { + /* Right Nephew Few */ + potc = gpb.LorLeaf; + i = gp.RorPoints; + for (j = 0; j < gpn.RorPoints; j++) gp.points[i++] = gpn.points[j]; + gp.RorPoints = i; + gpp.LorLeaf = potc; + gpp.middle = gpb.middle; + gp.start = gpb.middle; + GeoIndexFreePot(gix, potn); + GeoIndexFreePot(gix, potb); + GeoPopulateMaxdist(gix, &gp, gsa); + PotWrite(gix,pot,&gp); + PotWrite(gix,potp,&gpp); + GeoAdjust(gix, potp); + gt.pathlength--; + rebalance = 1; + } + } + } + } + /* =================================================== */ + /* GeoIndex_remove continued */ + /* Again the balancing of the tree is fairly standard */ + /* and documented in the associated documentation to */ + /* this routine. At every stage in this process the */ + /* parent potp of the current pot may not be balanced */ + /* as pot has just had its level reduced. To tell what*/ + /* to do, the product i of the level differences is */ + /* calculated. This should be 1 or 2, but may be 3 or */ + /* 4, and in each case some further investigation soon */ + /* shows what rotations and further upward balancing */ + /* may be needed. continued . . . */ + /* =================================================== */ + pathix = gt.pathlength - 1; + while ((pathix > 0) && (rebalance == 1)) { + /* Deletion rebalancing */ + rebalance = 0; + pathix--; + potp = gt.path[pathix]; + PotRead(gix,potp,&gpp); + int levp = gpp.level; + pot = gpp.LorLeaf; + potb = gpp.RorPoints; + PotRead(gix,pot,&gp); + PotRead(gix,potb,&gpb); + int lev = gp.level; + int levb = gpb.level; + i = (levp - lev) * (levp - levb); + if (i == 4) { + gpp.level--; + PotWrite(gix,potp,&gpp); + rebalance = 1; + } + if (i == 3) { + if ((levp - lev) == 3) { + potn = gpb.LorLeaf; + PotRead(gix,potn,&gpn); + potc = gpb.RorPoints; + PotRead(gix,potc,&gpc); + levn = gpn.level; + levc = gpc.level; + if (levn <= levc) { + RotateLeft(gix, potp); + if (levn < levc) rebalance = 1; + } else { + RotateRight(gix, potb); + RotateLeft(gix, potp); + rebalance = 1; + } + } else { + potn = gp.LorLeaf; + PotRead(gix,potn,&gpn); + potc = gp.RorPoints; + PotRead(gix,potc,&gpc); + levn = gpn.level; + levc = gpc.level; + if (levn >= levc) { + RotateRight(gix, potp); + if (levn > levc) rebalance = 1; + } else { + RotateLeft(gix, pot); + RotateRight(gix, potp); + rebalance = 1; + } + } + } + GeoAdjust(gix, potp); + } + /* =================================================== */ + /* GeoIndex_remove continued */ + /* In the case of deletion, it is not so easy to see */ + /* what the new maximum distances are given the point */ + /* deleted, so the GeoAdjust routine is used all the */ + /* way up. */ + /* =================================================== */ + while (pathix > 0) { + pathix--; + pot = gt.path[pathix]; + GeoAdjust(gix, pot); + } + return 0; +} +/* =================================================== */ +/* GeoIndex_CoordinatesFree */ +/* The user-facing routine that must be called by the */ +/* user when the results of a search are finished with */ +/* =================================================== */ +void GeoIndex_CoordinatesFree(GeoCoordinates* clist) { + if (clist == nullptr) { + return; + } + TRI_Free(TRI_UNKNOWN_MEM_ZONE, clist->coordinates); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, clist->distances); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, clist); +} +/* =================================================== */ +/* GeoIndex_hint does nothing! */ +/* it is here for possible future compatibilty */ +/* =================================================== */ +int GeoIndex_hint(GeoIdx* gi, int hint) { return 0; } + +/* =================================================== */ +/* GeoCr structure */ +/* This is the REAL GeoCursor structure - the one in */ +/* the GeoIndex.h file is just a sham (it says it is */ +/* a char!) to keep the structure private so that the */ +/* GeoIndex.h is short and contains only data of */ +/* interest to the user. */ +/* =================================================== */ + +typedef struct { + int pot; + GeoFix dist; +} hpot; // pot for putting on the heap + +static bool hpotcompare(hpot a, hpot b) { return (a.dist > b.dist); } + +typedef struct { + int slot; + double snmd; +} hslot; // pot for putting on the heap + +bool hslotcompare(hslot a, hslot b) { + if (a.snmd > b.snmd) + return true; + else + return false; +} + +typedef struct { + GeoIx* Ix; /* GeoIndex */ + GeoDetailedPoint gd; + double potsnmd; + double slotsnmd; + std::vector potheap; + std::vector slotheap; +} GeoCr; + +GeoFix makedist(GeoPot* pot, GeoDetailedPoint* gd) { + GeoFix dist, d1; + int i; + dist = 0; + for (i = 0; i < GeoIndexFIXEDPOINTS; i++) { + if (gd->fixdist[i] > pot->maxdist[i]) + d1 = gd->fixdist[i] - pot->maxdist[i]; + else + d1 = 0; + if (d1 > dist) dist = d1; + } + return dist; +} + +GeoCursor* GeoIndex_NewCursor(GeoIdx* gi, GeoCoordinate* c) { + GeoPot root; + if (c->longitude < -180.0) return nullptr; + if (c->longitude > 180.0) return nullptr; + if (c->latitude < -90.0) return nullptr; + if (c->latitude > 90.0) return nullptr; + GeoIx* gix = (GeoIx*)gi; + GeoCr* gcr = nullptr; + + try { + gcr = new GeoCr; + } catch (...) { + } + + if (gcr == nullptr) { + return (GeoCursor*)gcr; + } + gcr->Ix = gix; + +#if 0 + std::vector* p = new (&gcr->potheap) std::vector(); + std::vector* q = new (&gcr->slotheap) std::vector(); + (void)p; // avoid compiler warnings - I just want to call + (void)q; // the constructors and have no use for p,q. +#endif + GeoMkDetail(gix, &(gcr->gd), c); + hpot hp; + hp.pot = 1; + PotRead(gix,1,&root); + hp.dist = makedist(&root, &(gcr->gd)); + gcr->potsnmd = GeoFixtoSNMD(hp.dist); + gcr->slotsnmd = 20.0; + gcr->potheap.push_back(hp); + // not necessary here because potheap only contains one element + //std::push_heap(gcr->potheap.begin(), gcr->potheap.end(), hpotcompare); + TRI_ASSERT(gcr->potheap.size() == 1); + // cppcheck-suppress * + return (GeoCursor*)gcr; +} + +GeoCoordinates* GeoIndex_ReadCursor(GeoCursor* gc, int count, bool returnDistances, double maxDistance) { + int i, j, r; + GeoCoordinate ct; + GeoCoordinates* gcts; + GeoCr* gcr; + GeoIx* gix; + GeoPot pot; + GeoPot pot1; + double tsnmd; + hslot hs; + hpot hp; + gcr = (GeoCr*)gc; + gix=gcr->Ix; + GeoResults* gr = GeoResultsCons(count); + if (gr == nullptr) return nullptr; + while (gr->pointsct < count) { + if (gcr->potsnmd < gcr->slotsnmd * 1.000001) { + // smash top pot - if there is one + if (gcr->potheap.size() == 0) break; // that's all there is + PotRead(gix,gcr->potheap.front().pot,&pot); + // anyway remove top from heap + std::pop_heap(gcr->potheap.begin(), gcr->potheap.end(), hpotcompare); + gcr->potheap.pop_back(); + if (pot.LorLeaf == 0) { + // leaf pot - put all the points into the points heap + for (i = 0; i < pot.RorPoints; i++) { + j = pot.points[i]; + SlotRead(gix,j,&ct); + hs.snmd = GeoSNMD(&(gcr->gd), &ct); + hs.slot = j; + gcr->slotheap.push_back(hs); + std::push_heap(gcr->slotheap.begin(), gcr->slotheap.end(), + hslotcompare); + } + if (!gcr->slotheap.empty()) { + gcr->slotsnmd = gcr->slotheap.front().snmd; + } + } else { + hp.pot = pot.LorLeaf; + PotRead(gix,hp.pot,&pot1); + hp.dist = makedist(&pot1, &(gcr->gd)); + gcr->potheap.push_back(hp); + std::push_heap(gcr->potheap.begin(), gcr->potheap.end(), hpotcompare); + hp.pot = pot.RorPoints; + PotRead(gix,hp.pot,&pot1); + hp.dist = makedist(&pot1, &(gcr->gd)); + gcr->potheap.push_back(hp); + std::push_heap(gcr->potheap.begin(), gcr->potheap.end(), hpotcompare); + } + gcr->potsnmd = 10.0; + if (!gcr->potheap.empty()) { + PotRead(gix,gcr->potheap.front().pot,&pot); + gcr->potsnmd = GeoFixtoSNMD(makedist(&pot, &(gcr->gd))); + } + } else { + if (gcr->slotheap.empty()) break; // that's all there is + int slox = gcr->slotheap.front().slot; + tsnmd = gcr->slotheap.front().snmd; + r = GeoResultsGrow(gr); + if (r == -1) { + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr->snmd); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr->slot); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr); + return nullptr; + } + gr->slot[gr->pointsct] = slox; + gr->snmd[gr->pointsct] = tsnmd; + gr->pointsct++; + gcr->slotsnmd = 5.0; + std::pop_heap(gcr->slotheap.begin(), gcr->slotheap.end(), hslotcompare); + gcr->slotheap.pop_back(); + if (!gcr->slotheap.empty()) { + gcr->slotsnmd = gcr->slotheap.front().snmd; + } + } + } + gcts = GeoAnswers(gix, gr, returnDistances); + return gcts; +} + +void GeoIndex_CursorFree(GeoCursor* gc) { + delete reinterpret_cast(gc); +} + +/* =================================================== */ +/* The remaining routines are usually */ +/* only compiled in for debugging purposes. They allow*/ +/* the dumping of the index (to a specified file) and */ +/* a self-check to see whether the index itself seems */ +/* to be correct. */ +/* =================================================== */ +#ifdef TRI_GEO_DEBUG + +void RecursivePotDump(GeoIx* gix, FILE* f, int pot) { + int i; + GeoPot gp; + GeoCoordinate gc; + PotRead(gix,pot,&gp); + fprintf(f, "GP. pot %d level %d Kids %d %d\n", pot, gp.level, gp.LorLeaf, + gp.RorPoints); + fprintf(f, "strings %llx %llx %llx\n", gp.start, gp.middle, gp.end); + fprintf(f, "maxdists "); + for (i = 0; i < GeoIndexFIXEDPOINTS; i++) fprintf(f, " %x", gp.maxdist[i]); + fprintf(f, "\n"); + if (gp.LorLeaf == 0) { + fprintf(f, "Leaf pot containing %d points . . .\n", gp.RorPoints); + for (i = 0; i < gp.RorPoints; i++) { + fprintf(f, "Child %d Point %d ", i, gp.points[i]); + SlotRead(gix,gp.points[i],&gc); + fprintf(f, "Lat. %9.4f, Long. %9.4f", gc.latitude, gc.longitude); +#if TRI_GEO_DEBUG == 2 + fprintf(f, " %llu", (unsigned long long)gc.data); +#endif + fprintf(f, "\n"); + } + } else { + fprintf(f, "\nPot %d - Left Child of pot %d\n", gp.LorLeaf, pot); + RecursivePotDump(gix, f, gp.LorLeaf); + fprintf(f, "\nPot %d - Right Child of pot %d\n", gp.RorPoints, pot); + RecursivePotDump(gix, f, gp.RorPoints); + } +} + +void GeoIndex_INDEXDUMP(GeoIdx* gi, FILE* f) { + GeoIx* gix; + gix = (GeoIx*)gi; + fprintf(f, "Dump of entire index. %d pots and %d slots allocated\n", + gix->potct, gix->slotct); + RecursivePotDump(gix, f, 1); +} + +int RecursivePotValidate(GeoIx* gix, int pot, int* usage) { + int i, j; + GeoPot gp; + GeoDetailedPoint gd; + GeoFix maxdist[GeoIndexFIXEDPOINTS]; + GeoPot gpa, gpb; + GeoCoordinate gc; + PotRead(gix,pot,&gp); + usage[0]++; + if (gp.LorLeaf == 0) { + if ((pot != 1) && (2 * gp.RorPoints < GeoIndexPOTSIZE)) return 1; + for (j = 0; j < GeoIndexFIXEDPOINTS; j++) maxdist[j] = 0; + if (gp.level != 1) return 10; + for (i = 0; i < gp.RorPoints; i++) { + SlotRead(gix,gp.points[i],&gc); + GeoMkDetail(gix, &gd, &gc); + for (j = 0; j < GeoIndexFIXEDPOINTS; j++) + if (maxdist[j] < gd.fixdist[j]) maxdist[j] = gd.fixdist[j]; + if (gd.gs < gp.start) return 8; + if (gd.gs > gp.end) return 9; + } + for (j = 0; j < GeoIndexFIXEDPOINTS; j++) + if (maxdist[j] != gp.maxdist[j]) return 7; + usage[1] += gp.RorPoints; + return 0; + } else { + int pota = gp.LorLeaf; + int potb = gp.RorPoints; + PotRead(gix,pota,&gpa); + PotRead(gix,potb,&gpb); + int lev = gp.level; + int leva = gpa.level; + int levb = gpb.level; + if (leva >= lev) return 2; + if (levb >= lev) return 3; + i = (lev - leva) * (lev - levb); + if (i > 2) return 4; + if (gp.middle != gpa.end) return 5; + if (gp.middle != gpb.start) return 6; + if (gp.start != gpa.start) return 11; + if (gp.end != gpb.end) return 12; + for (j = 0; j < GeoIndexFIXEDPOINTS; j++) maxdist[j] = gpa.maxdist[j]; + for (j = 0; j < GeoIndexFIXEDPOINTS; j++) + if (maxdist[j] < gpb.maxdist[j]) maxdist[j] = gpb.maxdist[j]; + for (j = 0; j < GeoIndexFIXEDPOINTS; j++) + if (maxdist[j] != gp.maxdist[j]) return 13; + i = RecursivePotValidate(gix, gp.LorLeaf, usage); + if (i != 0) return i; + i = RecursivePotValidate(gix, gp.RorPoints, usage); + if (i != 0) return i; + return 0; + } +} + +int GeoIndex_INDEXVALID(GeoIdx* gi) { + int usage[2]; // pots and slots + int j, pot, slot; + GeoIx* gix; + GeoPot gp; + gix = (GeoIx*)gi; + usage[0] = 0; + usage[1] = 0; + j = RecursivePotValidate(gix, 1, usage); + if (j != 0) return j; + pot = 0; + PotRead(gix,pot,&gp); + pot = gp.LorLeaf; + usage[0]++; + while (pot != 0) { + PotRead(gix,pot,&gp); + pot = gp.LorLeaf; + usage[0]++; + } + if (usage[0] != gix->potct) return 14; + PotRead(gix,1,&gp); + if (gp.start != 0) return 15; + if (gp.end != 0x1FFFFFFFFFFFFFll) return 16; + slot = 0; + usage[1]++; + slot = (int)((gix->gxc[slot]).latitude); //XQXQ needs changing when testable + while (slot != 0) { + usage[1]++; + slot = (int)((gix->gxc[slot]).latitude); //XQXQ so does this + } + if (usage[1] != gix->slotct) return 17; + return 0; +} + +#endif + +size_t GeoIndex_MemoryUsage(void* theIndex) { + GeoIx* geoIndex = (GeoIx*)theIndex; + if (geoIndex != nullptr) { + return geoIndex->_memoryUsed; + } + return 0; +} +}} +/* end of GeoIndex.c */ diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h new file mode 100644 index 0000000000..5db5a98a08 --- /dev/null +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h @@ -0,0 +1,115 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author R. A. Parker +//////////////////////////////////////////////////////////////////////////////// + +/* GeoIdx.h - header file for GeoIdx algorithms */ +/* Version 2.2 25.11.2015 R. A. Parker */ + +#ifndef ARANGOD_ROCKSDB_GEO_INDEX_IMPL_H +#define ARANGOD_ROCKSDB_GEO_INDEX_IMPL_H 1 + +#include "Basics/Common.h" + +namespace arangodb { namespace rocks { + +/* first the things that a user might want to change */ + +/* a GeoString - a signed type of at least 64 bits */ +typedef long long GeoString; + +/* percentage growth of slot or slotslot tables */ +#define GeoIndexGROW 50 + +/* maximum number of points in a pot */ +/* *** note - must be even! */ +/* smaller takes more space but is a little faster */ +#define GeoIndexPOTSIZE 6 + +/* choses the set of fixed points */ +#define GeoIndexFIXEDSET 6 +/* 1 is just the N pole (doesn't really work) */ +/* 2 is N and S pole - slow but OK */ +/* 3 is equilateral triangle on 0/180 long */ +/* 4 is four corners of a tetrahedron */ +/* 5 is trigonal bipyramid */ +/* 6 is the corners of octahedron (default) */ +/* 8 is eight corners of a cube */ + +/* size of max-dist integer. */ +/* 2 is 16-bit - smaller but slow when lots of points */ +/* within a few hundred meters of target */ +/* 4 is 32-bit - larger and fast even when points are */ +/* only centimeters apart. Default */ +#define GEOFIXLEN 4 +#if GEOFIXLEN == 2 +typedef unsigned short GeoFix; +#endif +#if GEOFIXLEN == 4 +typedef unsigned int GeoFix; +#endif + +/* If this #define is there, then the INDEXDUMP and */ +/* INDEXVALID functions are also available. These */ +/* are not needed for normal production versions */ +/* the INDEXDUMP function also prints the data, */ +/* assumed to be a character string, if DEBUG is */ +/* set to 2. */ +#define TRI_GEO_DEBUG 1 + +typedef struct { + double latitude; + double longitude; + uint64_t data; +} GeoCoordinate; + +typedef struct { + size_t length; + GeoCoordinate* coordinates; + double* distances; +} GeoCoordinates; + +typedef void GeoIdx; /* to keep the structure private */ +typedef void GeoCursor; /* to keep the structure private */ + +size_t GeoIndex_MemoryUsage(void*); + +GeoIdx* GeoIndex_new(void); +void GeoIndex_free(GeoIdx* gi); +double GeoIndex_distance(GeoCoordinate* c1, GeoCoordinate* c2); +int GeoIndex_insert(GeoIdx* gi, GeoCoordinate* c); +int GeoIndex_remove(GeoIdx* gi, GeoCoordinate* c); +int GeoIndex_hint(GeoIdx* gi, int hint); +GeoCoordinates* GeoIndex_PointsWithinRadius(GeoIdx* gi, GeoCoordinate* c, + double d); +GeoCoordinates* GeoIndex_NearestCountPoints(GeoIdx* gi, GeoCoordinate* c, + int count); +GeoCursor* GeoIndex_NewCursor(GeoIdx* gi, GeoCoordinate* c); +GeoCoordinates* GeoIndex_ReadCursor(GeoCursor* gc, int count, bool returnDistances = true, double maxDistance = -1.0); +void GeoIndex_CursorFree(GeoCursor* gc); +void GeoIndex_CoordinatesFree(GeoCoordinates* clist); +#ifdef TRI_GEO_DEBUG +void GeoIndex_INDEXDUMP(GeoIdx* gi, FILE* f); +int GeoIndex_INDEXVALID(GeoIdx* gi); +#endif +}} +#endif +/* end of GeoIdx.h */ From b42c6c0e450d5c07a5bb6159ed2f20aa68143097 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Fri, 5 May 2017 13:38:57 +0200 Subject: [PATCH 02/18] add rocksdbgeoindex --- arangod/RocksDBEngine/CMakeLists.txt | 1 + arangod/RocksDBEngine/RocksDBGeoIndex.cpp | 554 ++++++++++++++++++ arangod/RocksDBEngine/RocksDBGeoIndex.h | 202 +++++++ arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp | 2 +- arangod/RocksDBEngine/RocksDBGeoIndexImpl.h | 2 +- 5 files changed, 759 insertions(+), 2 deletions(-) create mode 100644 arangod/RocksDBEngine/RocksDBGeoIndex.cpp create mode 100644 arangod/RocksDBEngine/RocksDBGeoIndex.h diff --git a/arangod/RocksDBEngine/CMakeLists.txt b/arangod/RocksDBEngine/CMakeLists.txt index 0454c22abe..76d2565766 100644 --- a/arangod/RocksDBEngine/CMakeLists.txt +++ b/arangod/RocksDBEngine/CMakeLists.txt @@ -11,6 +11,7 @@ set(ROCKSDB_SOURCES RocksDBEngine/RocksDBEdgeIndex.cpp RocksDBEngine/RocksDBEngine.cpp RocksDBEngine/RocksDBExportCursor.cpp + RocksDBEngine/RocksDBGeoIndex.cpp RocksDBEngine/RocksDBGeoIndexImpl.cpp RocksDBEngine/RocksDBHashIndex.cpp RocksDBEngine/RocksDBIndex.cpp diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp new file mode 100644 index 0000000000..2e362abb84 --- /dev/null +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp @@ -0,0 +1,554 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Dr. Frank Celler +//////////////////////////////////////////////////////////////////////////////// + +#include "RocksDBGeoIndex.h" + +#include "Aql/Ast.h" +#include "Aql/AstNode.h" +#include "Aql/SortCondition.h" +#include "Basics/StringRef.h" +#include "Basics/VelocyPackHelper.h" +#include "Logger/Logger.h" +#include "StorageEngine/TransactionState.h" +#include "RocksDBEngine/RocksDBToken.h" + +using namespace arangodb; + +RocksDBGeoIndexIterator::RocksDBGeoIndexIterator( + LogicalCollection* collection, transaction::Methods* trx, + ManagedDocumentResult* mmdr, RocksDBGeoIndex const* index, + arangodb::aql::AstNode const* cond, arangodb::aql::Variable const* var) + : IndexIterator(collection, trx, mmdr, index), + _index(index), + _cursor(nullptr), + _coor(), + _condition(cond), + _lat(0.0), + _lon(0.0), + _near(true), + _inclusive(false), + _done(false), + _radius(0.0) { + evaluateCondition(); +} + +void RocksDBGeoIndexIterator::evaluateCondition() { + if (_condition) { + auto numMembers = _condition->numMembers(); + + TRI_ASSERT(numMembers == 1); // should only be an FCALL + auto fcall = _condition->getMember(0); + TRI_ASSERT(fcall->type == arangodb::aql::NODE_TYPE_FCALL); + TRI_ASSERT(fcall->numMembers() == 1); + auto args = fcall->getMember(0); + + numMembers = args->numMembers(); + TRI_ASSERT(numMembers >= 3); + + _lat = args->getMember(1)->getDoubleValue(); + _lon = args->getMember(2)->getDoubleValue(); + + if (numMembers == 3) { + // NEAR + _near = true; + } else { + // WITHIN + TRI_ASSERT(numMembers == 5); + _near = false; + _radius = args->getMember(3)->getDoubleValue(); + _inclusive = args->getMember(4)->getBoolValue(); + } + } else { + LOG_TOPIC(ERR, arangodb::Logger::FIXME) + << "No condition passed to RocksDBGeoIndexIterator constructor"; + } +} + +size_t RocksDBGeoIndexIterator::findLastIndex(GeoCoordinates* coords) const { + TRI_ASSERT(coords != nullptr); + + // determine which documents to return... + size_t numDocs = coords->length; + + if (!_near) { + // WITHIN + // only return those documents that are within the specified radius + TRI_ASSERT(numDocs > 0); + + // linear scan for the first document outside the specified radius + // scan backwards because documents with higher distances are more + // interesting + int iterations = 0; + while ((_inclusive && coords->distances[numDocs - 1] > _radius) || + (!_inclusive && coords->distances[numDocs - 1] >= _radius)) { + // document is outside the specified radius! + --numDocs; + + if (numDocs == 0) { + break; + } + + if (++iterations == 8 && numDocs >= 10) { + // switch to a binary search for documents inside/outside the specified + // radius + size_t l = 0; + size_t r = numDocs - 1; + + while (true) { + // determine midpoint + size_t m = l + ((r - l) / 2); + if ((_inclusive && coords->distances[m] > _radius) || + (!_inclusive && coords->distances[m] >= _radius)) { + // document is outside the specified radius! + if (m == 0) { + numDocs = 0; + break; + } + r = m - 1; + } else { + // still inside the radius + numDocs = m + 1; + l = m + 1; + } + + if (r < l) { + break; + } + } + break; + } + } + } + return numDocs; +} + +bool RocksDBGeoIndexIterator::next(TokenCallback const& cb, size_t limit) { + if (!_cursor) { + createCursor(_lat, _lon); + + if (!_cursor) { + // actually validate that we got a valid cursor + THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); + } + } + + TRI_ASSERT(_cursor != nullptr); + + if (_done) { + // we already know that no further results will be returned by the index + return false; + } + + TRI_ASSERT(limit > 0); + if (limit > 0) { + // only need to calculate distances for WITHIN queries, but not for NEAR + // queries + bool withDistances; + double maxDistance; + if (_near) { + withDistances = false; + maxDistance = -1.0; + } else { + withDistances = true; + maxDistance = _radius; + } + auto coords = std::unique_ptr(::GeoIndex_ReadCursor( + _cursor, static_cast(limit), withDistances, maxDistance)); + + size_t const length = coords ? coords->length : 0; + + if (length == 0) { + // Nothing Found + // TODO validate + _done = true; + return false; + } + + size_t numDocs = findLastIndex(coords.get()); + if (numDocs == 0) { + // we are done + _done = true; + return false; + } + + for (size_t i = 0; i < numDocs; ++i) { + cb(::RocksDBGeoIndex::toDocumentIdentifierToken( + coords->coordinates[i].data)); + } + // If we return less then limit many docs we are done. + _done = numDocs < limit; + } + return true; +} + +void RocksDBGeoIndexIterator::replaceCursor(::GeoCursor* c) { + if (_cursor) { + ::GeoIndex_CursorFree(_cursor); + } + _cursor = c; + _done = false; +} + +void RocksDBGeoIndexIterator::createCursor(double lat, double lon) { + _coor = GeoCoordinate{lat, lon, 0}; + replaceCursor(::GeoIndex_NewCursor(_index->_geoIndex, &_coor)); +} + +uint64_t RocksDBGeoIndex::fromDocumentIdentifierToken( + DocumentIdentifierToken const& token) { + auto tkn = static_cast(&token); + return static_cast(tkn->revisionId()); +} + +DocumentIdentifierToken RocksDBGeoIndex::toDocumentIdentifierToken( + uint64_t internal) { + return RocksDBToken{internal}; +} + +/// @brief creates an IndexIterator for the given Condition +IndexIterator* RocksDBGeoIndex::iteratorForCondition( + transaction::Methods* trx, ManagedDocumentResult* mmdr, + arangodb::aql::AstNode const* node, + arangodb::aql::Variable const* reference, bool) { + TRI_IF_FAILURE("GeoIndex::noIterator") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + return new RocksDBGeoIndexIterator(_collection, trx, mmdr, this, node, + reference); +} + +void RocksDBGeoIndexIterator::reset() { replaceCursor(nullptr); } + +RocksDBGeoIndex::RocksDBGeoIndex(TRI_idx_iid_t iid, + arangodb::LogicalCollection* collection, + VPackSlice const& info) + : RocksDBIndex(iid, collection, info), + _variant(INDEX_GEO_INDIVIDUAL_LAT_LON), + _geoJson(false), + _geoIndex(nullptr) { + TRI_ASSERT(iid != 0); + _unique = false; + _sparse = true; + + if (_fields.size() == 1) { + _geoJson = arangodb::basics::VelocyPackHelper::getBooleanValue( + info, "geoJson", false); + auto& loc = _fields[0]; + _location.reserve(loc.size()); + for (auto const& it : loc) { + _location.emplace_back(it.name); + } + _variant = + _geoJson ? INDEX_GEO_COMBINED_LAT_LON : INDEX_GEO_COMBINED_LON_LAT; + } else if (_fields.size() == 2) { + _variant = INDEX_GEO_INDIVIDUAL_LAT_LON; + auto& lat = _fields[0]; + _latitude.reserve(lat.size()); + for (auto const& it : lat) { + _latitude.emplace_back(it.name); + } + auto& lon = _fields[1]; + _longitude.reserve(lon.size()); + for (auto const& it : lon) { + _longitude.emplace_back(it.name); + } + } else { + THROW_ARANGO_EXCEPTION_MESSAGE( + TRI_ERROR_BAD_PARAMETER, + "RocksDBGeoIndex can only be created with one or two fields."); + } + + _geoIndex = GeoIndex_new(); + + if (_geoIndex == nullptr) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); + } +} + +RocksDBGeoIndex::~RocksDBGeoIndex() { + if (_geoIndex != nullptr) { + GeoIndex_free(_geoIndex); + } +} + +size_t RocksDBGeoIndex::memory() const { + return GeoIndex_MemoryUsage(_geoIndex); +} + +/// @brief return a JSON representation of the index +void RocksDBGeoIndex::toVelocyPack(VPackBuilder& builder, bool withFigures, + bool forPersistence) const { + builder.openObject(); + // Basic index + Index::toVelocyPack(builder, withFigures, forPersistence); + + if (_variant == INDEX_GEO_COMBINED_LAT_LON || + _variant == INDEX_GEO_COMBINED_LON_LAT) { + builder.add("geoJson", VPackValue(_geoJson)); + } + + // geo indexes are always non-unique + // geo indexes are always sparse. + // "ignoreNull" has the same meaning as "sparse" and is only returned for + // backwards compatibility + // the "constraint" attribute has no meaning since ArangoDB 2.5 and is only + // returned for backwards compatibility + builder.add("constraint", VPackValue(false)); + builder.add("unique", VPackValue(false)); + builder.add("ignoreNull", VPackValue(true)); + builder.add("sparse", VPackValue(true)); + builder.close(); +} + +/// @brief Test if this index matches the definition +bool RocksDBGeoIndex::matchesDefinition(VPackSlice const& info) const { + TRI_ASSERT(info.isObject()); +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + VPackSlice typeSlice = info.get("type"); + TRI_ASSERT(typeSlice.isString()); + StringRef typeStr(typeSlice); + TRI_ASSERT(typeStr == oldtypeName()); +#endif + auto value = info.get("id"); + if (!value.isNone()) { + // We already have an id. + if (!value.isString()) { + // Invalid ID + return false; + } + // Short circuit. If id is correct the index is identical. + StringRef idRef(value); + return idRef == std::to_string(_iid); + } + value = info.get("fields"); + if (!value.isArray()) { + return false; + } + + size_t const n = static_cast(value.length()); + if (n != _fields.size()) { + return false; + } + if (_unique != arangodb::basics::VelocyPackHelper::getBooleanValue( + info, "unique", false)) { + return false; + } + if (_sparse != arangodb::basics::VelocyPackHelper::getBooleanValue( + info, "sparse", true)) { + return false; + } + + if (n == 1) { + if (_geoJson != arangodb::basics::VelocyPackHelper::getBooleanValue( + info, "geoJson", false)) { + return false; + } + } + + // This check takes ordering of attributes into account. + std::vector translate; + for (size_t i = 0; i < n; ++i) { + translate.clear(); + VPackSlice f = value.at(i); + if (!f.isString()) { + // Invalid field definition! + return false; + } + arangodb::StringRef in(f); + TRI_ParseAttributeString(in, translate, true); + if (!arangodb::basics::AttributeName::isIdentical(_fields[i], translate, + false)) { + return false; + } + } + return true; +} + +int RocksDBGeoIndex::insert(transaction::Methods*, TRI_voc_rid_t revisionId, + VPackSlice const& doc, bool isRollback) { + double latitude; + double longitude; + + if (_variant == INDEX_GEO_INDIVIDUAL_LAT_LON) { + VPackSlice lat = doc.get(_latitude); + if (!lat.isNumber()) { + // Invalid, no insert. Index is sparse + return TRI_ERROR_NO_ERROR; + } + + VPackSlice lon = doc.get(_longitude); + if (!lon.isNumber()) { + // Invalid, no insert. Index is sparse + return TRI_ERROR_NO_ERROR; + } + latitude = lat.getNumericValue(); + longitude = lon.getNumericValue(); + } else { + VPackSlice loc = doc.get(_location); + if (!loc.isArray() || loc.length() < 2) { + // Invalid, no insert. Index is sparse + return TRI_ERROR_NO_ERROR; + } + VPackSlice first = loc.at(0); + if (!first.isNumber()) { + // Invalid, no insert. Index is sparse + return TRI_ERROR_NO_ERROR; + } + VPackSlice second = loc.at(1); + if (!second.isNumber()) { + // Invalid, no insert. Index is sparse + return TRI_ERROR_NO_ERROR; + } + if (_geoJson) { + longitude = first.getNumericValue(); + latitude = second.getNumericValue(); + } else { + latitude = first.getNumericValue(); + longitude = second.getNumericValue(); + } + } + + // and insert into index + GeoCoordinate gc; + gc.latitude = latitude; + gc.longitude = longitude; + gc.data = static_cast(revisionId); + + int res = GeoIndex_insert(_geoIndex, &gc); + + if (res == -1) { + LOG_TOPIC(WARN, arangodb::Logger::FIXME) + << "found duplicate entry in geo-index, should not happen"; + return TRI_set_errno(TRI_ERROR_INTERNAL); + } else if (res == -2) { + return TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); + } else if (res == -3) { + LOG_TOPIC(DEBUG, arangodb::Logger::FIXME) + << "illegal geo-coordinates, ignoring entry"; + return TRI_ERROR_NO_ERROR; + } else if (res < 0) { + return TRI_set_errno(TRI_ERROR_INTERNAL); + } + + return TRI_ERROR_NO_ERROR; +} + +int RocksDBGeoIndex::remove(transaction::Methods*, TRI_voc_rid_t revisionId, + VPackSlice const& doc, bool isRollback) { + double latitude = 0.0; + double longitude = 0.0; + bool ok = true; + + if (_variant == INDEX_GEO_INDIVIDUAL_LAT_LON) { + VPackSlice lat = doc.get(_latitude); + VPackSlice lon = doc.get(_longitude); + if (!lat.isNumber()) { + ok = false; + } else { + latitude = lat.getNumericValue(); + } + if (!lon.isNumber()) { + ok = false; + } else { + longitude = lon.getNumericValue(); + } + } else { + VPackSlice loc = doc.get(_location); + if (!loc.isArray() || loc.length() < 2) { + ok = false; + } else { + VPackSlice first = loc.at(0); + if (!first.isNumber()) { + ok = false; + } + VPackSlice second = loc.at(1); + if (!second.isNumber()) { + ok = false; + } + if (ok) { + if (_geoJson) { + longitude = first.getNumericValue(); + latitude = second.getNumericValue(); + } else { + latitude = first.getNumericValue(); + longitude = second.getNumericValue(); + } + } + } + } + + if (!ok) { + return TRI_ERROR_NO_ERROR; + } + + GeoCoordinate gc; + gc.latitude = latitude; + gc.longitude = longitude; + gc.data = static_cast(revisionId); + + // ignore non-existing elements in geo-index + GeoIndex_remove(_geoIndex, &gc); + + return TRI_ERROR_NO_ERROR; +} + +int RocksDBGeoIndex::unload() { + // create a new, empty index + auto empty = GeoIndex_new(); + + if (empty == nullptr) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); + } + + // free the old one + if (_geoIndex != nullptr) { + GeoIndex_free(_geoIndex); + } + + // and assign it + _geoIndex = empty; + + return TRI_ERROR_NO_ERROR; +} + +/// @brief looks up all points within a given radius +GeoCoordinates* RocksDBGeoIndex::withinQuery(transaction::Methods* trx, + double lat, double lon, + double radius) const { + GeoCoordinate gc; + gc.latitude = lat; + gc.longitude = lon; + + return GeoIndex_PointsWithinRadius(_geoIndex, &gc, radius); +} + +/// @brief looks up the nearest points +GeoCoordinates* RocksDBGeoIndex::nearQuery(transaction::Methods* trx, + double lat, double lon, + size_t count) const { + GeoCoordinate gc; + gc.latitude = lat; + gc.longitude = lon; + + return GeoIndex_NearestCountPoints(_geoIndex, &gc, static_cast(count)); +} diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.h b/arangod/RocksDBEngine/RocksDBGeoIndex.h new file mode 100644 index 0000000000..c27bd17fbc --- /dev/null +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.h @@ -0,0 +1,202 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Dr. Frank Celler +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_MMFILES_GEO_INDEX_H +#define ARANGOD_MMFILES_GEO_INDEX_H 1 + +#include "Basics/Common.h" +#include "Indexes/IndexIterator.h" +#include "RocksDBEngine/RocksDBGeoIndexImpl.h" +#include "RocksDBEngine/RocksDBIndex.h" +#include "VocBase/voc-types.h" +#include "VocBase/vocbase.h" + +#include +#include + +using namespace ::arangodb::rocksdbengine; + +// GeoCoordinate.data must be capable of storing revision ids +static_assert(sizeof(GeoCoordinate::data) >= sizeof(TRI_voc_rid_t), + "invalid size of GeoCoordinate.data"); + +namespace arangodb { +class RocksDBGeoIndex; + +class RocksDBGeoIndexIterator final : public IndexIterator { + public: + /// @brief Construct an RocksDBGeoIndexIterator based on Ast Conditions + RocksDBGeoIndexIterator(LogicalCollection* collection, + transaction::Methods* trx, + ManagedDocumentResult* mmdr, + RocksDBGeoIndex const* index, + arangodb::aql::AstNode const*, + arangodb::aql::Variable const*); + + ~RocksDBGeoIndexIterator() { replaceCursor(nullptr); } + + char const* typeName() const override { return "geo-index-iterator"; } + + bool next(TokenCallback const& cb, size_t limit) override; + + void reset() override; + + private: + size_t findLastIndex(GeoCoordinates* coords) const; + void replaceCursor(::GeoCursor* c); + void createCursor(double lat, double lon); + void evaluateCondition(); // called in constructor + + RocksDBGeoIndex const* _index; + ::GeoCursor* _cursor; + ::GeoCoordinate _coor; + arangodb::aql::AstNode const* _condition; + double _lat; + double _lon; + bool _near; + bool _inclusive; + bool _done; + double _radius; +}; + +class RocksDBGeoIndex final : public RocksDBIndex { + friend class RocksDBGeoIndexIterator; + + public: + RocksDBGeoIndex() = delete; + + RocksDBGeoIndex(TRI_idx_iid_t, LogicalCollection*, + arangodb::velocypack::Slice const&); + + ~RocksDBGeoIndex(); + + public: + /// @brief geo index variants + enum IndexVariant { + INDEX_GEO_NONE = 0, + INDEX_GEO_INDIVIDUAL_LAT_LON, + INDEX_GEO_COMBINED_LAT_LON, + INDEX_GEO_COMBINED_LON_LAT + }; + + public: + IndexType type() const override { + if (_variant == INDEX_GEO_COMBINED_LAT_LON || + _variant == INDEX_GEO_COMBINED_LON_LAT) { + return TRI_IDX_TYPE_GEO1_INDEX; + } + + return TRI_IDX_TYPE_GEO2_INDEX; + } + + char const* typeName() const override { + if (_variant == INDEX_GEO_COMBINED_LAT_LON || + _variant == INDEX_GEO_COMBINED_LON_LAT) { + return "geo1"; + } + return "geo2"; + } + + IndexIterator* iteratorForCondition(transaction::Methods*, + ManagedDocumentResult*, + arangodb::aql::AstNode const*, + arangodb::aql::Variable const*, + bool) override; + + bool allowExpansion() const override { return false; } + + bool canBeDropped() const override { return true; } + + bool isSorted() const override { return true; } + + bool hasSelectivityEstimate() const override { return false; } + + size_t memory() const override; + + void toVelocyPack(VPackBuilder&, bool, bool) const override; + // Uses default toVelocyPackFigures + + bool matchesDefinition(VPackSlice const& info) const override; + + int insert(transaction::Methods*, TRI_voc_rid_t, + arangodb::velocypack::Slice const&, bool isRollback) override; + + int remove(transaction::Methods*, TRI_voc_rid_t, + arangodb::velocypack::Slice const&, bool isRollback) override; + + int unload() override; + + /// @brief looks up all points within a given radius + GeoCoordinates* withinQuery(transaction::Methods*, double, double, + double) const; + + /// @brief looks up the nearest points + GeoCoordinates* nearQuery(transaction::Methods*, double, double, + size_t) const; + + bool isSame(std::vector const& location, bool geoJson) const { + return (!_location.empty() && _location == location && _geoJson == geoJson); + } + + bool isSame(std::vector const& latitude, + std::vector const& longitude) const { + return (!_latitude.empty() && !_longitude.empty() && + _latitude == latitude && _longitude == longitude); + } + + static uint64_t fromDocumentIdentifierToken( + DocumentIdentifierToken const& token); + + static DocumentIdentifierToken toDocumentIdentifierToken(uint64_t internal); + + private: + /// @brief attribute paths + std::vector _location; + std::vector _latitude; + std::vector _longitude; + + /// @brief the geo index variant (geo1 or geo2) + IndexVariant _variant; + + /// @brief whether the index is a geoJson index (latitude / longitude + /// reversed) + bool _geoJson; + + /// @brief the actual geo index + GeoIdx* _geoIndex; +}; +} + +namespace std { +template <> +class default_delete { + public: + void operator()(GeoCoordinates* result) { + if (result != nullptr) { + GeoIndex_CoordinatesFree(result); + } + } +}; +} + +#endif diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp index 17c458330a..fce7e1f583 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp @@ -57,7 +57,7 @@ #define GeoIndexFIXEDPOINTS 1 #endif -namespace arangodb { namespace rocks { +namespace arangodb { namespace rocksdbengine { /* =================================================== */ /* GeoIndexFixed structure. */ diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h index 5db5a98a08..4e236c4b31 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h @@ -29,7 +29,7 @@ #include "Basics/Common.h" -namespace arangodb { namespace rocks { +namespace arangodb { namespace rocksdbengine { /* first the things that a user might want to change */ From 577e702e20f6ff0205c7be6daec2e49a2f16251d Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Fri, 5 May 2017 18:27:08 +0200 Subject: [PATCH 03/18] geo index - (de)serialize RocksDBValues --- GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK | 12 +- arangod/RocksDBEngine/RocksDBGeoIndex.cpp | 8 +- arangod/RocksDBEngine/RocksDBGeoIndex.h | 4 +- arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp | 39 ++++-- arangod/RocksDBEngine/RocksDBGeoIndexImpl.h | 11 +- .../RocksDBEngine/RocksDBGeoIndexImplHelper.h | 119 ++++++++++++++++++ 6 files changed, 163 insertions(+), 30 deletions(-) create mode 100644 arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h diff --git a/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK b/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK index a68a755185..73077bd257 100644 --- a/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK +++ b/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK @@ -1,11 +1,11 @@ -1. Create new GeoIndex. -2. Attach to existing GeoIndex +1. Create new GeoIndex. - partial +2. Attach to existing GeoIndex - parital 3. Drop GeoIndex 4. Disconnect from GeoIndex -5. PotRead -6. PotWrite -7. SlotRead -8. SlotWrite +5. PotRead - partial +6. PotWrite - partial +7. SlotRead - partial +8. SlotWrite - partial 9. CreatePot (take the next number) 10. CreateSlot (take the next number) 11. Also needs start and stop transaction routine and to use them Das sind die diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp index 2e362abb84..8ea354319f 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// -/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany +/// Copyright 2014-2017 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,7 +18,7 @@ /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// -/// @author Dr. Frank Celler +/// @author Jan Christoph Uhde //////////////////////////////////////////////////////////////////////////////// #include "RocksDBGeoIndex.h" @@ -278,7 +278,7 @@ RocksDBGeoIndex::RocksDBGeoIndex(TRI_idx_iid_t iid, "RocksDBGeoIndex can only be created with one or two fields."); } - _geoIndex = GeoIndex_new(); + _geoIndex = GeoIndex_new(_objectId); if (_geoIndex == nullptr) { THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); @@ -514,7 +514,7 @@ int RocksDBGeoIndex::remove(transaction::Methods*, TRI_voc_rid_t revisionId, int RocksDBGeoIndex::unload() { // create a new, empty index - auto empty = GeoIndex_new(); + auto empty = GeoIndex_new(_objectId); if (empty == nullptr) { THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.h b/arangod/RocksDBEngine/RocksDBGeoIndex.h index c27bd17fbc..2c62edb25c 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndex.h +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.h @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// -/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany +/// Copyright 2014-2017 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,7 +18,7 @@ /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// -/// @author Dr. Frank Celler +/// @author Jan Christoph Uhde //////////////////////////////////////////////////////////////////////////////// #ifndef ARANGOD_MMFILES_GEO_INDEX_H diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp index fce7e1f583..089daf5b0e 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp @@ -24,10 +24,11 @@ /* GeoIndex.c - GeoIndex algorithms */ /* Version 2.1 8.1.2012 R. A. Parker */ #define _USE_MATH_DEFINES -#include +#include #include +#include -#include "RocksDBGeoIndexImpl.h" +#include /* Radius of the earth used for distances */ #define EARTHRADIAN 6371000.0 @@ -125,13 +126,14 @@ typedef struct { /* get smaller when the majority of points are deleted */ /* =================================================== */ typedef struct { + uint64_t objectId; /* Rocksdb Index objectId */ GeoIndexFixed fixed; /* fixed point data */ int potct; /* pots allocated */ int slotct; /* slots allocated */ - GeoPot* ypots; /* the pots themselves */ - GeoCoordinate* gxc; /* the slots themselves */ + GeoPot* ypots; /* the pots themselves */ + GeoCoordinate* gxc; /* the slots themselves */ size_t _memoryUsed; /* the amount of memory currently used */ -} GeoIx; +} GeoIx; /* =================================================== */ /* GeoDetailedPoint structure */ /* The routine GeoMkDetail is given a point - really */ @@ -251,6 +253,12 @@ typedef struct { int pathlength; int path[50]; } GeoPath; +}} + +// must be included here after struct definition +#include + +namespace arangodb { namespace rocksdbengine { /* =================================================== */ /* GeoIndex_Distance routine */ @@ -359,7 +367,7 @@ int GeoIndexNewPot(GeoIx* gix) { /* GeoString values of real (latitude, longitude) */ /* points */ /* =================================================== */ -GeoIdx* GeoIndex_new(void) { +GeoIdx* GeoIndex_new(uint64_t objectId) { GeoIx* gix; int i, j; @@ -370,6 +378,8 @@ GeoIdx* GeoIndex_new(void) { return (GeoIdx*)gix; } + gix->objectId = objectId; + /* try to allocate all the things we need */ gix->ypots = static_cast( TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, GEOPOTSTART * sizeof(GeoPot), false)); @@ -732,20 +742,22 @@ void GeoSetDistance(GeoDetailedPoint* gd, double snmd) { } /* CRUD interface */ -int SlotRead(GeoIx * gix, int slot, GeoCoordinate * gc) +int SlotRead(GeoIx * gix, int slot, GeoCoordinate * gc /*out param*/) { + //gc GeoCoordinate, element in point array of real geo index memcpy(gc,gix->gxc+slot,sizeof(GeoCoordinate)); return 0; } -int PotRead(GeoIx * gix, int pot, GeoPot * gp) -{ - memcpy(gp,gix->ypots+pot,sizeof(GeoPot)); - return 0; -} void SlotWrite(GeoIx * gix,int slot, GeoCoordinate * gc) { memcpy(gix->gxc+slot,gc,sizeof(GeoCoordinate)); } + +int PotRead(GeoIx * gix, int pot, GeoPot * gp) +{ + memcpy(gp,gix->ypots+pot,sizeof(GeoPot)); + return 0; +} void PotWrite(GeoIx * gix,int pot, GeoPot * gp) { memcpy(gix->ypots+pot,gp,sizeof(GeoPot)); @@ -2251,7 +2263,8 @@ void RecursivePotDump(GeoIx* gix, FILE* f, int pot) { PotRead(gix,pot,&gp); fprintf(f, "GP. pot %d level %d Kids %d %d\n", pot, gp.level, gp.LorLeaf, gp.RorPoints); - fprintf(f, "strings %llx %llx %llx\n", gp.start, gp.middle, gp.end); + fprintf(f, "strings %llx %llx %llx\n", gp.start, gp.middle, gp.end); // fixme - usage of printf is broken + // fast_uint64_t is not necessarily a long long fprintf(f, "maxdists "); for (i = 0; i < GeoIndexFIXEDPOINTS; i++) fprintf(f, " %x", gp.maxdist[i]); fprintf(f, "\n"); diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h index 4e236c4b31..4365202820 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h @@ -28,13 +28,14 @@ #define ARANGOD_ROCKSDB_GEO_INDEX_IMPL_H 1 #include "Basics/Common.h" +#include namespace arangodb { namespace rocksdbengine { /* first the things that a user might want to change */ /* a GeoString - a signed type of at least 64 bits */ -typedef long long GeoString; +typedef std::uint_fast64_t GeoString; /* percentage growth of slot or slotslot tables */ #define GeoIndexGROW 50 @@ -61,10 +62,10 @@ typedef long long GeoString; /* only centimeters apart. Default */ #define GEOFIXLEN 4 #if GEOFIXLEN == 2 -typedef unsigned short GeoFix; +typedef std::uint16_t GeoFix; #endif #if GEOFIXLEN == 4 -typedef unsigned int GeoFix; +typedef std::uint32_t GeoFix; #endif /* If this #define is there, then the INDEXDUMP and */ @@ -73,7 +74,7 @@ typedef unsigned int GeoFix; /* the INDEXDUMP function also prints the data, */ /* assumed to be a character string, if DEBUG is */ /* set to 2. */ -#define TRI_GEO_DEBUG 1 +//#define TRI_GEO_DEBUG 1 typedef struct { double latitude; @@ -92,7 +93,7 @@ typedef void GeoCursor; /* to keep the structure private */ size_t GeoIndex_MemoryUsage(void*); -GeoIdx* GeoIndex_new(void); +GeoIdx* GeoIndex_new(uint64_t objectId); void GeoIndex_free(GeoIdx* gi); double GeoIndex_distance(GeoCoordinate* c1, GeoCoordinate* c2); int GeoIndex_insert(GeoIdx* gi, GeoCoordinate* c); diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h b/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h new file mode 100644 index 0000000000..64c7cb46a7 --- /dev/null +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h @@ -0,0 +1,119 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2017 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Jan Christoph Uhde +//////////////////////////////////////////////////////////////////////////////// + + +// MUST BE ONLY INCLUDED IN RocksDBGeoIndexImpl.cpp after struct definitions! +// IT CAN NOT BE USED IN OTHER +// This file has only been added to keep Richards code clean. So it is easier +// for him to spot relevant changes. + +#ifndef ARANGOD_ROCKSDB_GEO_INDEX_IMPL_HELPER_H +#define ARANGOD_ROCKSDB_GEO_INDEX_IMPL_HELPER_H 1 + +#include + +#include +#include + +#include +#include +#include + +namespace arangodb { namespace rocksdbengine { + +VPackBuilder CoordToVpack(GeoCoordinate* coord){ + VPackBuilder rv{}; + rv.openArray(); + rv.add(VPackValue(coord->latitude)); //double + rv.add(VPackValue(coord->longitude)); //double + rv.add(VPackValue(coord->data)); //uint64_t + rv.close(); + return rv; +} + +GeoCoordinate VpackToCoord(VPackSlice const& slice){ + TRI_ASSERT(slice.isArray() && slice.length() == 3); + return GeoCoordinate{slice.at(0).getDouble() + ,slice.at(1).getDouble() + ,slice.at(2).getUInt() + }; +} + +VPackBuilder PotToVpack(GeoPot* pot){ + VPackBuilder rv{}; + rv.openArray(); // open + rv.add(VPackValue(pot->LorLeaf)); // int + rv.add(VPackValue(pot->RorPoints)); // int + rv.add(VPackValue(pot->middle)); // GeoString + { + rv.openArray(); // array GeoFix //uint 16/32 + for(std::size_t i = 0; i < GeoIndexFIXEDPOINTS; i++){ + rv.add(VPackValue(pot->maxdist[i])); //unit 16/32 + } + rv.close(); // close array + } + rv.add(VPackValue(pot->start)); // GeoString + rv.add(VPackValue(pot->end)); // GeoString + rv.add(VPackValue(pot->level)); // int + { + rv.openArray(); // arrray of int + for(std::size_t i = 0; i < GeoIndexPOTSIZE; i++){ + rv.add(VPackValue(pot->points[i])); // int + } + rv.close(); // close array + } + rv.close(); // close + return rv; +} + +GeoPot VpackToPot(VPackSlice const& slice){ + GeoPot rv{}; + TRI_ASSERT(slice.isArray()); + rv.LorLeaf = slice.at(0).getInt(); // int + rv.RorPoints = slice.at(1).getInt(); // int + rv.middle = slice.at(2).getUInt(); // GeoString + { + auto maxdistSlice = slice.at(3); + TRI_ASSERT(maxdistSlice.isArray()); + TRI_ASSERT(maxdistSlice.length() == GeoIndexFIXEDPOINTS); + for(std::size_t i = 0; i < GeoIndexFIXEDPOINTS; i++){ + rv.maxdist[i] = maxdistSlice.at(i).getUInt(); //unit 16/33 + } + } + rv.start = slice.at(4).getUInt(); // GeoString + rv.end = slice.at(5).getUInt(); // GeoString + rv.level = slice.at(6).getInt(); // int + { + auto pointsSlice = slice.at(7); + TRI_ASSERT(pointsSlice.isArray()); + TRI_ASSERT(pointsSlice.length() == GeoIndexFIXEDPOINTS); + for(std::size_t i = 0; i < GeoIndexPOTSIZE; i++){ + rv.points[i] = pointsSlice.at(i).getInt(); //int + } + } + return rv; +} + + +}} +#endif From 8a1c954b09d10a5bdd234d17f62008baa09d2f47 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 8 May 2017 11:10:49 +0200 Subject: [PATCH 04/18] add GeoIndexValue to RocksDBTypes and Keys --- .../RocksDBEngine/RocksDBGeoIndexImplHelper.h | 1 + arangod/RocksDBEngine/RocksDBKey.cpp | 26 ++++++++++++++++++- arangod/RocksDBEngine/RocksDBKey.h | 6 +++++ arangod/RocksDBEngine/RocksDBTypes.h | 3 ++- 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h b/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h index 64c7cb46a7..225634009a 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h @@ -34,6 +34,7 @@ #include #include +#include #include #include diff --git a/arangod/RocksDBEngine/RocksDBKey.cpp b/arangod/RocksDBEngine/RocksDBKey.cpp index 9c83c38db2..94429236f1 100644 --- a/arangod/RocksDBEngine/RocksDBKey.cpp +++ b/arangod/RocksDBEngine/RocksDBKey.cpp @@ -77,6 +77,10 @@ RocksDBKey RocksDBKey::UniqueIndexValue(uint64_t indexId, return RocksDBKey(RocksDBEntryType::UniqueIndexValue, indexId, indexValues); } +RocksDBKey GeoIndexValue(uint64_t indexId, bool isSlot, uint64_t offset){ + RocksDBKey(RocksDBEntryType::GeoIndexValue, isSlot, offset); +} + RocksDBKey RocksDBKey::View(TRI_voc_tick_t databaseId, TRI_voc_cid_t viewId) { return RocksDBKey(RocksDBEntryType::View, databaseId, viewId); } @@ -294,6 +298,24 @@ RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t first, } } +RocksDBKey::RocksDBKey(RocksDBEntryType type, bool isSlot, uint64_t offset) + : _type(type), _buffer() { + switch (_type) { + case RocksDBEntryType::GeoIndexValue: { + + size_t length = sizeof(char) + sizeof(isSlot) + sizeof(offset); + _buffer.reserve(length); + _buffer.push_back(static_cast(_type)); + _buffer.append(isSlot, sizeof(isSlot)); + uint64ToPersistent(_buffer, offset); + break; + } + + default: + THROW_ARANGO_EXCEPTION(TRI_ERROR_BAD_PARAMETER); + } +} + RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t first, std::string const& second, std::string const& third) : _type(type), _buffer() { @@ -365,7 +387,9 @@ TRI_voc_cid_t RocksDBKey::objectId(char const* data, size_t size) { case RocksDBEntryType::PrimaryIndexValue: case RocksDBEntryType::EdgeIndexValue: case RocksDBEntryType::IndexValue: - case RocksDBEntryType::UniqueIndexValue: { + case RocksDBEntryType::UniqueIndexValue: + case RocksDBEntryType::GeoIndexValue: + { TRI_ASSERT(size >= (sizeof(char) + (2 * sizeof(uint64_t)))); return uint64FromPersistent(data + sizeof(char)); } diff --git a/arangod/RocksDBEngine/RocksDBKey.h b/arangod/RocksDBEngine/RocksDBKey.h index 29efa5008e..e92222330f 100644 --- a/arangod/RocksDBEngine/RocksDBKey.h +++ b/arangod/RocksDBEngine/RocksDBKey.h @@ -103,6 +103,11 @@ class RocksDBKey { static RocksDBKey UniqueIndexValue(uint64_t indexId, VPackSlice const& indexValues); + ////////////////////////////////////////////////////////////////////////////// + /// @brief Create a fully-specified key for a geoIndexValue + ////////////////////////////////////////////////////////////////////////////// + static RocksDBKey GeoIndexValue(uint64_t indexId, bool isSlot, uint64_t offset); + ////////////////////////////////////////////////////////////////////////////// /// @brief Create a fully-specified key for a view ////////////////////////////////////////////////////////////////////////////// @@ -226,6 +231,7 @@ class RocksDBKey { arangodb::StringRef const& second); RocksDBKey(RocksDBEntryType type, uint64_t first, std::string const& second, std::string const& third); + RocksDBKey(RocksDBEntryType type, bool, uint64_t); private: static RocksDBEntryType type(char const* data, size_t size); diff --git a/arangod/RocksDBEngine/RocksDBTypes.h b/arangod/RocksDBEngine/RocksDBTypes.h index 85edc8c0f1..18f3e3a9bf 100644 --- a/arangod/RocksDBEngine/RocksDBTypes.h +++ b/arangod/RocksDBEngine/RocksDBTypes.h @@ -42,7 +42,8 @@ enum class RocksDBEntryType : char { UniqueIndexValue = '7', View = '8', SettingsValue = '9', - ReplicationApplierConfig = ':' + ReplicationApplierConfig = ':', + GeoIndexValue = ';' }; enum class RocksDBLogType : char { From 13f2cd4cb9c36b90e428265cc648566ecb2f64ba Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 8 May 2017 12:34:53 +0200 Subject: [PATCH 05/18] change rocksdbkey for geoindex --- arangod/RocksDBEngine/RocksDBKey.cpp | 7 ++++--- arangod/RocksDBEngine/RocksDBKey.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arangod/RocksDBEngine/RocksDBKey.cpp b/arangod/RocksDBEngine/RocksDBKey.cpp index e3d5078afc..3e626171e0 100644 --- a/arangod/RocksDBEngine/RocksDBKey.cpp +++ b/arangod/RocksDBEngine/RocksDBKey.cpp @@ -306,15 +306,16 @@ RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t first, } } -RocksDBKey::RocksDBKey(RocksDBEntryType type, bool isSlot, uint64_t offset) +RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t objectId, uint64_t offset, bool isSlot) : _type(type), _buffer() { switch (_type) { case RocksDBEntryType::GeoIndexValue: { - size_t length = sizeof(char) + sizeof(isSlot) + sizeof(offset); + size_t length = sizeof(char) + sizeof(objectId) + sizeof(offset); _buffer.reserve(length); _buffer.push_back(static_cast(_type)); - _buffer.append(isSlot, sizeof(isSlot)); + offset |= std::uint64_t{isSlot} << 63; //encode slot|pot in highest bit + uint64ToPersistent(_buffer, objectId); uint64ToPersistent(_buffer, offset); break; } diff --git a/arangod/RocksDBEngine/RocksDBKey.h b/arangod/RocksDBEngine/RocksDBKey.h index bfe6fe8e90..960922c2c0 100644 --- a/arangod/RocksDBEngine/RocksDBKey.h +++ b/arangod/RocksDBEngine/RocksDBKey.h @@ -240,7 +240,7 @@ class RocksDBKey { std::string const& third); RocksDBKey(RocksDBEntryType type, uint64_t first, arangodb::StringRef const& second, arangodb::StringRef const& third); - RocksDBKey(RocksDBEntryType type, bool, uint64_t); + RocksDBKey(RocksDBEntryType type, uint64_t objectId, uint64_t index, bool isSlot); private: static RocksDBEntryType type(char const* data, size_t size); From 7142bf2cfc66980513690455166ab2da3df8272b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Mon, 8 May 2017 13:41:00 +0200 Subject: [PATCH 06/18] Intermediate commit --- arangod/RocksDBEngine/RocksDBKey.cpp | 37 ++++++++++++++++------ arangod/RocksDBEngine/RocksDBKey.h | 27 ++++++++++------ arangod/RocksDBEngine/RocksDBKeyBounds.cpp | 4 +++ arangod/RocksDBEngine/RocksDBKeyBounds.h | 5 +++ 4 files changed, 54 insertions(+), 19 deletions(-) diff --git a/arangod/RocksDBEngine/RocksDBKey.cpp b/arangod/RocksDBEngine/RocksDBKey.cpp index 3e626171e0..2d9b30c336 100644 --- a/arangod/RocksDBEngine/RocksDBKey.cpp +++ b/arangod/RocksDBEngine/RocksDBKey.cpp @@ -77,6 +77,12 @@ RocksDBKey RocksDBKey::UniqueIndexValue(uint64_t indexId, return RocksDBKey(RocksDBEntryType::UniqueIndexValue, indexId, indexValues); } +RocksDBKey RocksDBKey::FulltextIndexValue(uint64_t indexId, + arangodb::StringRef const& word, + arangodb::StringRef const& primaryKey) { + return RocksDBKey(RocksDBEntryType::FulltextIndexValue, indexId, word, primaryKey); +} + RocksDBKey RocksDBKey::GeoIndexValue(uint64_t indexId, bool isSlot, uint64_t offset){ return RocksDBKey(RocksDBEntryType::GeoIndexValue, isSlot, offset); } @@ -97,12 +103,6 @@ RocksDBKey RocksDBKey::ReplicationApplierConfig(TRI_voc_tick_t databaseId) { return RocksDBKey(RocksDBEntryType::ReplicationApplierConfig, databaseId); } -RocksDBKey RocksDBKey::FulltextIndexValue(uint64_t indexId, - arangodb::StringRef const& word, - arangodb::StringRef const& primaryKey) { - return RocksDBKey(RocksDBEntryType::FulltextIndexValue, indexId, word, primaryKey); -} - // ========================= Member methods =========================== RocksDBEntryType RocksDBKey::type(RocksDBKey const& key) { @@ -181,6 +181,22 @@ VPackSlice RocksDBKey::indexedVPack(rocksdb::Slice const& slice) { return indexedVPack(slice.data(), slice.size()); } +std::pair RocksDBKey::geoValues(rocksdb::Slice const& slice) { + TRI_ASSERT(size >= sizeof(char) + sizeof(uint64_t) * 2); + RocksDBEntryType type = static_cast(data[0]); + TRI_ASSERT(type == RocksDBEntryType::GeoIndexValue); + uint64_t val = uint64FromPersistent(data + sizeof(char) + sizeof(uint64_t)); + bool isSlot = (val >> 63) & 0x1; + return std::pair(isSlot, (val & ); + + size_t length = sizeof(char) + sizeof(objectId) + sizeof(offset); + _buffer.reserve(length); + _buffer.push_back(static_cast(_type)); + offset |= std::uint64_t{isSlot} << 63; //encode slot|pot in highest bit + uint64ToPersistent(_buffer, objectId); + uint64ToPersistent(_buffer, offset); +} + std::string const& RocksDBKey::string() const { return _buffer; } RocksDBKey::RocksDBKey(RocksDBEntryType type) : _type(type), _buffer() { @@ -306,7 +322,7 @@ RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t first, } } -RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t objectId, uint64_t offset, bool isSlot) +RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t objectId, uint32_t offset, bool isSlot) : _type(type), _buffer() { switch (_type) { case RocksDBEntryType::GeoIndexValue: { @@ -314,9 +330,10 @@ RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t objectId, uint64_t offset size_t length = sizeof(char) + sizeof(objectId) + sizeof(offset); _buffer.reserve(length); _buffer.push_back(static_cast(_type)); - offset |= std::uint64_t{isSlot} << 63; //encode slot|pot in highest bit uint64ToPersistent(_buffer, objectId); - uint64ToPersistent(_buffer, offset); + uint64_t norm = offset; + if (isSlot) norm |= 1 << 63;//encode slot|pot in highest bit + uint64ToPersistent(_buffer, norm); break; } @@ -350,6 +367,8 @@ RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t first, } } +// ====================== Private Methods ========================== + RocksDBEntryType RocksDBKey::type(char const* data, size_t size) { TRI_ASSERT(data != nullptr); TRI_ASSERT(size >= sizeof(char)); diff --git a/arangod/RocksDBEngine/RocksDBKey.h b/arangod/RocksDBEngine/RocksDBKey.h index 960922c2c0..4cfc8e1256 100644 --- a/arangod/RocksDBEngine/RocksDBKey.h +++ b/arangod/RocksDBEngine/RocksDBKey.h @@ -103,6 +103,13 @@ class RocksDBKey { static RocksDBKey UniqueIndexValue(uint64_t indexId, VPackSlice const& indexValues); + ////////////////////////////////////////////////////////////////////////////// + /// @brief Create a fully-specified key for the fulltext index + ////////////////////////////////////////////////////////////////////////////// + static RocksDBKey FulltextIndexValue(uint64_t indexId, + arangodb::StringRef const& word, + arangodb::StringRef const& primaryKey); + ////////////////////////////////////////////////////////////////////////////// /// @brief Create a fully-specified key for a geoIndexValue ////////////////////////////////////////////////////////////////////////////// @@ -127,13 +134,6 @@ class RocksDBKey { /// @brief Create a fully-specified key for a replication applier config ////////////////////////////////////////////////////////////////////////////// static RocksDBKey ReplicationApplierConfig(TRI_voc_tick_t databaseId); - - ////////////////////////////////////////////////////////////////////////////// - /// @brief Create a fully-specified key for the fulltext index - ////////////////////////////////////////////////////////////////////////////// - static RocksDBKey FulltextIndexValue(uint64_t indexId, - arangodb::StringRef const& word, - arangodb::StringRef const& primaryKey); public: ////////////////////////////////////////////////////////////////////////////// @@ -172,8 +172,8 @@ class RocksDBKey { ////////////////////////////////////////////////////////////////////////////// /// @brief Extracts the objectId from a key /// - /// May be called only on the the following key types: Document. - /// Other types will throw. + /// May be called only on the the following key types: Document, + /// all kinds of index entries. Other types will throw. ////////////////////////////////////////////////////////////////////////////// static uint64_t objectId(RocksDBKey const&); static uint64_t objectId(rocksdb::Slice const&); @@ -221,6 +221,13 @@ class RocksDBKey { static VPackSlice indexedVPack(RocksDBKey const&); static VPackSlice indexedVPack(rocksdb::Slice const&); + ////////////////////////////////////////////////////////////////////////////// + /// @brief Extracts the geo pot offset + /// + /// May be called only on GeoIndexValues + ////////////////////////////////////////////////////////////////////////////// + std::pair geoValues(rocksdb::Slice const&); + public: ////////////////////////////////////////////////////////////////////////////// /// @brief Returns a reference to the full, constructed key @@ -240,7 +247,7 @@ class RocksDBKey { std::string const& third); RocksDBKey(RocksDBEntryType type, uint64_t first, arangodb::StringRef const& second, arangodb::StringRef const& third); - RocksDBKey(RocksDBEntryType type, uint64_t objectId, uint64_t index, bool isSlot); + RocksDBKey(RocksDBEntryType type, uint64_t objectId, uint32_t index, bool isSlot); private: static RocksDBEntryType type(char const* data, size_t size); diff --git a/arangod/RocksDBEngine/RocksDBKeyBounds.cpp b/arangod/RocksDBEngine/RocksDBKeyBounds.cpp index cb11739779..f12aa7a7df 100644 --- a/arangod/RocksDBEngine/RocksDBKeyBounds.cpp +++ b/arangod/RocksDBEngine/RocksDBKeyBounds.cpp @@ -73,6 +73,10 @@ RocksDBKeyBounds RocksDBKeyBounds::UniqueIndex(uint64_t indexId) { return RocksDBKeyBounds(RocksDBEntryType::UniqueIndexValue, indexId); } +RocksDBKeyBounds RocksDBKeyBounds::GeoIndex(uint64_t indexId) { + return RocksDBKeyBounds(RocksDBEntryType::GeoIndexValue, indexId); +} + RocksDBKeyBounds RocksDBKeyBounds::IndexRange(uint64_t indexId, VPackSlice const& left, VPackSlice const& right) { diff --git a/arangod/RocksDBEngine/RocksDBKeyBounds.h b/arangod/RocksDBEngine/RocksDBKeyBounds.h index dd752395bb..6731b16e48 100644 --- a/arangod/RocksDBEngine/RocksDBKeyBounds.h +++ b/arangod/RocksDBEngine/RocksDBKeyBounds.h @@ -85,6 +85,11 @@ class RocksDBKeyBounds { ////////////////////////////////////////////////////////////////////////////// static RocksDBKeyBounds UniqueIndex(uint64_t indexId); + ////////////////////////////////////////////////////////////////////////////// + /// @brief Bounds for all entries belonging to a specified unique index + ////////////////////////////////////////////////////////////////////////////// + static RocksDBKeyBounds GeoIndex(uint64_t indexId); + ////////////////////////////////////////////////////////////////////////////// /// @brief Bounds for all index-entries within a value range belonging to a /// specified non-unique index From a610d81e3d077e2bf47d8b9dd5125cadf72237d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Mon, 8 May 2017 14:14:23 +0200 Subject: [PATCH 07/18] fixing compiler warnings --- arangod/RocksDBEngine/RocksDBEngine.cpp | 4 +-- arangod/RocksDBEngine/RocksDBGeoIndex.cpp | 14 +++++++++- arangod/RocksDBEngine/RocksDBGeoIndex.h | 5 +++- .../RocksDBEngine/RocksDBGeoIndexImplHelper.h | 12 ++++----- arangod/RocksDBEngine/RocksDBKey.cpp | 17 ++++-------- arangod/RocksDBEngine/RocksDBKey.h | 2 +- arangod/RocksDBEngine/RocksDBKeyBounds.cpp | 14 +++++----- arangod/RocksDBEngine/RocksDBKeyBounds.h | 12 ++++----- arangod/RocksDBEngine/RocksDBTypes.cpp | 27 ++++++++++++------- 9 files changed, 62 insertions(+), 45 deletions(-) diff --git a/arangod/RocksDBEngine/RocksDBEngine.cpp b/arangod/RocksDBEngine/RocksDBEngine.cpp index 4d0edc31bf..f2c3b589d5 100644 --- a/arangod/RocksDBEngine/RocksDBEngine.cpp +++ b/arangod/RocksDBEngine/RocksDBEngine.cpp @@ -211,9 +211,9 @@ void RocksDBEngine::start() { _options.compaction_readahead_size = static_cast(opts->_compactionReadaheadSize); - _options.env->SetBackgroundThreads(opts->_numThreadsHigh, + _options.env->SetBackgroundThreads((int)opts->_numThreadsHigh, rocksdb::Env::Priority::HIGH); - _options.env->SetBackgroundThreads(opts->_numThreadsLow, + _options.env->SetBackgroundThreads((int)opts->_numThreadsLow, rocksdb::Env::Priority::LOW); _options.create_if_missing = true; diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp index 8ea354319f..cc25903cfa 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp @@ -384,7 +384,7 @@ bool RocksDBGeoIndex::matchesDefinition(VPackSlice const& info) const { return true; } -int RocksDBGeoIndex::insert(transaction::Methods*, TRI_voc_rid_t revisionId, +int RocksDBGeoIndex::insert(transaction::Methods *, TRI_voc_rid_t revisionId, VPackSlice const& doc, bool isRollback) { double latitude; double longitude; @@ -453,6 +453,12 @@ int RocksDBGeoIndex::insert(transaction::Methods*, TRI_voc_rid_t revisionId, return TRI_ERROR_NO_ERROR; } +int RocksDBGeoIndex::insertRaw(rocksdb::WriteBatchWithIndex* batch, + TRI_voc_rid_t revisionId, + arangodb::velocypack::Slice const& doc) { + return this->insert(nullptr, revisionId, doc, false); +} + int RocksDBGeoIndex::remove(transaction::Methods*, TRI_voc_rid_t revisionId, VPackSlice const& doc, bool isRollback) { double latitude = 0.0; @@ -512,6 +518,12 @@ int RocksDBGeoIndex::remove(transaction::Methods*, TRI_voc_rid_t revisionId, return TRI_ERROR_NO_ERROR; } +int RocksDBGeoIndex::removeRaw(rocksdb::WriteBatch*, TRI_voc_rid_t revisionId, + arangodb::velocypack::Slice const& doc) { + return this->remove(nullptr, revisionId, doc, false); +} + + int RocksDBGeoIndex::unload() { // create a new, empty index auto empty = GeoIndex_new(_objectId); diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.h b/arangod/RocksDBEngine/RocksDBGeoIndex.h index 2c62edb25c..1f03c142eb 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndex.h +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.h @@ -140,9 +140,12 @@ class RocksDBGeoIndex final : public RocksDBIndex { int insert(transaction::Methods*, TRI_voc_rid_t, arangodb::velocypack::Slice const&, bool isRollback) override; - + int insertRaw(rocksdb::WriteBatchWithIndex*, TRI_voc_rid_t, + arangodb::velocypack::Slice const&) override; int remove(transaction::Methods*, TRI_voc_rid_t, arangodb::velocypack::Slice const&, bool isRollback) override; + int removeRaw(rocksdb::WriteBatch*, TRI_voc_rid_t, + arangodb::velocypack::Slice const&) override; int unload() override; diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h b/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h index 225634009a..0cde7ac7cd 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h @@ -90,26 +90,26 @@ VPackBuilder PotToVpack(GeoPot* pot){ GeoPot VpackToPot(VPackSlice const& slice){ GeoPot rv{}; TRI_ASSERT(slice.isArray()); - rv.LorLeaf = slice.at(0).getInt(); // int - rv.RorPoints = slice.at(1).getInt(); // int + rv.LorLeaf = (int) slice.at(0).getInt(); // int + rv.RorPoints = (int) slice.at(1).getInt(); // int rv.middle = slice.at(2).getUInt(); // GeoString { auto maxdistSlice = slice.at(3); TRI_ASSERT(maxdistSlice.isArray()); TRI_ASSERT(maxdistSlice.length() == GeoIndexFIXEDPOINTS); for(std::size_t i = 0; i < GeoIndexFIXEDPOINTS; i++){ - rv.maxdist[i] = maxdistSlice.at(i).getUInt(); //unit 16/33 + rv.maxdist[i] = (int) maxdistSlice.at(i).getUInt(); //unit 16/33 } } - rv.start = slice.at(4).getUInt(); // GeoString + rv.start = (int) slice.at(4).getUInt(); // GeoString rv.end = slice.at(5).getUInt(); // GeoString - rv.level = slice.at(6).getInt(); // int + rv.level = (int) slice.at(6).getInt(); // int { auto pointsSlice = slice.at(7); TRI_ASSERT(pointsSlice.isArray()); TRI_ASSERT(pointsSlice.length() == GeoIndexFIXEDPOINTS); for(std::size_t i = 0; i < GeoIndexPOTSIZE; i++){ - rv.points[i] = pointsSlice.at(i).getInt(); //int + rv.points[i] = (int) pointsSlice.at(i).getInt(); //int } } return rv; diff --git a/arangod/RocksDBEngine/RocksDBKey.cpp b/arangod/RocksDBEngine/RocksDBKey.cpp index 2d9b30c336..c5fd730d81 100644 --- a/arangod/RocksDBEngine/RocksDBKey.cpp +++ b/arangod/RocksDBEngine/RocksDBKey.cpp @@ -182,19 +182,12 @@ VPackSlice RocksDBKey::indexedVPack(rocksdb::Slice const& slice) { } std::pair RocksDBKey::geoValues(rocksdb::Slice const& slice) { - TRI_ASSERT(size >= sizeof(char) + sizeof(uint64_t) * 2); - RocksDBEntryType type = static_cast(data[0]); + TRI_ASSERT(slice.size() >= sizeof(char) + sizeof(uint64_t) * 2); + RocksDBEntryType type = static_cast(*slice.data()); TRI_ASSERT(type == RocksDBEntryType::GeoIndexValue); - uint64_t val = uint64FromPersistent(data + sizeof(char) + sizeof(uint64_t)); + uint64_t val = uint64FromPersistent(slice.data() + sizeof(char) + sizeof(uint64_t)); bool isSlot = (val >> 63) & 0x1; - return std::pair(isSlot, (val & ); - - size_t length = sizeof(char) + sizeof(objectId) + sizeof(offset); - _buffer.reserve(length); - _buffer.push_back(static_cast(_type)); - offset |= std::uint64_t{isSlot} << 63; //encode slot|pot in highest bit - uint64ToPersistent(_buffer, objectId); - uint64ToPersistent(_buffer, offset); + return std::pair(isSlot, (val & 0xffffffff)); } std::string const& RocksDBKey::string() const { return _buffer; } @@ -332,7 +325,7 @@ RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t objectId, uint32_t offset _buffer.push_back(static_cast(_type)); uint64ToPersistent(_buffer, objectId); uint64_t norm = offset; - if (isSlot) norm |= 1 << 63;//encode slot|pot in highest bit + if (isSlot) norm |= uint64_t(1) << 63;//encode slot|pot in highest bit uint64ToPersistent(_buffer, norm); break; } diff --git a/arangod/RocksDBEngine/RocksDBKey.h b/arangod/RocksDBEngine/RocksDBKey.h index 4cfc8e1256..4e2c4ba1f5 100644 --- a/arangod/RocksDBEngine/RocksDBKey.h +++ b/arangod/RocksDBEngine/RocksDBKey.h @@ -226,7 +226,7 @@ class RocksDBKey { /// /// May be called only on GeoIndexValues ////////////////////////////////////////////////////////////////////////////// - std::pair geoValues(rocksdb::Slice const&); + std::pair geoValues(rocksdb::Slice const& slice); public: ////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/RocksDBEngine/RocksDBKeyBounds.cpp b/arangod/RocksDBEngine/RocksDBKeyBounds.cpp index 4df1015413..8ea793f163 100644 --- a/arangod/RocksDBEngine/RocksDBKeyBounds.cpp +++ b/arangod/RocksDBEngine/RocksDBKeyBounds.cpp @@ -73,6 +73,10 @@ RocksDBKeyBounds RocksDBKeyBounds::UniqueIndex(uint64_t indexId) { return RocksDBKeyBounds(RocksDBEntryType::UniqueIndexValue, indexId); } +RocksDBKeyBounds RocksDBKeyBounds::FulltextIndex(uint64_t indexId) { + return RocksDBKeyBounds(RocksDBEntryType::FulltextIndexValue, indexId); +} + RocksDBKeyBounds RocksDBKeyBounds::GeoIndex(uint64_t indexId) { return RocksDBKeyBounds(RocksDBEntryType::GeoIndexValue, indexId); } @@ -98,11 +102,6 @@ RocksDBKeyBounds RocksDBKeyBounds::CounterValues() { return RocksDBKeyBounds(RocksDBEntryType::CounterValue); } -RocksDBKeyBounds RocksDBKeyBounds::FulltextIndex(uint64_t indexId) { - return RocksDBKeyBounds(RocksDBEntryType::FulltextIndexValue, indexId); -} - - RocksDBKeyBounds RocksDBKeyBounds::FulltextIndexPrefix(uint64_t indexId, arangodb::StringRef const& word) { // I did not want to pass a bool to the constructor for this @@ -236,8 +235,9 @@ RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first) case RocksDBEntryType::PrimaryIndexValue: case RocksDBEntryType::EdgeIndexValue: - case RocksDBEntryType::View: - case RocksDBEntryType::FulltextIndexValue: { + case RocksDBEntryType::FulltextIndexValue: + case RocksDBEntryType::GeoIndexValue: + case RocksDBEntryType::View: { size_t length = sizeof(char) + sizeof(uint64_t); _startBuffer.reserve(length); _startBuffer.push_back(static_cast(_type)); diff --git a/arangod/RocksDBEngine/RocksDBKeyBounds.h b/arangod/RocksDBEngine/RocksDBKeyBounds.h index 9badb2a718..f402f0acbf 100644 --- a/arangod/RocksDBEngine/RocksDBKeyBounds.h +++ b/arangod/RocksDBEngine/RocksDBKeyBounds.h @@ -85,6 +85,11 @@ class RocksDBKeyBounds { ////////////////////////////////////////////////////////////////////////////// static RocksDBKeyBounds UniqueIndex(uint64_t indexId); + ////////////////////////////////////////////////////////////////////////////// + /// @brief Bounds for all entries of a fulltext index + ////////////////////////////////////////////////////////////////////////////// + static RocksDBKeyBounds FulltextIndex(uint64_t indexId); + ////////////////////////////////////////////////////////////////////////////// /// @brief Bounds for all entries belonging to a specified unique index ////////////////////////////////////////////////////////////////////////////// @@ -114,12 +119,7 @@ class RocksDBKeyBounds { /// @brief Bounds for all counter values ////////////////////////////////////////////////////////////////////////////// static RocksDBKeyBounds CounterValues(); - - ////////////////////////////////////////////////////////////////////////////// - /// @brief Bounds for all entries of a fulltext index - ////////////////////////////////////////////////////////////////////////////// - static RocksDBKeyBounds FulltextIndex(uint64_t indexId); - + ////////////////////////////////////////////////////////////////////////////// /// @brief Bounds for all entries of a fulltext index, matching prefixes ////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/RocksDBEngine/RocksDBTypes.cpp b/arangod/RocksDBEngine/RocksDBTypes.cpp index b65edf5c38..0af06df592 100644 --- a/arangod/RocksDBEngine/RocksDBTypes.cpp +++ b/arangod/RocksDBEngine/RocksDBTypes.cpp @@ -72,6 +72,20 @@ static rocksdb::Slice UniqueIndexValue( reinterpret_cast::type*>( &uniqueIndexValue), 1); + +static RocksDBEntryType fulltextIndexValue = +RocksDBEntryType::FulltextIndexValue; +static rocksdb::Slice FulltextIndexValue( + reinterpret_cast::type*>( + &fulltextIndexValue), + 1); + +static RocksDBEntryType geoIndexValue = +RocksDBEntryType::GeoIndexValue; +static rocksdb::Slice GeoIndexValue( + reinterpret_cast::type*>( + &geoIndexValue), + 1); static RocksDBEntryType view = RocksDBEntryType::View; static rocksdb::Slice View( @@ -89,13 +103,6 @@ static rocksdb::Slice ReplicationApplierConfig( reinterpret_cast::type*>( &replicationApplierConfig), 1); - -static RocksDBEntryType fulltextIndexValue = - RocksDBEntryType::FulltextIndexValue; -static rocksdb::Slice FulltextIndexValue( - reinterpret_cast::type*>( - &fulltextIndexValue), - 1); } rocksdb::Slice const& arangodb::rocksDBSlice(RocksDBEntryType const& type) { @@ -116,14 +123,16 @@ rocksdb::Slice const& arangodb::rocksDBSlice(RocksDBEntryType const& type) { return IndexValue; case RocksDBEntryType::UniqueIndexValue: return UniqueIndexValue; + case RocksDBEntryType::FulltextIndexValue: + return FulltextIndexValue; + case RocksDBEntryType::GeoIndexValue: + return GeoIndexValue; case RocksDBEntryType::View: return View; case RocksDBEntryType::SettingsValue: return SettingsValue; case RocksDBEntryType::ReplicationApplierConfig: return ReplicationApplierConfig; - case RocksDBEntryType::FulltextIndexValue: - return FulltextIndexValue; } return Document; // avoids warning - errorslice instead ?! From 90987765890b26015b5537d5a0419117aa764e92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Mon, 8 May 2017 14:49:58 +0200 Subject: [PATCH 08/18] Added geo index CRUD functionality --- arangod/RocksDBEngine/RocksDBGeoIndex.cpp | 5 +- arangod/RocksDBEngine/RocksDBGeoIndex.h | 4 +- arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp | 57 ++++++++++++- .../RocksDBEngine/RocksDBGeoIndexImplHelper.h | 83 +++++++++---------- 4 files changed, 97 insertions(+), 52 deletions(-) diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp index cc25903cfa..29078c7681 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp @@ -29,8 +29,8 @@ #include "Basics/StringRef.h" #include "Basics/VelocyPackHelper.h" #include "Logger/Logger.h" -#include "StorageEngine/TransactionState.h" #include "RocksDBEngine/RocksDBToken.h" +#include "StorageEngine/TransactionState.h" using namespace arangodb; @@ -384,7 +384,7 @@ bool RocksDBGeoIndex::matchesDefinition(VPackSlice const& info) const { return true; } -int RocksDBGeoIndex::insert(transaction::Methods *, TRI_voc_rid_t revisionId, +int RocksDBGeoIndex::insert(transaction::Methods*, TRI_voc_rid_t revisionId, VPackSlice const& doc, bool isRollback) { double latitude; double longitude; @@ -523,7 +523,6 @@ int RocksDBGeoIndex::removeRaw(rocksdb::WriteBatch*, TRI_voc_rid_t revisionId, return this->remove(nullptr, revisionId, doc, false); } - int RocksDBGeoIndex::unload() { // create a new, empty index auto empty = GeoIndex_new(_objectId); diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.h b/arangod/RocksDBEngine/RocksDBGeoIndex.h index 1f03c142eb..d7624f3ce1 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndex.h +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.h @@ -188,7 +188,7 @@ class RocksDBGeoIndex final : public RocksDBIndex { /// @brief the actual geo index GeoIdx* _geoIndex; }; -} +} // namespace arangodb namespace std { template <> @@ -200,6 +200,6 @@ class default_delete { } } }; -} +} // namespace std #endif diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp index 089daf5b0e..dbdb221f20 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp @@ -746,21 +746,70 @@ int SlotRead(GeoIx * gix, int slot, GeoCoordinate * gc /*out param*/) { //gc GeoCoordinate, element in point array of real geo index memcpy(gc,gix->gxc+slot,sizeof(GeoCoordinate)); - return 0; + + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, true, slot); + RocksDBValue val = RocksDBValue::Empty(RocksDBEntryType::GeoIndexValue); + + rocksdb::ReadOptions opts; + rocksdb::Status s = db->Get(opts, key.string(), val.buffer()); + if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } + + VpackToCoord(val.slice(), gc); + return 0; } void SlotWrite(GeoIx * gix,int slot, GeoCoordinate * gc) { memcpy(gix->gxc+slot,gc,sizeof(GeoCoordinate)); + + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + VPackBuilder serialized = CoordToVpack(gc); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, true, slot); + RocksDBValue val = RocksDBValue::Empty(RocksDBEntryType::GeoIndexValue); + + rocksdb::WriteOptions opts; + rocksdb::Status s = db->Put(opts, key.string(), val.string()); + if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } } int PotRead(GeoIx * gix, int pot, GeoPot * gp) { memcpy(gp,gix->ypots+pot,sizeof(GeoPot)); - return 0; + + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, false, pot); + RocksDBValue val = RocksDBValue::Empty(RocksDBEntryType::GeoIndexValue); + + rocksdb::ReadOptions opts; + rocksdb::Status s = db->Get(opts, key.string(), val.buffer()); + if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } + + VpackToPot(val.slice(), gp); + return 0; } -void PotWrite(GeoIx * gix,int pot, GeoPot * gp) -{ +void PotWrite(GeoIx * gix, int pot, GeoPot * gp) { memcpy(gix->ypots+pot,gp,sizeof(GeoPot)); + + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + VPackBuilder serialized = PotToVpack(gp); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, false, pot); + RocksDBValue val = RocksDBValue::Document(serialized.slice()); + + rocksdb::WriteOptions opts; + rocksdb::Status s = db->Put(opts, key.string(), val.string()); + if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } } /* =================================================== */ diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h b/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h index 0cde7ac7cd..d3ce85fc0f 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImplHelper.h @@ -21,7 +21,6 @@ /// @author Jan Christoph Uhde //////////////////////////////////////////////////////////////////////////////// - // MUST BE ONLY INCLUDED IN RocksDBGeoIndexImpl.cpp after struct definitions! // IT CAN NOT BE USED IN OTHER // This file has only been added to keep Richards code clean. So it is easier @@ -36,85 +35,83 @@ #include #include -#include #include +#include #include -namespace arangodb { namespace rocksdbengine { +namespace arangodb { +namespace rocksdbengine { -VPackBuilder CoordToVpack(GeoCoordinate* coord){ +VPackBuilder CoordToVpack(GeoCoordinate* coord) { VPackBuilder rv{}; rv.openArray(); - rv.add(VPackValue(coord->latitude)); //double - rv.add(VPackValue(coord->longitude)); //double - rv.add(VPackValue(coord->data)); //uint64_t + rv.add(VPackValue(coord->latitude)); // double + rv.add(VPackValue(coord->longitude)); // double + rv.add(VPackValue(coord->data)); // uint64_t rv.close(); return rv; } -GeoCoordinate VpackToCoord(VPackSlice const& slice){ +void VpackToCoord(VPackSlice const& slice, GeoCoordinate* gc) { TRI_ASSERT(slice.isArray() && slice.length() == 3); - return GeoCoordinate{slice.at(0).getDouble() - ,slice.at(1).getDouble() - ,slice.at(2).getUInt() - }; + gc->latitude = slice.at(0).getDouble(); + gc->longitude = slice.at(1).getDouble(); + gc->data = slice.at(2).getUInt(); } -VPackBuilder PotToVpack(GeoPot* pot){ +VPackBuilder PotToVpack(GeoPot* pot) { VPackBuilder rv{}; - rv.openArray(); // open - rv.add(VPackValue(pot->LorLeaf)); // int - rv.add(VPackValue(pot->RorPoints)); // int - rv.add(VPackValue(pot->middle)); // GeoString + rv.openArray(); // open + rv.add(VPackValue(pot->LorLeaf)); // int + rv.add(VPackValue(pot->RorPoints)); // int + rv.add(VPackValue(pot->middle)); // GeoString { - rv.openArray(); // array GeoFix //uint 16/32 - for(std::size_t i = 0; i < GeoIndexFIXEDPOINTS; i++){ - rv.add(VPackValue(pot->maxdist[i])); //unit 16/32 + rv.openArray(); // array GeoFix //uint 16/32 + for (std::size_t i = 0; i < GeoIndexFIXEDPOINTS; i++) { + rv.add(VPackValue(pot->maxdist[i])); // unit 16/32 } - rv.close(); // close array + rv.close(); // close array } - rv.add(VPackValue(pot->start)); // GeoString - rv.add(VPackValue(pot->end)); // GeoString - rv.add(VPackValue(pot->level)); // int + rv.add(VPackValue(pot->start)); // GeoString + rv.add(VPackValue(pot->end)); // GeoString + rv.add(VPackValue(pot->level)); // int { - rv.openArray(); // arrray of int - for(std::size_t i = 0; i < GeoIndexPOTSIZE; i++){ + rv.openArray(); // arrray of int + for (std::size_t i = 0; i < GeoIndexPOTSIZE; i++) { rv.add(VPackValue(pot->points[i])); // int } - rv.close(); // close array + rv.close(); // close array } - rv.close(); // close + rv.close(); // close return rv; } -GeoPot VpackToPot(VPackSlice const& slice){ - GeoPot rv{}; +void VpackToPot(VPackSlice const& slice, GeoPot* rv) { TRI_ASSERT(slice.isArray()); - rv.LorLeaf = (int) slice.at(0).getInt(); // int - rv.RorPoints = (int) slice.at(1).getInt(); // int - rv.middle = slice.at(2).getUInt(); // GeoString + rv->LorLeaf = (int)slice.at(0).getInt(); // int + rv->RorPoints = (int)slice.at(1).getInt(); // int + rv->middle = slice.at(2).getUInt(); // GeoString { auto maxdistSlice = slice.at(3); TRI_ASSERT(maxdistSlice.isArray()); TRI_ASSERT(maxdistSlice.length() == GeoIndexFIXEDPOINTS); - for(std::size_t i = 0; i < GeoIndexFIXEDPOINTS; i++){ - rv.maxdist[i] = (int) maxdistSlice.at(i).getUInt(); //unit 16/33 + for (std::size_t i = 0; i < GeoIndexFIXEDPOINTS; i++) { + rv->maxdist[i] = (int)maxdistSlice.at(i).getUInt(); // unit 16/33 } } - rv.start = (int) slice.at(4).getUInt(); // GeoString - rv.end = slice.at(5).getUInt(); // GeoString - rv.level = (int) slice.at(6).getInt(); // int + rv->start = (int)slice.at(4).getUInt(); // GeoString + rv->end = slice.at(5).getUInt(); // GeoString + rv->level = (int)slice.at(6).getInt(); // int { auto pointsSlice = slice.at(7); TRI_ASSERT(pointsSlice.isArray()); TRI_ASSERT(pointsSlice.length() == GeoIndexFIXEDPOINTS); - for(std::size_t i = 0; i < GeoIndexPOTSIZE; i++){ - rv.points[i] = (int) pointsSlice.at(i).getInt(); //int + for (std::size_t i = 0; i < GeoIndexPOTSIZE; i++) { + rv->points[i] = (int)pointsSlice.at(i).getInt(); // int } } - return rv; } - -}} +} // namespace rocksdbengine +} // namespace arangodb #endif From f62c99188c4ec84bfa3cde2c22a733a6b94a0390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Mon, 8 May 2017 17:48:08 +0200 Subject: [PATCH 09/18] Seemingly functional geo index --- GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK | 4 +- arangod/RocksDBEngine/RocksDBAqlFunctions.cpp | 228 +++++++++++++++++- arangod/RocksDBEngine/RocksDBCollection.cpp | 14 +- arangod/RocksDBEngine/RocksDBCollection.h | 15 +- arangod/RocksDBEngine/RocksDBGeoIndex.cpp | 16 +- arangod/RocksDBEngine/RocksDBGeoIndex.h | 5 - arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp | 101 ++++---- arangod/RocksDBEngine/RocksDBIndexFactory.cpp | 6 + arangod/RocksDBEngine/RocksDBKey.cpp | 34 +-- arangod/RocksDBEngine/RocksDBKey.h | 3 +- .../RocksDBTransactionCollection.cpp | 15 +- 11 files changed, 329 insertions(+), 112 deletions(-) diff --git a/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK b/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK index 73077bd257..23e0806ebf 100644 --- a/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK +++ b/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK @@ -8,6 +8,8 @@ 8. SlotWrite - partial 9. CreatePot (take the next number) 10. CreateSlot (take the next number) -11. Also needs start and stop transaction routine and to use them Das sind die +11. Also needs start and stop transaction routine and to use them + +Das sind die Punkte, die Richard eingeplant hatte zum Anschluss. Über die obere Ebene weiß er praktisch nichts. diff --git a/arangod/RocksDBEngine/RocksDBAqlFunctions.cpp b/arangod/RocksDBEngine/RocksDBAqlFunctions.cpp index 736653188c..eab7b6540a 100644 --- a/arangod/RocksDBEngine/RocksDBAqlFunctions.cpp +++ b/arangod/RocksDBEngine/RocksDBAqlFunctions.cpp @@ -25,6 +25,8 @@ #include "Aql/AqlFunctionFeature.h" #include "Aql/Function.h" #include "RocksDBEngine/RocksDBFulltextIndex.h" +#include "RocksDBEngine/RocksDBGeoIndex.h" +#include "RocksDBEngine/RocksDBToken.h" #include "StorageEngine/DocumentIdentifierToken.h" #include "Transaction/Helpers.h" #include "Transaction/Methods.h" @@ -134,7 +136,7 @@ AqlValue RocksDBAqlFunctions::Fulltext( } // do we need this in rocksdb? trx->pinData(cid); - + transaction::BuilderLeaser builder(trx); FulltextQuery parsedQuery; Result res = fulltextIndex->parseQueryString(queryString, parsedQuery); @@ -149,20 +151,236 @@ AqlValue RocksDBAqlFunctions::Fulltext( return AqlValue(builder.get()); } +/// @brief Load geoindex for collection name +static arangodb::RocksDBGeoIndex* getGeoIndex( + transaction::Methods* trx, TRI_voc_cid_t const& cid, + std::string const& collectionName) { + // NOTE: + // Due to trx lock the shared_index stays valid + // as long as trx stays valid. + // It is save to return the Raw pointer. + // It can only be used until trx is finished. + trx->addCollectionAtRuntime(cid, collectionName); + auto document = trx->documentCollection(cid); + if (document == nullptr) { + THROW_ARANGO_EXCEPTION_FORMAT(TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND, "'%s'", + collectionName.c_str()); + } + + arangodb::RocksDBGeoIndex* index = nullptr; + for (auto const& idx : document->getIndexes()) { + if (idx->type() == arangodb::Index::TRI_IDX_TYPE_GEO1_INDEX || + idx->type() == arangodb::Index::TRI_IDX_TYPE_GEO2_INDEX) { + index = static_cast(idx.get()); + break; + } + } + + if (index == nullptr) { + THROW_ARANGO_EXCEPTION_PARAMS(TRI_ERROR_QUERY_GEO_INDEX_MISSING, + collectionName.c_str()); + } + + trx->pinData(cid); + return index; +} + +static AqlValue buildGeoResult(transaction::Methods* trx, + LogicalCollection* collection, + arangodb::aql::Query* query, + GeoCoordinates* cors, TRI_voc_cid_t const& cid, + std::string const& attributeName) { + if (cors == nullptr) { + return AqlValue(arangodb::basics::VelocyPackHelper::EmptyArrayValue()); + } + + size_t const nCoords = cors->length; + if (nCoords == 0) { + GeoIndex_CoordinatesFree(cors); + return AqlValue(arangodb::basics::VelocyPackHelper::EmptyArrayValue()); + } + + struct geo_coordinate_distance_t { + geo_coordinate_distance_t(double distance, RocksDBToken token) + : _distance(distance), _token(token) {} + double _distance; + RocksDBToken _token; + }; + + std::vector distances; + + try { + distances.reserve(nCoords); + + for (size_t i = 0; i < nCoords; ++i) { + distances.emplace_back(geo_coordinate_distance_t( + cors->distances[i], RocksDBToken(cors->coordinates[i].data))); + } + } catch (...) { + GeoIndex_CoordinatesFree(cors); + THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); + } + + GeoIndex_CoordinatesFree(cors); + + // sort result by distance + std::sort(distances.begin(), distances.end(), + [](geo_coordinate_distance_t const& left, + geo_coordinate_distance_t const& right) { + return left._distance < right._distance; + }); + + try { + ManagedDocumentResult mmdr; + transaction::BuilderLeaser builder(trx); + builder->openArray(); + if (!attributeName.empty()) { + // We have to copy the entire document + for (auto& it : distances) { + VPackObjectBuilder docGuard(builder.get()); + builder->add(attributeName, VPackValue(it._distance)); + if (collection->readDocument(trx, it._token, mmdr)) { + VPackSlice doc(mmdr.vpack()); + for (auto const& entry : VPackObjectIterator(doc)) { + std::string key = entry.key.copyString(); + if (key != attributeName) { + builder->add(key, entry.value); + } + } + } + } + + } else { + for (auto& it : distances) { + if (collection->readDocument(trx, it._token, mmdr)) { + mmdr.addToBuilder(*builder.get(), true); + } + } + } + builder->close(); + return AqlValue(builder.get()); + } catch (...) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); + } +} + /// @brief function NEAR AqlValue RocksDBAqlFunctions::Near(arangodb::aql::Query* query, transaction::Methods* trx, VPackFunctionParameters const& parameters) { - // TODO: obi - THROW_ARANGO_EXCEPTION_PARAMS(TRI_ERROR_QUERY_GEO_INDEX_MISSING, "NEAR"); + ValidateParameters(parameters, "NEAR", 3, 5); + + AqlValue collectionValue = ExtractFunctionParameterValue(trx, parameters, 0); + if (!collectionValue.isString()) { + THROW_ARANGO_EXCEPTION_PARAMS( + TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, "NEAR"); + } + + std::string const collectionName(collectionValue.slice().copyString()); + + AqlValue latitude = ExtractFunctionParameterValue(trx, parameters, 1); + AqlValue longitude = ExtractFunctionParameterValue(trx, parameters, 2); + + if (!latitude.isNumber() || !longitude.isNumber()) { + THROW_ARANGO_EXCEPTION_PARAMS( + TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, "NEAR"); + } + + // extract limit + int64_t limitValue = 100; + + if (parameters.size() > 3) { + AqlValue limit = ExtractFunctionParameterValue(trx, parameters, 3); + + if (limit.isNumber()) { + limitValue = limit.toInt64(trx); + } else if (!limit.isNull(true)) { + THROW_ARANGO_EXCEPTION_PARAMS( + TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, "NEAR"); + } + } + + std::string attributeName; + if (parameters.size() > 4) { + // have a distance attribute + AqlValue distanceValue = ExtractFunctionParameterValue(trx, parameters, 4); + + if (!distanceValue.isNull(true) && !distanceValue.isString()) { + THROW_ARANGO_EXCEPTION_PARAMS( + TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, "NEAR"); + } + + if (distanceValue.isString()) { + attributeName = distanceValue.slice().copyString(); + } + } + + TRI_voc_cid_t cid = trx->resolver()->getCollectionIdLocal(collectionName); + arangodb::RocksDBGeoIndex* index = getGeoIndex(trx, cid, collectionName); + + TRI_ASSERT(index != nullptr); + TRI_ASSERT(trx->isPinned(cid)); + + GeoCoordinates* cors = + index->nearQuery(trx, latitude.toDouble(trx), longitude.toDouble(trx), + static_cast(limitValue)); + + return buildGeoResult(trx, index->collection(), query, cors, cid, + attributeName); } /// @brief function WITHIN AqlValue RocksDBAqlFunctions::Within( arangodb::aql::Query* query, transaction::Methods* trx, VPackFunctionParameters const& parameters) { - // TODO: obi - THROW_ARANGO_EXCEPTION_PARAMS(TRI_ERROR_QUERY_GEO_INDEX_MISSING, "Within"); + ValidateParameters(parameters, "WITHIN", 4, 5); + + AqlValue collectionValue = ExtractFunctionParameterValue(trx, parameters, 0); + + if (!collectionValue.isString()) { + THROW_ARANGO_EXCEPTION_PARAMS( + TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, "WITHIN"); + } + + std::string const collectionName(collectionValue.slice().copyString()); + + AqlValue latitudeValue = ExtractFunctionParameterValue(trx, parameters, 1); + AqlValue longitudeValue = ExtractFunctionParameterValue(trx, parameters, 2); + AqlValue radiusValue = ExtractFunctionParameterValue(trx, parameters, 3); + + if (!latitudeValue.isNumber() || !longitudeValue.isNumber() || + !radiusValue.isNumber()) { + THROW_ARANGO_EXCEPTION_PARAMS( + TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, "WITHIN"); + } + + std::string attributeName; + if (parameters.size() > 4) { + // have a distance attribute + AqlValue distanceValue = ExtractFunctionParameterValue(trx, parameters, 4); + + if (!distanceValue.isNull(true) && !distanceValue.isString()) { + THROW_ARANGO_EXCEPTION_PARAMS( + TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, "WITHIN"); + } + + if (distanceValue.isString()) { + attributeName = distanceValue.slice().copyString(); + } + } + + TRI_voc_cid_t cid = trx->resolver()->getCollectionIdLocal(collectionName); + arangodb::RocksDBGeoIndex* index = getGeoIndex(trx, cid, collectionName); + + TRI_ASSERT(index != nullptr); + TRI_ASSERT(trx->isPinned(cid)); + + GeoCoordinates* cors = index->withinQuery(trx, latitudeValue.toDouble(trx), + longitudeValue.toDouble(trx), + radiusValue.toDouble(trx)); + + return buildGeoResult(trx, index->collection(), query, cors, cid, + attributeName); } void RocksDBAqlFunctions::registerResources() { diff --git a/arangod/RocksDBEngine/RocksDBCollection.cpp b/arangod/RocksDBEngine/RocksDBCollection.cpp index 275cd48c0b..a6b68e5ce1 100644 --- a/arangod/RocksDBEngine/RocksDBCollection.cpp +++ b/arangod/RocksDBEngine/RocksDBCollection.cpp @@ -183,11 +183,14 @@ void RocksDBCollection::open(bool ignoreErrors) { << " number of documents: " << counterValue.added(); _numberDocuments = counterValue.added() - counterValue.removed(); _revisionId = counterValue.revisionId(); - //_numberDocuments = countKeyRange(db, readOptions, - // RocksDBKeyBounds::CollectionDocuments(_objectId)); - for (auto it : getIndexes()) { + for (std::shared_ptr it : getIndexes()) { static_cast(it.get())->load(); + + if (it->type() == Index::TRI_IDX_TYPE_GEO1_INDEX || + it->type() == Index::TRI_IDX_TYPE_GEO2_INDEX) { + _hasGeoIndex = true; + } } } @@ -1263,8 +1266,11 @@ void RocksDBCollection::addIndex(std::shared_ptr idx) { } TRI_UpdateTickServer(static_cast(id)); - _indexes.emplace_back(idx); + if (idx->type() == Index::TRI_IDX_TYPE_GEO1_INDEX || + idx->type() == Index::TRI_IDX_TYPE_GEO2_INDEX) { + _hasGeoIndex = true; + } } void RocksDBCollection::addIndexCoordinator( diff --git a/arangod/RocksDBEngine/RocksDBCollection.h b/arangod/RocksDBEngine/RocksDBCollection.h index 84b9cccfd6..0ffcbe15e5 100644 --- a/arangod/RocksDBEngine/RocksDBCollection.h +++ b/arangod/RocksDBEngine/RocksDBCollection.h @@ -39,7 +39,7 @@ class Result; class RocksDBPrimaryIndex; class RocksDBVPackIndex; struct RocksDBToken; - + class RocksDBCollection final : public PhysicalCollection { friend class RocksDBEngine; friend class RocksDBVPackIndex; @@ -48,7 +48,6 @@ class RocksDBCollection final : public PhysicalCollection { constexpr static double defaultLockTimeout = 10.0 * 60.0; public: - public: explicit RocksDBCollection(LogicalCollection*, VPackSlice const& info); explicit RocksDBCollection(LogicalCollection*, @@ -121,7 +120,7 @@ class RocksDBCollection final : public PhysicalCollection { /// non transactional truncate, will continoiusly commit the deletes /// and no fully rollback on failure. Uses trx snapshots to isolate /// against newer PUTs - //void truncateNoTrx(transaction::Methods* trx); + // void truncateNoTrx(transaction::Methods* trx); DocumentIdentifierToken lookupKey( transaction::Methods* trx, @@ -185,10 +184,12 @@ class RocksDBCollection final : public PhysicalCollection { /// recalculte counts for collection in case of failure uint64_t recalculateCounts(); - + /// trigger rocksdb compaction for documentDB and indexes void compact(); - void estimateSize(velocypack::Builder &builder); + void estimateSize(velocypack::Builder& builder); + + bool hasGeoIndex() { return _hasGeoIndex; } private: /// @brief return engine-specific figures @@ -231,6 +232,8 @@ class RocksDBCollection final : public PhysicalCollection { std::atomic _numberDocuments; std::atomic _revisionId; + /// upgrade write locks to exclusive locks if this flag is set + bool _hasGeoIndex; basics::ReadWriteLock _exclusiveLock; }; @@ -246,6 +249,6 @@ inline RocksDBCollection* toRocksDBCollection(LogicalCollection* logical) { return toRocksDBCollection(phys); } -} +} // namespace arangodb #endif diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp index 29078c7681..93401d3c3c 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp @@ -192,8 +192,7 @@ bool RocksDBGeoIndexIterator::next(TokenCallback const& cb, size_t limit) { } for (size_t i = 0; i < numDocs; ++i) { - cb(::RocksDBGeoIndex::toDocumentIdentifierToken( - coords->coordinates[i].data)); + cb(RocksDBToken(coords->coordinates[i].data)); } // If we return less then limit many docs we are done. _done = numDocs < limit; @@ -214,17 +213,6 @@ void RocksDBGeoIndexIterator::createCursor(double lat, double lon) { replaceCursor(::GeoIndex_NewCursor(_index->_geoIndex, &_coor)); } -uint64_t RocksDBGeoIndex::fromDocumentIdentifierToken( - DocumentIdentifierToken const& token) { - auto tkn = static_cast(&token); - return static_cast(tkn->revisionId()); -} - -DocumentIdentifierToken RocksDBGeoIndex::toDocumentIdentifierToken( - uint64_t internal) { - return RocksDBToken{internal}; -} - /// @brief creates an IndexIterator for the given Condition IndexIterator* RocksDBGeoIndex::iteratorForCondition( transaction::Methods* trx, ManagedDocumentResult* mmdr, @@ -300,7 +288,7 @@ void RocksDBGeoIndex::toVelocyPack(VPackBuilder& builder, bool withFigures, bool forPersistence) const { builder.openObject(); // Basic index - Index::toVelocyPack(builder, withFigures, forPersistence); + RocksDBIndex::toVelocyPack(builder, withFigures, forPersistence); if (_variant == INDEX_GEO_COMBINED_LAT_LON || _variant == INDEX_GEO_COMBINED_LON_LAT) { diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.h b/arangod/RocksDBEngine/RocksDBGeoIndex.h index d7624f3ce1..850dd1f39e 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndex.h +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.h @@ -167,11 +167,6 @@ class RocksDBGeoIndex final : public RocksDBIndex { _latitude == latitude && _longitude == longitude); } - static uint64_t fromDocumentIdentifierToken( - DocumentIdentifierToken const& token); - - static DocumentIdentifierToken toDocumentIdentifierToken(uint64_t internal); - private: /// @brief attribute paths std::vector _location; diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp index dbdb221f20..ce00ac4418 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp @@ -747,69 +747,74 @@ int SlotRead(GeoIx * gix, int slot, GeoCoordinate * gc /*out param*/) //gc GeoCoordinate, element in point array of real geo index memcpy(gc,gix->gxc+slot,sizeof(GeoCoordinate)); - rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); - RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, true, slot); - RocksDBValue val = RocksDBValue::Empty(RocksDBEntryType::GeoIndexValue); - - rocksdb::ReadOptions opts; - rocksdb::Status s = db->Get(opts, key.string(), val.buffer()); - if (!s.ok()) { - arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); - THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); - } - - VpackToCoord(val.slice(), gc); - return 0; + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, slot, true); + RocksDBValue val = RocksDBValue::Empty(RocksDBEntryType::GeoIndexValue); + + rocksdb::ReadOptions opts; + rocksdb::Status s = db->Get(opts, key.string(), val.buffer()); + if (s.IsNotFound()) { // TODO how to indicate not found? + memset(gc, 0, sizeof(GeoCoordinate)); + return -1; + } else if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } + VpackToCoord(val.slice(), gc); + return 0; } void SlotWrite(GeoIx * gix,int slot, GeoCoordinate * gc) { memcpy(gix->gxc+slot,gc,sizeof(GeoCoordinate)); - rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); - VPackBuilder serialized = CoordToVpack(gc); - RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, true, slot); - RocksDBValue val = RocksDBValue::Empty(RocksDBEntryType::GeoIndexValue); - - rocksdb::WriteOptions opts; - rocksdb::Status s = db->Put(opts, key.string(), val.string()); - if (!s.ok()) { - arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); - THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); - } + VPackBuilder serialized = CoordToVpack(gc); + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, slot, true); + RocksDBValue val = RocksDBValue::Document(serialized.slice()); + + rocksdb::WriteOptions opts; + rocksdb::Status s = db->Put(opts, key.string(), val.string()); + if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } } int PotRead(GeoIx * gix, int pot, GeoPot * gp) { memcpy(gp,gix->ypots+pot,sizeof(GeoPot)); - rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); - RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, false, pot); - RocksDBValue val = RocksDBValue::Empty(RocksDBEntryType::GeoIndexValue); - - rocksdb::ReadOptions opts; - rocksdb::Status s = db->Get(opts, key.string(), val.buffer()); - if (!s.ok()) { - arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); - THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); - } - - VpackToPot(val.slice(), gp); - return 0; + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, pot, false); + RocksDBValue val = RocksDBValue::Empty(RocksDBEntryType::GeoIndexValue); + + rocksdb::ReadOptions opts; + rocksdb::Status s = db->Get(opts, key.string(), val.buffer()); + if (s.IsNotFound()) { // TODO how to indicate not found? + gp->LorLeaf = 0; + gp->RorPoints = 0; + return -1; + } else if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } + VpackToPot(val.slice(), gp); + return 0; } void PotWrite(GeoIx * gix, int pot, GeoPot * gp) { memcpy(gix->ypots+pot,gp,sizeof(GeoPot)); - rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); - VPackBuilder serialized = PotToVpack(gp); - RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, false, pot); - RocksDBValue val = RocksDBValue::Document(serialized.slice()); - - rocksdb::WriteOptions opts; - rocksdb::Status s = db->Put(opts, key.string(), val.string()); - if (!s.ok()) { - arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); - THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); - } + VPackBuilder serialized = PotToVpack(gp); + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, pot, false); + RocksDBValue val = RocksDBValue::Document(serialized.slice()); + + rocksdb::WriteOptions opts; + rocksdb::Status s = db->Put(opts, key.string(), val.string()); + if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } } /* =================================================== */ diff --git a/arangod/RocksDBEngine/RocksDBIndexFactory.cpp b/arangod/RocksDBEngine/RocksDBIndexFactory.cpp index 1bff5fd940..d44968d01b 100644 --- a/arangod/RocksDBEngine/RocksDBIndexFactory.cpp +++ b/arangod/RocksDBEngine/RocksDBIndexFactory.cpp @@ -29,6 +29,7 @@ #include "RocksDBEngine/RocksDBEdgeIndex.h" #include "RocksDBEngine/RocksDBEngine.h" #include "RocksDBEngine/RocksDBFulltextIndex.h" +#include "RocksDBEngine/RocksDBGeoIndex.h" #include "RocksDBEngine/RocksDBHashIndex.h" #include "RocksDBEngine/RocksDBPersistentIndex.h" #include "RocksDBEngine/RocksDBPrimaryIndex.h" @@ -428,6 +429,11 @@ std::shared_ptr RocksDBIndexFactory::prepareIndexFromSlice( newIdx.reset(new arangodb::RocksDBPersistentIndex(iid, col, info)); break; } + case arangodb::Index::TRI_IDX_TYPE_GEO1_INDEX: + case arangodb::Index::TRI_IDX_TYPE_GEO2_INDEX:{ + newIdx.reset(new arangodb::RocksDBGeoIndex(iid, col, info)); + break; + } case arangodb::Index::TRI_IDX_TYPE_FULLTEXT_INDEX: { newIdx.reset(new arangodb::RocksDBFulltextIndex(iid, col, info)); break; diff --git a/arangod/RocksDBEngine/RocksDBKey.cpp b/arangod/RocksDBEngine/RocksDBKey.cpp index c5fd730d81..3b209339ec 100644 --- a/arangod/RocksDBEngine/RocksDBKey.cpp +++ b/arangod/RocksDBEngine/RocksDBKey.cpp @@ -83,8 +83,17 @@ RocksDBKey RocksDBKey::FulltextIndexValue(uint64_t indexId, return RocksDBKey(RocksDBEntryType::FulltextIndexValue, indexId, word, primaryKey); } -RocksDBKey RocksDBKey::GeoIndexValue(uint64_t indexId, bool isSlot, uint64_t offset){ - return RocksDBKey(RocksDBEntryType::GeoIndexValue, isSlot, offset); +RocksDBKey RocksDBKey::GeoIndexValue(uint64_t indexId, uint32_t offset, bool isSlot) { + RocksDBKey key(RocksDBEntryType::GeoIndexValue); + size_t length = sizeof(char) + sizeof(indexId) + sizeof(offset); + key._buffer.reserve(length); + key._buffer.push_back(static_cast(RocksDBEntryType::GeoIndexValue)); + uint64ToPersistent(key._buffer, indexId); + uint64_t norm = offset; + if (isSlot) norm |= uint64_t(1) << 63;//encode slot|pot in highest bit + uint64ToPersistent(key._buffer, norm); + + return key; } RocksDBKey RocksDBKey::View(TRI_voc_tick_t databaseId, TRI_voc_cid_t viewId) { @@ -194,6 +203,7 @@ std::string const& RocksDBKey::string() const { return _buffer; } RocksDBKey::RocksDBKey(RocksDBEntryType type) : _type(type), _buffer() { switch (_type) { + case RocksDBEntryType::GeoIndexValue: case RocksDBEntryType::SettingsValue: { _buffer.push_back(static_cast(_type)); break; @@ -315,26 +325,6 @@ RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t first, } } -RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t objectId, uint32_t offset, bool isSlot) - : _type(type), _buffer() { - switch (_type) { - case RocksDBEntryType::GeoIndexValue: { - - size_t length = sizeof(char) + sizeof(objectId) + sizeof(offset); - _buffer.reserve(length); - _buffer.push_back(static_cast(_type)); - uint64ToPersistent(_buffer, objectId); - uint64_t norm = offset; - if (isSlot) norm |= uint64_t(1) << 63;//encode slot|pot in highest bit - uint64ToPersistent(_buffer, norm); - break; - } - - default: - THROW_ARANGO_EXCEPTION(TRI_ERROR_BAD_PARAMETER); - } -} - RocksDBKey::RocksDBKey(RocksDBEntryType type, uint64_t first, arangodb::StringRef const& second, arangodb::StringRef const& third) diff --git a/arangod/RocksDBEngine/RocksDBKey.h b/arangod/RocksDBEngine/RocksDBKey.h index 4e2c4ba1f5..91c0061c62 100644 --- a/arangod/RocksDBEngine/RocksDBKey.h +++ b/arangod/RocksDBEngine/RocksDBKey.h @@ -113,7 +113,7 @@ class RocksDBKey { ////////////////////////////////////////////////////////////////////////////// /// @brief Create a fully-specified key for a geoIndexValue ////////////////////////////////////////////////////////////////////////////// - static RocksDBKey GeoIndexValue(uint64_t indexId, bool isSlot, uint64_t offset); + static RocksDBKey GeoIndexValue(uint64_t indexId, uint32_t offset, bool isSlot); ////////////////////////////////////////////////////////////////////////////// /// @brief Create a fully-specified key for a view @@ -247,7 +247,6 @@ class RocksDBKey { std::string const& third); RocksDBKey(RocksDBEntryType type, uint64_t first, arangodb::StringRef const& second, arangodb::StringRef const& third); - RocksDBKey(RocksDBEntryType type, uint64_t objectId, uint32_t index, bool isSlot); private: static RocksDBEntryType type(char const* data, size_t size); diff --git a/arangod/RocksDBEngine/RocksDBTransactionCollection.cpp b/arangod/RocksDBEngine/RocksDBTransactionCollection.cpp index 41f2956e7a..9111a61b3f 100644 --- a/arangod/RocksDBEngine/RocksDBTransactionCollection.cpp +++ b/arangod/RocksDBEngine/RocksDBTransactionCollection.cpp @@ -164,6 +164,13 @@ int RocksDBTransactionCollection::use(int nestingLevel) { if (_collection != nullptr) { _usageLocked = true; } + + // geo index needs exclusive write access + RocksDBCollection* rc = static_cast(_collection->getPhysical()); + if (AccessMode::isWrite(_accessType) && rc->hasGeoIndex()) { + _accessType = AccessMode::Type::EXCLUSIVE; + } + } else { // use without usage-lock (lock already set externally) _collection = _transaction->vocbase()->lookupCollection(_cid); @@ -187,11 +194,9 @@ int RocksDBTransactionCollection::use(int nestingLevel) { return TRI_ERROR_ARANGO_READ_ONLY; } - _initialNumberDocuments = - static_cast(_collection->getPhysical()) - ->numberDocuments(); - _revision = - static_cast(_collection->getPhysical())->revision(); + RocksDBCollection* rc = static_cast(_collection->getPhysical()); + _initialNumberDocuments = rc->numberDocuments(); + _revision = rc->revision(); } if (AccessMode::isWriteOrExclusive(_accessType) && !isLocked()) { From 1e3368c76a2eff037f092f56e58ab75677f7977e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Mon, 8 May 2017 18:27:51 +0200 Subject: [PATCH 10/18] Preliminary changes --- GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK | 9 +++++++++ js/common/tests/replication/replication.js | 15 --------------- ...ll-index-geo-mmfiles.js => shell-index-geo.js} | 0 ...index-mmfiles.js => aql-optimizer-geoindex.js} | 0 ...ctangle-mmfiles.js => aql-within-rectangle.js} | 0 js/server/tests/dump/dump-rocksdb.js | 8 +++----- .../{indexes-geo-mmfiles.js => indexes-geo.js} | 0 7 files changed, 12 insertions(+), 20 deletions(-) rename js/common/tests/shell/{shell-index-geo-mmfiles.js => shell-index-geo.js} (100%) rename js/server/tests/aql/{aql-optimizer-geoindex-mmfiles.js => aql-optimizer-geoindex.js} (100%) rename js/server/tests/aql/{aql-within-rectangle-mmfiles.js => aql-within-rectangle.js} (100%) rename js/server/tests/recovery/{indexes-geo-mmfiles.js => indexes-geo.js} (100%) diff --git a/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK b/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK index 23e0806ebf..9bc4940d7a 100644 --- a/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK +++ b/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK @@ -13,3 +13,12 @@ Das sind die Punkte, die Richard eingeplant hatte zum Anschluss. Über die obere Ebene weiß er praktisch nichts. + +Questions for Richard: + +- How to communicate non existent values in SlotRead, PotRead ? + - Return value != 0 ? + +- The GeoIx struct contains some fields, `potct` and `slotct` which we would need to store and recover from RocksDB. + +- Above the functions `CreatePot` and `CreateSlot` are mentioned, but they are not in the source code ? \ No newline at end of file diff --git a/js/common/tests/replication/replication.js b/js/common/tests/replication/replication.js index ae7f7721d8..00bb2825f5 100644 --- a/js/common/tests/replication/replication.js +++ b/js/common/tests/replication/replication.js @@ -725,9 +725,6 @@ function ReplicationLoggerSuite () { //////////////////////////////////////////////////////////////////////////////// testLoggerCreateIndexGeo1 : function () { - if (db._engine().name === "rocksdb") { - return; - } var c = db._create(cn); var tick = getLastLogTick(); @@ -750,9 +747,6 @@ function ReplicationLoggerSuite () { //////////////////////////////////////////////////////////////////////////////// testLoggerCreateIndexGeo2 : function () { - if (db._engine().name === "rocksdb") { - return; - } var c = db._create(cn); var tick = getLastLogTick(); @@ -775,9 +769,6 @@ function ReplicationLoggerSuite () { //////////////////////////////////////////////////////////////////////////////// testLoggerCreateIndexGeo3 : function () { - if (db._engine().name === "rocksdb") { - return; - } var c = db._create(cn); var tick = getLastLogTick(); @@ -802,9 +793,6 @@ function ReplicationLoggerSuite () { //////////////////////////////////////////////////////////////////////////////// testLoggerCreateIndexGeo4 : function () { - if (db._engine().name === "rocksdb") { - return; - } var c = db._create(cn); var tick = getLastLogTick(); @@ -829,9 +817,6 @@ function ReplicationLoggerSuite () { //////////////////////////////////////////////////////////////////////////////// testLoggerCreateIndexGeo5 : function () { - if (db._engine().name === "rocksdb") { - return; - } var c = db._create(cn); var tick = getLastLogTick(); diff --git a/js/common/tests/shell/shell-index-geo-mmfiles.js b/js/common/tests/shell/shell-index-geo.js similarity index 100% rename from js/common/tests/shell/shell-index-geo-mmfiles.js rename to js/common/tests/shell/shell-index-geo.js diff --git a/js/server/tests/aql/aql-optimizer-geoindex-mmfiles.js b/js/server/tests/aql/aql-optimizer-geoindex.js similarity index 100% rename from js/server/tests/aql/aql-optimizer-geoindex-mmfiles.js rename to js/server/tests/aql/aql-optimizer-geoindex.js diff --git a/js/server/tests/aql/aql-within-rectangle-mmfiles.js b/js/server/tests/aql/aql-within-rectangle.js similarity index 100% rename from js/server/tests/aql/aql-within-rectangle-mmfiles.js rename to js/server/tests/aql/aql-within-rectangle.js diff --git a/js/server/tests/dump/dump-rocksdb.js b/js/server/tests/dump/dump-rocksdb.js index e4d5126c3e..7469b2e30d 100644 --- a/js/server/tests/dump/dump-rocksdb.js +++ b/js/server/tests/dump/dump-rocksdb.js @@ -222,11 +222,9 @@ function dumpTestSuite () { assertEqual("fulltext", c.getIndexes()[7].type); assertEqual([ "a_f" ], c.getIndexes()[7].fields); - if (db._engine().name !== "rocksdb") { - assertEqual("geo2", c.getIndexes()[8].type); - assertEqual([ "a_la", "a_lo" ], c.getIndexes()[8].fields); - assertFalse(c.getIndexes()[8].unique); - } + assertEqual("geo2", c.getIndexes()[8].type); + assertEqual([ "a_la", "a_lo" ], c.getIndexes()[8].fields); + assertFalse(c.getIndexes()[8].unique); assertEqual(0, c.count()); }, diff --git a/js/server/tests/recovery/indexes-geo-mmfiles.js b/js/server/tests/recovery/indexes-geo.js similarity index 100% rename from js/server/tests/recovery/indexes-geo-mmfiles.js rename to js/server/tests/recovery/indexes-geo.js From a13614277d9eac10fcac88ede175d741efa780e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Mon, 8 May 2017 18:36:25 +0200 Subject: [PATCH 11/18] Added newline --- GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK b/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK index 9bc4940d7a..65926ccc7c 100644 --- a/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK +++ b/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK @@ -21,4 +21,4 @@ Questions for Richard: - The GeoIx struct contains some fields, `potct` and `slotct` which we would need to store and recover from RocksDB. -- Above the functions `CreatePot` and `CreateSlot` are mentioned, but they are not in the source code ? \ No newline at end of file +- Above the functions `CreatePot` and `CreateSlot` are mentioned, but they are not in the source code ? From b0bc75c93e125eebe930715c37cc6d27429b6775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Tue, 9 May 2017 14:47:13 +0200 Subject: [PATCH 12/18] Working geo index --- GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK | 4 +- arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp | 306 +++--- ...ries-geo-mmfiles.js => aql-queries-geo.js} | 0 .../shell/shell-shaped-mmfiles-noncluster.js | 129 --- .../tests/shell/shell-shaped-noncluster.js | 941 +----------------- 5 files changed, 214 insertions(+), 1166 deletions(-) rename js/server/tests/aql/{aql-queries-geo-mmfiles.js => aql-queries-geo.js} (100%) delete mode 100644 js/server/tests/shell/shell-shaped-mmfiles-noncluster.js diff --git a/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK b/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK index 65926ccc7c..15e766d6d7 100644 --- a/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK +++ b/GEOINDEX_PLAN_DELETE_ME_BEFORE_MERGING_BACK @@ -16,8 +16,8 @@ Das sind die Questions for Richard: -- How to communicate non existent values in SlotRead, PotRead ? - - Return value != 0 ? +//- How to communicate non existent values in SlotRead, PotRead ? +// - Return value != 0 ? - The GeoIx struct contains some fields, `potct` and `slotct` which we would need to store and recover from RocksDB. diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp index ce00ac4418..0bdd5151b0 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp @@ -128,11 +128,11 @@ typedef struct { typedef struct { uint64_t objectId; /* Rocksdb Index objectId */ GeoIndexFixed fixed; /* fixed point data */ - int potct; /* pots allocated */ - int slotct; /* slots allocated */ - GeoPot* ypots; /* the pots themselves */ - GeoCoordinate* gxc; /* the slots themselves */ - size_t _memoryUsed; /* the amount of memory currently used */ + int nextFreePot; /* pots allocated */ + int nextFreeSlot; /* slots allocated */ + //GeoPot* ypots; /* the pots themselves */// gone + //GeoCoordinate* gxc; /* the slots themselves */// gone + //size_t _memoryUsed; /* the amount of memory currently used */// gone } GeoIx; /* =================================================== */ /* GeoDetailedPoint structure */ @@ -173,7 +173,7 @@ typedef struct { /* The routine GeoPotJunk is used to test this, */ /* by comparing the distances in the pot the this array*/ /* =================================================== */ -typedef struct { +typedef struct {// stays GeoIx* gix; GeoCoordinate* gc; double x; @@ -256,9 +256,86 @@ typedef struct { }} // must be included here after struct definition -#include +#include +#include +#include +#include +#include +#include namespace arangodb { namespace rocksdbengine { + + +/* CRUD interface */ +int SlotRead(GeoIx * gix, int slot, GeoCoordinate * gc /*out param*/) +{ + //gc GeoCoordinate, element in point array of real geo index + //memcpy(gc,gix->gxc+slot,sizeof(GeoCoordinate)); + + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, slot, true); + std::string slotValue; + + rocksdb::ReadOptions opts; + rocksdb::Status s = db->Get(opts, key.string(), &slotValue); + if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } + //VpackToCoord(val.slice(), gc); + memcpy(gc, slotValue.data(), slotValue.size()); + + return 0; +} +void SlotWrite(GeoIx * gix,int slot, GeoCoordinate * gc) +{ + //memcpy(gix->gxc+slot,gc,sizeof(GeoCoordinate)); + + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, slot, true); + + rocksdb::WriteOptions opts; + rocksdb::Status s = db->Put(opts, key.string(), + rocksdb::Slice((char*)gc, + sizeof(GeoCoordinate))); + if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } +} + +int PotRead(GeoIx * gix, int pot, GeoPot * gp) +{ + //memcpy(gp,gix->ypots+pot,sizeof(GeoPot)); + + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, pot, false); + std::string potValue; + + rocksdb::ReadOptions opts; + rocksdb::Status s = db->Get(opts, key.string(), &potValue); + if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } + memcpy(gp, potValue.data(), potValue.size()); + return 0; +} +void PotWrite(GeoIx * gix, int pot, GeoPot * gp) { + //memcpy(gix->ypots+pot,gp,sizeof(GeoPot)); + + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, pot, false); + + rocksdb::WriteOptions opts; + rocksdb::Status s = db->Put(opts, key.string(), + rocksdb::Slice((char*)gp, + sizeof(GeoPot))); + if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } +} /* =================================================== */ /* GeoIndex_Distance routine */ @@ -292,9 +369,19 @@ double GeoIndex_distance(GeoCoordinate* c1, GeoCoordinate* c2) { /* takes the supplied pot, and puts it back onto the */ /* free list. */ /* =================================================== */ -void GeoIndexFreePot(GeoIx* gix, int pot) { - gix->ypots[pot].LorLeaf = gix->ypots[0].LorLeaf; - gix->ypots[0].LorLeaf = pot; +void GeoIndexFreePot(GeoIx* gix, int pot) {// rewrite delete in rocksdb + // gix->ypots[pot].LorLeaf = gix->ypots[0].LorLeaf; + //gix->ypots[0].LorLeaf = pot; + + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, pot, false); + + rocksdb::WriteOptions opts; + rocksdb::Status s = db->Delete(opts, key.string()); + if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + } } /* =================================================== */ /* GeoIndexNewPot */ @@ -313,12 +400,12 @@ void GeoIndexFreePot(GeoIx* gix, int pot) { /* therefore careful to get the new pots (if any are */ /* needed) before it gets too far into things. */ /* =================================================== */ -int GeoIndexNewPot(GeoIx* gix) { - int j; +int GeoIndexNewPot(GeoIx* gix) {// rocksdb initial put + /*int j; GeoPot* gp; if (gix->ypots[0].LorLeaf == 0) { - /* do the growth calculation in long long to make sure it doesn't */ - /* overflow when the size gets to be near 2^31 */ + // do the growth calculation in long long to make sure it doesn't + // overflow when the size gets to be near 2^31 long long x = gix->potct; long long y = 100 + GeoIndexGROW; x = x * y + 99; @@ -342,10 +429,27 @@ int GeoIndexNewPot(GeoIx* gix) { GeoIndexFreePot(gix, j); } gix->potct = newpotct; - } - j = gix->ypots[0].LorLeaf; - gix->ypots[0].LorLeaf = gix->ypots[j].LorLeaf; - return j; + }*/ + //j = gix->ypots[0].LorLeaf; + //gix->ypots[0].LorLeaf = gix->ypots[j].LorLeaf; + //return j; + + // + //gp.LorLeaf = pot - 1; + //gix->ypots[0].LorLeaf = pot; + + /*rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, pot, false); + + GeoPot gp = {}; + rocksdb::WriteOptions opts; + rocksdb::Status s = db->Put(opts, key.string(), rocksdb::Slice((char*)(&gp), + sizeof(GeoPot))); + if (!s.ok()) { + arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); + THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); + }*/ + return gix->nextFreePot++; } /* =================================================== */ /* GeoIndex_new routine */ @@ -381,13 +485,13 @@ GeoIdx* GeoIndex_new(uint64_t objectId) { gix->objectId = objectId; /* try to allocate all the things we need */ - gix->ypots = static_cast( + /*gix->ypots = static_cast( TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, GEOPOTSTART * sizeof(GeoPot), false)); gix->gxc = static_cast(TRI_Allocate( TRI_UNKNOWN_MEM_ZONE, GEOSLOTSTART * sizeof(GeoCoordinate), false)); - /* if any of them fail, free the ones that succeeded */ - /* and then return the nullptr for our user */ + // if any of them fail, free the ones that succeeded + // and then return the nullptr for our user if ((gix->ypots == nullptr) || (gix->gxc == nullptr)) { if (gix->ypots != nullptr) { TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix->ypots); @@ -400,31 +504,34 @@ GeoIdx* GeoIndex_new(uint64_t objectId) { TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix); return nullptr; - } + }*/ // set initial memory usage - gix->_memoryUsed = - GEOPOTSTART * sizeof(GeoPot) + GEOSLOTSTART * sizeof(GeoCoordinate); + //gix->_memoryUsed = + // GEOPOTSTART * sizeof(GeoPot) + GEOSLOTSTART * sizeof(GeoCoordinate); /* initialize chain of empty slots */ - for (i = 0; i < GEOSLOTSTART; i++) { + /*for (i = 0; i < GEOSLOTSTART; i++) { if (i < GEOSLOTSTART - 1) (gix->gxc[i]).latitude = i + 1; else (gix->gxc[i]).latitude = 0; } - /* similarly set up free chain of empty pots */ + // similarly set up free chain of empty pots for (i = 0; i < GEOPOTSTART; i++) { if (i < GEOPOTSTART - 1) gix->ypots[i].LorLeaf = i + 1; else gix->ypots[i].LorLeaf = 0; - } - - gix->potct = GEOPOTSTART; - gix->slotct = GEOSLOTSTART; + }*/ + + //RocksDBCounterManager *mgr = rocksutils::globalRocksEngine()->counterManager(); + //RocksDBCounterManager::CounterAdjustment adj = mgr->loadCounter(objectId); + gix->nextFreePot = 2;//(adj.added() & 0xffffffff); + gix->nextFreeSlot = 1;//adj.added() >> 32; + // leave intact /* set up the fixed points structure */ for (i = 0; i < GeoIndexFIXEDPOINTS; i++) { @@ -539,15 +646,27 @@ GeoIdx* GeoIndex_new(uint64_t objectId) { (gix->fixed.z)[i] = z; } /* set up the root pot */ + + GeoPot gp; + //j = GeoIndexNewPot(gix); + gp.LorLeaf = 0; //leaf pot + gp.RorPoints = 0; // with no points in it! + gp.middle = 0ll; + gp.start = 0ll; + gp.end = 0x1FFFFFFFFFFFFFll; + gp.level = 1; + for (i = 0; i < GeoIndexFIXEDPOINTS; i++) gp.maxdist[i] = 0; - j = GeoIndexNewPot(gix); - gix->ypots[j].LorLeaf = 0; /* leaf pot */ - gix->ypots[j].RorPoints = 0; /* with no points in it! */ + PotWrite(gix, 1, &gp); + + /*j = GeoIndexNewPot(gix); + gix->ypots[j].LorLeaf = 0; //leaf pot + gix->ypots[j].RorPoints = 0; // with no points in it! gix->ypots[j].middle = 0ll; gix->ypots[j].start = 0ll; gix->ypots[j].end = 0x1FFFFFFFFFFFFFll; gix->ypots[j].level = 1; - for (i = 0; i < GeoIndexFIXEDPOINTS; i++) gix->ypots[j].maxdist[i] = 0; + for (i = 0; i < GeoIndexFIXEDPOINTS; i++) gix->ypots[j].maxdist[i] = 0;*/ return (GeoIdx*)gix; } /* =================================================== */ @@ -557,7 +676,7 @@ GeoIdx* GeoIndex_new(uint64_t objectId) { /* objects that may have been pointed to by the user's */ /* data pointers are (of course) not freed by this call*/ /* =================================================== */ -void GeoIndex_free(GeoIdx* gi) { +void GeoIndex_free(GeoIdx* gi) {// like drop GeoIx* gix; if (gi == nullptr) { @@ -565,8 +684,8 @@ void GeoIndex_free(GeoIdx* gi) { } gix = (GeoIx*)gi; - TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix->gxc); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix->ypots); + //TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix->gxc); + //TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix->ypots); TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix); } /* =================================================== */ @@ -741,82 +860,6 @@ void GeoSetDistance(GeoDetailedPoint* gd, double snmd) { } } -/* CRUD interface */ -int SlotRead(GeoIx * gix, int slot, GeoCoordinate * gc /*out param*/) -{ - //gc GeoCoordinate, element in point array of real geo index - memcpy(gc,gix->gxc+slot,sizeof(GeoCoordinate)); - - rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); - RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, slot, true); - RocksDBValue val = RocksDBValue::Empty(RocksDBEntryType::GeoIndexValue); - - rocksdb::ReadOptions opts; - rocksdb::Status s = db->Get(opts, key.string(), val.buffer()); - if (s.IsNotFound()) { // TODO how to indicate not found? - memset(gc, 0, sizeof(GeoCoordinate)); - return -1; - } else if (!s.ok()) { - arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); - THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); - } - VpackToCoord(val.slice(), gc); - return 0; -} -void SlotWrite(GeoIx * gix,int slot, GeoCoordinate * gc) -{ - memcpy(gix->gxc+slot,gc,sizeof(GeoCoordinate)); - - VPackBuilder serialized = CoordToVpack(gc); - rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); - RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, slot, true); - RocksDBValue val = RocksDBValue::Document(serialized.slice()); - - rocksdb::WriteOptions opts; - rocksdb::Status s = db->Put(opts, key.string(), val.string()); - if (!s.ok()) { - arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); - THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); - } -} - -int PotRead(GeoIx * gix, int pot, GeoPot * gp) -{ - memcpy(gp,gix->ypots+pot,sizeof(GeoPot)); - - rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); - RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, pot, false); - RocksDBValue val = RocksDBValue::Empty(RocksDBEntryType::GeoIndexValue); - - rocksdb::ReadOptions opts; - rocksdb::Status s = db->Get(opts, key.string(), val.buffer()); - if (s.IsNotFound()) { // TODO how to indicate not found? - gp->LorLeaf = 0; - gp->RorPoints = 0; - return -1; - } else if (!s.ok()) { - arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); - THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); - } - VpackToPot(val.slice(), gp); - return 0; -} -void PotWrite(GeoIx * gix, int pot, GeoPot * gp) { - memcpy(gix->ypots+pot,gp,sizeof(GeoPot)); - - VPackBuilder serialized = PotToVpack(gp); - rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); - RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, pot, false); - RocksDBValue val = RocksDBValue::Document(serialized.slice()); - - rocksdb::WriteOptions opts; - rocksdb::Status s = db->Put(opts, key.string(), val.string()); - if (!s.ok()) { - arangodb::Result r = rocksutils::convertStatus(s, rocksutils::index); - THROW_ARANGO_EXCEPTION_MESSAGE(r.errorNumber(), r.errorMessage()); - } -} - /* =================================================== */ /* GeoStackSet routine */ /* The searches (by count and by distance) both start */ @@ -1267,8 +1310,9 @@ GeoCoordinates* GeoIndex_NearestCountPoints(GeoIdx* gi, GeoCoordinate* c, /* return the specified slot to the free list */ /* =================================================== */ void GeoIndexFreeSlot(GeoIx* gix, int slot) { - gix->gxc[slot].latitude = gix->gxc[0].latitude; - gix->gxc[0].latitude = slot; + //gix->gxc[slot].latitude = gix->gxc[0].latitude; + //gix->gxc[0].latitude = slot; + // TODO delete slot } /* =================================================== */ /* GeoIndexNewSlot */ @@ -1286,11 +1330,11 @@ void GeoIndexFreeSlot(GeoIx* gix, int slot) { /* added to the index. */ /* =================================================== */ int GeoIndexNewSlot(GeoIx* gix) { - int j; + //int j; GeoCoordinate* gc; - if (gix->gxc[0].latitude == 0.0) { - /* do the growth calculation in long long to make sure it doesn't */ - /* overflow when the size gets to be near 2^31 */ + /*if (gix->gxc[0].latitude == 0.0) { + // do the growth calculation in long long to make sure it doesn't + // overflow when the size gets to be near 2^31 long long x = gix->slotct; long long y = 100 + GeoIndexGROW; x = x * y + 99; @@ -1314,8 +1358,9 @@ int GeoIndexNewSlot(GeoIx* gix) { gix->slotct = newslotct; } j = (int)(gix->gxc[0].latitude); - gix->gxc[0].latitude = gix->gxc[j].latitude; - return j; + gix->gxc[0].latitude = gix->gxc[j].latitude;*/ + + return gix->nextFreeSlot++; } /* =================================================== */ /* GeoFind */ @@ -1573,7 +1618,7 @@ int GeoIndex_insert(GeoIdx* gi, GeoCoordinate* c) { /* new point, so we try to put it in */ slot = GeoIndexNewSlot(gix); if (slot == -2) return -2; /* no room :( */ - SlotRead(gix,slot,&Xslot); + //SlotRead(gix,slot,&Xslot); Xslot.latitude = c->latitude; Xslot.longitude = c->longitude; Xslot.data = c->data; @@ -1585,7 +1630,7 @@ int GeoIndex_insert(GeoIdx* gi, GeoCoordinate* c) { rebalance = 1; pot1 = GeoIndexNewPot(gix); int pot2 = GeoIndexNewPot(gix); - PotRead(gix,pot,&gp); /* XQXQ won't have to do this on Rocks */ + //PotRead(gix,pot,&gp); /* XQXQ won't have to do this on Rocks */ if ((pot1 == -2) || (pot2 == -2)) { GeoIndexFreeSlot(gix, slot); if (pot1 != -2) GeoIndexFreePot(gix, pot1); @@ -1608,8 +1653,8 @@ int GeoIndex_insert(GeoIdx* gi, GeoCoordinate* c) { /* new point (which has still not been inserted) shoud */ /* go. Continued below . . . . */ /* =================================================== */ - PotRead(gix,pot1,&gp1); //XQXQ don't think this is even needed now! - PotRead(gix,pot2,&gp2); //XQXQ on Rocks it is reading a pot that is not there. + //PotRead(gix,pot1,&gp1); //XQXQ don't think this is even needed now! + //PotRead(gix,pot2,&gp2); //XQXQ on Rocks it is reading a pot that is not there. /* pot is old one, pot1 and pot2 are the new ones */ gp1.LorLeaf = 0; /* leaf pot */ gp1.RorPoints = 0; /* no points in it yet */ @@ -2403,6 +2448,7 @@ int RecursivePotValidate(GeoIx* gix, int pot, int* usage) { } } + // rethink this function int GeoIndex_INDEXVALID(GeoIdx* gi) { int usage[2]; // pots and slots int j, pot, slot; @@ -2439,11 +2485,13 @@ int GeoIndex_INDEXVALID(GeoIdx* gi) { #endif + // change to Approximate memory size_t GeoIndex_MemoryUsage(void* theIndex) { - GeoIx* geoIndex = (GeoIx*)theIndex; - if (geoIndex != nullptr) { - return geoIndex->_memoryUsed; - } + //GeoIx* geoIndex = (GeoIx*)theIndex; + //if (geoIndex != nullptr) { + // return geoIndex->_memoryUsed; + //} +#warning FIXME return 0; } }} diff --git a/js/server/tests/aql/aql-queries-geo-mmfiles.js b/js/server/tests/aql/aql-queries-geo.js similarity index 100% rename from js/server/tests/aql/aql-queries-geo-mmfiles.js rename to js/server/tests/aql/aql-queries-geo.js diff --git a/js/server/tests/shell/shell-shaped-mmfiles-noncluster.js b/js/server/tests/shell/shell-shaped-mmfiles-noncluster.js deleted file mode 100644 index acfc52894c..0000000000 --- a/js/server/tests/shell/shell-shaped-mmfiles-noncluster.js +++ /dev/null @@ -1,129 +0,0 @@ -/*jshint globalstrict:false, strict:false */ -/*global fail, assertFalse, assertTrue, assertEqual, assertUndefined */ - -//////////////////////////////////////////////////////////////////////////////// -/// @brief test the shaped json behavior -/// -/// @file -/// -/// DISCLAIMER -/// -/// Copyright 2010-2012 triagens GmbH, Cologne, Germany -/// -/// Licensed under the Apache License, Version 2.0 (the "License"); -/// you may not use this file except in compliance with the License. -/// You may obtain a copy of the License at -/// -/// http://www.apache.org/licenses/LICENSE-2.0 -/// -/// Unless required by applicable law or agreed to in writing, software -/// distributed under the License is distributed on an "AS IS" BASIS, -/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -/// See the License for the specific language governing permissions and -/// limitations under the License. -/// -/// Copyright holder is triAGENS GmbH, Cologne, Germany -/// -/// @author Jan Steemann -/// @author Copyright 2012, triAGENS GmbH, Cologne, Germany -//////////////////////////////////////////////////////////////////////////////// - -var jsunity = require("jsunity"); - -var arangodb = require("@arangodb"); -var db = arangodb.db; -var internal = require("internal"); - -//////////////////////////////////////////////////////////////////////////////// -/// @brief test suite -//////////////////////////////////////////////////////////////////////////////// - -function GeoShapedJsonSuite () { - 'use strict'; - var cn = "UnitTestsCollectionShaped"; - var c; - - return { - -//////////////////////////////////////////////////////////////////////////////// -/// @brief set up -//////////////////////////////////////////////////////////////////////////////// - - setUp : function () { - db._drop(cn); - c = db._create(cn); - c.ensureGeoIndex("lat", "lon"); - - for (var i = -3; i < 3; ++i) { - for (var j = -3; j < 3; ++j) { - c.save({ distance: 0, lat: 40 + 0.01 * i, lon: 40 + 0.01 * j, something: "test" }); - } - } - - - // wait until the documents are actually shaped json - internal.wal.flush(true, true); - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief tear down -//////////////////////////////////////////////////////////////////////////////// - - tearDown : function () { - db._drop(cn); - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief call within function with "distance" attribute -//////////////////////////////////////////////////////////////////////////////// - - testDistance : function () { - var result = db._query( - "FOR u IN WITHIN(" + cn + ", 40.0, 40.0, 5000000, 'distance') " + - "SORT u.distance "+ - "RETURN { lat: u.lat, lon: u.lon, distance: u.distance }" - ).toArray(); - - // skip first result (which has a distance of 0) - for (var i = 1; i < result.length; ++i) { - var doc = result[i]; - - assertTrue(doc.hasOwnProperty("lat")); - assertTrue(doc.hasOwnProperty("lon")); - assertTrue(doc.hasOwnProperty("distance")); - assertTrue(doc.distance > 0); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief call near function with "distance" attribute -//////////////////////////////////////////////////////////////////////////////// - - testNear : function () { - var result = db._query( - "FOR u IN NEAR(" + cn + ", 40.0, 40.0, 5, 'something') SORT u.something " + - "RETURN { lat: u.lat, lon: u.lon, distance: u.something }") - .toArray(); - - // skip first result (which has a distance of 0) - for (var i = 1; i < result.length; ++i) { - var doc = result[i]; - - assertTrue(doc.hasOwnProperty("lat")); - assertTrue(doc.hasOwnProperty("lon")); - assertTrue(doc.hasOwnProperty("distance")); - assertTrue(doc.distance >= 0); - } - } - - }; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief executes the test suite -//////////////////////////////////////////////////////////////////////////////// - -jsunity.run(GeoShapedJsonSuite); - -return jsunity.done(); - diff --git a/js/server/tests/shell/shell-shaped-noncluster.js b/js/server/tests/shell/shell-shaped-noncluster.js index d9ef8ef845..acfc52894c 100644 --- a/js/server/tests/shell/shell-shaped-noncluster.js +++ b/js/server/tests/shell/shell-shaped-noncluster.js @@ -38,7 +38,7 @@ var internal = require("internal"); /// @brief test suite //////////////////////////////////////////////////////////////////////////////// -function DocumentShapedJsonSuite () { +function GeoShapedJsonSuite () { 'use strict'; var cn = "UnitTestsCollectionShaped"; var c; @@ -52,15 +52,15 @@ function DocumentShapedJsonSuite () { setUp : function () { db._drop(cn); c = db._create(cn); + c.ensureGeoIndex("lat", "lon"); - for (var i = 0; i < 100; ++i) { - c.save({ _key: "test" + i, - value: i, - text: "Test" + i, - values: [ i ], - one: { two: { three: [ 1 ] } } }); + for (var i = -3; i < 3; ++i) { + for (var j = -3; j < 3; ++j) { + c.save({ distance: 0, lat: 40 + 0.01 * i, lon: 40 + 0.01 * j, something: "test" }); + } } + // wait until the documents are actually shaped json internal.wal.flush(true, true); }, @@ -74,915 +74,45 @@ function DocumentShapedJsonSuite () { }, //////////////////////////////////////////////////////////////////////////////// -/// @brief save a Buffer object +/// @brief call within function with "distance" attribute //////////////////////////////////////////////////////////////////////////////// - testBuffer : function () { - var b = new Buffer('abcdefg', 'binary'); - c.save({ _key: "buffer", value: b }); - var doc = c.document("buffer"); - assertTrue(doc.hasOwnProperty("value")); - assertEqual(b.toJSON(), doc.value); - assertEqual([ 97, 98, 99, 100, 101, 102, 103 ], doc.value); - }, + testDistance : function () { + var result = db._query( + "FOR u IN WITHIN(" + cn + ", 40.0, 40.0, 5000000, 'distance') " + + "SORT u.distance "+ + "RETURN { lat: u.lat, lon: u.lon, distance: u.distance }" + ).toArray(); -//////////////////////////////////////////////////////////////////////////////// -/// @brief save a date object -//////////////////////////////////////////////////////////////////////////////// + // skip first result (which has a distance of 0) + for (var i = 1; i < result.length; ++i) { + var doc = result[i]; - testDate : function () { - var dt = new Date(); - c.save({ _key: "date", value: dt }); - var doc = c.document("date"); - assertTrue(doc.hasOwnProperty("value")); - assertEqual(dt.toJSON(), doc.value); - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief save a regexp object -//////////////////////////////////////////////////////////////////////////////// - - testRegexp : function () { - try { - c.save({ _key: "date", regexp : /foobar/ }); - fail(); - } - catch (err) { - assertEqual(internal.errors.ERROR_BAD_PARAMETER.code, err.errorNum); + assertTrue(doc.hasOwnProperty("lat")); + assertTrue(doc.hasOwnProperty("lon")); + assertTrue(doc.hasOwnProperty("distance")); + assertTrue(doc.distance > 0); } }, //////////////////////////////////////////////////////////////////////////////// -/// @brief save a function object +/// @brief call near function with "distance" attribute //////////////////////////////////////////////////////////////////////////////// - testFunction : function () { - try { - c.save({ _key: "date", func : function () { } }); - fail(); - } - catch (err) { - assertEqual(internal.errors.ERROR_BAD_PARAMETER.code, err.errorNum); - } - }, + testNear : function () { + var result = db._query( + "FOR u IN NEAR(" + cn + ", 40.0, 40.0, 5, 'something') SORT u.something " + + "RETURN { lat: u.lat, lon: u.lon, distance: u.something }") + .toArray(); -//////////////////////////////////////////////////////////////////////////////// -/// @brief check getting keys -//////////////////////////////////////////////////////////////////////////////// - - testGet : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - assertTrue(doc.hasOwnProperty("one")); - - assertEqual(cn + "/test" + i, doc._id); - assertEqual("test" + i, doc._key); - assertEqual(i, doc.value); - assertEqual("Test" + i, doc.text); - assertEqual([ i ], doc.values); - assertEqual({ two: { three: [ 1 ] } }, doc.one); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check getting keys -//////////////////////////////////////////////////////////////////////////////// - - testGetKeys : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - var keys = Object.keys(doc).sort(); - assertEqual([ "_id", "_key", "_rev", "one", "text", "value", "values" ], keys); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check updating of keys in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testUpdatePseudo : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - assertEqual(cn + "/test" + i, doc._id); - assertEqual("test" + i, doc._key); - assertEqual(i, doc.value); - assertEqual("Test" + i, doc.text); - assertEqual([ i ], doc.values); - - doc._id = "foobarbaz"; - doc._key = "meow"; - doc._rev = null; - - assertEqual("foobarbaz", doc._id); - assertEqual("meow", doc._key); - assertEqual(null, doc._rev); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check updating of keys in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testUpdateShaped1 : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - doc.value = "Tester" + i; - doc.text = 42 + i; - doc.values.push(i); - - assertEqual(cn + "/test" + i, doc._id); - assertEqual("test" + i, doc._key); - assertEqual("Tester" + i, doc.value); - assertEqual(42 + i, doc.text); - assertEqual([ i, i ], doc.values); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check updating of keys in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testUpdateShaped2 : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - assertEqual(i, doc.value); - - doc.value = 99; - assertEqual(99, doc.value); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check updating of keys in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testUpdateShaped3 : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - assertEqual([ i ], doc.values); - - doc.someValue = 1; // need to do this to trigger copying - doc.values.push(42); - assertEqual([ i, 42 ], doc.values); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check updating of keys in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testUpdateShapedNested1 : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - assertEqual({ two: { three: [ 1 ] } }, doc.one); - - doc.one = "removing the nested structure"; - assertTrue(doc.hasOwnProperty("one")); - assertEqual("removing the nested structure", doc.one); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check updating of keys in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testUpdateShapedNested2 : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - assertEqual({ two: { three: [ 1 ] } }, doc.one); - - doc.someValue = 1; // need to do this to trigger copying - doc.one.two.three = "removing the nested structure"; - assertTrue(doc.hasOwnProperty("one")); - assertTrue(doc.one.hasOwnProperty("two")); - assertTrue(doc.one.two.hasOwnProperty("three")); - assertEqual("removing the nested structure", doc.one.two.three); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check updating of keys in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testUpdateShapedNested3 : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - assertEqual({ two: { three: [ 1 ] } }, doc.one); - doc.someValue = 1; // need to do this to trigger copying - - doc.one.two.four = 42; - assertTrue(doc.hasOwnProperty("one")); - assertTrue(doc.one.hasOwnProperty("two")); - assertTrue(doc.one.two.hasOwnProperty("three")); - assertTrue(doc.one.two.hasOwnProperty("four")); - assertEqual([ 1 ], doc.one.two.three); - assertEqual(42, doc.one.two.four); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check adding attributes in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testAddAttributes1 : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - doc.thisIsAnAttribute = 99; - - assertTrue(doc.hasOwnProperty("thisIsAnAttribute")); - assertEqual(99, doc.thisIsAnAttribute); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check adding attributes in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testAddAttributes2 : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - doc["some attribute set now"] = "aha"; - - assertTrue(doc.hasOwnProperty("some attribute set now")); - assertEqual("aha", doc["some attribute set now"]); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check adding attributes in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testAddAttributesIndexed : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - doc[1] = "aha"; - - assertTrue(doc.hasOwnProperty(1)); - assertTrue(doc.hasOwnProperty("1")); - assertEqual("aha", doc[1]); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check adding attributes in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testAddAttributesNested1 : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - doc.someValue = 1; // need to do this to trigger copying - doc.one.test = { foo: "bar" }; - assertTrue(doc.hasOwnProperty("one")); - assertTrue(doc.one.hasOwnProperty("two")); - assertTrue(doc.one.two.hasOwnProperty("three")); - assertTrue(doc.one.hasOwnProperty("test")); - assertEqual({ foo: "bar" }, doc.one.test); - assertEqual({ three: [ 1 ] }, doc.one.two); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check adding attributes in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testAddAttributesNested2 : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - doc.something = { foo: "bar" }; - assertTrue(doc.hasOwnProperty("something")); - assertTrue(doc.something.hasOwnProperty("foo")); - assertEqual("bar", doc.something.foo); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion of keys from shaped json -//////////////////////////////////////////////////////////////////////////////// - - testDeletionPseudoFirst : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - // initial state - assertTrue(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - - // delete pseudo-attributes first - delete doc._key; - assertFalse(doc.hasOwnProperty("_key")); - - delete doc._rev; - assertFalse(doc.hasOwnProperty("_rev")); - - delete doc._id; - assertFalse(doc.hasOwnProperty("_id")); - - delete doc.value; - assertFalse(doc.hasOwnProperty("value")); - - delete doc.text; - assertFalse(doc.hasOwnProperty("text")); - - delete doc.values; - assertFalse(doc.hasOwnProperty("values")); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion of special attribute _id -//////////////////////////////////////////////////////////////////////////////// - - testDeletionShapedKeyId : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - // initial state - assertTrue(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - - // delete special attribute _id - delete doc._id; - assertFalse(doc.hasOwnProperty("_id")); - assertUndefined(doc._id); - assertTrue(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion of special attributes from shaped json -//////////////////////////////////////////////////////////////////////////////// - - testDeletionShapedKeyRev : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - // initial state - assertTrue(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - - // delete special attribute _key - delete doc._key; - assertFalse(doc.hasOwnProperty("_key")); - assertUndefined(doc._key); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - - // delete special attribute _rev - delete doc._rev; - assertFalse(doc.hasOwnProperty("_rev")); - assertFalse(doc.hasOwnProperty("_key")); - assertUndefined(doc._rev); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion of keys from shaped json -//////////////////////////////////////////////////////////////////////////////// - - testDeletionShapedFirst : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - // initial state - assertTrue(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - - // delete shaped attributes first - delete doc.value; - assertFalse(doc.hasOwnProperty("value")); - assertUndefined(doc.value); - - delete doc.text; - assertFalse(doc.hasOwnProperty("text")); - assertUndefined(doc.text); - - delete doc.values; - assertFalse(doc.hasOwnProperty("values")); - assertUndefined(doc.values); - - delete doc._key; - assertFalse(doc.hasOwnProperty("_key")); - assertUndefined(doc._key); - - delete doc._rev; - assertFalse(doc.hasOwnProperty("_rev")); - assertUndefined(doc._rev); - - delete doc._id; - assertFalse(doc.hasOwnProperty("_id")); - assertUndefined(doc._id); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion after deletion -//////////////////////////////////////////////////////////////////////////////// - - testDeletionDeletion : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - // initial state - assertTrue(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("one")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("values")); - - assertEqual([ "_id", "_key", "_rev", "one", "text", "value", "values" ], Object.keys(doc).sort()); - - // delete _key - delete doc._key; - assertEqual([ "_id", "_rev", "one", "text", "value", "values" ], Object.keys(doc).sort()); - - // delete text - delete doc.text; - assertEqual([ "_id", "_rev", "one", "value", "values" ], Object.keys(doc).sort()); - - // delete _id - delete doc._id; - assertEqual([ "_rev", "one", "value", "values" ], Object.keys(doc).sort()); - - // delete value - delete doc.value; - assertEqual([ "_rev", "one", "values" ], Object.keys(doc).sort()); - - // delete _rev - delete doc._rev; - assertEqual([ "one", "values" ], Object.keys(doc).sort()); - - // delete values - delete doc.values; - assertEqual([ "one" ], Object.keys(doc).sort()); - - // delete one - delete doc.one; - assertEqual([ ], Object.keys(doc).sort()); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion of keys from shaped json -//////////////////////////////////////////////////////////////////////////////// - - testDeletionAfterUpdate : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - // initial state - assertTrue(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - - doc._key = "foobar"; - assertEqual("foobar", doc._key); - doc._rev = 12345; - assertEqual(12345, doc._rev); - doc._id = "foo"; - assertEqual("foo", doc._id); - - delete doc._key; - delete doc._rev; - - assertFalse(doc.hasOwnProperty("_rev")); - assertFalse(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_id")); - assertEqual("foo", doc._id); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion of keys from shaped json -//////////////////////////////////////////////////////////////////////////////// - - testDeletionSomeAttributes : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - // initial state - assertTrue(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - - delete doc._key; - delete doc.value; - - assertFalse(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertFalse(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion of keys from shaped json -//////////////////////////////////////////////////////////////////////////////// - - testDeletionIndexed : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - delete doc._key; - doc[9] = "42!"; - - assertFalse(doc.hasOwnProperty("_key")); - assertEqual("42!", doc[9]); - - delete doc[9]; - assertFalse(doc.hasOwnProperty(9)); - assertFalse(doc.hasOwnProperty("9")); - assertUndefined(doc[9]); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion of keys from shaped json -//////////////////////////////////////////////////////////////////////////////// - - testDeletionNested : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - delete doc.one.two.three; - - assertTrue(doc.hasOwnProperty("one")); - assertTrue(doc.one.hasOwnProperty("two")); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check access after deletion of documents -//////////////////////////////////////////////////////////////////////////////// - - testAccessAfterDeletion : function () { - var docs = [ ]; - for (var i = 0; i < 100; ++i) { - docs[i] = c.document("test" + i); - } - - c.truncate(); - if (c.rotate) { - c.rotate(); - internal.wait(5); - } - - for (i = 0; i < 100; ++i) { - assertEqual(cn + "/test" + i, docs[i]._id); - assertEqual("test" + i, docs[i]._key); - assertEqual("Test" + i, docs[i].text); - assertEqual([ i ], docs[i].values); - assertEqual({ two: { three: [ 1 ] } }, docs[i].one); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check access after dropping collection -//////////////////////////////////////////////////////////////////////////////// - - testAccessAfterDropping : function () { - var docs = [ ]; - for (var i = 0; i < 100; ++i) { - docs[i] = c.document("test" + i); - } - - c.drop(); - - internal.wait(5); - - for (i = 0; i < 100; ++i) { - assertEqual(cn + "/test" + i, docs[i]._id); - assertEqual("test" + i, docs[i]._key); - assertEqual("Test" + i, docs[i].text); - assertEqual([ i ], docs[i].values); - assertEqual({ two: { three: [ 1 ] } }, docs[i].one); - } - } - - }; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief test suite -//////////////////////////////////////////////////////////////////////////////// - -function EdgeShapedJsonSuite () { - 'use strict'; - var cn = "UnitTestsCollectionShaped"; - var c; - - return { - -//////////////////////////////////////////////////////////////////////////////// -/// @brief set up -//////////////////////////////////////////////////////////////////////////////// - - setUp : function () { - db._drop(cn); - c = db._createEdgeCollection(cn); - - for (var i = 0; i < 100; ++i) { - c.save(cn + "/from" + i, - cn + "/to" + i, - { _key: "test" + i, - value: i, - text: "Test" + i, - values: [ i ], - one: { two: { three: [ 1 ] } } }); - } - - // wait until the documents are actually shaped json - internal.wal.flush(true, true); - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief tear down -//////////////////////////////////////////////////////////////////////////////// - - tearDown : function () { - db._drop(cn); - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check updating of keys in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testUpdatePseudo : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - assertEqual(cn + "/from" + i, doc._from); - assertEqual(cn + "/to" + i, doc._to); - - doc._from = "foobarbaz"; - doc._to = "meow"; - - assertEqual("foobarbaz", doc._from); - assertEqual("meow", doc._to); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check adding attributes in shaped json -//////////////////////////////////////////////////////////////////////////////// - - testAddAttribute : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - doc["some attribute set now"] = "aha"; - - assertTrue(doc.hasOwnProperty("some attribute set now")); - assertEqual("aha", doc["some attribute set now"]); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion of keys from shaped json -//////////////////////////////////////////////////////////////////////////////// - - testDeletionPseudoFirst : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - // initial state - assertTrue(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("_from")); - assertTrue(doc.hasOwnProperty("_to")); - - // delete pseudo-attributes - delete doc._from; - assertFalse(doc.hasOwnProperty("_from")); - - delete doc._to; - assertFalse(doc.hasOwnProperty("_to")); - - delete doc._key; - assertFalse(doc.hasOwnProperty("_key")); - - delete doc._rev; - assertFalse(doc.hasOwnProperty("_rev")); - - delete doc._id; - assertFalse(doc.hasOwnProperty("_id")); - - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion of keys from shaped json -//////////////////////////////////////////////////////////////////////////////// - - testDeletionShapedFirst : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - // initial state - assertTrue(doc.hasOwnProperty("_from")); - assertTrue(doc.hasOwnProperty("_to")); - assertTrue(doc.hasOwnProperty("value")); - - // delete shaped attributes first - delete doc.value; - assertFalse(doc.hasOwnProperty("value")); - assertUndefined(doc.value); - - delete doc._from; - assertFalse(doc.hasOwnProperty("_from")); - assertUndefined(doc._from); - - delete doc._to; - assertFalse(doc.hasOwnProperty("_to")); - assertUndefined(doc._to); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion of special attributes from shaped json -//////////////////////////////////////////////////////////////////////////////// - - testDeletionShapedKeyRev : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - // initial state - assertTrue(doc.hasOwnProperty("_from")); - assertTrue(doc.hasOwnProperty("_to")); - assertTrue(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - - // delete special attribute _key - delete doc._key; - assertFalse(doc.hasOwnProperty("_key")); - assertUndefined(doc._key); - assertTrue(doc.hasOwnProperty("_from")); - assertTrue(doc.hasOwnProperty("_to")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - - // delete special attribute _rev - delete doc._rev; - assertFalse(doc.hasOwnProperty("_rev")); - assertFalse(doc.hasOwnProperty("_key")); - assertUndefined(doc._rev); - assertTrue(doc.hasOwnProperty("_from")); - assertTrue(doc.hasOwnProperty("_to")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("values")); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion of keys from shaped json -//////////////////////////////////////////////////////////////////////////////// - - testDeletionAfterUpdate : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - // initial state - assertTrue(doc.hasOwnProperty("_from")); - assertTrue(doc.hasOwnProperty("_to")); - - doc._from = "foobar"; - assertEqual("foobar", doc._from); - doc._from = 12345; - assertEqual(12345, doc._from); - doc._to = "foo"; - assertEqual("foo", doc._to); - - delete doc._from; - delete doc._to; - - assertFalse(doc.hasOwnProperty("_from")); - assertFalse(doc.hasOwnProperty("_to")); - } - }, - -//////////////////////////////////////////////////////////////////////////////// -/// @brief check deletion after deletion -//////////////////////////////////////////////////////////////////////////////// - - testDeletionDeletion : function () { - for (var i = 0; i < 100; ++i) { - var doc = c.document("test" + i); - - // initial state - assertTrue(doc.hasOwnProperty("_from")); - assertTrue(doc.hasOwnProperty("_to")); - assertTrue(doc.hasOwnProperty("_key")); - assertTrue(doc.hasOwnProperty("_rev")); - assertTrue(doc.hasOwnProperty("_id")); - assertTrue(doc.hasOwnProperty("one")); - assertTrue(doc.hasOwnProperty("text")); - assertTrue(doc.hasOwnProperty("value")); - assertTrue(doc.hasOwnProperty("values")); - - var keys = Object.keys(doc).sort(); - assertEqual([ "_from", "_id", "_key", "_rev", "_to", "one", "text", "value", "values" ], keys); - - // delete _from - delete doc._from; - assertEqual([ "_id", "_key", "_rev", "_to", "one", "text", "value", "values" ], Object.keys(doc).sort()); - - // delete _to - delete doc._to; - assertEqual([ "_id", "_key", "_rev", "one", "text", "value", "values" ], Object.keys(doc).sort()); - - // delete _key - delete doc._key; - assertEqual([ "_id", "_rev", "one", "text", "value", "values" ], Object.keys(doc).sort()); - - // delete text - delete doc.text; - assertEqual([ "_id", "_rev", "one", "value", "values" ], Object.keys(doc).sort()); - - // delete _id - delete doc._id; - assertEqual([ "_rev", "one", "value", "values" ], Object.keys(doc).sort()); - - // delete value - delete doc.value; - assertEqual([ "_rev", "one", "values" ], Object.keys(doc).sort()); - - // delete _rev - delete doc._rev; - assertEqual([ "one", "values" ], Object.keys(doc).sort()); - - // delete values - delete doc.values; - assertEqual([ "one" ], Object.keys(doc).sort()); + // skip first result (which has a distance of 0) + for (var i = 1; i < result.length; ++i) { + var doc = result[i]; - // delete one - delete doc.one; - assertEqual([ ], Object.keys(doc).sort()); + assertTrue(doc.hasOwnProperty("lat")); + assertTrue(doc.hasOwnProperty("lon")); + assertTrue(doc.hasOwnProperty("distance")); + assertTrue(doc.distance >= 0); } } @@ -993,8 +123,7 @@ function EdgeShapedJsonSuite () { /// @brief executes the test suite //////////////////////////////////////////////////////////////////////////////// -jsunity.run(DocumentShapedJsonSuite); -jsunity.run(EdgeShapedJsonSuite); +jsunity.run(GeoShapedJsonSuite); return jsunity.done(); From df7e12d79b1d7fb9461d5d9a0c4ee97ff2c8283e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Tue, 9 May 2017 15:04:51 +0200 Subject: [PATCH 13/18] Working geo index + tests --- arangod/Aql/OptimizerRules.cpp | 935 +++++++++++++++--- arangod/Aql/OptimizerRules.h | 2 + arangod/Aql/OptimizerRulesFeature.cpp | 4 + arangod/MMFiles/MMFilesOptimizerRules.cpp | 537 ---------- arangod/MMFiles/MMFilesOptimizerRules.h | 2 - arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp | 7 +- 6 files changed, 796 insertions(+), 691 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 7e8ed01be5..d34a03cae6 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -23,10 +23,10 @@ //////////////////////////////////////////////////////////////////////////////// #include "OptimizerRules.h" -#include "Aql/CollectOptions.h" #include "Aql/ClusterNodes.h" -#include "Aql/Collection.h" #include "Aql/CollectNode.h" +#include "Aql/CollectOptions.h" +#include "Aql/Collection.h" #include "Aql/ConditionFinder.h" #include "Aql/ExecutionEngine.h" #include "Aql/ExecutionNode.h" @@ -60,7 +60,8 @@ using namespace arangodb::aql; using EN = arangodb::aql::ExecutionNode; /// @brief adds a SORT operation for IN right-hand side operands -void arangodb::aql::sortInValuesRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::sortInValuesRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; @@ -235,9 +236,9 @@ void arangodb::aql::sortInValuesRule(Optimizer* opt, std::unique_ptr plan, - OptimizerRule const* rule) { +void arangodb::aql::removeRedundantSortsRule( + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::SORT, true); @@ -276,8 +277,8 @@ void arangodb::aql::removeRedundantSortsRule(Optimizer* opt, if (current->getType() == EN::SORT) { // we found another sort. now check if they are compatible! - auto other = static_cast(current) - ->getSortInformation(plan.get(), &buffer); + auto other = static_cast(current)->getSortInformation( + plan.get(), &buffer); switch (sortInfo.isCoveredBy(other)) { case SortInformation::unequal: { @@ -365,7 +366,8 @@ void arangodb::aql::removeRedundantSortsRule(Optimizer* opt, current->addDependencies(stack); } - if (toUnlink.find(n) == toUnlink.end() && sortNode->simplify(plan.get())) { + if (toUnlink.find(n) == toUnlink.end() && + sortNode->simplify(plan.get())) { // sort node had only constant expressions. it will make no difference // if we execute it or not // so we can remove it @@ -385,9 +387,9 @@ void arangodb::aql::removeRedundantSortsRule(Optimizer* opt, /// this rule modifies the plan in place: /// - filters that are always true are removed completely /// - filters that are always false will be replaced by a NoResults node -void arangodb::aql::removeUnnecessaryFiltersRule(Optimizer* opt, - std::unique_ptr plan, - OptimizerRule const* rule) { +void arangodb::aql::removeUnnecessaryFiltersRule( + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::FILTER, true); @@ -449,9 +451,9 @@ void arangodb::aql::removeUnnecessaryFiltersRule(Optimizer* opt, /// @brief remove INTO of a COLLECT if not used /// additionally remove all unused aggregate calculations from a COLLECT -void arangodb::aql::removeCollectVariablesRule(Optimizer* opt, - std::unique_ptr plan, - OptimizerRule const* rule) { +void arangodb::aql::removeCollectVariablesRule( + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::COLLECT, true); @@ -477,13 +479,13 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt, std::pair> const& aggregate) -> bool { - if (varsUsedLater.find(aggregate.first) == varsUsedLater.end()) { - // result of aggregate function not used later - modified = true; - return true; - } - return false; - }); + if (varsUsedLater.find(aggregate.first) == varsUsedLater.end()) { + // result of aggregate function not used later + modified = true; + return true; + } + return false; + }); } opt->addPlan(std::move(plan), rule, modified); @@ -700,7 +702,8 @@ class PropagateConstantAttributesHelper { /// @brief propagate constant attributes in FILTERs void arangodb::aql::propagateConstantAttributesRule( - Optimizer* opt, std::unique_ptr plan, OptimizerRule const* rule) { + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { PropagateConstantAttributesHelper helper; helper.propagateConstants(plan.get()); @@ -711,7 +714,8 @@ void arangodb::aql::propagateConstantAttributesRule( /// this rule modifies the plan in place /// it aims to move up calculations as far up in the plan as possible, to /// avoid redundant calculations in inner loops -void arangodb::aql::moveCalculationsUpRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::moveCalculationsUpRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; @@ -779,9 +783,9 @@ void arangodb::aql::moveCalculationsUpRule(Optimizer* opt, std::unique_ptr plan, - OptimizerRule const* rule) { +void arangodb::aql::moveCalculationsDownRule( + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::CALCULATION, true); @@ -837,8 +841,7 @@ void arangodb::aql::moveCalculationsDownRule(Optimizer* opt, currentType == EN::ENUMERATE_LIST || currentType == EN::TRAVERSAL || currentType == EN::SHORTEST_PATH || - currentType == EN::COLLECT || - currentType == EN::NORESULTS) { + currentType == EN::COLLECT || currentType == EN::NORESULTS) { // we will not push further down than such nodes shouldMove = false; break; @@ -867,7 +870,8 @@ void arangodb::aql::moveCalculationsDownRule(Optimizer* opt, /// @brief determine the "right" type of CollectNode and /// add a sort node for each COLLECT (note: the sort may be removed later) /// this rule cannot be turned off (otherwise, the query result might be wrong!) -void arangodb::aql::specializeCollectRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::specializeCollectRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; @@ -956,7 +960,8 @@ void arangodb::aql::specializeCollectRule(Optimizer* opt, std::unique_ptrnextId(), sortElements, true); + auto sortNode = + new SortNode(plan.get(), plan->nextId(), sortElements, true); plan->registerNode(sortNode); TRI_ASSERT(collectNode->hasDependency()); @@ -973,7 +978,8 @@ void arangodb::aql::specializeCollectRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::splitFiltersRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; @@ -1014,8 +1020,8 @@ void arangodb::aql::splitFiltersRule(Optimizer* opt, std::unique_ptrgetAst()->variables()->createTemporaryVariable(); auto expression = new Expression(plan->getAst(), current); try { - calculationNode = - new CalculationNode(plan.get(), plan->nextId(), expression, outVar); + calculationNode = new CalculationNode(plan.get(), plan->nextId(), + expression, outVar); } catch (...) { delete expression; throw; @@ -1044,7 +1050,8 @@ void arangodb::aql::splitFiltersRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::moveFiltersUpRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; @@ -1128,17 +1135,18 @@ class arangodb::aql::RedundantCalculationsReplacer final public: explicit RedundantCalculationsReplacer( std::unordered_map const& replacements) - : _replacements(replacements) { - } + : _replacements(replacements) {} template void replaceStartTargetVariables(ExecutionNode* en) { auto node = static_cast(en); if (node->_inStartVariable != nullptr) { - node->_inStartVariable = Variable::replace(node->_inStartVariable, _replacements); + node->_inStartVariable = + Variable::replace(node->_inStartVariable, _replacements); } if (node->_inTargetVariable != nullptr) { - node->_inTargetVariable = Variable::replace(node->_inTargetVariable, _replacements); + node->_inTargetVariable = + Variable::replace(node->_inTargetVariable, _replacements); } } @@ -1202,17 +1210,20 @@ class arangodb::aql::RedundantCalculationsReplacer final variable.second = Variable::replace(variable.second, _replacements); } for (auto& variable : node->_aggregateVariables) { - variable.second.first = Variable::replace(variable.second.first, _replacements); + variable.second.first = + Variable::replace(variable.second.first, _replacements); } if (node->_expressionVariable != nullptr) { - node->_expressionVariable = Variable::replace(node->_expressionVariable, _replacements); + node->_expressionVariable = + Variable::replace(node->_expressionVariable, _replacements); } for (auto const& it : _replacements) { node->_variableMap.emplace(it.second->id, it.second->name); } // node->_keepVariables does not need to be updated at the moment as the - // "remove-redundant-calculations" rule will stop when it finds a COLLECT - // with an INTO, and the "inline-subqueries" rule will abort there as well + // "remove-redundant-calculations" rule will stop when it finds a + // COLLECT with an INTO, and the "inline-subqueries" rule will abort + // there as well break; } @@ -1238,13 +1249,16 @@ class arangodb::aql::RedundantCalculationsReplacer final auto node = static_cast(en); if (node->_inDocVariable != nullptr) { - node->_inDocVariable = Variable::replace(node->_inDocVariable, _replacements); + node->_inDocVariable = + Variable::replace(node->_inDocVariable, _replacements); } if (node->_insertVariable != nullptr) { - node->_insertVariable = Variable::replace(node->_insertVariable, _replacements); + node->_insertVariable = + Variable::replace(node->_insertVariable, _replacements); } if (node->_updateVariable != nullptr) { - node->_updateVariable = Variable::replace(node->_updateVariable, _replacements); + node->_updateVariable = + Variable::replace(node->_updateVariable, _replacements); } break; } @@ -1253,10 +1267,12 @@ class arangodb::aql::RedundantCalculationsReplacer final auto node = static_cast(en); if (node->_inDocVariable != nullptr) { - node->_inDocVariable = Variable::replace(node->_inDocVariable, _replacements); + node->_inDocVariable = + Variable::replace(node->_inDocVariable, _replacements); } if (node->_inKeyVariable != nullptr) { - node->_inKeyVariable = Variable::replace(node->_inKeyVariable, _replacements); + node->_inKeyVariable = + Variable::replace(node->_inKeyVariable, _replacements); } break; } @@ -1265,10 +1281,12 @@ class arangodb::aql::RedundantCalculationsReplacer final auto node = static_cast(en); if (node->_inDocVariable != nullptr) { - node->_inDocVariable = Variable::replace(node->_inDocVariable, _replacements); + node->_inDocVariable = + Variable::replace(node->_inDocVariable, _replacements); } if (node->_inKeyVariable != nullptr) { - node->_inKeyVariable = Variable::replace(node->_inKeyVariable, _replacements); + node->_inKeyVariable = + Variable::replace(node->_inKeyVariable, _replacements); } break; } @@ -1289,7 +1307,8 @@ class arangodb::aql::RedundantCalculationsReplacer final /// @brief remove CalculationNode(s) that are repeatedly used in a query /// (i.e. common expressions) void arangodb::aql::removeRedundantCalculationsRule( - Optimizer* opt, std::unique_ptr plan, OptimizerRule const* rule) { + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::CALCULATION, true); @@ -1346,8 +1365,10 @@ void arangodb::aql::removeRedundantCalculationsRule( continue; } - bool const isEqual = (buffer.length() == referenceExpression.size() && - memcmp(buffer.c_str(), referenceExpression.c_str(), buffer.length()) == 0); + bool const isEqual = + (buffer.length() == referenceExpression.size() && + memcmp(buffer.c_str(), referenceExpression.c_str(), + buffer.length()) == 0); buffer.reset(); if (isEqual) { @@ -1410,14 +1431,15 @@ void arangodb::aql::removeRedundantCalculationsRule( RedundantCalculationsReplacer finder(replacements); plan->root()->walk(&finder); } - + opt->addPlan(std::move(plan), rule, !replacements.empty()); } /// @brief remove CalculationNodes and SubqueryNodes that are never needed /// this modifies an existing plan in place void arangodb::aql::removeUnnecessaryCalculationsRule( - Optimizer* opt, std::unique_ptr plan, OptimizerRule const* rule) { + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { std::vector const types{EN::CALCULATION, EN::SUBQUERY}; @@ -1471,7 +1493,8 @@ void arangodb::aql::removeUnnecessaryCalculationsRule( continue; } - AstNode const* rootNode = static_cast(n)->expression()->node(); + AstNode const* rootNode = + static_cast(n)->expression()->node(); if (rootNode->type == NODE_TYPE_REFERENCE) { // if the LET is a simple reference to another variable, e.g. LET a = b @@ -1494,7 +1517,8 @@ void arangodb::aql::removeUnnecessaryCalculationsRule( if (!hasCollectWithOutVariable) { // no COLLECT found, now replace std::unordered_map replacements; - replacements.emplace(outvars[0]->id, static_cast(rootNode->getData())); + replacements.emplace(outvars[0]->id, static_cast( + rootNode->getData())); RedundantCalculationsReplacer finder(replacements); plan->root()->walk(&finder); @@ -1559,9 +1583,8 @@ void arangodb::aql::removeUnnecessaryCalculationsRule( // expression types (V8 vs. non-V8) do not match. give up continue; } - - if (!n->isInInnerLoop() && - rootNode->callsFunction() && + + if (!n->isInInnerLoop() && rootNode->callsFunction() && other->isInInnerLoop()) { // original expression calls a function and is not contained in a loop // we're about to move this expression into a loop, but we don't want @@ -1585,7 +1608,8 @@ void arangodb::aql::removeUnnecessaryCalculationsRule( } /// @brief useIndex, try to use an index for filtering -void arangodb::aql::useIndexesRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::useIndexesRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const* rule) { // These are all the nodes where we start traversing (including all // subqueries) @@ -1649,7 +1673,9 @@ struct SortToIndexNode final : public WalkerWorker { return true; } - SortCondition sortCondition(_sorts, std::vector>(), _variableDefinitions); + SortCondition sortCondition( + _sorts, std::vector>(), + _variableDefinitions); if (!sortCondition.isEmpty() && sortCondition.isOnlyAttributeAccess() && sortCondition.isUnidirectional()) { @@ -1661,9 +1687,8 @@ struct SortToIndexNode final : public WalkerWorker { auto trx = _plan->getAst()->query()->trx(); size_t coveredAttributes = 0; auto resultPair = trx->getIndexForSortCondition( - enumerateCollectionNode->collection()->getName(), - &sortCondition, outVariable, - enumerateCollectionNode->collection()->count(trx), + enumerateCollectionNode->collection()->getName(), &sortCondition, + outVariable, enumerateCollectionNode->collection()->count(trx), usedIndexes, coveredAttributes); if (resultPair.second) { // If this bit is set, then usedIndexes has length exactly one @@ -1673,8 +1698,8 @@ struct SortToIndexNode final : public WalkerWorker { std::unique_ptr newNode(new IndexNode( _plan, _plan->nextId(), enumerateCollectionNode->vocbase(), - enumerateCollectionNode->collection(), outVariable, - usedIndexes, condition.get(), sortCondition.isDescending())); + enumerateCollectionNode->collection(), outVariable, usedIndexes, + condition.get(), sortCondition.isDescending())); condition.release(); @@ -1749,14 +1774,17 @@ struct SortToIndexNode final : public WalkerWorker { bool handled = false; if (indexes.size() == 1 && isSorted) { - // if we have just a single index and we can use it for the filtering condition, - // then we can use the index for sorting, too. regardless of it the index is sparse or not. - // because the index would only return non-null attributes anyway, so we do not need - // to care about null values when sorting here + // if we have just a single index and we can use it for the filtering + // condition, then we can use the index for sorting, too. regardless of it + // the index is sparse or not. because the index would only return + // non-null attributes anyway, so we do not need to care about null values + // when sorting here isSparse = false; } - SortCondition sortCondition(_sorts, cond->getConstAttributes(outVariable, !isSparse), _variableDefinitions); + SortCondition sortCondition( + _sorts, cond->getConstAttributes(outVariable, !isSparse), + _variableDefinitions); bool const isOnlyAttributeAccess = (!sortCondition.isEmpty() && sortCondition.isOnlyAttributeAccess()); @@ -1805,7 +1833,8 @@ struct SortToIndexNode final : public WalkerWorker { _modified = true; } else if (numCovered > 0 && sortCondition.isUnidirectional()) { // remove the first few attributes if they are constant - SortNode* sortNode = static_cast(_plan->getNodeById(_sortNode->id())); + SortNode* sortNode = + static_cast(_plan->getNodeById(_sortNode->id())); sortNode->removeConditions(numCovered); _modified = true; } @@ -1876,7 +1905,8 @@ struct SortToIndexNode final : public WalkerWorker { } }; -void arangodb::aql::useIndexForSortRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::useIndexForSortRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; @@ -1900,8 +1930,8 @@ void arangodb::aql::useIndexForSortRule(Optimizer* opt, std::unique_ptr plan, OptimizerRule const* rule) { - + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::FILTER, true); @@ -1955,8 +1985,8 @@ void arangodb::aql::removeFiltersCoveredByIndexRule( if (indexesUsed.size() == 1) { // single index. this is something that we can handle - auto newNode = condition->removeIndexCondition(plan.get(), - indexNode->outVariable(), indexCondition->root()); + auto newNode = condition->removeIndexCondition( + plan.get(), indexNode->outVariable(), indexCondition->root()); if (newNode == nullptr) { // no condition left... @@ -2037,12 +2067,13 @@ static bool NextPermutationTuple(std::vector& data, /// @brief interchange adjacent EnumerateCollectionNodes in all possible ways void arangodb::aql::interchangeAdjacentEnumerationsRule( - Optimizer* opt, std::unique_ptr plan, OptimizerRule const* rule) { + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; std::vector const types = { - ExecutionNode::ENUMERATE_COLLECTION, ExecutionNode::ENUMERATE_LIST }; + ExecutionNode::ENUMERATE_COLLECTION, ExecutionNode::ENUMERATE_LIST}; plan->findNodesOfType(nodes, types, true); std::unordered_set nodesSet; @@ -2077,7 +2108,8 @@ void arangodb::aql::interchangeAdjacentEnumerationsRule( break; } - if (n->getType() == EN::ENUMERATE_LIST && dep->getType() == EN::ENUMERATE_LIST) { + if (n->getType() == EN::ENUMERATE_LIST && + dep->getType() == EN::ENUMERATE_LIST) { break; } @@ -2151,7 +2183,7 @@ void arangodb::aql::interchangeAdjacentEnumerationsRule( opt->addPlan(std::move(newPlan), rule, true); } while (NextPermutationTuple(permTuple, starts)); } - + opt->addPlan(std::move(plan), rule, false); } @@ -2159,7 +2191,8 @@ void arangodb::aql::interchangeAdjacentEnumerationsRule( /// this rule inserts scatter, gather and remote nodes so operations on sharded /// collections actually work /// it will change plans in place -void arangodb::aql::scatterInClusterRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::scatterInClusterRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const* rule) { bool wasModified = false; @@ -2179,9 +2212,13 @@ void arangodb::aql::scatterInClusterRule(Optimizer* opt, std::unique_ptr const types = { - ExecutionNode::ENUMERATE_COLLECTION, ExecutionNode::INDEX, - ExecutionNode::INSERT, ExecutionNode::UPDATE, ExecutionNode::REPLACE, - ExecutionNode::REMOVE, ExecutionNode::UPSERT}; + ExecutionNode::ENUMERATE_COLLECTION, + ExecutionNode::INDEX, + ExecutionNode::INSERT, + ExecutionNode::UPDATE, + ExecutionNode::REPLACE, + ExecutionNode::REMOVE, + ExecutionNode::UPSERT}; SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; @@ -2267,8 +2304,8 @@ void arangodb::aql::scatterInClusterRule(Optimizer* opt, std::unique_ptraddDependency(deps[0]); // insert a remote node - ExecutionNode* remoteNode = - new RemoteNode(plan.get(), plan->nextId(), vocbase, collection, "", "", ""); + ExecutionNode* remoteNode = new RemoteNode( + plan.get(), plan->nextId(), vocbase, collection, "", "", ""); plan->registerNode(remoteNode); TRI_ASSERT(scatterNode); remoteNode->addDependency(scatterNode); @@ -2277,8 +2314,8 @@ void arangodb::aql::scatterInClusterRule(Optimizer* opt, std::unique_ptraddDependency(remoteNode); // insert another remote node - remoteNode = - new RemoteNode(plan.get(), plan->nextId(), vocbase, collection, "", "", ""); + remoteNode = new RemoteNode(plan.get(), plan->nextId(), vocbase, + collection, "", "", ""); plan->registerNode(remoteNode); TRI_ASSERT(node); remoteNode->addDependency(node); @@ -2323,7 +2360,8 @@ void arangodb::aql::scatterInClusterRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::distributeInClusterRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const* rule) { bool wasModified = false; @@ -2388,8 +2426,8 @@ void arangodb::aql::distributeInClusterRule(Optimizer* opt, std::unique_ptrgetCollection(collection->vocbase->name(), - collection->name); + auto collInfo = + ci->getCollection(collection->vocbase->name(), collection->name); // Throws if collection is not found! if (collInfo->isSmart() && collInfo->type() == TRI_COL_TYPE_EDGE) { distributeInClusterRuleSmartEdgeCollection(opt, std::move(plan), rule); @@ -2439,8 +2477,9 @@ void arangodb::aql::distributeInClusterRule(Optimizer* opt, std::unique_ptrgetVariablesUsedHere()[0]; - distNode = new DistributeNode(plan.get(), plan->nextId(), vocbase, collection, - inputVariable->id, createKeys, true); + distNode = + new DistributeNode(plan.get(), plan->nextId(), vocbase, collection, + inputVariable->id, createKeys, true); } else if (nodeType == ExecutionNode::REPLACE) { std::vector v = node->getVariablesUsedHere(); if (defaultSharding && v.size() > 1) { @@ -2450,8 +2489,9 @@ void arangodb::aql::distributeInClusterRule(Optimizer* opt, std::unique_ptrnextId(), vocbase, collection, - inputVariable->id, false, v.size() > 1); + distNode = + new DistributeNode(plan.get(), plan->nextId(), vocbase, collection, + inputVariable->id, false, v.size() > 1); } else if (nodeType == ExecutionNode::UPDATE) { std::vector v = node->getVariablesUsedHere(); if (v.size() > 1) { @@ -2463,15 +2503,16 @@ void arangodb::aql::distributeInClusterRule(Optimizer* opt, std::unique_ptr IN inputVariable = v[0]; } - distNode = new DistributeNode(plan.get(), plan->nextId(), vocbase, collection, - inputVariable->id, false, v.size() > 1); + distNode = + new DistributeNode(plan.get(), plan->nextId(), vocbase, collection, + inputVariable->id, false, v.size() > 1); } else if (nodeType == ExecutionNode::UPSERT) { // an UPSERT node has two input variables! std::vector v(node->getVariablesUsedHere()); TRI_ASSERT(v.size() >= 2); - auto d = new DistributeNode(plan.get(), plan->nextId(), vocbase, collection, - v[0]->id, v[1]->id, true, true); + auto d = new DistributeNode(plan.get(), plan->nextId(), vocbase, + collection, v[0]->id, v[1]->id, true, true); d->setAllowSpecifiedKeys(true); distNode = static_cast(d); } else { @@ -2485,8 +2526,8 @@ void arangodb::aql::distributeInClusterRule(Optimizer* opt, std::unique_ptraddDependency(deps[0]); // insert a remote node - ExecutionNode* remoteNode = - new RemoteNode(plan.get(), plan->nextId(), vocbase, collection, "", "", ""); + ExecutionNode* remoteNode = new RemoteNode(plan.get(), plan->nextId(), + vocbase, collection, "", "", ""); plan->registerNode(remoteNode); remoteNode->addDependency(distNode); @@ -2494,8 +2535,8 @@ void arangodb::aql::distributeInClusterRule(Optimizer* opt, std::unique_ptraddDependency(remoteNode); // insert another remote node - remoteNode = - new RemoteNode(plan.get(), plan->nextId(), vocbase, collection, "", "", ""); + remoteNode = new RemoteNode(plan.get(), plan->nextId(), vocbase, collection, + "", "", ""); plan->registerNode(remoteNode); remoteNode->addDependency(node); @@ -2524,7 +2565,8 @@ void arangodb::aql::distributeInClusterRule(Optimizer* opt, std::unique_ptr plan, OptimizerRule const* rule) { + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { bool modified = false; SmallVector::allocator_type::arena_type a; @@ -2627,9 +2669,9 @@ void arangodb::aql::distributeFilternCalcToClusterRule( /// as small as possible as early as possible /// /// filters are not pushed beyond limits -void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, - std::unique_ptr plan, - OptimizerRule const* rule) { +void arangodb::aql::distributeSortToClusterRule( + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::GATHER, true); @@ -2691,7 +2733,7 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, // then unlink the filter/calculator from the plan plan->unlinkNode(inspectNode); // and re-insert into plan in front of the remoteNode - if (thisSortNode->_reinsertInCluster){ + if (thisSortNode->_reinsertInCluster) { plan->insertDependency(rn, inspectNode); } if (gatherNode->collection()->numberOfShards() > 1) { @@ -2713,8 +2755,8 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, /// @brief try to get rid of a RemoteNode->ScatterNode combination which has /// only a SingletonNode and possibly some CalculationNodes as dependencies void arangodb::aql::removeUnnecessaryRemoteScatterRule( - Optimizer* opt, std::unique_ptr plan, OptimizerRule const* rule) { - + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::REMOTE, true); @@ -2832,7 +2874,8 @@ class RemoveToEnumCollFinder final : public WalkerWorker { } // check the remove node's collection is sharded over _key std::vector shardKeys = rn->collection()->shardKeys(); - if (shardKeys.size() != 1 || shardKeys[0] != StaticStrings::KeyString) { + if (shardKeys.size() != 1 || + shardKeys[0] != StaticStrings::KeyString) { break; // abort . . . } @@ -2954,7 +2997,8 @@ class RemoveToEnumCollFinder final : public WalkerWorker { /// @brief recognizes that a RemoveNode can be moved to the shards. void arangodb::aql::undistributeRemoveAfterEnumCollRule( - Optimizer* opt, std::unique_ptr plan, OptimizerRule const* rule) { + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::REMOVE, true); @@ -3090,8 +3134,7 @@ struct OrSimplifier { std::string stringifyNode(AstNode const* node) const { try { return node->toString(); - } - catch (...) { + } catch (...) { } return std::string(); } @@ -3111,7 +3154,8 @@ struct OrSimplifier { return false; } - bool detect(AstNode const* node, bool preferRight, std::string& attributeName, AstNode const*& attr, AstNode const*& value) const { + bool detect(AstNode const* node, bool preferRight, std::string& attributeName, + AstNode const*& attr, AstNode const*& value) const { attributeName.clear(); if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) { @@ -3133,8 +3177,7 @@ struct OrSimplifier { } } // fallthrough intentional - } - else if (node->type == NODE_TYPE_OPERATOR_BINARY_IN) { + } else if (node->type == NODE_TYPE_OPERATOR_BINARY_IN) { auto lhs = node->getMember(0); auto rhs = node->getMember(1); if (rhs->isArray() && qualifies(lhs, attributeName)) { @@ -3150,15 +3193,16 @@ struct OrSimplifier { return false; } - AstNode* buildValues(AstNode const* attr, AstNode const* lhs, bool leftIsArray, AstNode const* rhs, bool rightIsArray) const { + AstNode* buildValues(AstNode const* attr, AstNode const* lhs, + bool leftIsArray, AstNode const* rhs, + bool rightIsArray) const { auto values = ast->createNodeArray(); if (leftIsArray) { size_t const n = lhs->numMembers(); for (size_t i = 0; i < n; ++i) { values->addMember(lhs->getMemberUnchecked(i)); } - } - else { + } else { values->addMember(lhs); } @@ -3167,12 +3211,12 @@ struct OrSimplifier { for (size_t i = 0; i < n; ++i) { values->addMember(rhs->getMemberUnchecked(i)); } - } - else { + } else { values->addMember(rhs); } - return ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_IN, attr, values); + return ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_IN, attr, + values); } AstNode* simplify(AstNode const* node) const { @@ -3192,8 +3236,10 @@ struct OrSimplifier { node = ast->createNodeBinaryOperator(node->type, lhsNew, rhsNew); } - if ((lhsNew->type == NODE_TYPE_OPERATOR_BINARY_EQ || lhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN) && - (rhsNew->type == NODE_TYPE_OPERATOR_BINARY_EQ || rhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN)) { + if ((lhsNew->type == NODE_TYPE_OPERATOR_BINARY_EQ || + lhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN) && + (rhsNew->type == NODE_TYPE_OPERATOR_BINARY_EQ || + rhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN)) { std::string leftName; std::string rightName; AstNode const* leftAttr = nullptr; @@ -3205,7 +3251,10 @@ struct OrSimplifier { if (detect(lhsNew, i >= 2, leftName, leftAttr, leftValue) && detect(rhsNew, i % 2 == 0, rightName, rightAttr, rightValue) && leftName == rightName) { - return buildValues(leftAttr, leftValue, lhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN, rightValue, rhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN); + return buildValues(leftAttr, leftValue, + lhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN, + rightValue, + rhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN); } } } @@ -3239,7 +3288,8 @@ struct OrSimplifier { // x.val IN [1,2,3] // when the OR conditions are present in the same FILTER node, and refer to the // same (single) attribute. -void arangodb::aql::replaceOrWithInRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::replaceOrWithInRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; @@ -3279,7 +3329,8 @@ void arangodb::aql::replaceOrWithInRule(Optimizer* opt, std::unique_ptrnextId(), expr, outVar[0]); + newNode = + new CalculationNode(plan.get(), plan->nextId(), expr, outVar[0]); } catch (...) { delete expr; throw; @@ -3302,13 +3353,13 @@ struct RemoveRedundantOr { CommonNodeFinder finder; AstNode const* commonNode = nullptr; std::string commonName; - + bool hasRedundantCondition(AstNode const* node) { try { if (finder.find(node, NODE_TYPE_OPERATOR_BINARY_LT, commonNode, commonName)) { return hasRedundantConditionWalker(node); - } + } } catch (...) { // ignore errors and simply return false } @@ -3424,7 +3475,8 @@ struct RemoveRedundantOr { } }; -void arangodb::aql::removeRedundantOrRule(Optimizer* opt, std::unique_ptr plan, +void arangodb::aql::removeRedundantOrRule(Optimizer* opt, + std::unique_ptr plan, OptimizerRule const* rule) { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; @@ -3462,7 +3514,8 @@ void arangodb::aql::removeRedundantOrRule(Optimizer* opt, std::unique_ptrgetAst(), astNode); try { - newNode = new CalculationNode(plan.get(), plan->nextId(), expr, outVar[0]); + newNode = + new CalculationNode(plan.get(), plan->nextId(), expr, outVar[0]); } catch (...) { delete expr; throw; @@ -3480,7 +3533,8 @@ void arangodb::aql::removeRedundantOrRule(Optimizer* opt, std::unique_ptr plan, OptimizerRule const* rule) { + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { bool modified = false; std::vector const types = { EN::REMOVE, EN::INSERT, EN::UPDATE, EN::REPLACE, EN::UPSERT}; @@ -3512,9 +3566,9 @@ void arangodb::aql::removeDataModificationOutVariablesRule( /// @brief patch UPDATE statement on single collection that iterates over the /// entire collection to operate in batches -void arangodb::aql::patchUpdateStatementsRule(Optimizer* opt, - std::unique_ptr plan, - OptimizerRule const* rule) { +void arangodb::aql::patchUpdateStatementsRule( + Optimizer* opt, std::unique_ptr plan, + OptimizerRule const* rule) { // no need to dive into subqueries here, as UPDATE needs to be on the top // level SmallVector::allocator_type::arena_type a; @@ -3746,17 +3800,21 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, if (listNode->inVariable() == out) { // bingo! auto queryVariables = plan->getAst()->variables(); - std::vector subNodes(subqueryNode->getSubquery()->getDependencyChain(true)); + std::vector subNodes( + subqueryNode->getSubquery()->getDependencyChain(true)); - // check if the subquery result variable is used after the FOR loop as well - std::unordered_set varsUsedLater(listNode->getVarsUsedLater()); - if (varsUsedLater.find(listNode->inVariable()) != varsUsedLater.end()) { + // check if the subquery result variable is used after the FOR loop as + // well + std::unordered_set varsUsedLater( + listNode->getVarsUsedLater()); + if (varsUsedLater.find(listNode->inVariable()) != + varsUsedLater.end()) { // exit the loop current = nullptr; break; } - TRI_ASSERT(! subNodes.empty()); + TRI_ASSERT(!subNodes.empty()); auto returnNode = static_cast(subNodes[0]); TRI_ASSERT(returnNode->getType() == EN::RETURN); @@ -3779,14 +3837,15 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, // and now insert them one level up if (it != returnNode) { - // we skip over the subquery's return node. we don't need it anymore + // we skip over the subquery's return node. we don't need it + // anymore insert->removeDependencies(); TRI_ASSERT(it != nullptr); insert->addDependency(it); insert = it; - // additionally rename the variables from the subquery so they cannot - // conflict with the ones from the top query + // additionally rename the variables from the subquery so they + // cannot conflict with the ones from the top query for (auto const& variable : it->getVariablesSetHere()) { queryVariables->renameVariable(variable->id); } @@ -3801,11 +3860,13 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, // remove the list node from the plan plan->unlinkNode(listNode, false); - queryVariables->renameVariable(returnNode->inVariable()->id, listNode->outVariable()->name); + queryVariables->renameVariable(returnNode->inVariable()->id, + listNode->outVariable()->name); // finally replace the variables std::unordered_map replacements; - replacements.emplace(listNode->outVariable()->id, returnNode->inVariable()); + replacements.emplace(listNode->outVariable()->id, + returnNode->inVariable()); RedundantCalculationsReplacer finder(replacements); plan->root()->walk(&finder); @@ -3836,3 +3897,581 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, opt->addPlan(std::move(plan), rule, modified); } + +struct GeoIndexInfo { + operator bool() const { return distanceNode && valid; } + void invalidate() { valid = false; } + GeoIndexInfo() + : collectionNode(nullptr), + executionNode(nullptr), + indexNode(nullptr), + setter(nullptr), + expressionParent(nullptr), + expressionNode(nullptr), + distanceNode(nullptr), + index(nullptr), + range(nullptr), + executionNodeType(EN::NORESULTS), + within(false), + lessgreaterequal(false), + valid(true), + constantPair{nullptr, nullptr} {} + EnumerateCollectionNode* + collectionNode; // node that will be replaced by (geo) IndexNode + ExecutionNode* executionNode; // start node that is a sort or filter + IndexNode* indexNode; // AstNode that is the parent of the Node + CalculationNode* + setter; // node that has contains the condition for filter or sort + AstNode* expressionParent; // AstNode that is the parent of the Node + AstNode* expressionNode; // AstNode that contains the sort/filter condition + AstNode* distanceNode; // AstNode that contains the distance parameters + std::shared_ptr index; // pointer to geoindex + AstNode const* range; // range for within + ExecutionNode::NodeType + executionNodeType; // type of execution node sort or filter + bool within; // is this a within lookup + bool lessgreaterequal; // is this a check for le/ge (true) or lt/gt (false) + bool valid; // contains this node a valid condition + std::vector longitude; // access path to longitude + std::vector latitude; // access path to latitude + std::pair constantPair; +}; + +// candidate checking + +AstNode* isValueOrRefNode(AstNode* node) { + // TODO - implement me + return node; +} + +GeoIndexInfo isDistanceFunction(AstNode* distanceNode, + AstNode* expressionParent) { + // the expression must exist and it must be a function call + auto rv = GeoIndexInfo{}; + if (distanceNode->type != NODE_TYPE_FCALL) { + return rv; + } + + // get the ast node of the expression + auto func = static_cast(distanceNode->getData()); + + // we're looking for "DISTANCE()", which is a function call + // with an empty parameters array + if (func->externalName != "DISTANCE" || distanceNode->numMembers() != 1) { + return rv; + } + rv.distanceNode = distanceNode; + rv.expressionNode = distanceNode; + rv.expressionParent = expressionParent; + return rv; +} + +GeoIndexInfo isGeoFilterExpression(AstNode* node, AstNode* expressionParent) { + // binary compare must be on top + bool dist_first = true; + bool lessEqual = true; + auto rv = GeoIndexInfo{}; + if (node->type != NODE_TYPE_OPERATOR_BINARY_GE && + node->type != NODE_TYPE_OPERATOR_BINARY_GT && + node->type != NODE_TYPE_OPERATOR_BINARY_LE && + node->type != NODE_TYPE_OPERATOR_BINARY_LT) { + return rv; + } + if (node->type == NODE_TYPE_OPERATOR_BINARY_GE || + node->type == NODE_TYPE_OPERATOR_BINARY_GT) { + dist_first = false; + } + if (node->type == NODE_TYPE_OPERATOR_BINARY_GT || + node->type == NODE_TYPE_OPERATOR_BINARY_LT) { + lessEqual = false; + } + + if (node->numMembers() != 2) { + return rv; + } + + AstNode* first = node->getMember(0); + AstNode* second = node->getMember(1); + + auto eval_stuff = [](bool dist_first, bool lessEqual, GeoIndexInfo&& dist_fun, + AstNode* value_node) { + if (dist_first && dist_fun && value_node) { + dist_fun.within = true; + dist_fun.range = value_node; + dist_fun.lessgreaterequal = lessEqual; + } else { + dist_fun.invalidate(); + } + return dist_fun; + }; + + rv = eval_stuff(dist_first, lessEqual, + isDistanceFunction(first, expressionParent), + isValueOrRefNode(second)); + if (!rv) { + rv = eval_stuff(dist_first, lessEqual, + isDistanceFunction(second, expressionParent), + isValueOrRefNode(first)); + } + + if (rv) { + // this must be set after checking if the node contains a distance node. + rv.expressionNode = node; + } + + return rv; +} + +GeoIndexInfo iterativePreorderWithCondition( + EN::NodeType type, AstNode* root, + GeoIndexInfo (*condition)(AstNode*, AstNode*)) { + // returns on first hit + if (!root) { + return GeoIndexInfo{}; + } + std::vector> nodestack; + nodestack.push_back({root, nullptr}); + + while (nodestack.size()) { + auto current = nodestack.back(); + nodestack.pop_back(); + GeoIndexInfo rv = condition(current.first, current.second); + if (rv) { + return rv; + } + + if (type == EN::FILTER) { + if (current.first->type == NODE_TYPE_OPERATOR_BINARY_AND || + current.first->type == NODE_TYPE_OPERATOR_NARY_AND) { + for (std::size_t i = 0; i < current.first->numMembers(); ++i) { + nodestack.push_back({current.first->getMember(i), current.first}); + } + } + } else if (type == EN::SORT) { + // must be the only sort condition + } + } + return GeoIndexInfo{}; +} + +GeoIndexInfo geoDistanceFunctionArgCheck( + std::pair const& pair, ExecutionPlan* plan, + GeoIndexInfo info) { + std::pair> + attributeAccess1; + std::pair> + attributeAccess2; + + // first and second should be based on the same document - need to provide the + // document in order to see which collection is bound to it and if that + // collections supports geo-index + if (!pair.first->isAttributeAccessForVariable(attributeAccess1) || + !pair.second->isAttributeAccessForVariable(attributeAccess2)) { + info.invalidate(); + return info; + } + + TRI_ASSERT(attributeAccess1.first != nullptr); + TRI_ASSERT(attributeAccess2.first != nullptr); + + // expect access of the for doc.attribute + auto setter1 = plan->getVarSetBy(attributeAccess1.first->id); + auto setter2 = plan->getVarSetBy(attributeAccess2.first->id); + + if (setter1 != nullptr && setter2 != nullptr && setter1 == setter2 && + setter1->getType() == EN::ENUMERATE_COLLECTION) { + auto collNode = reinterpret_cast(setter1); + auto coll = collNode->collection(); // what kind of indexes does it have on + // what attributes + auto lcoll = coll->getCollection(); + // TODO - check collection for suitable geo-indexes + for (auto indexShardPtr : lcoll->getIndexes()) { + // get real index + arangodb::Index& index = *indexShardPtr.get(); + + // check if current index is a geo-index + if (index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO1_INDEX && + index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO2_INDEX) { + continue; + } + + TRI_ASSERT(index.fields().size() == 2); + + // check access paths of attributes in ast and those in index match + if (index.fields()[0] == attributeAccess1.second && + index.fields()[1] == attributeAccess2.second) { + info.collectionNode = collNode; + info.index = indexShardPtr; + TRI_AttributeNamesJoinNested(attributeAccess1.second, info.longitude, + true); + TRI_AttributeNamesJoinNested(attributeAccess2.second, info.latitude, + true); + return info; + } + } + } + + info.invalidate(); + return info; +} + +bool checkDistanceArguments(GeoIndexInfo& info, ExecutionPlan* plan) { + if (!info) { + return false; + } + + auto const& functionArguments = info.distanceNode->getMember(0); + if (functionArguments->numMembers() < 4) { + return false; + } + + std::pair argPair1 = {functionArguments->getMember(0), + functionArguments->getMember(1)}; + std::pair argPair2 = {functionArguments->getMember(2), + functionArguments->getMember(3)}; + + GeoIndexInfo result1 = + geoDistanceFunctionArgCheck(argPair1, plan, info /*copy*/); + GeoIndexInfo result2 = + geoDistanceFunctionArgCheck(argPair2, plan, info /*copy*/); + // info now conatins access path to collection + + // xor only one argument pair shall have a geoIndex + if ((!result1 && !result2) || (result1 && result2)) { + info.invalidate(); + return false; + } + + GeoIndexInfo res; + if (result1) { + info = std::move(result1); + info.constantPair = std::move(argPair2); + } else { + info = std::move(result2); + info.constantPair = std::move(argPair1); + } + + return true; +} + +// checks a single sort or filter node +GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, + ExecutionPlan* plan, + ExecutionNode* n) { + ExecutionNode* setter = nullptr; + auto rv = GeoIndexInfo{}; + switch (type) { + case EN::SORT: { + auto node = static_cast(n); + auto& elements = node->getElements(); + + // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort + // criterion + if (!(elements.size() == 1 && elements[0].ascending)) { + // test on second makes sure the SORT is ascending + return rv; + } + + // variable of sort expression + auto variable = elements[0].var; + TRI_ASSERT(variable != nullptr); + + //// find the expression that is bound to the variable + // get the expression node that holds the calculation + setter = plan->getVarSetBy(variable->id); + } break; + + case EN::FILTER: { + auto node = static_cast(n); + + // filter nodes always have one input variable + auto varsUsedHere = node->getVariablesUsedHere(); + TRI_ASSERT(varsUsedHere.size() == 1); + + // now check who introduced our variable + auto variable = varsUsedHere[0]; + setter = plan->getVarSetBy(variable->id); + } break; + + default: + return rv; + } + + // common part - extract astNode from setter witch is a calculation node + if (setter == nullptr || setter->getType() != EN::CALCULATION) { + return rv; + } + + auto expression = static_cast(setter)->expression(); + + // the expression must exist and it must have an astNode + if (expression == nullptr || expression->node() == nullptr) { + // not the right type of node + return rv; + } + AstNode* node = expression->nodeForModification(); + + // FIXME -- technical debt -- code duplication / not all cases covered + switch (type) { + case EN::SORT: { + // check comma separated parts of condition cond0, cond1, cond2 + rv = isDistanceFunction(node, nullptr); + } break; + + case EN::FILTER: { + rv = iterativePreorderWithCondition(type, node, &isGeoFilterExpression); + } break; + + default: + rv.invalidate(); // not required but make sure the result is invalid + } + + rv.executionNode = n; + rv.executionNodeType = type; + rv.setter = static_cast(setter); + + checkDistanceArguments(rv, plan); + + return rv; +}; + +// modify plan + +// builds a condition that can be used with the index interface and +// contains all parameters required by the MMFilesGeoIndex +std::unique_ptr buildGeoCondition(ExecutionPlan* plan, + GeoIndexInfo& info) { + AstNode* lat = info.constantPair.first; + AstNode* lon = info.constantPair.second; + auto ast = plan->getAst(); + auto varAstNode = + ast->createNodeReference(info.collectionNode->outVariable()); + + auto args = ast->createNodeArray(info.within ? 4 : 3); + args->addMember(varAstNode); // collection + args->addMember(lat); // latitude + args->addMember(lon); // longitude + + AstNode* cond = nullptr; + if (info.within) { + // WITHIN + args->addMember(info.range); + auto lessValue = ast->createNodeValueBool(info.lessgreaterequal); + args->addMember(lessValue); + cond = ast->createNodeFunctionCall("WITHIN", args); + } else { + // NEAR + cond = ast->createNodeFunctionCall("NEAR", args); + } + + TRI_ASSERT(cond != nullptr); + + auto condition = std::make_unique(ast); + condition->andCombine(cond); + condition->normalize(plan); + return condition; +} + +void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info) { + if (info.expressionParent && info.executionNodeType == EN::FILTER) { + auto ast = plan->getAst(); + CalculationNode* newNode = nullptr; + Expression* expr = + new Expression(ast, static_cast(info.setter) + ->expression() + ->nodeForModification() + ->clone(ast)); + + try { + newNode = new CalculationNode( + plan, plan->nextId(), expr, + static_cast(info.setter)->outVariable()); + } catch (...) { + delete expr; + throw; + } + + plan->registerNode(newNode); + plan->replaceNode(info.setter, newNode); + + bool done = false; + ast->traverseAndModify( + newNode->expression()->nodeForModification(), + [&done](AstNode* node, void* data) { + if (done) { + return node; + } + if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) { + for (std::size_t i = 0; i < node->numMembers(); i++) { + if (isGeoFilterExpression(node->getMemberUnchecked(i), node)) { + done = true; + return node->getMemberUnchecked(i ? 0 : 1); + } + } + } + return node; + }, + nullptr); + + if (done) { + return; + } + + auto replaceInfo = iterativePreorderWithCondition( + EN::FILTER, newNode->expression()->nodeForModification(), + &isGeoFilterExpression); + if (newNode->expression()->nodeForModification() == + replaceInfo.expressionParent) { + if (replaceInfo.expressionParent->type == NODE_TYPE_OPERATOR_BINARY_AND) { + for (std::size_t i = 0; i < replaceInfo.expressionParent->numMembers(); + ++i) { + if (replaceInfo.expressionParent->getMember(i) != + replaceInfo.expressionNode) { + newNode->expression()->replaceNode( + replaceInfo.expressionParent->getMember(i)); + return; + } + } + } + } + + // else { + // // COULD BE IMPROVED + // if(replaceInfo.expressionParent->type == NODE_TYPE_OPERATOR_BINARY_AND){ + // // delete ast node - we would need the parent of expression parent to + // delete the node + // // we do not have it available here so we just replace the the node + // with true return; + // } + //} + + // fallback + auto replacement = ast->createNodeValueBool(true); + for (std::size_t i = 0; i < replaceInfo.expressionParent->numMembers(); + ++i) { + if (replaceInfo.expressionParent->getMember(i) == + replaceInfo.expressionNode) { + replaceInfo.expressionParent->removeMemberUnchecked(i); + replaceInfo.expressionParent->addMember(replacement); + } + } + } +} + +// applys the optimization for a candidate +bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, + GeoIndexInfo& second) { + if (!first && !second) { + return false; + } + + if (!first) { + first = std::move(second); + second.invalidate(); + } + + // We are not allowed to be a inner loop + if (first.collectionNode->isInInnerLoop() && + first.executionNodeType == EN::SORT) { + return false; + } + + std::unique_ptr condition(buildGeoCondition(plan, first)); + + auto inode = new IndexNode( + plan, plan->nextId(), first.collectionNode->vocbase(), + first.collectionNode->collection(), first.collectionNode->outVariable(), + std::vector{ + transaction::Methods::IndexHandle{first.index}}, + condition.get(), false); + plan->registerNode(inode); + condition.release(); + + plan->replaceNode(first.collectionNode, inode); + + replaceGeoCondition(plan, first); + replaceGeoCondition(plan, second); + + // if executionNode is sort OR a filter without further sub conditions + // the node can be unlinked + auto unlinkNode = [&](GeoIndexInfo& info) { + if (info && !info.expressionParent) { + if (!arangodb::ServerState::instance()->isCoordinator() || + info.executionNodeType == EN::FILTER) { + plan->unlinkNode(info.executionNode); + } else if (info.executionNodeType == EN::SORT) { + // make sure sort is not reinserted in cluster + static_cast(info.executionNode)->_reinsertInCluster = false; + } + } + }; + + unlinkNode(first); + unlinkNode(second); + + // signal that plan has been changed + return true; +} + +void arangodb::aql::geoIndexRule(Optimizer* opt, + std::unique_ptr plan, + OptimizerRule const* rule) { + SmallVector::allocator_type::arena_type a; + SmallVector nodes{a}; + bool modified = false; + // inspect each return node and work upwards to SingletonNode + plan->findEndNodes(nodes, true); + + for (auto& node : nodes) { + GeoIndexInfo sortInfo{}; + GeoIndexInfo filterInfo{}; + auto current = node; + + while (current) { + switch (current->getType()) { + case EN::SORT: { + sortInfo = + identifyGeoOptimizationCandidate(EN::SORT, plan.get(), current); + break; + } + case EN::FILTER: { + filterInfo = + identifyGeoOptimizationCandidate(EN::FILTER, plan.get(), current); + break; + } + case EN::ENUMERATE_COLLECTION: { + EnumerateCollectionNode* collnode = + static_cast(current); + if ((sortInfo && sortInfo.collectionNode != collnode) || + (filterInfo && filterInfo.collectionNode != collnode)) { + filterInfo.invalidate(); + sortInfo.invalidate(); + break; + } + if (applyGeoOptimization(true, plan.get(), filterInfo, sortInfo)) { + modified = true; + filterInfo.invalidate(); + sortInfo.invalidate(); + } + break; + } + + case EN::INDEX: + case EN::COLLECT: { + filterInfo.invalidate(); + sortInfo.invalidate(); + break; + } + + default: { + // skip - do nothing + break; + } + } + + current = current->getFirstDependency(); // inspect next node + } + } + + opt->addPlan(std::move(plan), rule, modified); +} diff --git a/arangod/Aql/OptimizerRules.h b/arangod/Aql/OptimizerRules.h index feaef40502..5969668fe8 100644 --- a/arangod/Aql/OptimizerRules.h +++ b/arangod/Aql/OptimizerRules.h @@ -198,6 +198,8 @@ void prepareTraversalsRule(Optimizer* opt, std::unique_ptr plan, /// @brief moves simple subqueries one level higher void inlineSubqueriesRule(Optimizer*, std::unique_ptr, OptimizerRule const*); + +void geoIndexRule(aql::Optimizer* opt, std::unique_ptr plan, aql::OptimizerRule const* rule); } // namespace aql } // namespace arangodb diff --git a/arangod/Aql/OptimizerRulesFeature.cpp b/arangod/Aql/OptimizerRulesFeature.cpp index eacdec7fa0..e6943904c4 100644 --- a/arangod/Aql/OptimizerRulesFeature.cpp +++ b/arangod/Aql/OptimizerRulesFeature.cpp @@ -221,6 +221,10 @@ void OptimizerRulesFeature::addRules() { // patch update statements registerRule("patch-update-statements", patchUpdateStatementsRule, OptimizerRule::patchUpdateStatementsRule_pass9, DoesNotCreateAdditionalPlans, CanBeDisabled); + + // patch update statements + OptimizerRulesFeature::registerRule("geo-index-optimizer", geoIndexRule, + OptimizerRule::applyGeoIndexRule, false, true); if (arangodb::ServerState::instance()->isCoordinator()) { // distribute operations in cluster diff --git a/arangod/MMFiles/MMFilesOptimizerRules.cpp b/arangod/MMFiles/MMFilesOptimizerRules.cpp index 5020e43266..365f40983d 100644 --- a/arangod/MMFiles/MMFilesOptimizerRules.cpp +++ b/arangod/MMFiles/MMFilesOptimizerRules.cpp @@ -41,549 +41,12 @@ using namespace arangodb::aql; using EN = arangodb::aql::ExecutionNode; void MMFilesOptimizerRules::registerResources() { - // patch update statements - OptimizerRulesFeature::registerRule("geo-index-optimizer", geoIndexRule, - OptimizerRule::applyGeoIndexRule, false, true); // remove SORT RAND() if appropriate OptimizerRulesFeature::registerRule("remove-sort-rand", removeSortRandRule, OptimizerRule::removeSortRandRule_pass5, false, true); } -struct MMFilesGeoIndexInfo { - operator bool() const { return distanceNode && valid; } - void invalidate() { valid = false; } - MMFilesGeoIndexInfo() - : collectionNode(nullptr) - , executionNode(nullptr) - , indexNode(nullptr) - , setter(nullptr) - , expressionParent(nullptr) - , expressionNode(nullptr) - , distanceNode(nullptr) - , index(nullptr) - , range(nullptr) - , executionNodeType(EN::NORESULTS) - , within(false) - , lessgreaterequal(false) - , valid(true) - , constantPair{nullptr,nullptr} - {} - EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode - ExecutionNode* executionNode; // start node that is a sort or filter - IndexNode* indexNode; // AstNode that is the parent of the Node - CalculationNode* setter; // node that has contains the condition for filter or sort - AstNode* expressionParent; // AstNode that is the parent of the Node - AstNode* expressionNode; // AstNode that contains the sort/filter condition - AstNode* distanceNode; // AstNode that contains the distance parameters - std::shared_ptr index; //pointer to geoindex - AstNode const* range; // range for within - ExecutionNode::NodeType executionNodeType; // type of execution node sort or filter - bool within; // is this a within lookup - bool lessgreaterequal; // is this a check for le/ge (true) or lt/gt (false) - bool valid; // contains this node a valid condition - std::vector longitude; // access path to longitude - std::vector latitude; // access path to latitude - std::pair constantPair; -}; - -//candidate checking - -AstNode* isValueOrRefNode(AstNode* node){ - //TODO - implement me - return node; -} - -MMFilesGeoIndexInfo isDistanceFunction(AstNode* distanceNode, AstNode* expressionParent){ - // the expression must exist and it must be a function call - auto rv = MMFilesGeoIndexInfo{}; - if(distanceNode->type != NODE_TYPE_FCALL) { - return rv; - } - - //get the ast node of the expression - auto func = static_cast(distanceNode->getData()); - - // we're looking for "DISTANCE()", which is a function call - // with an empty parameters array - if ( func->externalName != "DISTANCE" || distanceNode->numMembers() != 1 ) { - return rv; - } - rv.distanceNode = distanceNode; - rv.expressionNode = distanceNode; - rv.expressionParent = expressionParent; - return rv; -} - -MMFilesGeoIndexInfo isGeoFilterExpression(AstNode* node, AstNode* expressionParent){ - // binary compare must be on top - bool dist_first = true; - bool lessEqual = true; - auto rv = MMFilesGeoIndexInfo{}; - if( node->type != NODE_TYPE_OPERATOR_BINARY_GE - && node->type != NODE_TYPE_OPERATOR_BINARY_GT - && node->type != NODE_TYPE_OPERATOR_BINARY_LE - && node->type != NODE_TYPE_OPERATOR_BINARY_LT) { - - return rv; - } - if (node->type == NODE_TYPE_OPERATOR_BINARY_GE || node->type == NODE_TYPE_OPERATOR_BINARY_GT) { - dist_first = false; - } - if (node->type == NODE_TYPE_OPERATOR_BINARY_GT || node->type == NODE_TYPE_OPERATOR_BINARY_LT) { - lessEqual = false; - } - - if (node->numMembers() != 2){ - return rv; - } - - AstNode* first = node->getMember(0); - AstNode* second = node->getMember(1); - - auto eval_stuff = [](bool dist_first, bool lessEqual, MMFilesGeoIndexInfo&& dist_fun, AstNode* value_node){ - if (dist_first && dist_fun && value_node) { - dist_fun.within = true; - dist_fun.range = value_node; - dist_fun.lessgreaterequal = lessEqual; - } else { - dist_fun.invalidate(); - } - return dist_fun; - }; - - rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first, expressionParent), isValueOrRefNode(second)); - if (!rv) { - rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(second, expressionParent), isValueOrRefNode(first)); - } - - if(rv){ - //this must be set after checking if the node contains a distance node. - rv.expressionNode = node; - } - - return rv; -} - -MMFilesGeoIndexInfo iterativePreorderWithCondition(EN::NodeType type, AstNode* root, MMFilesGeoIndexInfo(*condition)(AstNode*, AstNode*)){ - // returns on first hit - if (!root){ - return MMFilesGeoIndexInfo{}; - } - std::vector> nodestack; - nodestack.push_back({root, nullptr}); - - while(nodestack.size()){ - auto current = nodestack.back(); - nodestack.pop_back(); - MMFilesGeoIndexInfo rv = condition(current.first,current.second); - if (rv) { - return rv; - } - - if (type == EN::FILTER){ - if (current.first->type == NODE_TYPE_OPERATOR_BINARY_AND || current.first->type == NODE_TYPE_OPERATOR_NARY_AND ){ - for (std::size_t i = 0; i < current.first->numMembers(); ++i){ - nodestack.push_back({current.first->getMember(i),current.first}); - } - } - } else if (type == EN::SORT) { - // must be the only sort condition - } - } - return MMFilesGeoIndexInfo{}; -} - -MMFilesGeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pair, - ExecutionPlan* plan, MMFilesGeoIndexInfo info){ - std::pair> attributeAccess1; - std::pair> attributeAccess2; - - // first and second should be based on the same document - need to provide the document - // in order to see which collection is bound to it and if that collections supports geo-index - if (!pair.first->isAttributeAccessForVariable(attributeAccess1) || - !pair.second->isAttributeAccessForVariable(attributeAccess2)) { - info.invalidate(); - return info; - } - - TRI_ASSERT(attributeAccess1.first != nullptr); - TRI_ASSERT(attributeAccess2.first != nullptr); - - // expect access of the for doc.attribute - auto setter1 = plan->getVarSetBy(attributeAccess1.first->id); - auto setter2 = plan->getVarSetBy(attributeAccess2.first->id); - - if (setter1 != nullptr && - setter2 != nullptr && - setter1 == setter2 && - setter1->getType() == EN::ENUMERATE_COLLECTION) { - auto collNode = reinterpret_cast(setter1); - auto coll = collNode->collection(); //what kind of indexes does it have on what attributes - auto lcoll = coll->getCollection(); - // TODO - check collection for suitable geo-indexes - for(auto indexShardPtr : lcoll->getIndexes()){ - // get real index - arangodb::Index& index = *indexShardPtr.get(); - - // check if current index is a geo-index - if( index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO1_INDEX - && index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO2_INDEX) { - continue; - } - - TRI_ASSERT(index.fields().size() == 2); - - //check access paths of attributes in ast and those in index match - if (index.fields()[0] == attributeAccess1.second && - index.fields()[1] == attributeAccess2.second) { - info.collectionNode = collNode; - info.index = indexShardPtr; - TRI_AttributeNamesJoinNested(attributeAccess1.second, info.longitude, true); - TRI_AttributeNamesJoinNested(attributeAccess2.second, info.latitude, true); - return info; - } - } - } - - info.invalidate(); - return info; -} - -bool checkDistanceArguments(MMFilesGeoIndexInfo& info, ExecutionPlan* plan){ - if(!info){ - return false; - } - - auto const& functionArguments = info.distanceNode->getMember(0); - if(functionArguments->numMembers() < 4){ - return false; - } - - std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; - std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; - - MMFilesGeoIndexInfo result1 = geoDistanceFunctionArgCheck(argPair1, plan, info /*copy*/); - MMFilesGeoIndexInfo result2 = geoDistanceFunctionArgCheck(argPair2, plan, info /*copy*/); - //info now conatins access path to collection - - // xor only one argument pair shall have a geoIndex - if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ - info.invalidate(); - return false; - } - - MMFilesGeoIndexInfo res; - if(result1){ - info = std::move(result1); - info.constantPair = std::move(argPair2); - } else { - info = std::move(result2); - info.constantPair = std::move(argPair1); - } - - return true; -} - -//checks a single sort or filter node -MMFilesGeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, ExecutionPlan* plan, ExecutionNode* n){ - ExecutionNode* setter = nullptr; - auto rv = MMFilesGeoIndexInfo{}; - switch(type){ - case EN::SORT: { - auto node = static_cast(n); - auto& elements = node->getElements(); - - // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion - if ( !(elements.size() == 1 && elements[0].ascending)) { - //test on second makes sure the SORT is ascending - return rv; - } - - //variable of sort expression - auto variable = elements[0].var; - TRI_ASSERT(variable != nullptr); - - //// find the expression that is bound to the variable - // get the expression node that holds the calculation - setter = plan->getVarSetBy(variable->id); - } - break; - - case EN::FILTER: { - auto node = static_cast(n); - - // filter nodes always have one input variable - auto varsUsedHere = node->getVariablesUsedHere(); - TRI_ASSERT(varsUsedHere.size() == 1); - - // now check who introduced our variable - auto variable = varsUsedHere[0]; - setter = plan->getVarSetBy(variable->id); - } - break; - - default: - return rv; - } - - // common part - extract astNode from setter witch is a calculation node - if (setter == nullptr || setter->getType() != EN::CALCULATION) { - return rv; - } - - auto expression = static_cast(setter)->expression(); - - // the expression must exist and it must have an astNode - if (expression == nullptr || expression->node() == nullptr){ - // not the right type of node - return rv; - } - AstNode* node = expression->nodeForModification(); - - //FIXME -- technical debt -- code duplication / not all cases covered - switch(type){ - case EN::SORT: { - // check comma separated parts of condition cond0, cond1, cond2 - rv = isDistanceFunction(node,nullptr); - } - break; - - case EN::FILTER: { - rv = iterativePreorderWithCondition(type, node, &isGeoFilterExpression); - } - break; - - default: - rv.invalidate(); // not required but make sure the result is invalid - } - - rv.executionNode = n; - rv.executionNodeType = type; - rv.setter = static_cast(setter); - - checkDistanceArguments(rv, plan); - - return rv; -}; - -//modify plan - -// builds a condition that can be used with the index interface and -// contains all parameters required by the MMFilesGeoIndex -std::unique_ptr buildGeoCondition(ExecutionPlan* plan, MMFilesGeoIndexInfo& info) { - AstNode* lat = info.constantPair.first; - AstNode* lon = info.constantPair.second; - auto ast = plan->getAst(); - auto varAstNode = ast->createNodeReference(info.collectionNode->outVariable()); - - auto args = ast->createNodeArray(info.within ? 4 : 3); - args->addMember(varAstNode); // collection - args->addMember(lat); // latitude - args->addMember(lon); // longitude - - AstNode* cond = nullptr; - if (info.within) { - // WITHIN - args->addMember(info.range); - auto lessValue = ast->createNodeValueBool(info.lessgreaterequal); - args->addMember(lessValue); - cond = ast->createNodeFunctionCall("WITHIN", args); - } else { - // NEAR - cond = ast->createNodeFunctionCall("NEAR", args); - } - - TRI_ASSERT(cond != nullptr); - - auto condition = std::make_unique(ast); - condition->andCombine(cond); - condition->normalize(plan); - return condition; -} - -void replaceGeoCondition(ExecutionPlan* plan, MMFilesGeoIndexInfo& info){ - if (info.expressionParent && info.executionNodeType == EN::FILTER) { - auto ast = plan->getAst(); - CalculationNode* newNode = nullptr; - Expression* expr = new Expression(ast, static_cast(info.setter)->expression()->nodeForModification()->clone(ast)); - - try { - newNode = new CalculationNode(plan, plan->nextId(), expr, static_cast(info.setter)->outVariable()); - } catch (...) { - delete expr; - throw; - } - - plan->registerNode(newNode); - plan->replaceNode(info.setter, newNode); - - bool done = false; - ast->traverseAndModify(newNode->expression()->nodeForModification(),[&done](AstNode* node, void* data) { - if (done) { - return node; - } - if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) { - for (std::size_t i = 0; i < node->numMembers(); i++){ - if (isGeoFilterExpression(node->getMemberUnchecked(i),node)) { - done = true; - return node->getMemberUnchecked(i ? 0 : 1); - } - } - } - return node; - }, - nullptr); - - if(done){ - return; - } - - auto replaceInfo = iterativePreorderWithCondition(EN::FILTER, newNode->expression()->nodeForModification(), &isGeoFilterExpression); - if (newNode->expression()->nodeForModification() == replaceInfo.expressionParent) { - if (replaceInfo.expressionParent->type == NODE_TYPE_OPERATOR_BINARY_AND){ - for (std::size_t i = 0; i < replaceInfo.expressionParent->numMembers(); ++i) { - if (replaceInfo.expressionParent->getMember(i) != replaceInfo.expressionNode) { - newNode->expression()->replaceNode(replaceInfo.expressionParent->getMember(i)); - return; - } - } - } - } - - //else { - // // COULD BE IMPROVED - // if(replaceInfo.expressionParent->type == NODE_TYPE_OPERATOR_BINARY_AND){ - // // delete ast node - we would need the parent of expression parent to delete the node - // // we do not have it available here so we just replace the the node with true - // return; - // } - //} - - //fallback - auto replacement = ast->createNodeValueBool(true); - for (std::size_t i = 0; i < replaceInfo.expressionParent->numMembers(); ++i) { - if (replaceInfo.expressionParent->getMember(i) == replaceInfo.expressionNode) { - replaceInfo.expressionParent->removeMemberUnchecked(i); - replaceInfo.expressionParent->addMember(replacement); - } - } - } -} - -// applys the optimization for a candidate -bool applyGeoOptimization(bool near, ExecutionPlan* plan, MMFilesGeoIndexInfo& first, MMFilesGeoIndexInfo& second) { - if (!first && !second) { - return false; - } - - if (!first) { - first = std::move(second); - second.invalidate(); - } - - // We are not allowed to be a inner loop - if (first.collectionNode->isInInnerLoop() && first.executionNodeType == EN::SORT) { - return false; - } - - std::unique_ptr condition(buildGeoCondition(plan, first)); - - auto inode = new IndexNode( - plan, plan->nextId(), first.collectionNode->vocbase(), - first.collectionNode->collection(), first.collectionNode->outVariable(), - std::vector{transaction::Methods::IndexHandle{first.index}}, - condition.get(), false); - plan->registerNode(inode); - condition.release(); - - plan->replaceNode(first.collectionNode,inode); - - replaceGeoCondition(plan, first); - replaceGeoCondition(plan, second); - - // if executionNode is sort OR a filter without further sub conditions - // the node can be unlinked - auto unlinkNode = [&](MMFilesGeoIndexInfo& info) { - if (info && !info.expressionParent) { - if (!arangodb::ServerState::instance()->isCoordinator() || info.executionNodeType == EN::FILTER) { - plan->unlinkNode(info.executionNode); - } else if (info.executionNodeType == EN::SORT) { - //make sure sort is not reinserted in cluster - static_cast(info.executionNode)->_reinsertInCluster = false; - } - } - }; - - unlinkNode(first); - unlinkNode(second); - - //signal that plan has been changed - return true; -} - -void MMFilesOptimizerRules::geoIndexRule(Optimizer* opt, - std::unique_ptr plan, - OptimizerRule const* rule) { - - SmallVector::allocator_type::arena_type a; - SmallVector nodes{a}; - bool modified = false; - //inspect each return node and work upwards to SingletonNode - plan->findEndNodes(nodes, true); - - for (auto& node : nodes) { - MMFilesGeoIndexInfo sortInfo{}; - MMFilesGeoIndexInfo filterInfo{}; - auto current = node; - - while (current) { - switch(current->getType()) { - case EN::SORT:{ - sortInfo = identifyGeoOptimizationCandidate(EN::SORT, plan.get(), current); - break; - } - case EN::FILTER: { - filterInfo = identifyGeoOptimizationCandidate(EN::FILTER, plan.get(), current); - break; - } - case EN::ENUMERATE_COLLECTION: { - EnumerateCollectionNode* collnode = static_cast(current); - if( (sortInfo && sortInfo.collectionNode!= collnode) - ||(filterInfo && filterInfo.collectionNode != collnode) - ){ - filterInfo.invalidate(); - sortInfo.invalidate(); - break; - } - if (applyGeoOptimization(true, plan.get(), filterInfo, sortInfo)){ - modified = true; - filterInfo.invalidate(); - sortInfo.invalidate(); - } - break; - } - - case EN::INDEX: - case EN::COLLECT:{ - filterInfo.invalidate(); - sortInfo.invalidate(); - break; - } - - default: { - //skip - do nothing - break; - } - } - - current = current->getFirstDependency(); //inspect next node - } - } - - opt->addPlan(std::move(plan), rule, modified); -} - /// @brief remove SORT RAND() if appropriate void MMFilesOptimizerRules::removeSortRandRule(Optimizer* opt, std::unique_ptr plan, OptimizerRule const* rule) { diff --git a/arangod/MMFiles/MMFilesOptimizerRules.h b/arangod/MMFiles/MMFilesOptimizerRules.h index 07bef8808b..3689493b3c 100644 --- a/arangod/MMFiles/MMFilesOptimizerRules.h +++ b/arangod/MMFiles/MMFilesOptimizerRules.h @@ -35,8 +35,6 @@ struct OptimizerRule; struct MMFilesOptimizerRules { static void registerResources(); - - static void geoIndexRule(aql::Optimizer* opt, std::unique_ptr plan, aql::OptimizerRule const* rule); static void removeSortRandRule(aql::Optimizer* opt, std::unique_ptr plan, aql::OptimizerRule const* rule); }; diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp index 0bdd5151b0..c7eb9d05a0 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp @@ -473,7 +473,7 @@ int GeoIndexNewPot(GeoIx* gix) {// rocksdb initial put /* =================================================== */ GeoIdx* GeoIndex_new(uint64_t objectId) { GeoIx* gix; - int i, j; + int i; gix = static_cast( TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(GeoIx), false)); @@ -1330,9 +1330,9 @@ void GeoIndexFreeSlot(GeoIx* gix, int slot) { /* added to the index. */ /* =================================================== */ int GeoIndexNewSlot(GeoIx* gix) { - //int j; + /*int j; GeoCoordinate* gc; - /*if (gix->gxc[0].latitude == 0.0) { + if (gix->gxc[0].latitude == 0.0) { // do the growth calculation in long long to make sure it doesn't // overflow when the size gets to be near 2^31 long long x = gix->slotct; @@ -1359,7 +1359,6 @@ int GeoIndexNewSlot(GeoIx* gix) { } j = (int)(gix->gxc[0].latitude); gix->gxc[0].latitude = gix->gxc[j].latitude;*/ - return gix->nextFreeSlot++; } /* =================================================== */ From ce5f5c0c7d9030f393c6b350c19e5e60661f77e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Tue, 9 May 2017 15:13:15 +0200 Subject: [PATCH 14/18] Fixed nullptr --- .../RocksDBEngine/RocksDBTransactionCollection.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arangod/RocksDBEngine/RocksDBTransactionCollection.cpp b/arangod/RocksDBEngine/RocksDBTransactionCollection.cpp index 9111a61b3f..18f9824c54 100644 --- a/arangod/RocksDBEngine/RocksDBTransactionCollection.cpp +++ b/arangod/RocksDBEngine/RocksDBTransactionCollection.cpp @@ -163,14 +163,13 @@ int RocksDBTransactionCollection::use(int nestingLevel) { _collection = _transaction->vocbase()->useCollection(_cid, status); if (_collection != nullptr) { _usageLocked = true; + + // geo index needs exclusive write access + RocksDBCollection* rc = static_cast(_collection->getPhysical()); + if (AccessMode::isWrite(_accessType) && rc->hasGeoIndex()) { + _accessType = AccessMode::Type::EXCLUSIVE; + } } - - // geo index needs exclusive write access - RocksDBCollection* rc = static_cast(_collection->getPhysical()); - if (AccessMode::isWrite(_accessType) && rc->hasGeoIndex()) { - _accessType = AccessMode::Type::EXCLUSIVE; - } - } else { // use without usage-lock (lock already set externally) _collection = _transaction->vocbase()->lookupCollection(_cid); From c016c543201c068414a2cc3d643c60df1c3c7352 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Tue, 9 May 2017 15:44:19 +0200 Subject: [PATCH 15/18] Fixing AQL query-geo test --- arangod/RocksDBEngine/RocksDBAqlFunctions.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arangod/RocksDBEngine/RocksDBAqlFunctions.cpp b/arangod/RocksDBEngine/RocksDBAqlFunctions.cpp index eab7b6540a..d606c45837 100644 --- a/arangod/RocksDBEngine/RocksDBAqlFunctions.cpp +++ b/arangod/RocksDBEngine/RocksDBAqlFunctions.cpp @@ -30,6 +30,7 @@ #include "StorageEngine/DocumentIdentifierToken.h" #include "Transaction/Helpers.h" #include "Transaction/Methods.h" +#include "StorageEngine/TransactionState.h" #include "Utils/CollectionNameResolver.h" #include "VocBase/LogicalCollection.h" #include "VocBase/ManagedDocumentResult.h" @@ -161,6 +162,12 @@ static arangodb::RocksDBGeoIndex* getGeoIndex( // It is save to return the Raw pointer. // It can only be used until trx is finished. trx->addCollectionAtRuntime(cid, collectionName); + Result res = trx->state()->ensureCollections(); + if (!res.ok()) { + THROW_ARANGO_EXCEPTION_MESSAGE(res.errorNumber(), + res.errorMessage()); + } + auto document = trx->documentCollection(cid); if (document == nullptr) { THROW_ARANGO_EXCEPTION_FORMAT(TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND, "'%s'", From dd584c0aa4c895637ca190b41536e5446ca19b16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Tue, 9 May 2017 17:28:08 +0200 Subject: [PATCH 16/18] Swapping key format, fixing recovery --- arangod/RocksDBEngine/RocksDBGeoIndex.cpp | 45 +++++++++++++- arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp | 58 ++++--------------- arangod/RocksDBEngine/RocksDBGeoIndexImpl.h | 2 +- arangod/RocksDBEngine/RocksDBKey.cpp | 9 +-- arangod/RocksDBEngine/RocksDBKey.h | 4 +- arangod/RocksDBEngine/RocksDBKeyBounds.cpp | 20 ++++++- arangod/RocksDBEngine/RocksDBKeyBounds.h | 1 + 7 files changed, 79 insertions(+), 60 deletions(-) diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp index 93401d3c3c..5e334aff8e 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp @@ -29,8 +29,10 @@ #include "Basics/StringRef.h" #include "Basics/VelocyPackHelper.h" #include "Logger/Logger.h" +#include "RocksDBEngine/RocksDBCommon.h" #include "RocksDBEngine/RocksDBToken.h" #include "StorageEngine/TransactionState.h" +#include using namespace arangodb; @@ -265,9 +267,46 @@ RocksDBGeoIndex::RocksDBGeoIndex(TRI_idx_iid_t iid, TRI_ERROR_BAD_PARAMETER, "RocksDBGeoIndex can only be created with one or two fields."); } + + + // cheap trick to get the last inserted pot and slot number + rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); + rocksdb::ReadOptions opts; + std::unique_ptr iter(db->NewIterator(opts)); + + usleep(1000000); + usleep(1000000); + usleep(1000000); + usleep(1000000); + usleep(1000000); + usleep(1000000); + + // cheap trick to find the number of last use pots and slots + int numPots = 0; + RocksDBKeyBounds b1 = RocksDBKeyBounds::GeoIndex(_objectId, false); + iter->SeekForPrev(b1.end()); + if (iter->Valid() + && _cmp->Compare(iter->key(), b1.start()) >= 0 + && _cmp->Compare(iter->key(), b1.end()) < 0) { + // found a key smaller than bounds end + std::pair pair = RocksDBKey::geoValues(iter->key()); + TRI_ASSERT(pair.first == false); + numPots = pair.second; + } + + int numSlots = 0; + RocksDBKeyBounds b2 = RocksDBKeyBounds::GeoIndex(_objectId, true); + iter->SeekForPrev(b2.end()); + if (iter->Valid() + && _cmp->Compare(iter->key(), b2.start()) >= 0 + && _cmp->Compare(iter->key(), b2.end()) < 0) { + // found a key smaller than bounds end + std::pair pair = RocksDBKey::geoValues(iter->key()); + TRI_ASSERT(pair.first); + numSlots = pair.second; + } - _geoIndex = GeoIndex_new(_objectId); - + _geoIndex = GeoIndex_new(_objectId, numPots, numSlots); if (_geoIndex == nullptr) { THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); } @@ -513,7 +552,7 @@ int RocksDBGeoIndex::removeRaw(rocksdb::WriteBatch*, TRI_voc_rid_t revisionId, int RocksDBGeoIndex::unload() { // create a new, empty index - auto empty = GeoIndex_new(_objectId); + auto empty = GeoIndex_new(_objectId, 0, 0); if (empty == nullptr) { THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp index c7eb9d05a0..b98a96a6b0 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp @@ -471,7 +471,10 @@ int GeoIndexNewPot(GeoIx* gix) {// rocksdb initial put /* GeoString values of real (latitude, longitude) */ /* points */ /* =================================================== */ -GeoIdx* GeoIndex_new(uint64_t objectId) { +GeoIdx* GeoIndex_new(uint64_t objectId, + int numPots, + int numSlots) { + TRI_ASSERT(objectId != 0); GeoIx* gix; int i; @@ -483,55 +486,14 @@ GeoIdx* GeoIndex_new(uint64_t objectId) { } gix->objectId = objectId; - - /* try to allocate all the things we need */ - /*gix->ypots = static_cast( - TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, GEOPOTSTART * sizeof(GeoPot), false)); - gix->gxc = static_cast(TRI_Allocate( - TRI_UNKNOWN_MEM_ZONE, GEOSLOTSTART * sizeof(GeoCoordinate), false)); - - // if any of them fail, free the ones that succeeded - // and then return the nullptr for our user - if ((gix->ypots == nullptr) || (gix->gxc == nullptr)) { - if (gix->ypots != nullptr) { - TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix->ypots); - } - - if (gix->gxc != nullptr) { - TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix->gxc); - } - - TRI_Free(TRI_UNKNOWN_MEM_ZONE, gix); - - return nullptr; - }*/ - - // set initial memory usage - //gix->_memoryUsed = - // GEOPOTSTART * sizeof(GeoPot) + GEOSLOTSTART * sizeof(GeoCoordinate); - - /* initialize chain of empty slots */ - /*for (i = 0; i < GEOSLOTSTART; i++) { - if (i < GEOSLOTSTART - 1) - (gix->gxc[i]).latitude = i + 1; - else - (gix->gxc[i]).latitude = 0; + if (numPots == 0 || numSlots == 0) { // first run + gix->nextFreePot = 2; + gix->nextFreeSlot = 1; + } else { + gix->nextFreePot = numPots + 1; + gix->nextFreeSlot = numSlots + 1; } - // similarly set up free chain of empty pots - for (i = 0; i < GEOPOTSTART; i++) { - if (i < GEOPOTSTART - 1) - gix->ypots[i].LorLeaf = i + 1; - else - gix->ypots[i].LorLeaf = 0; - }*/ - - //RocksDBCounterManager *mgr = rocksutils::globalRocksEngine()->counterManager(); - //RocksDBCounterManager::CounterAdjustment adj = mgr->loadCounter(objectId); - gix->nextFreePot = 2;//(adj.added() & 0xffffffff); - gix->nextFreeSlot = 1;//adj.added() >> 32; - - // leave intact /* set up the fixed points structure */ for (i = 0; i < GeoIndexFIXEDPOINTS; i++) { diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h index 4365202820..a18c764b6e 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h @@ -93,7 +93,7 @@ typedef void GeoCursor; /* to keep the structure private */ size_t GeoIndex_MemoryUsage(void*); -GeoIdx* GeoIndex_new(uint64_t objectId); +GeoIdx* GeoIndex_new(uint64_t objectId, int slo, int); void GeoIndex_free(GeoIdx* gi); double GeoIndex_distance(GeoCoordinate* c1, GeoCoordinate* c2); int GeoIndex_insert(GeoIdx* gi, GeoCoordinate* c); diff --git a/arangod/RocksDBEngine/RocksDBKey.cpp b/arangod/RocksDBEngine/RocksDBKey.cpp index 3b209339ec..d928684cd7 100644 --- a/arangod/RocksDBEngine/RocksDBKey.cpp +++ b/arangod/RocksDBEngine/RocksDBKey.cpp @@ -83,12 +83,13 @@ RocksDBKey RocksDBKey::FulltextIndexValue(uint64_t indexId, return RocksDBKey(RocksDBEntryType::FulltextIndexValue, indexId, word, primaryKey); } -RocksDBKey RocksDBKey::GeoIndexValue(uint64_t indexId, uint32_t offset, bool isSlot) { +RocksDBKey RocksDBKey::GeoIndexValue(uint64_t indexId, int32_t offset, bool isSlot) { RocksDBKey key(RocksDBEntryType::GeoIndexValue); size_t length = sizeof(char) + sizeof(indexId) + sizeof(offset); key._buffer.reserve(length); key._buffer.push_back(static_cast(RocksDBEntryType::GeoIndexValue)); uint64ToPersistent(key._buffer, indexId); + uint64_t norm = offset; if (isSlot) norm |= uint64_t(1) << 63;//encode slot|pot in highest bit uint64ToPersistent(key._buffer, norm); @@ -190,13 +191,13 @@ VPackSlice RocksDBKey::indexedVPack(rocksdb::Slice const& slice) { return indexedVPack(slice.data(), slice.size()); } -std::pair RocksDBKey::geoValues(rocksdb::Slice const& slice) { +std::pair RocksDBKey::geoValues(rocksdb::Slice const& slice) { TRI_ASSERT(slice.size() >= sizeof(char) + sizeof(uint64_t) * 2); RocksDBEntryType type = static_cast(*slice.data()); TRI_ASSERT(type == RocksDBEntryType::GeoIndexValue); uint64_t val = uint64FromPersistent(slice.data() + sizeof(char) + sizeof(uint64_t)); - bool isSlot = (val >> 63) & 0x1; - return std::pair(isSlot, (val & 0xffffffff)); + bool isSlot = val & 0xFF;// lowest byte is 0xFF if true + return std::pair(isSlot, (val >> 32)); } std::string const& RocksDBKey::string() const { return _buffer; } diff --git a/arangod/RocksDBEngine/RocksDBKey.h b/arangod/RocksDBEngine/RocksDBKey.h index 91c0061c62..fff3f13196 100644 --- a/arangod/RocksDBEngine/RocksDBKey.h +++ b/arangod/RocksDBEngine/RocksDBKey.h @@ -113,7 +113,7 @@ class RocksDBKey { ////////////////////////////////////////////////////////////////////////////// /// @brief Create a fully-specified key for a geoIndexValue ////////////////////////////////////////////////////////////////////////////// - static RocksDBKey GeoIndexValue(uint64_t indexId, uint32_t offset, bool isSlot); + static RocksDBKey GeoIndexValue(uint64_t indexId, int32_t offset, bool isSlot); ////////////////////////////////////////////////////////////////////////////// /// @brief Create a fully-specified key for a view @@ -226,7 +226,7 @@ class RocksDBKey { /// /// May be called only on GeoIndexValues ////////////////////////////////////////////////////////////////////////////// - std::pair geoValues(rocksdb::Slice const& slice); + static std::pair geoValues(rocksdb::Slice const& slice); public: ////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/RocksDBEngine/RocksDBKeyBounds.cpp b/arangod/RocksDBEngine/RocksDBKeyBounds.cpp index 8ea793f163..66e0de4c35 100644 --- a/arangod/RocksDBEngine/RocksDBKeyBounds.cpp +++ b/arangod/RocksDBEngine/RocksDBKeyBounds.cpp @@ -81,6 +81,22 @@ RocksDBKeyBounds RocksDBKeyBounds::GeoIndex(uint64_t indexId) { return RocksDBKeyBounds(RocksDBEntryType::GeoIndexValue, indexId); } +RocksDBKeyBounds RocksDBKeyBounds::GeoIndex(uint64_t indexId, bool isSlot) { + RocksDBKeyBounds b; + size_t length = sizeof(char) + sizeof(uint64_t) * 2; + b._startBuffer.reserve(length); + b._startBuffer.push_back(static_cast(RocksDBEntryType::GeoIndexValue)); + uint64ToPersistent(b._startBuffer, indexId); + b._endBuffer.append(b._startBuffer);// append common prefix + + uint64_t norm = isSlot ? 0xFF : 0;//encode slot|pot in lowest bit + uint64ToPersistent(b._startBuffer, norm);// lower endian + norm |= 0xFFFFFFFFULL << 32; + uint64ToPersistent(b._endBuffer, norm); + return b; +} + + RocksDBKeyBounds RocksDBKeyBounds::IndexRange(uint64_t indexId, VPackSlice const& left, VPackSlice const& right) { @@ -212,7 +228,8 @@ RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first) } case RocksDBEntryType::Collection: - case RocksDBEntryType::Document:{ + case RocksDBEntryType::Document: + case RocksDBEntryType::GeoIndexValue: { // Collections are stored as follows: // Key: 1 + 8-byte ArangoDB database ID + 8-byte ArangoDB collection ID // @@ -236,7 +253,6 @@ RocksDBKeyBounds::RocksDBKeyBounds(RocksDBEntryType type, uint64_t first) case RocksDBEntryType::PrimaryIndexValue: case RocksDBEntryType::EdgeIndexValue: case RocksDBEntryType::FulltextIndexValue: - case RocksDBEntryType::GeoIndexValue: case RocksDBEntryType::View: { size_t length = sizeof(char) + sizeof(uint64_t); _startBuffer.reserve(length); diff --git a/arangod/RocksDBEngine/RocksDBKeyBounds.h b/arangod/RocksDBEngine/RocksDBKeyBounds.h index f402f0acbf..f101080e26 100644 --- a/arangod/RocksDBEngine/RocksDBKeyBounds.h +++ b/arangod/RocksDBEngine/RocksDBKeyBounds.h @@ -94,6 +94,7 @@ class RocksDBKeyBounds { /// @brief Bounds for all entries belonging to a specified unique index ////////////////////////////////////////////////////////////////////////////// static RocksDBKeyBounds GeoIndex(uint64_t indexId); + static RocksDBKeyBounds GeoIndex(uint64_t indexId, bool isSlot); ////////////////////////////////////////////////////////////////////////////// /// @brief Bounds for all index-entries within a value range belonging to a From 42d230e4ae90c211d8ceedac0b738190cba9a17a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Tue, 9 May 2017 22:29:29 +0200 Subject: [PATCH 17/18] Fixing recovery for geo index --- arangod/RocksDBEngine/RocksDBGeoIndex.cpp | 15 +++-- arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp | 57 ++++++------------- arangod/RocksDBEngine/RocksDBGeoIndexImpl.h | 2 - arangod/RocksDBEngine/RocksDBKey.cpp | 10 ++-- arangod/RocksDBEngine/RocksDBKeyBounds.cpp | 9 ++- tests/Basics/RocksDBKeyTest.cpp | 28 ++++++++- 6 files changed, 63 insertions(+), 58 deletions(-) diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp index 5e334aff8e..6a6913f34d 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp @@ -286,7 +286,7 @@ RocksDBGeoIndex::RocksDBGeoIndex(TRI_idx_iid_t iid, RocksDBKeyBounds b1 = RocksDBKeyBounds::GeoIndex(_objectId, false); iter->SeekForPrev(b1.end()); if (iter->Valid() - && _cmp->Compare(iter->key(), b1.start()) >= 0 + && _cmp->Compare(b1.start(), iter->key()) < 0 && _cmp->Compare(iter->key(), b1.end()) < 0) { // found a key smaller than bounds end std::pair pair = RocksDBKey::geoValues(iter->key()); @@ -298,7 +298,7 @@ RocksDBGeoIndex::RocksDBGeoIndex(TRI_idx_iid_t iid, RocksDBKeyBounds b2 = RocksDBKeyBounds::GeoIndex(_objectId, true); iter->SeekForPrev(b2.end()); if (iter->Valid() - && _cmp->Compare(iter->key(), b2.start()) >= 0 + && _cmp->Compare(b2.start(), iter->key()) < 0 && _cmp->Compare(iter->key(), b2.end()) < 0) { // found a key smaller than bounds end std::pair pair = RocksDBKey::geoValues(iter->key()); @@ -319,7 +319,12 @@ RocksDBGeoIndex::~RocksDBGeoIndex() { } size_t RocksDBGeoIndex::memory() const { - return GeoIndex_MemoryUsage(_geoIndex); + rocksdb::TransactionDB* db = rocksutils::globalRocksDB(); + RocksDBKeyBounds bounds = RocksDBKeyBounds::GeoIndex(_objectId); + rocksdb::Range r(bounds.start(), bounds.end()); + uint64_t out; + db->GetApproximateSizes(&r, 1, &out, true); + return (size_t)out; } /// @brief return a JSON representation of the index @@ -552,7 +557,7 @@ int RocksDBGeoIndex::removeRaw(rocksdb::WriteBatch*, TRI_voc_rid_t revisionId, int RocksDBGeoIndex::unload() { // create a new, empty index - auto empty = GeoIndex_new(_objectId, 0, 0); + /*auto empty = GeoIndex_new(_objectId, 0, 0); if (empty == nullptr) { THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); @@ -564,7 +569,7 @@ int RocksDBGeoIndex::unload() { } // and assign it - _geoIndex = empty; + _geoIndex = empty;*/ return TRI_ERROR_NO_ERROR; } diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp index b98a96a6b0..507ef189be 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp @@ -485,15 +485,6 @@ GeoIdx* GeoIndex_new(uint64_t objectId, return (GeoIdx*)gix; } - gix->objectId = objectId; - if (numPots == 0 || numSlots == 0) { // first run - gix->nextFreePot = 2; - gix->nextFreeSlot = 1; - } else { - gix->nextFreePot = numPots + 1; - gix->nextFreeSlot = numSlots + 1; - } - /* set up the fixed points structure */ for (i = 0; i < GeoIndexFIXEDPOINTS; i++) { @@ -609,26 +600,24 @@ GeoIdx* GeoIndex_new(uint64_t objectId, } /* set up the root pot */ - GeoPot gp; - //j = GeoIndexNewPot(gix); - gp.LorLeaf = 0; //leaf pot - gp.RorPoints = 0; // with no points in it! - gp.middle = 0ll; - gp.start = 0ll; - gp.end = 0x1FFFFFFFFFFFFFll; - gp.level = 1; - for (i = 0; i < GeoIndexFIXEDPOINTS; i++) gp.maxdist[i] = 0; - - PotWrite(gix, 1, &gp); - - /*j = GeoIndexNewPot(gix); - gix->ypots[j].LorLeaf = 0; //leaf pot - gix->ypots[j].RorPoints = 0; // with no points in it! - gix->ypots[j].middle = 0ll; - gix->ypots[j].start = 0ll; - gix->ypots[j].end = 0x1FFFFFFFFFFFFFll; - gix->ypots[j].level = 1; - for (i = 0; i < GeoIndexFIXEDPOINTS; i++) gix->ypots[j].maxdist[i] = 0;*/ + gix->objectId = objectId; + if (numPots == 0 || numSlots == 0) { // first run + gix->nextFreePot = 2; + gix->nextFreeSlot = 1; + + GeoPot gp; + gp.LorLeaf = 0; //leaf pot + gp.RorPoints = 0; // with no points in it! + gp.middle = 0ll; + gp.start = 0ll; + gp.end = 0x1FFFFFFFFFFFFFll; + gp.level = 1; + for (i = 0; i < GeoIndexFIXEDPOINTS; i++) gp.maxdist[i] = 0; + PotWrite(gix, 1, &gp); + } else { + gix->nextFreePot = numPots + 1; + gix->nextFreeSlot = numSlots + 1; + } return (GeoIdx*)gix; } /* =================================================== */ @@ -2445,15 +2434,5 @@ int GeoIndex_INDEXVALID(GeoIdx* gi) { } #endif - - // change to Approximate memory -size_t GeoIndex_MemoryUsage(void* theIndex) { - //GeoIx* geoIndex = (GeoIx*)theIndex; - //if (geoIndex != nullptr) { - // return geoIndex->_memoryUsed; - //} -#warning FIXME - return 0; -} }} /* end of GeoIndex.c */ diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h index a18c764b6e..3cb9088fec 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.h @@ -91,8 +91,6 @@ typedef struct { typedef void GeoIdx; /* to keep the structure private */ typedef void GeoCursor; /* to keep the structure private */ -size_t GeoIndex_MemoryUsage(void*); - GeoIdx* GeoIndex_new(uint64_t objectId, int slo, int); void GeoIndex_free(GeoIdx* gi); double GeoIndex_distance(GeoCoordinate* c1, GeoCoordinate* c2); diff --git a/arangod/RocksDBEngine/RocksDBKey.cpp b/arangod/RocksDBEngine/RocksDBKey.cpp index d928684cd7..258c18a1eb 100644 --- a/arangod/RocksDBEngine/RocksDBKey.cpp +++ b/arangod/RocksDBEngine/RocksDBKey.cpp @@ -87,13 +87,11 @@ RocksDBKey RocksDBKey::GeoIndexValue(uint64_t indexId, int32_t offset, bool isSl RocksDBKey key(RocksDBEntryType::GeoIndexValue); size_t length = sizeof(char) + sizeof(indexId) + sizeof(offset); key._buffer.reserve(length); - key._buffer.push_back(static_cast(RocksDBEntryType::GeoIndexValue)); uint64ToPersistent(key._buffer, indexId); - - uint64_t norm = offset; - if (isSlot) norm |= uint64_t(1) << 63;//encode slot|pot in highest bit + + uint64_t norm = uint64_t(offset) << 32; + norm |= isSlot ? 0xFFU : 0; //encode slot|pot in lowest bit uint64ToPersistent(key._buffer, norm); - return key; } @@ -196,7 +194,7 @@ std::pair RocksDBKey::geoValues(rocksdb::Slice const& slice) { RocksDBEntryType type = static_cast(*slice.data()); TRI_ASSERT(type == RocksDBEntryType::GeoIndexValue); uint64_t val = uint64FromPersistent(slice.data() + sizeof(char) + sizeof(uint64_t)); - bool isSlot = val & 0xFF;// lowest byte is 0xFF if true + bool isSlot = val & 0xFFU;// lowest byte is 0xFF if true return std::pair(isSlot, (val >> 32)); } diff --git a/arangod/RocksDBEngine/RocksDBKeyBounds.cpp b/arangod/RocksDBEngine/RocksDBKeyBounds.cpp index 66e0de4c35..b2d089f86a 100644 --- a/arangod/RocksDBEngine/RocksDBKeyBounds.cpp +++ b/arangod/RocksDBEngine/RocksDBKeyBounds.cpp @@ -87,11 +87,13 @@ RocksDBKeyBounds RocksDBKeyBounds::GeoIndex(uint64_t indexId, bool isSlot) { b._startBuffer.reserve(length); b._startBuffer.push_back(static_cast(RocksDBEntryType::GeoIndexValue)); uint64ToPersistent(b._startBuffer, indexId); + + b._endBuffer.clear(); b._endBuffer.append(b._startBuffer);// append common prefix - uint64_t norm = isSlot ? 0xFF : 0;//encode slot|pot in lowest bit + uint64_t norm = isSlot ? 0xFFU : 0;//encode slot|pot in lowest bit uint64ToPersistent(b._startBuffer, norm);// lower endian - norm |= 0xFFFFFFFFULL << 32; + norm = norm | (0xFFFFFFFFULL << 32); uint64ToPersistent(b._endBuffer, norm); return b; } @@ -129,8 +131,9 @@ RocksDBKeyBounds RocksDBKeyBounds::FulltextIndexPrefix(uint64_t indexId, uint64ToPersistent(bounds._startBuffer, indexId); bounds._startBuffer.append(word.data(), word.length()); + bounds._endBuffer.clear(); bounds._endBuffer.append(bounds._startBuffer); - bounds._endBuffer.push_back(0xFF);// invalid UTF-8 character, higher than with memcmp + bounds._endBuffer.push_back(0xFFU);// invalid UTF-8 character, higher than with memcmp return bounds; } diff --git a/tests/Basics/RocksDBKeyTest.cpp b/tests/Basics/RocksDBKeyTest.cpp index 493f14d3cf..18f24ccb2f 100644 --- a/tests/Basics/RocksDBKeyTest.cpp +++ b/tests/Basics/RocksDBKeyTest.cpp @@ -29,7 +29,9 @@ #include "catch.hpp" +#include "RocksDBEngine/RocksDBComparator.h" #include "RocksDBEngine/RocksDBKey.h" +#include "RocksDBEngine/RocksDBKeyBounds.h" #include "RocksDBEngine/RocksDBTypes.h" #include "Basics/Exceptions.h" @@ -39,7 +41,7 @@ using namespace arangodb; // --SECTION-- test suite // ----------------------------------------------------------------------------- -/// @brief setup +/// @brief test RocksDBKey class TEST_CASE("RocksDBKeyTest", "[rocksdbkeytest]") { /// @brief test database @@ -227,6 +229,26 @@ SECTION("test_edge_index") { CHECK(s1 == std::string("5\0\0\0\0\0\0\0\0a/1\0foobar\x06", 20)); } - - +} + +/// @brief test RocksDBKeyBounds class +TEST_CASE("RocksDBKeyBoundsTest", "[rocksdbkeybounds]") { + +/// @brief test geo index key and bounds consistency +SECTION("test_geo_index") { + + RocksDBComparator cmp; + + RocksDBKey k1 = RocksDBKey::GeoIndexValue(256, 128, false); + RocksDBKeyBounds bb1 = RocksDBKeyBounds::GeoIndex(256, false); + + CHECK(cmp.Compare(k1.string(), bb1.start()) > 0); + CHECK(cmp.Compare(k1.string(), bb1.end()) < 0); + + RocksDBKey k2 = RocksDBKey::GeoIndexValue(256, 128, true); + RocksDBKeyBounds bb2 = RocksDBKeyBounds::GeoIndex(256, true); + CHECK(cmp.Compare(k2.string(), bb2.start()) > 0); + CHECK(cmp.Compare(k2.string(), bb2.end()) < 0); +} + } From f82745b61726e46a65968be33bc00b9c5ee72e87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Tue, 9 May 2017 23:02:16 +0200 Subject: [PATCH 18/18] Oops --- arangod/RocksDBEngine/RocksDBGeoIndex.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp index 6a6913f34d..7433a7bc25 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp @@ -273,15 +273,7 @@ RocksDBGeoIndex::RocksDBGeoIndex(TRI_idx_iid_t iid, rocksdb::TransactionDB *db = rocksutils::globalRocksDB(); rocksdb::ReadOptions opts; std::unique_ptr iter(db->NewIterator(opts)); - - usleep(1000000); - usleep(1000000); - usleep(1000000); - usleep(1000000); - usleep(1000000); - usleep(1000000); - - // cheap trick to find the number of last use pots and slots + int numPots = 0; RocksDBKeyBounds b1 = RocksDBKeyBounds::GeoIndex(_objectId, false); iter->SeekForPrev(b1.end());