1
0
Fork 0

optimizations for geo index

This commit is contained in:
jsteemann 2017-01-03 14:17:49 +01:00
parent 9e7a98332d
commit 16f16b768d
3 changed files with 64 additions and 23 deletions

View File

@ -654,10 +654,13 @@ void GeoMkDetail(GeoIx* gix, GeoDetailedPoint* gd, GeoCoordinate* c) {
gd->gc = c; gd->gc = c;
/* The GeoString computation takes about 0.17 microseconds */ /* The GeoString computation takes about 0.17 microseconds */
gd->gs = GeoMkHilbert(c); gd->gs = GeoMkHilbert(c);
double const lat = c->latitude * M_PI / 180.0;
double const lon = c->longitude * M_PI / 180.0;
double const latCos = cos(lat);
/* This part takes about 0.32 microseconds */ /* This part takes about 0.32 microseconds */
gd->z = sin(c->latitude * M_PI / 180.0); gd->z = sin(lat);
gd->x = cos(c->latitude * M_PI / 180.0) * cos(c->longitude * M_PI / 180.0); gd->x = latCos * cos(lon);
gd->y = cos(c->latitude * M_PI / 180.0) * sin(c->longitude * M_PI / 180.0); gd->y = latCos * sin(lon);
/* And this bit takes about 0.45 microseconds */ /* And this bit takes about 0.45 microseconds */
for (int i = 0; i < GeoIndexFIXEDPOINTS; i++) { for (int i = 0; i < GeoIndexFIXEDPOINTS; i++) {
double xx1 = (gix->fixed.x)[i]; double xx1 = (gix->fixed.x)[i];
@ -927,7 +930,7 @@ int GeoResultsGrow(GeoResults* gr) {
/* distances that could be calculated by a separate */ /* distances that could be calculated by a separate */
/* call to GeoIndex_distance because of rounding errors*/ /* call to GeoIndex_distance because of rounding errors*/
/* =================================================== */ /* =================================================== */
GeoCoordinates* GeoAnswers(GeoIx* gix, GeoResults* gr) { GeoCoordinates* GeoAnswers(GeoIx* gix, GeoResults* gr, bool withDistances) {
GeoCoordinates* ans; GeoCoordinates* ans;
GeoCoordinate* gc; GeoCoordinate* gc;
int i, j; int i, j;
@ -967,11 +970,14 @@ GeoCoordinates* GeoAnswers(GeoIx* gix, GeoResults* gr) {
ans->coordinates[j].latitude = (gix->gc)[slot].latitude; ans->coordinates[j].latitude = (gix->gc)[slot].latitude;
ans->coordinates[j].longitude = (gix->gc)[slot].longitude; ans->coordinates[j].longitude = (gix->gc)[slot].longitude;
ans->coordinates[j].data = (gix->gc)[slot].data; ans->coordinates[j].data = (gix->gc)[slot].data;
mole = sqrt(gr->snmd[i]); if (withDistances) {
if (mole > 2.0) mole = 2.0; /* make sure arcsin succeeds! */ mole = sqrt(gr->snmd[i]);
gr->snmd[j] = 2.0 * EARTHRADIAN * asin(mole / 2.0); if (mole > 2.0) mole = 2.0; /* make sure arcsin succeeds! */
gr->snmd[j] = 2.0 * EARTHRADIAN * asin(mole / 2.0);
}
j++; j++;
} }
// note that these are uncalculated if withDistances is false!
ans->distances = gr->snmd; ans->distances = gr->snmd;
TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr->slot); TRI_Free(TRI_UNKNOWN_MEM_ZONE, gr->slot);
@ -1094,7 +1100,7 @@ GeoCoordinates* GeoIndex_PointsWithinRadius(GeoIdx* gi, GeoCoordinate* c,
gk.potid[gk.stacksize++] = gp->RorPoints; gk.potid[gk.stacksize++] = gp->RorPoints;
} }
} }
answer = GeoAnswers(gix, gres); answer = GeoAnswers(gix, gres, true);
return answer; /* note - this may be NULL */ return answer; /* note - this may be NULL */
} }
/* =================================================== */ /* =================================================== */
@ -1159,7 +1165,7 @@ GeoCoordinates* GeoIndex_NearestCountPoints(GeoIdx* gi, GeoCoordinate* c,
} }
} }
} }
answer = GeoAnswers(gix, gr); answer = GeoAnswers(gix, gr, true);
return answer; /* note - this may be NULL */ return answer; /* note - this may be NULL */
} }
/* =================================================== */ /* =================================================== */
@ -2060,7 +2066,7 @@ GeoCursor* GeoIndex_NewCursor(GeoIdx* gi, GeoCoordinate* c) {
return (GeoCursor*)gcr; return (GeoCursor*)gcr;
} }
GeoCoordinates* GeoIndex_ReadCursor(GeoCursor* gc, int count) { GeoCoordinates* GeoIndex_ReadCursor(GeoCursor* gc, int count, bool withDistances) {
int i, j, r; int i, j, r;
GeoCoordinate* ct; GeoCoordinate* ct;
GeoResults* gr; GeoResults* gr;
@ -2137,7 +2143,7 @@ GeoCoordinates* GeoIndex_ReadCursor(GeoCursor* gc, int count) {
} }
} }
} }
gcts = GeoAnswers(gcr->Ix, gr); gcts = GeoAnswers(gcr->Ix, gr, withDistances);
return gcts; return gcts;
} }

View File

@ -101,7 +101,7 @@ GeoCoordinates* GeoIndex_PointsWithinRadius(GeoIdx* gi, GeoCoordinate* c,
GeoCoordinates* GeoIndex_NearestCountPoints(GeoIdx* gi, GeoCoordinate* c, GeoCoordinates* GeoIndex_NearestCountPoints(GeoIdx* gi, GeoCoordinate* c,
int count); int count);
GeoCursor* GeoIndex_NewCursor(GeoIdx* gi, GeoCoordinate* c); GeoCursor* GeoIndex_NewCursor(GeoIdx* gi, GeoCoordinate* c);
GeoCoordinates* GeoIndex_ReadCursor(GeoCursor* gc, int count); GeoCoordinates* GeoIndex_ReadCursor(GeoCursor* gc, int count, bool withDistances = true);
void GeoIndex_CursorFree(GeoCursor* gc); void GeoIndex_CursorFree(GeoCursor* gc);
void GeoIndex_CoordinatesFree(GeoCoordinates* clist); void GeoIndex_CoordinatesFree(GeoCoordinates* clist);
#ifdef TRI_GEO_DEBUG #ifdef TRI_GEO_DEBUG

View File

@ -127,31 +127,66 @@ void GeoIndexIterator::nextBabies(std::vector<IndexLookupResult>& result, size_t
} }
if (batchSize > 0) { if (batchSize > 0) {
auto coords = std::unique_ptr<GeoCoordinates>(::GeoIndex_ReadCursor(_cursor, batchSize)); // only need to calculate distances for WITHIN queries, but not for NEAR queries
bool const withDistances = !_near;
auto coords = std::unique_ptr<GeoCoordinates>(::GeoIndex_ReadCursor(_cursor, batchSize, withDistances));
size_t const length = coords ? coords->length : 0; size_t const length = coords ? coords->length : 0;
if (!length) {
if (length == 0) {
return; return;
} }
// determine which documents to return... // determine which documents to return...
size_t numDocs = length; size_t numDocs = length;
if (!_near) { if (!_near) {
// WITHIN // WITHIN
// only return those documents that are within the specified radius // only return those documents that are within the specified radius
TRI_ASSERT(numDocs > 0); TRI_ASSERT(numDocs > 0);
// scan backwards because documents with higher distances are more interesting
// this can be improved to use a binary search if block size is increased in the future if (numDocs <= 8) {
while ((_inclusive && coords->distances[numDocs - 1] > _radius) || // linear scan for the first document outside the specified radius
(!_inclusive && coords->distances[numDocs - 1] >= _radius)) { // scan backwards because documents with higher distances are more interesting
// document is outside the specified radius! while ((_inclusive && coords->distances[numDocs - 1] > _radius) ||
--numDocs; (!_inclusive && coords->distances[numDocs - 1] >= _radius)) {
if (numDocs == 0) { // document is outside the specified radius!
break; --numDocs;
if (numDocs == 0) {
break;
}
}
} else {
// binary search for documents inside/outside the specified radius
size_t l = 0;
size_t r = numDocs - 1;
while (true) {
// determine midpoint
size_t m = l + ((r - l) / 2);
if ((_inclusive && coords->distances[m] > _radius) ||
(!_inclusive && coords->distances[m] >= _radius)) {
// document is outside the specified radius!
if (m == 0) {
numDocs = 0;
break;
}
r = m - 1;
} else {
// still inside the radius
numDocs = m + 1;
l = m + 1;
}
if (r < l) {
break;
}
} }
} }
} }
result.reserve(numDocs);
for (size_t i = 0; i < numDocs; ++i) { for (size_t i = 0; i < numDocs; ++i) {
result.emplace_back(IndexLookupResult(::GeoIndex::toRevision(coords->coordinates[i].data))); result.emplace_back(IndexLookupResult(::GeoIndex::toRevision(coords->coordinates[i].data)));
} }