From 404bcc8b0c144a6ee42c7422d5e43833a81f8d91 Mon Sep 17 00:00:00 2001 From: Andrey Abramov Date: Mon, 13 May 2019 18:25:46 +0300 Subject: [PATCH] switch to tiered consolidation by default (#8981) * switch to tiered consolidation by default * fix copy/paste error * fix tests * fix tests --- .../IResearch/IResearchAnalyzerFeature.cpp | 4 +- arangod/IResearch/IResearchViewMeta.cpp | 43 ++++++---- tests/IResearch/IResearchViewMeta-test.cpp | 78 +++++++++++++++---- .../aql/aql-view-arangosearch-ddl-cluster.js | 20 +++-- .../aql-view-arangosearch-ddl-noncluster.js | 20 +++-- 5 files changed, 124 insertions(+), 41 deletions(-) diff --git a/arangod/IResearch/IResearchAnalyzerFeature.cpp b/arangod/IResearch/IResearchAnalyzerFeature.cpp index a4c952ba2c..4ca9dde814 100644 --- a/arangod/IResearch/IResearchAnalyzerFeature.cpp +++ b/arangod/IResearch/IResearchAnalyzerFeature.cpp @@ -1619,7 +1619,7 @@ arangodb::Result IResearchAnalyzerFeature::remove( // remove analyzer if (split.first.null()) { return arangodb::Result( // result TRI_ERROR_FORBIDDEN, // code - "static analyzers cannot be removed" // message + "built-in analyzers cannot be removed" // message ); } @@ -2056,4 +2056,4 @@ bool IResearchAnalyzerFeature::visit( // visit analyzers // ----------------------------------------------------------------------------- // --SECTION-- END-OF-FILE -// ----------------------------------------------------------------------------- \ No newline at end of file +// ----------------------------------------------------------------------------- diff --git a/arangod/IResearch/IResearchViewMeta.cpp b/arangod/IResearch/IResearchViewMeta.cpp index b837aa9e9d..ce86f6915b 100644 --- a/arangod/IResearch/IResearchViewMeta.cpp +++ b/arangod/IResearch/IResearchViewMeta.cpp @@ -36,10 +36,11 @@ namespace { -const std::string POLICY_BYTES_ACCUM = - "bytes_accum"; // {threshold} > (segment_bytes + - // sum_of_merge_candidate_segment_bytes) / all_segment_bytes -const std::string POLICY_TIER = "tier"; // scoring policy based on byte size and live docs +// {threshold} > (segment_bytes + // sum_of_merge_candidate_segment_bytes) / all_segment_bytes +const std::string POLICY_BYTES_ACCUM = "bytes_accum"; + +// scoring policy based on byte size and live docs +const std::string POLICY_TIER = "tier"; template arangodb::iresearch::IResearchViewMeta::ConsolidationPolicy createConsolidationPolicy( @@ -158,14 +159,30 @@ arangodb::iresearch::IResearchViewMeta::ConsolidationPolicy createConsolidationP } } + { + // optional double + static const std::string fieldName("minScore"); + + if (slice.hasKey(fieldName)) { + auto field = slice.get(fieldName); + + if (!field.isNumber()) { + errorField = fieldName; + + return arangodb::iresearch::IResearchViewMeta::ConsolidationPolicy(); + } + + options.min_score = field.getNumber(); + } + } + properties.openObject(); - properties.add("type", arangodb::velocypack::Value(POLICY_TIER)); - properties.add("lookahead", arangodb::velocypack::Value(size_t(1))); // FIXME remove in 3.5 - properties.add("segmentsBytesFloor", - arangodb::velocypack::Value(options.floor_segment_bytes)); - properties.add("segmentsBytesMax", arangodb::velocypack::Value(options.max_segments_bytes)); - properties.add("segmentsMax", arangodb::velocypack::Value(options.max_segments)); - properties.add("segmentsMin", arangodb::velocypack::Value(options.min_segments)); + properties.add("type", VPackValue(POLICY_TIER)); + properties.add("segmentsBytesFloor", VPackValue(options.floor_segment_bytes)); + properties.add("segmentsBytesMax", VPackValue(options.max_segments_bytes)); + properties.add("segmentsMax", VPackValue(options.max_segments)); + properties.add("segmentsMin", VPackValue(options.min_segments)); + properties.add("minScore", VPackValue(options.min_score)); properties.close(); return arangodb::iresearch::IResearchViewMeta::ConsolidationPolicy{ @@ -202,9 +219,9 @@ IResearchViewMeta::IResearchViewMeta() std::string errorField; _consolidationPolicy = - createConsolidationPolicy( + createConsolidationPolicy( arangodb::velocypack::Parser::fromJson( - "{ \"type\": \"bytes_accum\", \"threshold\": 0.1 }") + "{ \"type\": \"tier\" }") ->slice(), errorField); assert(_consolidationPolicy.policy()); // ensure above syntax is correct diff --git a/tests/IResearch/IResearchViewMeta-test.cpp b/tests/IResearch/IResearchViewMeta-test.cpp index c09aa5f40a..93a7708919 100644 --- a/tests/IResearch/IResearchViewMeta-test.cpp +++ b/tests/IResearch/IResearchViewMeta-test.cpp @@ -68,17 +68,20 @@ SECTION("test_defaults") { arangodb::iresearch::IResearchViewMeta meta; arangodb::iresearch::IResearchViewMetaState metaState; - CHECK((true == metaState._collections.empty())); + CHECK(true == metaState._collections.empty()); CHECK(true == (10 == meta._cleanupIntervalStep)); - CHECK((true == (1000 == meta._commitIntervalMsec))); + CHECK(true == (1000 == meta._commitIntervalMsec)); CHECK(true == (60 * 1000 == meta._consolidationIntervalMsec)); - CHECK((std::string("bytes_accum") == meta._consolidationPolicy.properties().get("type").copyString())); - CHECK((false == !meta._consolidationPolicy.policy())); - CHECK((0.1f == meta._consolidationPolicy.properties().get("threshold").getNumber())); + CHECK(std::string("tier") == meta._consolidationPolicy.properties().get("type").copyString()); + CHECK(false == !meta._consolidationPolicy.policy()); + CHECK(1 == meta._consolidationPolicy.properties().get("segmentsMin").getNumber()); + CHECK(10 == meta._consolidationPolicy.properties().get("segmentsMax").getNumber()); + CHECK(size_t(2)*(1<<20) == meta._consolidationPolicy.properties().get("segmentsBytesFloor").getNumber()); + CHECK(size_t(5)*(1<<30) == meta._consolidationPolicy.properties().get("segmentsBytesMax").getNumber()); CHECK(std::string("C") == irs::locale_utils::name(meta._locale)); - CHECK((0 == meta._writebufferActive)); - CHECK((64 == meta._writebufferIdle)); - CHECK((32*(size_t(1)<<20) == meta._writebufferSizeMax)); + CHECK(0 == meta._writebufferActive); + CHECK(64 == meta._writebufferIdle); + CHECK(32*(size_t(1)<<20) == meta._writebufferSizeMax); CHECK(meta._primarySort.empty()); } @@ -148,9 +151,12 @@ SECTION("test_readDefaults") { CHECK(10 == meta._cleanupIntervalStep); CHECK((1000 == meta._commitIntervalMsec)); CHECK(60 * 1000 == meta._consolidationIntervalMsec); - CHECK((std::string("bytes_accum") == meta._consolidationPolicy.properties().get("type").copyString())); + CHECK((std::string("tier") == meta._consolidationPolicy.properties().get("type").copyString())); CHECK((false == !meta._consolidationPolicy.policy())); - CHECK((0.1f == meta._consolidationPolicy.properties().get("threshold").getNumber())); + CHECK(1 == meta._consolidationPolicy.properties().get("segmentsMin").getNumber()); + CHECK(10 == meta._consolidationPolicy.properties().get("segmentsMax").getNumber()); + CHECK(size_t(2)*(1<<20) == meta._consolidationPolicy.properties().get("segmentsBytesFloor").getNumber()); + CHECK(size_t(5)*(1<<30) == meta._consolidationPolicy.properties().get("segmentsBytesMax").getNumber()); CHECK(std::string("C") == irs::locale_utils::name(meta._locale)); CHECK((0 == meta._writebufferActive)); CHECK((64 == meta._writebufferIdle)); @@ -215,6 +221,8 @@ SECTION("test_readCustomizedValues") { CHECK((std::string("consolidationPolicy") == errorField)); } + // consolidation policy "bytes_accum" + { std::string errorField; auto json = arangodb::velocypack::Parser::fromJson("{ \"consolidationPolicy\": { \"type\": \"bytes_accum\", \"threshold\": -0.5 } }"); @@ -231,6 +239,40 @@ SECTION("test_readCustomizedValues") { CHECK((std::string("consolidationPolicy=>threshold") == errorField)); } + // consolidation policy "tier" + + { + std::string errorField; + auto json = arangodb::velocypack::Parser::fromJson("{ \"consolidationPolicy\": { \"type\": \"tier\", \"segmentsMin\": -1 } }"); + CHECK((true == metaState.init(json->slice(), errorField))); + CHECK(false == meta.init(json->slice(), errorField)); + CHECK((std::string("consolidationPolicy=>segmentsMin") == errorField)); + } + + { + std::string errorField; + auto json = arangodb::velocypack::Parser::fromJson("{ \"consolidationPolicy\": { \"type\": \"tier\", \"segmentsMax\": -1 } }"); + CHECK((true == metaState.init(json->slice(), errorField))); + CHECK(false == meta.init(json->slice(), errorField)); + CHECK((std::string("consolidationPolicy=>segmentsMax") == errorField)); + } + + { + std::string errorField; + auto json = arangodb::velocypack::Parser::fromJson("{ \"consolidationPolicy\": { \"type\": \"tier\", \"segmentsBytesFloor\": -1 } }"); + CHECK((true == metaState.init(json->slice(), errorField))); + CHECK(false == meta.init(json->slice(), errorField)); + CHECK((std::string("consolidationPolicy=>segmentsBytesFloor") == errorField)); + } + + { + std::string errorField; + auto json = arangodb::velocypack::Parser::fromJson("{ \"consolidationPolicy\": { \"type\": \"tier\", \"segmentsBytesMax\": -1 } }"); + CHECK((true == metaState.init(json->slice(), errorField))); + CHECK(false == meta.init(json->slice(), errorField)); + CHECK((std::string("consolidationPolicy=>segmentsBytesMax") == errorField)); + } + { std::string errorField; auto json = arangodb::velocypack::Parser::fromJson("{ \"consolidationPolicy\": { \"type\": \"invalid\" } }"); @@ -433,11 +475,19 @@ SECTION("test_writeDefaults") { tmpSlice = slice.get("consolidationIntervalMsec"); CHECK((true == tmpSlice.isNumber() && 60000 == tmpSlice.getNumber())); tmpSlice = slice.get("consolidationPolicy"); - CHECK((true == tmpSlice.isObject() && 2 == tmpSlice.length())); - tmpSlice2 = tmpSlice.get("threshold"); - CHECK((tmpSlice2.isNumber() && .1f == tmpSlice2.getNumber())); + CHECK((true == tmpSlice.isObject() && 6 == tmpSlice.length())); tmpSlice2 = tmpSlice.get("type"); - CHECK((tmpSlice2.isString() && std::string("bytes_accum") == tmpSlice2.copyString())); + CHECK((tmpSlice2.isString() && std::string("tier") == tmpSlice2.copyString())); + tmpSlice2 = tmpSlice.get("segmentsMin"); + CHECK((tmpSlice2.isNumber() && 1 == tmpSlice2.getNumber())); + tmpSlice2 = tmpSlice.get("segmentsMax"); + CHECK((tmpSlice2.isNumber() && 10 == tmpSlice2.getNumber())); + tmpSlice2 = tmpSlice.get("segmentsBytesFloor"); + CHECK((tmpSlice2.isNumber() && (size_t(2)*(1<<20)) == tmpSlice2.getNumber())); + tmpSlice2 = tmpSlice.get("segmentsBytesMax"); + CHECK((tmpSlice2.isNumber() && (size_t(5)*(1<<30)) == tmpSlice2.getNumber())); + tmpSlice2 = tmpSlice.get("minScore"); + CHECK((tmpSlice2.isNumber() && (0. == tmpSlice2.getNumber()))); tmpSlice = slice.get("version"); CHECK((true == tmpSlice.isNumber() && 1 == tmpSlice.getNumber())); tmpSlice = slice.get("writebufferActive"); diff --git a/tests/js/common/aql/aql-view-arangosearch-ddl-cluster.js b/tests/js/common/aql/aql-view-arangosearch-ddl-cluster.js index b52f075acd..f1c73398c1 100644 --- a/tests/js/common/aql/aql-view-arangosearch-ddl-cluster.js +++ b/tests/js/common/aql/aql-view-arangosearch-ddl-cluster.js @@ -215,9 +215,13 @@ function IResearchFeatureDDLTestSuite () { assertEqual(1000, properties.commitIntervalMsec); assertEqual(60000, properties.consolidationIntervalMsec); assertTrue(Object === properties.consolidationPolicy.constructor); - assertEqual(2, Object.keys(properties.consolidationPolicy).length); - assertEqual("bytes_accum", properties.consolidationPolicy.type); - assertEqual((0.1).toFixed(6), properties.consolidationPolicy.threshold.toFixed(6)); + assertEqual(6, Object.keys(properties.consolidationPolicy).length); + assertEqual("tier", properties.consolidationPolicy.type); + assertEqual(1, properties.consolidationPolicy.segmentsMin); + assertEqual(10, properties.consolidationPolicy.segmentsMax); + assertEqual(5*(1 << 30), properties.consolidationPolicy.segmentsBytesMax); + assertEqual(2*(1 << 20), properties.consolidationPolicy.segmentsBytesFloor); + assertEqual((0.0).toFixed(6), properties.consolidationPolicy.minScore.toFixed(6)); meta = { commitIntervalMsec: 12345, @@ -685,9 +689,13 @@ function IResearchFeatureDDLTestSuite () { assertEqual(1000, properties.commitIntervalMsec); assertEqual(60000, properties.consolidationIntervalMsec); assertTrue(Object === properties.consolidationPolicy.constructor); - assertEqual(2, Object.keys(properties.consolidationPolicy).length); - assertEqual("bytes_accum", properties.consolidationPolicy.type); - assertEqual((0.1).toFixed(6), properties.consolidationPolicy.threshold.toFixed(6)); + assertEqual(6, Object.keys(properties.consolidationPolicy).length); + assertEqual("tier", properties.consolidationPolicy.type); + assertEqual(1, properties.consolidationPolicy.segmentsMin); + assertEqual(10, properties.consolidationPolicy.segmentsMax); + assertEqual(5*(1 << 30), properties.consolidationPolicy.segmentsBytesMax); + assertEqual(2*(1 << 20), properties.consolidationPolicy.segmentsBytesFloor); + assertEqual((0.0).toFixed(6), properties.consolidationPolicy.minScore.toFixed(6)); assertTrue(Object === properties.links.constructor); assertEqual(0, Object.keys(properties.links).length); }, diff --git a/tests/js/common/aql/aql-view-arangosearch-ddl-noncluster.js b/tests/js/common/aql/aql-view-arangosearch-ddl-noncluster.js index b3939311a6..4b58164032 100644 --- a/tests/js/common/aql/aql-view-arangosearch-ddl-noncluster.js +++ b/tests/js/common/aql/aql-view-arangosearch-ddl-noncluster.js @@ -215,9 +215,13 @@ function IResearchFeatureDDLTestSuite () { assertEqual(1000, properties.commitIntervalMsec); assertEqual(60000, properties.consolidationIntervalMsec); assertTrue(Object === properties.consolidationPolicy.constructor); - assertEqual(2, Object.keys(properties.consolidationPolicy).length); - assertEqual("bytes_accum", properties.consolidationPolicy.type); - assertEqual((0.1).toFixed(6), properties.consolidationPolicy.threshold.toFixed(6)); + assertEqual(6, Object.keys(properties.consolidationPolicy).length); + assertEqual("tier", properties.consolidationPolicy.type); + assertEqual(1, properties.consolidationPolicy.segmentsMin); + assertEqual(10, properties.consolidationPolicy.segmentsMax); + assertEqual(5*(1 << 30), properties.consolidationPolicy.segmentsBytesMax); + assertEqual(2*(1 << 20), properties.consolidationPolicy.segmentsBytesFloor); + assertEqual((0.0).toFixed(6), properties.consolidationPolicy.minScore.toFixed(6)); meta = { commitIntervalMsec: 12345, @@ -685,9 +689,13 @@ function IResearchFeatureDDLTestSuite () { assertEqual(1000, properties.commitIntervalMsec); assertEqual(60000, properties.consolidationIntervalMsec); assertTrue(Object === properties.consolidationPolicy.constructor); - assertEqual(2, Object.keys(properties.consolidationPolicy).length); - assertEqual("bytes_accum", properties.consolidationPolicy.type); - assertEqual((0.1).toFixed(6), properties.consolidationPolicy.threshold.toFixed(6)); + assertEqual(6, Object.keys(properties.consolidationPolicy).length); + assertEqual("tier", properties.consolidationPolicy.type); + assertEqual(1, properties.consolidationPolicy.segmentsMin); + assertEqual(10, properties.consolidationPolicy.segmentsMax); + assertEqual(5*(1 << 30), properties.consolidationPolicy.segmentsBytesMax); + assertEqual(2*(1 << 20), properties.consolidationPolicy.segmentsBytesFloor); + assertEqual((0.0).toFixed(6), properties.consolidationPolicy.minScore.toFixed(6)); assertTrue(Object === properties.links.constructor); assertEqual(0, Object.keys(properties.links).length); },