mirror of https://gitee.com/bigwinds/arangodb
396 lines
14 KiB
C++
396 lines
14 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2017 ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Andrey Abramov
|
|
/// @author Vasiliy Nabatchikov
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "IResearchQueryCommon.h"
|
|
|
|
#include "IResearch/IResearchView.h"
|
|
#include "Transaction/StandaloneContext.h"
|
|
#include "Utils/OperationOptions.h"
|
|
#include "Utils/SingleCollectionTransaction.h"
|
|
#include "VocBase/LogicalCollection.h"
|
|
#include "VocBase/ManagedDocumentResult.h"
|
|
|
|
#include <velocypack/Iterator.h>
|
|
|
|
namespace {
|
|
static const VPackBuilder systemDatabaseBuilder = dbArgsBuilder();
|
|
static const VPackSlice systemDatabaseArgs = systemDatabaseBuilder.slice();
|
|
|
|
struct TestTermAttribute : public irs::term_attribute {
|
|
public:
|
|
void value(irs::bytes_ref const& value) { value_ = value; }
|
|
};
|
|
|
|
class TestDelimAnalyzer : public irs::analysis::analyzer {
|
|
public:
|
|
DECLARE_ANALYZER_TYPE();
|
|
|
|
static ptr make(irs::string_ref const& args) {
|
|
auto slice = arangodb::iresearch::slice(args);
|
|
if (slice.isNull()) throw std::exception();
|
|
if (slice.isNone()) return nullptr;
|
|
if (slice.isString()) {
|
|
PTR_NAMED(TestDelimAnalyzer, ptr, arangodb::iresearch::getStringRef(slice));
|
|
return ptr;
|
|
} else if (slice.isObject() && slice.hasKey("args") && slice.get("args").isString()) {
|
|
PTR_NAMED(TestDelimAnalyzer, ptr,
|
|
arangodb::iresearch::getStringRef(slice.get("args")));
|
|
return ptr;
|
|
} else {
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
static bool normalize(irs::string_ref const& args, std::string& out) {
|
|
auto slice = arangodb::iresearch::slice(args);
|
|
if (slice.isNull()) throw std::exception();
|
|
if (slice.isNone()) return false;
|
|
arangodb::velocypack::Builder builder;
|
|
if (slice.isString()) {
|
|
VPackObjectBuilder scope(&builder);
|
|
arangodb::iresearch::addStringRef(builder, "args",
|
|
arangodb::iresearch::getStringRef(slice));
|
|
} else if (slice.isObject() && slice.hasKey("args") && slice.get("args").isString()) {
|
|
VPackObjectBuilder scope(&builder);
|
|
arangodb::iresearch::addStringRef(builder, "args",
|
|
arangodb::iresearch::getStringRef(slice.get("args")));
|
|
} else {
|
|
return false;
|
|
}
|
|
|
|
out = builder.buffer()->toString();
|
|
return true;
|
|
}
|
|
|
|
TestDelimAnalyzer(irs::string_ref const& delim)
|
|
: irs::analysis::analyzer(TestDelimAnalyzer::type()),
|
|
_delim(irs::ref_cast<irs::byte_type>(delim)) {
|
|
_attrs.emplace(_term);
|
|
}
|
|
|
|
virtual irs::attribute_view const& attributes() const NOEXCEPT override {
|
|
return _attrs;
|
|
}
|
|
|
|
virtual bool next() override {
|
|
if (_data.empty()) {
|
|
return false;
|
|
}
|
|
|
|
size_t i = 0;
|
|
|
|
for (size_t count = _data.size(); i < count; ++i) {
|
|
auto data = irs::ref_cast<char>(_data);
|
|
auto delim = irs::ref_cast<char>(_delim);
|
|
|
|
if (0 == strncmp(&(data.c_str()[i]), delim.c_str(), delim.size())) {
|
|
_term.value(irs::bytes_ref(_data.c_str(), i));
|
|
_data =
|
|
irs::bytes_ref(_data.c_str() + i + (std::max)(size_t(1), _delim.size()),
|
|
_data.size() - i - (std::max)(size_t(1), _delim.size()));
|
|
return true;
|
|
}
|
|
}
|
|
|
|
_term.value(_data);
|
|
_data = irs::bytes_ref::NIL;
|
|
return true;
|
|
}
|
|
|
|
virtual bool reset(irs::string_ref const& data) override {
|
|
_data = irs::ref_cast<irs::byte_type>(data);
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
irs::attribute_view _attrs;
|
|
std::basic_string<irs::byte_type> _delim;
|
|
irs::bytes_ref _data;
|
|
TestTermAttribute _term;
|
|
};
|
|
|
|
DEFINE_ANALYZER_TYPE_NAMED(TestDelimAnalyzer, "TestDelimAnalyzer");
|
|
REGISTER_ANALYZER_VPACK(TestDelimAnalyzer, TestDelimAnalyzer::make, TestDelimAnalyzer::normalize);
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- setup / tear-down
|
|
// -----------------------------------------------------------------------------
|
|
|
|
class IResearchQueryTokensTest : public IResearchQueryTest {};
|
|
|
|
} // namespace
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- test suite
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief setup
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TEST_F(IResearchQueryTokensTest, test) {
|
|
TRI_vocbase_t vocbase(TRI_vocbase_type_e::TRI_VOCBASE_TYPE_NORMAL, testDBInfo(server.server()));
|
|
std::vector<arangodb::velocypack::Builder> insertedDocs;
|
|
arangodb::LogicalView* view;
|
|
|
|
// create collection0
|
|
{
|
|
auto createJson = arangodb::velocypack::Parser::fromJson(
|
|
"{ \"name\": \"testCollection0\" }");
|
|
auto collection = vocbase.createCollection(createJson->slice());
|
|
ASSERT_NE(nullptr, collection);
|
|
|
|
std::vector<std::shared_ptr<arangodb::velocypack::Builder>> docs{
|
|
VPackParser::fromJson("{ \"seq\": -6, \"value\": null }"),
|
|
VPackParser::fromJson("{ \"seq\": -5, \"value\": true }"),
|
|
VPackParser::fromJson("{ \"seq\": -4, \"value\": \"abc\" }"),
|
|
VPackParser::fromJson("{ \"seq\": -3, \"value\": 3.14 }"),
|
|
VPackParser::fromJson("{ \"seq\": -2, \"value\": [ 1, \"abc\" ] }"),
|
|
VPackParser::fromJson("{ \"seq\": -1, \"value\": { \"a\": 7, \"b\": \"c\" } }"),
|
|
};
|
|
|
|
arangodb::OperationOptions options;
|
|
options.returnNew = true;
|
|
arangodb::SingleCollectionTransaction trx(arangodb::transaction::StandaloneContext::Create(vocbase),
|
|
*collection,
|
|
arangodb::AccessMode::Type::WRITE);
|
|
EXPECT_TRUE(trx.begin().ok());
|
|
|
|
for (auto& entry : docs) {
|
|
auto res = trx.insert(collection->name(), entry->slice(), options);
|
|
EXPECT_TRUE(res.ok());
|
|
insertedDocs.emplace_back(res.slice().get("new"));
|
|
}
|
|
|
|
EXPECT_TRUE(trx.commit().ok());
|
|
}
|
|
|
|
// create collection1
|
|
{
|
|
auto createJson = arangodb::velocypack::Parser::fromJson(
|
|
"{ \"name\": \"testCollection1\" }");
|
|
auto collection = vocbase.createCollection(createJson->slice());
|
|
ASSERT_NE(nullptr, collection);
|
|
|
|
irs::utf8_path resource;
|
|
resource /= irs::string_ref(arangodb::tests::testResourceDir);
|
|
resource /= irs::string_ref("simple_sequential.json");
|
|
|
|
auto builder =
|
|
arangodb::basics::VelocyPackHelper::velocyPackFromFile(resource.utf8());
|
|
auto slice = builder.slice();
|
|
ASSERT_TRUE(slice.isArray());
|
|
|
|
arangodb::OperationOptions options;
|
|
options.returnNew = true;
|
|
arangodb::SingleCollectionTransaction trx(arangodb::transaction::StandaloneContext::Create(vocbase),
|
|
*collection,
|
|
arangodb::AccessMode::Type::WRITE);
|
|
EXPECT_TRUE(trx.begin().ok());
|
|
|
|
for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) {
|
|
auto res = trx.insert(collection->name(), itr.value(), options);
|
|
EXPECT_TRUE(res.ok());
|
|
insertedDocs.emplace_back(res.slice().get("new"));
|
|
}
|
|
|
|
EXPECT_TRUE(trx.commit().ok());
|
|
}
|
|
|
|
// create view
|
|
{
|
|
auto createJson = arangodb::velocypack::Parser::fromJson(
|
|
"{ \"name\": \"testView\", \"type\": \"arangosearch\" }");
|
|
auto logicalView = vocbase.createView(createJson->slice());
|
|
ASSERT_FALSE(!logicalView);
|
|
|
|
view = logicalView.get();
|
|
auto* impl = dynamic_cast<arangodb::iresearch::IResearchView*>(view);
|
|
ASSERT_FALSE(!impl);
|
|
|
|
auto updateJson = arangodb::velocypack::Parser::fromJson(
|
|
"{ \"links\": {"
|
|
"\"testCollection0\": { \"includeAllFields\": true, "
|
|
"\"trackListPositions\": true },"
|
|
"\"testCollection1\": { \"includeAllFields\": true }"
|
|
"}}");
|
|
EXPECT_TRUE(impl->properties(updateJson->slice(), true).ok());
|
|
std::set<TRI_voc_cid_t> cids;
|
|
impl->visitCollections([&cids](TRI_voc_cid_t cid) -> bool {
|
|
cids.emplace(cid);
|
|
return true;
|
|
});
|
|
EXPECT_EQ(2, cids.size());
|
|
EXPECT_TRUE(
|
|
(arangodb::tests::executeQuery(vocbase,
|
|
"FOR d IN testView SEARCH 1 ==1 OPTIONS "
|
|
"{ waitForSync: true } RETURN d")
|
|
.result.ok())); // commit
|
|
}
|
|
|
|
// test no-match
|
|
{
|
|
std::vector<arangodb::velocypack::Slice> expected = {};
|
|
auto result = arangodb::tests::executeQuery(
|
|
vocbase,
|
|
"FOR d IN testView SEARCH d.prefix IN TOKENS('def', "
|
|
"'test_csv_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d");
|
|
ASSERT_TRUE(result.result.ok());
|
|
auto slice = result.data->slice();
|
|
EXPECT_TRUE(slice.isArray());
|
|
size_t i = 0;
|
|
|
|
for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) {
|
|
auto const resolved = itr.value().resolveExternals();
|
|
EXPECT_TRUE(i < expected.size());
|
|
EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++],
|
|
resolved, true)));
|
|
}
|
|
|
|
EXPECT_EQ(i, expected.size());
|
|
}
|
|
|
|
// test no-match via []
|
|
{
|
|
std::vector<arangodb::velocypack::Slice> expected = {};
|
|
auto result = arangodb::tests::executeQuery(
|
|
vocbase,
|
|
"FOR d IN testView SEARCH d['prefix'] IN TOKENS('def', "
|
|
"'test_csv_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d");
|
|
ASSERT_TRUE(result.result.ok());
|
|
auto slice = result.data->slice();
|
|
EXPECT_TRUE(slice.isArray());
|
|
size_t i = 0;
|
|
|
|
for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) {
|
|
auto const resolved = itr.value().resolveExternals();
|
|
EXPECT_TRUE(i < expected.size());
|
|
EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++],
|
|
resolved, true)));
|
|
}
|
|
|
|
EXPECT_EQ(i, expected.size());
|
|
}
|
|
|
|
// test single match
|
|
{
|
|
std::vector<arangodb::velocypack::Slice> expected = {
|
|
insertedDocs[9].slice(),
|
|
};
|
|
auto result = arangodb::tests::executeQuery(
|
|
vocbase,
|
|
"FOR d IN testView SEARCH d.prefix IN TOKENS('ab,abcde,de', "
|
|
"'test_csv_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d");
|
|
ASSERT_TRUE(result.result.ok());
|
|
auto slice = result.data->slice();
|
|
EXPECT_TRUE(slice.isArray());
|
|
size_t i = 0;
|
|
for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) {
|
|
auto const resolved = itr.value().resolveExternals();
|
|
EXPECT_TRUE(i < expected.size());
|
|
EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++],
|
|
resolved, true)));
|
|
}
|
|
|
|
EXPECT_EQ(i, expected.size());
|
|
}
|
|
|
|
// test single match via []
|
|
{
|
|
std::vector<arangodb::velocypack::Slice> expected = {
|
|
insertedDocs[9].slice(),
|
|
};
|
|
auto result = arangodb::tests::executeQuery(
|
|
vocbase,
|
|
"FOR d IN testView SEARCH d['prefix'] IN TOKENS('ab,abcde,de', "
|
|
"'test_csv_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d");
|
|
ASSERT_TRUE(result.result.ok());
|
|
auto slice = result.data->slice();
|
|
EXPECT_TRUE(slice.isArray());
|
|
size_t i = 0;
|
|
|
|
for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) {
|
|
auto const resolved = itr.value().resolveExternals();
|
|
EXPECT_TRUE(i < expected.size());
|
|
EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++],
|
|
resolved, true)));
|
|
}
|
|
|
|
EXPECT_EQ(i, expected.size());
|
|
}
|
|
|
|
// test mulptiple match
|
|
{
|
|
std::vector<arangodb::velocypack::Slice> expected = {
|
|
insertedDocs[36].slice(), // (duplicate term)
|
|
insertedDocs[37].slice(), // (duplicate term)
|
|
insertedDocs[6].slice(), // (unique term)
|
|
insertedDocs[26].slice(), // (unique term)
|
|
};
|
|
auto result = arangodb::tests::executeQuery(
|
|
vocbase,
|
|
"FOR d IN testView SEARCH d.prefix IN TOKENS('z,xy,abcy,abcd,abc', "
|
|
"'test_csv_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d");
|
|
ASSERT_TRUE(result.result.ok());
|
|
auto slice = result.data->slice();
|
|
EXPECT_TRUE(slice.isArray());
|
|
size_t i = 0;
|
|
|
|
for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) {
|
|
auto const resolved = itr.value().resolveExternals();
|
|
EXPECT_TRUE(i < expected.size());
|
|
EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++],
|
|
resolved, true)));
|
|
}
|
|
|
|
EXPECT_EQ(i, expected.size());
|
|
}
|
|
|
|
// test mulptiple match via []
|
|
{
|
|
std::vector<arangodb::velocypack::Slice> expected = {
|
|
insertedDocs[36].slice(), // (duplicate term)
|
|
insertedDocs[37].slice(), // (duplicate term)
|
|
insertedDocs[6].slice(), // (unique term)
|
|
insertedDocs[26].slice(), // (unique term)
|
|
};
|
|
auto result = arangodb::tests::executeQuery(
|
|
vocbase,
|
|
"FOR d IN testView SEARCH d['prefix'] IN TOKENS('z,xy,abcy,abcd,abc', "
|
|
"'test_csv_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d");
|
|
ASSERT_TRUE(result.result.ok());
|
|
auto slice = result.data->slice();
|
|
EXPECT_TRUE(slice.isArray());
|
|
size_t i = 0;
|
|
|
|
for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) {
|
|
auto const resolved = itr.value().resolveExternals();
|
|
EXPECT_TRUE(i < expected.size());
|
|
EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++],
|
|
resolved, true)));
|
|
}
|
|
|
|
EXPECT_EQ(i, expected.size());
|
|
}
|
|
}
|