1
0
Fork 0

deduplicate column names (exclude expansions)

This commit is contained in:
Yuriy Popov 2019-11-29 14:51:52 +01:00
parent c4a9c88b9a
commit df906277a2
6 changed files with 67 additions and 20 deletions

View File

@ -908,8 +908,7 @@ bool IResearchViewExecutor<ordered, materializeType>::resetIterator() {
auto const& columns = storedValue.columns();
auto const storedColumnNumber = static_cast<decltype(columns.size())>(columnfieldsRegs.first);
TRI_ASSERT(storedColumnNumber < columns.size());
// column name is equal to the first field name (TODO: two can have the same)
auto storedValueColumn = ::storedValueColumn(segmentReader, columns[storedColumnNumber].back().first);
auto storedValueColumn = ::storedValueColumn(segmentReader, columns[storedColumnNumber].name);
if (!storedValueColumn) {
LOG_TOPIC("af7ec", WARN, arangodb::iresearch::TOPIC)
<< "encountered a sub-reader without a stored value column while "

View File

@ -1095,9 +1095,9 @@ void IResearchViewNode::toVelocyPackHelper(VPackBuilder& nodes, unsigned flags,
for (auto const& fieldVar : columnFieldsVars.second) {
VPackObjectBuilder objectScope(&nodes);
fieldName.clear();
TRI_ASSERT(fieldVar.first < columns[storedColumnNumber].size());
TRI_ASSERT(fieldVar.first < columns[storedColumnNumber].fields.size());
nodes.add(NODE_VIEW_VALUES_VAR_COLUMN_NUMBER, VPackValue(columnFieldsVars.first));
fieldName = columns[storedColumnNumber][fieldVar.first].first;
fieldName = columns[storedColumnNumber].fields[fieldVar.first].first;
addViewValuesVar(nodes, fieldName, fieldVar);
}
} else { // SortColumnNumber

View File

@ -300,7 +300,7 @@ bool attributesMatch(IResearchViewSort const& primarySort, IResearchViewStoredVa
int columnNum = 0;
fieldNum = 0;
for (auto const& column : storedValue.columns()) {
for (auto const& field : column) {
for (auto const& field : column.fields) {
if (arangodb::basics::AttributeName::isIdentical(nodeAttr.attr, field.second, false)) {
nodeAttr.afData.number = fieldNum;
nodeAttr.afData.field = &field.second;

View File

@ -172,12 +172,8 @@ inline arangodb::Result insertDocument(irs::index_writer::documents_context& ctx
} field; // StoredValue
for (auto const& column : meta._storedValue.columns()) {
field.fieldName = irs::string_ref{};
for (auto const& storedValue : column) {
// column name is equal to the first field name (TODO: two can have the same)
if (field.fieldName.empty()) {
field.fieldName = irs::string_ref(storedValue.first);
}
field.fieldName = column.name;
for (auto const& storedValue : column.fields) {
field.slice = arangodb::iresearch::get(document, storedValue.second, VPackSlice::nullSlice());
}
doc.insert<irs::Action::STORE>(field);

View File

@ -27,6 +27,8 @@
#include "VelocyPackHelper.h"
#include <unordered_set>
namespace arangodb {
namespace iresearch {
@ -40,13 +42,15 @@ namespace iresearch {
}
*/
const char FIELDS_DELIMITER = '\1';
bool IResearchViewStoredValue::toVelocyPack(velocypack::Builder& builder) const {
if (!builder.isOpenArray()) {
return false;
}
for (auto const& column : _storedColumns) {
velocypack::ArrayBuilder arrayScope(&builder);
for (auto const& field : column) {
for (auto const& field : column.fields) {
builder.add(VPackValue(field.first));
}
}
@ -58,17 +62,31 @@ bool IResearchViewStoredValue::fromVelocyPack(
clear();
if (slice.isArray()) {
_storedColumns.reserve(slice.length());
std::unordered_set<std::string> uniqueColumns;
std::unordered_set<irs::string_ref> uniqueFields;
std::vector<irs::string_ref> fieldNames;
std::vector<basics::AttributeName> field;
for (auto columnSlice : VPackArrayIterator(slice)) {
if (columnSlice.isArray()) {
uniqueFields.clear();
fieldNames.clear();
size_t columnLength = 0;
StoredColumn sc;
sc.reserve(columnSlice.length());
sc.fields.reserve(columnSlice.length());
for (auto fieldSlice : VPackArrayIterator(columnSlice)) {
if (!fieldSlice.isString()) {
clear();
return false;
}
auto fieldName = arangodb::iresearch::getStringRef(slice);
std::vector<basics::AttributeName> field;
// check field uniqueness
if (uniqueFields.find(fieldName) != uniqueFields.cend()) { // TODO: expansions
continue;
}
uniqueFields.emplace_hint(uniqueFields.cend(), fieldName);
columnLength += fieldName.size();
fieldNames.emplace_back(std::move(fieldName));
field.clear();
try {
arangodb::basics::TRI_ParseAttributeString(fieldName, field, false);
} catch (...) {
@ -76,19 +94,41 @@ bool IResearchViewStoredValue::fromVelocyPack(
clear();
return false;
}
sc.emplace_back(fieldName, std::move(field));
sc.fields.emplace_back(fieldName, std::move(field));
}
// check column uniqueness
std::sort(fieldNames.begin(), fieldNames.end());
std::string columnName;
columnName.reserve(columnLength);
for (auto const& fieldName : fieldNames) {
if (!columnName.empty()) {
columnName += FIELDS_DELIMITER;
}
columnName += fieldName;
}
if (uniqueColumns.find(columnName) != uniqueColumns.cend()) {
continue;
}
uniqueColumns.emplace_hint(uniqueColumns.cend(), columnName);
sc.name = std::move(columnName);
_storedColumns.emplace_back(std::move(sc));
} else if (columnSlice.isString()) {
auto fieldName = arangodb::iresearch::getStringRef(slice);
std::vector<basics::AttributeName> field;
field.clear();
try {
arangodb::basics::TRI_ParseAttributeString(fieldName, field, false);
} catch (...) {
error = "." + std::string(fieldName);
return false;
}
_storedColumns.emplace_back(StoredColumn{{fieldName, std::move(field)}});
if (uniqueColumns.find(fieldName) != uniqueColumns.cend()) {
continue;
}
uniqueColumns.emplace_hint(uniqueColumns.cend(), fieldName);
StoredColumn sc;
sc.fields.emplace_back(fieldName, std::move(field));
sc.name = std::move(fieldName);
_storedColumns.emplace_back(std::move(sc));
} else {
clear();
return false;
@ -103,8 +143,9 @@ size_t IResearchViewStoredValue::memory() const noexcept {
size_t size = sizeof(IResearchViewStoredValue);
size += sizeof(StoredColumn)*_storedColumns.size();
for (auto const& column : _storedColumns) {
size += sizeof(std::pair<std::string, std::vector<basics::AttributeName>>)*column.size();
for (auto const& field : column) {
size += column.name.size();
size += sizeof(std::pair<std::string, std::vector<basics::AttributeName>>)*column.fields.size();
for (auto const& field : column.fields) {
size += field.first.size();
size += sizeof(basics::AttributeName)*field.second.size();
for (auto const& attribute : field.second) {

View File

@ -48,7 +48,18 @@ namespace iresearch {
class IResearchViewStoredValue {
public:
using StoredColumn = std::vector<std::pair<std::string, std::vector<basics::AttributeName>>>;
struct StoredColumn {
std::string name;
std::vector<std::pair<std::string, std::vector<basics::AttributeName>>> fields;
bool operator==(StoredColumn const& rhs) const noexcept {
return name == rhs.name;
}
bool operator!=(StoredColumn const& rhs) const noexcept {
return !(*this == rhs);
}
};
bool operator==(IResearchViewStoredValue const& rhs) const noexcept {
return _storedColumns == rhs._storedColumns;