mirror of https://gitee.com/bigwinds/arangodb
444 lines
14 KiB
C++
444 lines
14 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Jan Steemann
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "json-utilities.h"
|
|
#include "Basics/fasthash.h"
|
|
#include "Basics/hashes.h"
|
|
#include "Basics/StringBuffer.h"
|
|
#include "Basics/Utf8Helper.h"
|
|
#include "Basics/VelocyPackHelper.h"
|
|
|
|
#include <velocypack/Builder.h>
|
|
#include <velocypack/velocypack-aliases.h>
|
|
|
|
static TRI_json_t* MergeRecursive(TRI_memory_zone_t* zone,
|
|
TRI_json_t const* lhs, TRI_json_t const* rhs,
|
|
bool nullMeansRemove, bool mergeObjects) {
|
|
TRI_ASSERT(lhs != nullptr);
|
|
|
|
std::unique_ptr<TRI_json_t> result(TRI_CopyJson(zone, lhs));
|
|
|
|
if (result == nullptr) {
|
|
return nullptr;
|
|
}
|
|
|
|
auto r = result.get(); // shortcut variable
|
|
|
|
size_t const n = TRI_LengthVector(&rhs->_value._objects);
|
|
|
|
for (size_t i = 0; i < n; i += 2) {
|
|
// enumerate all the replacement values
|
|
auto key =
|
|
static_cast<TRI_json_t const*>(TRI_AtVector(&rhs->_value._objects, i));
|
|
auto value = static_cast<TRI_json_t const*>(
|
|
TRI_AtVector(&rhs->_value._objects, i + 1));
|
|
|
|
if (value->_type == TRI_JSON_NULL && nullMeansRemove) {
|
|
// replacement value is a null and we don't want to store nulls => delete
|
|
// attribute from the result
|
|
TRI_DeleteObjectJson(zone, r, key->_value._string.data);
|
|
} else {
|
|
// replacement value is not a null or we want to store nulls
|
|
TRI_json_t const* lhsValue =
|
|
TRI_LookupObjectJson(lhs, key->_value._string.data);
|
|
|
|
if (lhsValue == nullptr) {
|
|
// existing array does not have the attribute => append new attribute
|
|
if (value->_type == TRI_JSON_OBJECT && nullMeansRemove) {
|
|
TRI_json_t empty;
|
|
TRI_InitObjectJson(TRI_UNKNOWN_MEM_ZONE, &empty);
|
|
TRI_json_t* merged = MergeRecursive(zone, &empty, value,
|
|
nullMeansRemove, mergeObjects);
|
|
|
|
if (merged == nullptr) {
|
|
return nullptr;
|
|
}
|
|
TRI_Insert3ObjectJson(zone, r, key->_value._string.data, merged);
|
|
} else {
|
|
TRI_json_t* copy = TRI_CopyJson(zone, value);
|
|
|
|
if (copy == nullptr) {
|
|
return nullptr;
|
|
}
|
|
|
|
TRI_Insert3ObjectJson(zone, r, key->_value._string.data, copy);
|
|
}
|
|
} else {
|
|
// existing array already has the attribute => replace attribute
|
|
if (lhsValue->_type == TRI_JSON_OBJECT &&
|
|
value->_type == TRI_JSON_OBJECT && mergeObjects) {
|
|
TRI_json_t* merged = MergeRecursive(zone, lhsValue, value,
|
|
nullMeansRemove, mergeObjects);
|
|
if (merged == nullptr) {
|
|
return nullptr;
|
|
}
|
|
TRI_ReplaceObjectJson(zone, r, key->_value._string.data, merged);
|
|
TRI_FreeJson(zone, merged);
|
|
} else {
|
|
TRI_ReplaceObjectJson(zone, r, key->_value._string.data, value);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return result.release();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief get type weight of a json value usable for comparison and sorting
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static int TypeWeight(TRI_json_t const* value) {
|
|
if (value != nullptr) {
|
|
switch (value->_type) {
|
|
case TRI_JSON_BOOLEAN:
|
|
return 1;
|
|
case TRI_JSON_NUMBER:
|
|
return 2;
|
|
case TRI_JSON_STRING:
|
|
case TRI_JSON_STRING_REFERENCE:
|
|
// a string reference has the same weight as a regular string
|
|
return 3;
|
|
case TRI_JSON_ARRAY:
|
|
return 4;
|
|
case TRI_JSON_OBJECT:
|
|
return 5;
|
|
case TRI_JSON_NULL:
|
|
case TRI_JSON_UNUSED:
|
|
break;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief uniquify a sorted json list into a new array
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static TRI_json_t* UniquifyArrayJson(TRI_json_t const* array) {
|
|
TRI_ASSERT(array != nullptr);
|
|
TRI_ASSERT(array->_type == TRI_JSON_ARRAY);
|
|
|
|
// create result array
|
|
std::unique_ptr<TRI_json_t> result(TRI_CreateArrayJson(TRI_UNKNOWN_MEM_ZONE));
|
|
|
|
if (result == nullptr) {
|
|
return nullptr;
|
|
}
|
|
|
|
size_t const n = TRI_LengthVector(&array->_value._objects);
|
|
|
|
TRI_json_t const* last = nullptr;
|
|
for (size_t i = 0; i < n; ++i) {
|
|
auto p = static_cast<TRI_json_t const*>(
|
|
TRI_AtVector(&array->_value._objects, i));
|
|
|
|
// don't push value if it is the same as the last value
|
|
if (last == nullptr || TRI_CompareValuesJson(p, last, false) != 0) {
|
|
TRI_PushBackArrayJson(TRI_UNKNOWN_MEM_ZONE, result.get(), p);
|
|
|
|
// remember last element
|
|
last = p;
|
|
}
|
|
}
|
|
|
|
return result.release();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief callback function used for json value sorting
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static int CompareJson(void const* lhs, void const* rhs) {
|
|
return TRI_CompareValuesJson(static_cast<TRI_json_t const*>(lhs),
|
|
static_cast<TRI_json_t const*>(rhs), true);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief sorts a json array in place
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static TRI_json_t* SortArrayJson(TRI_json_t* array) {
|
|
TRI_ASSERT(array != nullptr);
|
|
TRI_ASSERT(array->_type == TRI_JSON_ARRAY);
|
|
|
|
size_t const n = TRI_LengthVector(&array->_value._objects);
|
|
|
|
if (n > 1) {
|
|
// only sort if more than one value in array
|
|
qsort(TRI_BeginVector(&array->_value._objects), n, sizeof(TRI_json_t),
|
|
&CompareJson);
|
|
}
|
|
|
|
return array;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief merge two arrays of array keys, sort them and return a combined array
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static TRI_json_t* GetMergedKeyArray(TRI_json_t const* lhs,
|
|
TRI_json_t const* rhs) {
|
|
TRI_ASSERT(lhs->_type == TRI_JSON_OBJECT);
|
|
TRI_ASSERT(rhs->_type == TRI_JSON_OBJECT);
|
|
|
|
size_t n = TRI_LengthVector(&lhs->_value._objects) +
|
|
TRI_LengthVector(&rhs->_value._objects);
|
|
|
|
std::unique_ptr<TRI_json_t> keys(
|
|
TRI_CreateArrayJson(TRI_UNKNOWN_MEM_ZONE, n));
|
|
|
|
if (keys == nullptr) {
|
|
return nullptr;
|
|
}
|
|
|
|
if (TRI_CapacityVector(&(keys.get()->_value._objects)) < n) {
|
|
return nullptr;
|
|
}
|
|
|
|
n = TRI_LengthVector(&lhs->_value._objects);
|
|
|
|
for (size_t i = 0; i < n; i += 2) {
|
|
auto key =
|
|
static_cast<TRI_json_t const*>(TRI_AtVector(&lhs->_value._objects, i));
|
|
|
|
TRI_ASSERT(TRI_IsStringJson(key));
|
|
TRI_PushBackArrayJson(TRI_UNKNOWN_MEM_ZONE, keys.get(), key);
|
|
}
|
|
|
|
n = TRI_LengthVector(&rhs->_value._objects);
|
|
|
|
for (size_t i = 0; i < n; i += 2) {
|
|
auto key =
|
|
static_cast<TRI_json_t const*>(TRI_AtVector(&rhs->_value._objects, i));
|
|
|
|
TRI_ASSERT(TRI_IsStringJson(key));
|
|
TRI_PushBackArrayJson(TRI_UNKNOWN_MEM_ZONE, keys.get(), key);
|
|
}
|
|
|
|
// sort the key array in place
|
|
SortArrayJson(keys.get());
|
|
|
|
// array is now sorted
|
|
return UniquifyArrayJson(keys.get());
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief compare two json values
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int TRI_CompareValuesJson(TRI_json_t const* lhs, TRI_json_t const* rhs,
|
|
bool useUTF8) {
|
|
// note: both lhs and rhs may be NULL!
|
|
{
|
|
int lWeight = TypeWeight(lhs);
|
|
int rWeight = TypeWeight(rhs);
|
|
|
|
if (lWeight < rWeight) {
|
|
return -1;
|
|
}
|
|
|
|
if (lWeight > rWeight) {
|
|
return 1;
|
|
}
|
|
|
|
TRI_ASSERT(lWeight == rWeight);
|
|
}
|
|
|
|
// lhs and rhs have equal weights
|
|
|
|
if (lhs == nullptr || rhs == nullptr) {
|
|
// either lhs or rhs is a nullptr. we cannot be sure here that both are
|
|
// nullptrs.
|
|
// there can also exist the situation that lhs is a nullptr and rhs is a
|
|
// JSON null value
|
|
// (or vice versa). Anyway, the compare value is the same for both,
|
|
return 0;
|
|
}
|
|
|
|
switch (lhs->_type) {
|
|
case TRI_JSON_UNUSED:
|
|
case TRI_JSON_NULL: {
|
|
return 0; // null == null;
|
|
}
|
|
|
|
case TRI_JSON_BOOLEAN: {
|
|
if (lhs->_value._boolean == rhs->_value._boolean) {
|
|
return 0;
|
|
}
|
|
|
|
if (!lhs->_value._boolean && rhs->_value._boolean) {
|
|
return -1;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
case TRI_JSON_NUMBER: {
|
|
if (lhs->_value._number == rhs->_value._number) {
|
|
return 0;
|
|
}
|
|
|
|
if (lhs->_value._number < rhs->_value._number) {
|
|
return -1;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
case TRI_JSON_STRING:
|
|
case TRI_JSON_STRING_REFERENCE: {
|
|
// same for STRING and STRING_REFERENCE
|
|
TRI_ASSERT(lhs->_value._string.data != nullptr);
|
|
TRI_ASSERT(rhs->_value._string.data != nullptr);
|
|
int res;
|
|
size_t const nl = lhs->_value._string.length - 1;
|
|
size_t const nr = rhs->_value._string.length - 1;
|
|
if (useUTF8) {
|
|
res = TRI_compare_utf8(lhs->_value._string.data, nl,
|
|
rhs->_value._string.data, nr);
|
|
} else {
|
|
// beware of strings containing NUL bytes
|
|
size_t len = nl < nr ? nl : nr;
|
|
res = memcmp(lhs->_value._string.data, rhs->_value._string.data, len);
|
|
}
|
|
if (res < 0) {
|
|
return -1;
|
|
} else if (res > 0) {
|
|
return 1;
|
|
}
|
|
// res == 0
|
|
if (nl == nr) {
|
|
return 0;
|
|
}
|
|
// res == 0, but different string lengths
|
|
return nl < nr ? -1 : 1;
|
|
}
|
|
|
|
case TRI_JSON_ARRAY: {
|
|
size_t const nl = TRI_LengthVector(&lhs->_value._objects);
|
|
size_t const nr = TRI_LengthVector(&rhs->_value._objects);
|
|
size_t n;
|
|
|
|
if (nl > nr) {
|
|
n = nl;
|
|
} else {
|
|
n = nr;
|
|
}
|
|
|
|
for (size_t i = 0; i < n; ++i) {
|
|
auto lhsValue =
|
|
(i >= nl) ? nullptr : static_cast<TRI_json_t const*>(
|
|
TRI_AtVector(&lhs->_value._objects, i));
|
|
auto rhsValue =
|
|
(i >= nr) ? nullptr : static_cast<TRI_json_t const*>(
|
|
TRI_AtVector(&rhs->_value._objects, i));
|
|
|
|
int result = TRI_CompareValuesJson(lhsValue, rhsValue, useUTF8);
|
|
|
|
if (result != 0) {
|
|
return result;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
case TRI_JSON_OBJECT: {
|
|
TRI_ASSERT(lhs->_type == TRI_JSON_OBJECT);
|
|
TRI_ASSERT(rhs->_type == TRI_JSON_OBJECT);
|
|
|
|
std::unique_ptr<TRI_json_t> keys(GetMergedKeyArray(lhs, rhs));
|
|
|
|
if (keys != nullptr) {
|
|
auto json = keys.get();
|
|
size_t const n = TRI_LengthVector(&json->_value._objects);
|
|
|
|
for (size_t i = 0; i < n; ++i) {
|
|
auto keyElement = static_cast<TRI_json_t const*>(
|
|
TRI_AtVector(&json->_value._objects, i));
|
|
TRI_ASSERT(TRI_IsStringJson(keyElement));
|
|
|
|
TRI_json_t const* lhsValue = TRI_LookupObjectJson(
|
|
lhs, keyElement->_value._string.data); // may be NULL
|
|
TRI_json_t const* rhsValue = TRI_LookupObjectJson(
|
|
rhs, keyElement->_value._string.data); // may be NULL
|
|
|
|
int result = TRI_CompareValuesJson(lhsValue, rhsValue, useUTF8);
|
|
|
|
if (result != 0) {
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
// fall-through to returning 0
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief merge two JSON documents into one
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_json_t* TRI_MergeJson(TRI_memory_zone_t* zone, TRI_json_t const* lhs,
|
|
TRI_json_t const* rhs, bool nullMeansRemove,
|
|
bool mergeObjects) {
|
|
TRI_ASSERT(lhs->_type == TRI_JSON_OBJECT);
|
|
TRI_ASSERT(rhs->_type == TRI_JSON_OBJECT);
|
|
|
|
return MergeRecursive(zone, lhs, rhs, nullMeansRemove, mergeObjects);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief compute a hash value for a JSON document depending on a list
|
|
/// of attributes. This is used for sharding to map documents to shards.
|
|
///
|
|
/// The attributes array `attributes` has to contain exactly `nrAttributes`
|
|
/// pointers to zero-terminated strings.
|
|
/// Note that all JSON values given for `json` that are not JSON arrays
|
|
/// hash to the same value, which is not the same value a JSON array gets
|
|
/// that does not contain any of the specified attributes.
|
|
/// If the flag `docComplete` is false, it is an error if the document
|
|
/// does not contain explicit values for all attributes. An error
|
|
/// is reported by setting *error to
|
|
/// TRI_CLUSTER_NOT_ALL_SHARDING_ATTRIBUTES_GIVEN instead of
|
|
/// TRI_ERROR_NO_ERROR. It is allowed to give NULL as error in which
|
|
/// case no error is reported.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
uint64_t TRI_HashJsonByAttributes(TRI_json_t const* json,
|
|
char const* attributes[], int nrAttributes,
|
|
bool docComplete, int& error) {
|
|
error = TRI_ERROR_NO_ERROR;
|
|
std::shared_ptr<VPackBuilder> transformed = arangodb::basics::JsonHelper::toVelocyPack(json);
|
|
std::vector<std::string> attr;
|
|
|
|
for (int i = 0; i < nrAttributes; i++) {
|
|
attr.emplace_back(attributes[i]);
|
|
}
|
|
return arangodb::basics::VelocyPackHelper::hashByAttributes(
|
|
transformed->slice(), attr, docComplete, error);
|
|
}
|