//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2018 ArangoDB GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Andrey Abramov /// @author Vasiliy Nabatchikov //////////////////////////////////////////////////////////////////////////////// #ifndef IRESEARCH_BYTES_UTILS_H #define IRESEARCH_BYTES_UTILS_H #include "shared.hpp" #include "utils/bit_utils.hpp" #include "utils/math_utils.hpp" #include "utils/numeric_utils.hpp" NS_ROOT template struct bytes_io; template struct bytes_io { static const T const_max_vsize = 1; template static T read(InputIterator& in, std::input_iterator_tag) { T out = static_cast(*in); ++in; return out; } template static T vread(InputIterator& in, std::input_iterator_tag) { // read direct same as writen in vwrite(...) return read(in, typename std::iterator_traits::iterator_category()); } template static void write(OutputIterator& out, T value) { *out = static_cast(value); ++out; } template static void vwrite(OutputIterator& out, T value) { // write direct since no benefit from variable-size encoding write(out, value); } }; // bytes_io template struct bytes_io { static const T const_max_vsize = 2; template static T read(InputIterator& in, std::input_iterator_tag) { T out = static_cast(*in) << 8; ++in; out |= static_cast(*in); ++in; return out; } template static T vread(InputIterator& in, std::input_iterator_tag) { // read direct same as writen in vwrite(...) return read(in, typename std::iterator_traits::iterator_category()); } template static void write(OutputIterator& out, T value) { *out = static_cast(value >> 8); ++out; *out = static_cast(value); ++out; } template static void vwrite(OutputIterator& out, T value) { // write direct since no benefit from variable-size encoding write(out, value); } }; // bytes_io template struct bytes_io { static const T const_max_vsize = 5; template static void vwrite(OutputIterator& out, T in) { while (in >= 0x80) { *out = static_cast(in | 0x80); ++out; in >>= 7; } *out = static_cast(in); ++out; } template static void write(OutputIterator& out, T in) { *out = static_cast(in >> 24); ++out; *out = static_cast(in >> 16); ++out; *out = static_cast(in >> 8); ++out; *out = static_cast(in); ++out; } static void write(byte_type*& out, T in) { if (!numeric_utils::is_big_endian()) { in = numeric_utils::hton32(in); } *reinterpret_cast(out) = in; out += sizeof(T); } template static T vread(InputIterator& in, std::input_iterator_tag) { T out = *in; ++in; if (!(out & 0x80)) return out; T b; out -= 0x80; b = *in; ++in; out += b << 7; if (!(b & 0x80)) return out; out -= 0x80 << 7; b = *in; ++in; out += b << 14; if (!(b & 0x80)) return out; out -= 0x80 << 14; b = *in; ++in; out += b << 21; if (!(b & 0x80)) return out; out -= 0x80 << 21; b = *in; ++in; out += b << 28; // last byte always has MSB == 0, so we don't need to check and subtract 0x80 return out; } template static T read(InputIterator& in, std::input_iterator_tag) { T out = static_cast(*in) << 24; ++in; out |= static_cast(*in) << 16; ++in; out |= static_cast(*in) << 8; ++in; out |= static_cast(*in); ++in; return out; } static T read(byte_type*& in) { auto value = *reinterpret_cast(in); if (!numeric_utils::is_big_endian()) { value = numeric_utils::ntoh32(value); } in += sizeof(uint32_t); return value; } //////////////////////////////////////////////////////////////////////////////// /// @returns number of bytes required to store value in variable length format //////////////////////////////////////////////////////////////////////////////// FORCE_INLINE static uint32_t vsize(uint32_t value) { // compute 0 == value ? 1 : 1 + floor(log2(value)) / 7 // OR 0x1 since log2_floor_32 does not accept 0 const uint32_t log2 = math::log2_floor_32(value | 0x1); // division within range [1;31] return (73 + 9*log2) >> 6; } template static int32_t zvread(InputIterator& in, std::input_iterator_tag) { return irs::zig_zag_decode32(vread( in, typename std::iterator_traits::iterator_category() )); } template static void zvwrite(OutputIterator& out, int32_t value) { vwrite(out, zig_zag_encode32(value)); } }; // bytes_io template struct bytes_io { static const T const_max_vsize = 10; template static void vwrite(OutputIterator& out, T in) { while (in >= T(0x80)) { *out = static_cast(in | T(0x80)); ++out; in >>= 7; } *out = static_cast(in); ++out; } template static void write(OutputIterator& out, T in) { typedef bytes_io bytes_io_t; bytes_io_t::write(out, static_cast(in >> 32)); bytes_io_t::write(out, static_cast(in)); } static void write(byte_type*& out, T in) { if (!numeric_utils::is_big_endian()) { in = numeric_utils::hton64(in); } *reinterpret_cast(out) = in; out += sizeof(T); } template static T vread(InputIterator& in, std::input_iterator_tag) { const T MASK = 0x80; T out = *in; ++in; if (!(out & MASK)) return out; T b; out -= MASK; b = *in; ++in; out += b << 7; if (!(b & MASK)) return out; out -= MASK << 7; b = *in; ++in; out += b << 14; if (!(b & MASK)) return out; out -= MASK << 14; b = *in; ++in; out += b << 21; if (!(b & MASK)) return out; out -= MASK << 21; b = *in; ++in; out += b << 28; if (!(b & MASK)) return out; out -= MASK << 28; b = *in; ++in; out += b << 35; if (!(b & MASK)) return out; out -= MASK << 35; b = *in; ++in; out += b << 42; if (!(b & MASK)) return out; out -= MASK << 42; b = *in; ++in; out += b << 49; if (!(b & MASK)) return out; out -= MASK << 49; b = *in; ++in; out += b << 56; if (!(b & MASK)) return out; out -= MASK << 56; b = *in; ++in; out += b << 63; // last byte always has MSB == 0, so we don't need to check and subtract 0x80 return out; } template static T read(InputIterator& in, std::input_iterator_tag) { typedef bytes_io bytes_io_t; T out = static_cast(bytes_io_t::read(in, std::input_iterator_tag{})) << 32; return out | static_cast(bytes_io_t::read(in, std::input_iterator_tag{})); } static T read(byte_type*& in) { auto value = *reinterpret_cast(in); if (!numeric_utils::is_big_endian()) { value = numeric_utils::ntoh64(value); } in += sizeof(uint64_t); return value; } //////////////////////////////////////////////////////////////////////////////// /// @returns number of bytes required to store value in variable length format //////////////////////////////////////////////////////////////////////////////// FORCE_INLINE static uint64_t vsize(uint64_t value) { // compute 0 == value ? 1 : 1 + floor(log2(value)) / 7 // OR 0x1 since log2_floor_64 does not accept 0 const uint64_t log2 = math::log2_floor_64(value | 0x1); // division within range [1;63] return (73 + 9*log2) >> 6; } template static int64_t zvread(InputIterator& in, std::input_iterator_tag) { return zig_zag_decode64(vread( in, typename std::iterator_traits::iterator_category() )); } template static void zvwrite(OutputIterator& out, int64_t value) { vwrite(out, zig_zag_encode64(value)); } }; // bytes_io // ----------------------------------------------------------------------------- // --SECTION-- exported functions for reading bytes // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief read a raw value of type T from 'in' /// will increment 'in' to position after the end of the read value //////////////////////////////////////////////////////////////////////////////// template inline T read(Iterator& in) { return bytes_io::read(in, typename std::iterator_traits::iterator_category()); } //////////////////////////////////////////////////////////////////////////////// /// @brief read a variable-size encoded value of type T from 'in' /// will increment 'in' to position after the end of the read value /// variable-size encoding allows using less bytes for small values //////////////////////////////////////////////////////////////////////////////// template inline T vread(Iterator& in) { return bytes_io::vread(in, typename std::iterator_traits::iterator_category()); } //////////////////////////////////////////////////////////////////////////////// /// @brief read a variable-size zig-zag encoded value of type T from 'in' /// will increment 'in' to position after the end of the read value /// variable-size encoding allows using less bytes for small values //////////////////////////////////////////////////////////////////////////////// template inline T zvread(Iterator& in) { return bytes_io::zvread(in, typename std::iterator_traits::iterator_category()); } // ----------------------------------------------------------------------------- // --SECTION-- exported functions for writing bytes // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief write a raw value 'value' to 'out' /// will increment 'out' to position after the end of the written value //////////////////////////////////////////////////////////////////////////////// template inline void write(Iterator& out, T value) { bytes_io::write(out, value); } //////////////////////////////////////////////////////////////////////////////// /// @brief write a variable-size encoded value 'value' to 'out' /// will increment 'out' to position after the end of the written value //////////////////////////////////////////////////////////////////////////////// template inline void vwrite(Iterator& out, T value) { bytes_io::vwrite(out, value); } //////////////////////////////////////////////////////////////////////////////// /// @brief write a variable-size zig-zag encoded value 'value' to 'out' /// will increment 'out' to position after the end of the written value //////////////////////////////////////////////////////////////////////////////// template inline void zvwrite(Iterator& out, T value) { bytes_io::zvwrite(out, value); } NS_END #endif // IRESEARCH_BYTES_UTILS_H