Files
cup_edit/ios/include/tsl/array-hash/array_hash.h
2022-02-06 13:28:28 +08:00

1767 lines
64 KiB
C++

/**
* MIT License
*
* Copyright (c) 2017 Thibaut Goetghebuer-Planchon <tessil@gmx.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef TSL_ARRAY_HASH_H
#define TSL_ARRAY_HASH_H
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <iterator>
#include <limits>
#include <memory>
#include <stdexcept>
#include <type_traits>
#include <utility>
#include <vector>
#include "array_growth_policy.h"
/*
* __has_include is a bit useless (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79433),
* check also __cplusplus version.
*/
#ifdef __has_include
# if __has_include(<string_view>) && __cplusplus >= 201703L
# define TSL_AH_HAS_STRING_VIEW
# endif
#endif
#ifdef TSL_AH_HAS_STRING_VIEW
# include <string_view>
#endif
#ifdef TSL_DEBUG
# define tsl_ah_assert(expr) assert(expr)
#else
# define tsl_ah_assert(expr) (static_cast<void>(0))
#endif
/**
* Implementation of the array hash structure described in the
* "Cache-conscious collision resolution in string hash tables." (Askitis Nikolas and Justin Zobel, 2005) paper.
*/
namespace tsl {
namespace ah {
template<class CharT>
struct str_hash {
#ifdef TSL_AH_HAS_STRING_VIEW
std::size_t operator()(const CharT* key, std::size_t key_size) const {
return std::hash<std::basic_string_view<CharT>>()(std::basic_string_view<CharT>(key, key_size));
}
#else
/**
* FNV-1a hash
*/
std::size_t operator()(const CharT* key, std::size_t key_size) const {
static const std::size_t init = std::size_t((sizeof(std::size_t) == 8)?0xcbf29ce484222325:0x811c9dc5);
static const std::size_t multiplier = std::size_t((sizeof(std::size_t) == 8)?0x100000001b3:0x1000193);
std::size_t hash = init;
for (std::size_t i = 0; i < key_size; ++i) {
hash ^= key[i];
hash *= multiplier;
}
return hash;
}
#endif
};
template<class CharT>
struct str_equal {
bool operator()(const CharT* key_lhs, std::size_t key_size_lhs,
const CharT* key_rhs, std::size_t key_size_rhs) const
{
if(key_size_lhs != key_size_rhs) {
return false;
}
else {
return std::memcmp(key_lhs, key_rhs, key_size_lhs * sizeof(CharT)) == 0;
}
}
};
}
namespace detail_array_hash {
template<typename T, typename = void>
struct is_iterator: std::false_type {
};
template<typename T>
struct is_iterator<T, typename std::enable_if<!std::is_same<typename std::iterator_traits<T>::iterator_category, void>::value>::type>: std::true_type {
};
static constexpr bool is_power_of_two(std::size_t value) {
return value != 0 && (value & (value - 1)) == 0;
}
template<typename T, typename U>
static T numeric_cast(U value, const char* error_message = "numeric_cast() failed.") {
T ret = static_cast<T>(value);
if(static_cast<U>(ret) != value) {
THROW(std::runtime_error, error_message);
}
const bool is_same_signedness = (std::is_unsigned<T>::value && std::is_unsigned<U>::value) ||
(std::is_signed<T>::value && std::is_signed<U>::value);
if(!is_same_signedness && (ret < T{}) != (value < U{})) {
THROW(std::runtime_error, error_message);
}
return ret;
}
/**
* Fixed size type used to represent size_type values on serialization. Need to be big enough
* to represent a std::size_t on 32 and 64 bits platforms, and must be the same size on both platforms.
*/
using slz_size_type = std::uint64_t;
template<class T, class Deserializer>
static T deserialize_value(Deserializer& deserializer) {
// MSVC < 2017 is not conformant, circumvent the problem by removing the template keyword
#if defined (_MSC_VER) && _MSC_VER < 1910
return deserializer.Deserializer::operator()<T>();
#else
return deserializer.Deserializer::template operator()<T>();
#endif
}
/**
* For each string in the bucket, store the size of the string, the chars of the string
* and T, if it's not void. T should be either void or an unsigned type.
*
* End the buffer with END_OF_BUCKET flag. END_OF_BUCKET has the same type as the string size variable.
*
* m_buffer (CharT*):
* | size of str1 (KeySizeT) | str1 (const CharT*) | value (T if T != void) | ... |
* | size of strN (KeySizeT) | strN (const CharT*) | value (T if T != void) | END_OF_BUCKET (KeySizeT) |
*
* m_buffer is null if there is no string in the bucket.
*
* KeySizeT and T are extended to be a multiple of CharT when stored in the buffer.
*
* Use std::malloc and std::free instead of new and delete so we can have access to std::realloc.
*/
template<class CharT,
class T,
class KeyEqual,
class KeySizeT,
bool StoreNullTerminator>
class array_bucket {
template<typename U>
using has_mapped_type = typename std::integral_constant<bool, !std::is_same<U, void>::value>;
static_assert(!has_mapped_type<T>::value || std::is_unsigned<T>::value,
"T should be either void or an unsigned type.");
static_assert(std::is_unsigned<KeySizeT>::value, "KeySizeT should be an unsigned type.");
public:
template<bool IsConst>
class array_bucket_iterator;
using char_type = CharT;
using key_size_type = KeySizeT;
using mapped_type = T;
using size_type = std::size_t;
using key_equal = KeyEqual;
using iterator = array_bucket_iterator<false>;
using const_iterator = array_bucket_iterator<true>;
static_assert(sizeof(KeySizeT) <= sizeof(size_type), "sizeof(KeySizeT) should be <= sizeof(std::size_t;)");
static_assert(std::is_unsigned<size_type>::value, "");
private:
/**
* Return how much space in bytes the type U will take when stored in the buffer.
* As the buffer is of type CharT, U may take more space than sizeof(U).
*
* Example: sizeof(CharT) = 4, sizeof(U) = 2 => U will take 4 bytes in the buffer instead of 2.
*/
template<typename U>
static constexpr size_type sizeof_in_buff() noexcept {
static_assert(is_power_of_two(sizeof(U)), "sizeof(U) should be a power of two.");
static_assert(is_power_of_two(sizeof(CharT)), "sizeof(CharT) should be a power of two.");
return std::max(sizeof(U), sizeof(CharT));
}
/**
* Same as sizeof_in_buff<U>, but instead of returning the size in bytes return it in term of sizeof(CharT).
*/
template<typename U>
static constexpr size_type size_as_char_t() noexcept {
return sizeof_in_buff<U>() / sizeof(CharT);
}
static key_size_type read_key_size(const CharT* buffer) noexcept {
key_size_type key_size;
std::memcpy(&key_size, buffer, sizeof(key_size));
return key_size;
}
static mapped_type read_value(const CharT* buffer) noexcept {
mapped_type value;
std::memcpy(&value, buffer, sizeof(value));
return value;
}
static bool is_end_of_bucket(const CharT* buffer) noexcept {
return read_key_size(buffer) == END_OF_BUCKET;
}
public:
/**
* Return the size required for an entry with a key of size 'key_size'.
*/
template<class U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
static size_type entry_required_bytes(size_type key_size) noexcept {
return sizeof_in_buff<key_size_type>() + (key_size + KEY_EXTRA_SIZE) * sizeof(CharT);
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
static size_type entry_required_bytes(size_type key_size) noexcept {
return sizeof_in_buff<key_size_type>() + (key_size + KEY_EXTRA_SIZE) * sizeof(CharT) +
sizeof_in_buff<mapped_type>();
}
private:
/**
* Return the size of the current entry in buffer.
*/
static size_type entry_size_bytes(const CharT* buffer) noexcept {
return entry_required_bytes(read_key_size(buffer));
}
public:
template<bool IsConst>
class array_bucket_iterator {
friend class array_bucket;
using buffer_type = typename std::conditional<IsConst, const CharT, CharT>::type;
explicit array_bucket_iterator(buffer_type* position) noexcept: m_position(position) {
}
public:
using iterator_category = std::forward_iterator_tag;
using value_type = void;
using difference_type = std::ptrdiff_t;
using reference = void;
using pointer = void;
public:
array_bucket_iterator() noexcept: m_position(nullptr) {
}
const CharT* key() const {
return m_position + size_as_char_t<key_size_type>();
}
size_type key_size() const {
return read_key_size(m_position);
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
U value() const {
return read_value(m_position + size_as_char_t<key_size_type>() + key_size() + KEY_EXTRA_SIZE);
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value && !IsConst && std::is_same<U, T>::value>::type* = nullptr>
void set_value(U value) noexcept {
std::memcpy(m_position + size_as_char_t<key_size_type>() + key_size() + KEY_EXTRA_SIZE,
&value, sizeof(value));
}
array_bucket_iterator& operator++() {
m_position += entry_size_bytes(m_position)/sizeof(CharT);
if(is_end_of_bucket(m_position)) {
m_position = nullptr;
}
return *this;
}
array_bucket_iterator operator++(int) {
array_bucket_iterator tmp(*this);
++*this;
return tmp;
}
friend bool operator==(const array_bucket_iterator& lhs, const array_bucket_iterator& rhs) {
return lhs.m_position == rhs.m_position;
}
friend bool operator!=(const array_bucket_iterator& lhs, const array_bucket_iterator& rhs) {
return !(lhs == rhs);
}
private:
buffer_type* m_position;
};
static iterator end_it() noexcept {
return iterator(nullptr);
}
static const_iterator cend_it() noexcept {
return const_iterator(nullptr);
}
public:
array_bucket(): m_buffer(nullptr) {
}
/**
* Reserve 'size' in the buffer of the bucket. The created bucket is empty.
*/
array_bucket(std::size_t size): m_buffer(nullptr) {
if(size == 0) {
return;
}
m_buffer = static_cast<CharT*>(std::malloc(size*sizeof(CharT) + sizeof_in_buff<decltype(END_OF_BUCKET)>()));
if(m_buffer == nullptr) {
THROW(std::runtime_error, "Out of memory");
}
const auto end_of_bucket = END_OF_BUCKET;
std::memcpy(m_buffer, &end_of_bucket, sizeof(end_of_bucket));
}
~array_bucket() {
clear();
}
array_bucket(const array_bucket& other) {
if(other.m_buffer == nullptr) {
m_buffer = nullptr;
return;
}
const size_type other_buffer_size = other.size();
m_buffer = static_cast<CharT*>(std::malloc(other_buffer_size*sizeof(CharT) + sizeof_in_buff<decltype(END_OF_BUCKET)>()));
if(m_buffer == nullptr) {
THROW(std::runtime_error, "Out of memory");
}
std::memcpy(m_buffer, other.m_buffer, other_buffer_size*sizeof(CharT));
const auto end_of_bucket = END_OF_BUCKET;
std::memcpy(m_buffer + other_buffer_size, &end_of_bucket, sizeof(end_of_bucket));
}
array_bucket(array_bucket&& other) noexcept: m_buffer(other.m_buffer) {
other.m_buffer = nullptr;
}
array_bucket& operator=(array_bucket other) noexcept {
other.swap(*this);
return *this;
}
void swap(array_bucket& other) noexcept {
std::swap(m_buffer, other.m_buffer);
}
iterator begin() noexcept { return iterator(m_buffer); }
iterator end() noexcept { return iterator(nullptr); }
const_iterator begin() const noexcept { return cbegin(); }
const_iterator end() const noexcept { return cend(); }
const_iterator cbegin() const noexcept { return const_iterator(m_buffer); }
const_iterator cend() const noexcept { return const_iterator(nullptr); }
/**
* Return an iterator pointing to the key entry if presents or, if not there, to the position
* past the last element of the bucket. Return end() if the bucket has not be initialized yet.
*
* The boolean of the pair is set to true if the key is there, false otherwise.
*/
std::pair<const_iterator, bool> find_or_end_of_bucket(const CharT* key, size_type key_size) const noexcept {
if(m_buffer == nullptr) {
return std::make_pair(cend(), false);
}
const CharT* buffer_ptr_in_out = m_buffer;
const bool found = find_or_end_of_bucket_impl(key, key_size, buffer_ptr_in_out);
return std::make_pair(const_iterator(buffer_ptr_in_out), found);
}
/**
* Append the element 'key' with its potential value at the end of the bucket.
* 'end_of_bucket' should point past the end of the last element in the bucket, end() if the bucket
* was not initialized yet. You usually get this value from find_or_end_of_bucket.
*
* Return the position where the element was actually inserted.
*/
template<class... ValueArgs>
const_iterator append(const_iterator end_of_bucket, const CharT* key, size_type key_size,
ValueArgs&&... value)
{
const key_size_type key_sz = as_key_size_type(key_size);
if(end_of_bucket == cend()) {
tsl_ah_assert(m_buffer == nullptr);
const size_type buffer_size = entry_required_bytes(key_sz) + sizeof_in_buff<decltype(END_OF_BUCKET)>();
m_buffer = static_cast<CharT*>(std::malloc(buffer_size));
if(m_buffer == nullptr) {
THROW(std::runtime_error, "Out of memory");
}
append_impl(key, key_sz, m_buffer, std::forward<ValueArgs>(value)...);
return const_iterator(m_buffer);
}
else {
tsl_ah_assert(is_end_of_bucket(end_of_bucket.m_position));
const size_type current_size = ((end_of_bucket.m_position + size_as_char_t<decltype(END_OF_BUCKET)>()) -
m_buffer) * sizeof(CharT);
const size_type new_size = current_size + entry_required_bytes(key_sz);
CharT* new_buffer = static_cast<CharT*>(std::realloc(m_buffer, new_size));
if(new_buffer == nullptr) {
THROW(std::runtime_error, "Out of memory");
}
m_buffer = new_buffer;
CharT* buffer_append_pos = m_buffer + current_size / sizeof(CharT) -
size_as_char_t<decltype(END_OF_BUCKET)>();
append_impl(key, key_sz, buffer_append_pos, std::forward<ValueArgs>(value)...);
return const_iterator(buffer_append_pos);
}
}
const_iterator erase(const_iterator position) noexcept {
tsl_ah_assert(position.m_position != nullptr && !is_end_of_bucket(position.m_position));
// get mutable pointers
CharT* start_entry = m_buffer + (position.m_position - m_buffer);
CharT* start_next_entry = start_entry + entry_size_bytes(start_entry) / sizeof(CharT);
CharT* end_buffer_ptr = start_next_entry;
while(!is_end_of_bucket(end_buffer_ptr)) {
end_buffer_ptr += entry_size_bytes(end_buffer_ptr) / sizeof(CharT);
}
end_buffer_ptr += size_as_char_t<decltype(END_OF_BUCKET)>();
const size_type size_to_move = (end_buffer_ptr - start_next_entry) * sizeof(CharT);
std::memmove(start_entry, start_next_entry, size_to_move);
if(is_end_of_bucket(m_buffer)) {
clear();
return cend();
}
else if(is_end_of_bucket(start_entry)) {
return cend();
}
else {
return const_iterator(start_entry);
}
}
/**
* Return true if an element has been erased
*/
bool erase(const CharT* key, size_type key_size) noexcept {
if(m_buffer == nullptr) {
return false;
}
const CharT* entry_buffer_ptr_in_out = m_buffer;
bool found = find_or_end_of_bucket_impl(key, key_size, entry_buffer_ptr_in_out);
if(found) {
erase(const_iterator(entry_buffer_ptr_in_out));
return true;
}
else {
return false;
}
}
/**
* Bucket should be big enough and there is no check to see if the key already exists.
* No check on key_size.
*/
template<class... ValueArgs>
void append_in_reserved_bucket_no_check(const CharT* key, size_type key_size, ValueArgs&&... value) noexcept {
CharT* buffer_ptr = m_buffer;
while(!is_end_of_bucket(buffer_ptr)) {
buffer_ptr += entry_size_bytes(buffer_ptr)/sizeof(CharT);
}
append_impl(key, key_size_type(key_size), buffer_ptr, std::forward<ValueArgs>(value)...);
}
bool empty() const noexcept {
return m_buffer == nullptr || is_end_of_bucket(m_buffer);
}
void clear() noexcept {
std::free(m_buffer);
m_buffer = nullptr;
}
iterator mutable_iterator(const_iterator pos) noexcept {
return iterator(m_buffer + (pos.m_position - m_buffer));
}
template<class Serializer>
void serialize(Serializer& serializer) const {
const slz_size_type bucket_size = size();
tsl_ah_assert(m_buffer != nullptr || bucket_size == 0);
serializer(bucket_size);
serializer(m_buffer, bucket_size);
}
template<class Deserializer>
static array_bucket deserialize(Deserializer& deserializer) {
array_bucket bucket;
const slz_size_type bucket_size_ds = deserialize_value<slz_size_type>(deserializer);
if(bucket_size_ds == 0) {
return bucket;
}
const std::size_t bucket_size = numeric_cast<std::size_t>(bucket_size_ds, "Deserialized bucket_size is too big.");
bucket.m_buffer = static_cast<CharT*>(std::malloc(bucket_size*sizeof(CharT) + sizeof_in_buff<decltype(END_OF_BUCKET)>()));
if(bucket.m_buffer == nullptr) {
THROW(std::runtime_error, "Out of memory");
}
deserializer(bucket.m_buffer, bucket_size);
const auto end_of_bucket = END_OF_BUCKET;
std::memcpy(bucket.m_buffer + bucket_size, &end_of_bucket, sizeof(end_of_bucket));
tsl_ah_assert(bucket.size() == bucket_size);
return bucket;
}
private:
key_size_type as_key_size_type(size_type key_size) const {
if(key_size > MAX_KEY_SIZE) {
THROW(std::length_error, "Key is too long.");
}
return key_size_type(key_size);
}
/*
* Return true if found, false otherwise.
* If true, buffer_ptr_in_out points to the start of the entry matching 'key'.
* If false, buffer_ptr_in_out points to where the 'key' should be inserted.
*
* Start search from buffer_ptr_in_out.
*/
bool find_or_end_of_bucket_impl(const CharT* key, size_type key_size,
const CharT* & buffer_ptr_in_out) const noexcept
{
while(!is_end_of_bucket(buffer_ptr_in_out)) {
const key_size_type buffer_key_size = read_key_size(buffer_ptr_in_out);
const CharT* buffer_str = buffer_ptr_in_out + size_as_char_t<key_size_type>();
if(KeyEqual()(buffer_str, buffer_key_size, key, key_size)) {
return true;
}
buffer_ptr_in_out += entry_size_bytes(buffer_ptr_in_out)/sizeof(CharT);
}
return false;
}
template<typename U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
void append_impl(const CharT* key, key_size_type key_size, CharT* buffer_append_pos) noexcept {
std::memcpy(buffer_append_pos, &key_size, sizeof(key_size));
buffer_append_pos += size_as_char_t<key_size_type>();
std::memcpy(buffer_append_pos, key, key_size * sizeof(CharT));
buffer_append_pos += key_size;
const CharT zero = 0;
std::memcpy(buffer_append_pos, &zero, KEY_EXTRA_SIZE * sizeof(CharT));
buffer_append_pos += KEY_EXTRA_SIZE;
const auto end_of_bucket = END_OF_BUCKET;
std::memcpy(buffer_append_pos, &end_of_bucket, sizeof(end_of_bucket));
}
template<typename U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
void append_impl(const CharT* key, key_size_type key_size, CharT* buffer_append_pos,
typename array_bucket<CharT, U, KeyEqual, KeySizeT, StoreNullTerminator>::mapped_type value) noexcept
{
std::memcpy(buffer_append_pos, &key_size, sizeof(key_size));
buffer_append_pos += size_as_char_t<key_size_type>();
std::memcpy(buffer_append_pos, key, key_size * sizeof(CharT));
buffer_append_pos += key_size;
const CharT zero = 0;
std::memcpy(buffer_append_pos, &zero, KEY_EXTRA_SIZE * sizeof(CharT));
buffer_append_pos += KEY_EXTRA_SIZE;
std::memcpy(buffer_append_pos, &value, sizeof(value));
buffer_append_pos += size_as_char_t<mapped_type>();
const auto end_of_bucket = END_OF_BUCKET;
std::memcpy(buffer_append_pos, &end_of_bucket, sizeof(end_of_bucket));
}
/**
* Return the number of CharT in m_buffer. As the size of the buffer is not stored to gain some space,
* the method need to find the EOF marker and is thus in O(n).
*/
size_type size() const noexcept {
if(m_buffer == nullptr) {
return 0;
}
CharT* buffer_ptr = m_buffer;
while(!is_end_of_bucket(buffer_ptr)) {
buffer_ptr += entry_size_bytes(buffer_ptr)/sizeof(CharT);
}
return buffer_ptr - m_buffer;
}
private:
static const key_size_type END_OF_BUCKET = std::numeric_limits<key_size_type>::max();
static const key_size_type KEY_EXTRA_SIZE = StoreNullTerminator?1:0;
CharT* m_buffer;
public:
static const key_size_type MAX_KEY_SIZE =
// -1 for END_OF_BUCKET
key_size_type(std::numeric_limits<key_size_type>::max() - KEY_EXTRA_SIZE - 1);
};
template<class T>
class value_container {
public:
void clear() noexcept {
m_values.clear();
}
void reserve(std::size_t new_cap) {
m_values.reserve(new_cap);
}
void shrink_to_fit() {
m_values.shrink_to_fit();
}
friend void swap(value_container& lhs, value_container& rhs) {
lhs.m_values.swap(rhs.m_values);
}
protected:
static constexpr float VECTOR_GROWTH_RATE = 1.5f;
// TODO use a sparse array? or a std::deque
std::vector<T> m_values;
};
template<>
class value_container<void> {
public:
void clear() noexcept {
}
void shrink_to_fit() {
}
void reserve(std::size_t /*new_cap*/) {
}
};
/**
* If there is no value in the array_hash (in the case of a set for example), T should be void.
*
* The size of a key string is limited to std::numeric_limits<KeySizeT>::max() - 1.
*
* The number of elements in the map is limited to std::numeric_limits<IndexSizeT>::max().
*/
template<class CharT,
class T,
class Hash,
class KeyEqual,
bool StoreNullTerminator,
class KeySizeT,
class IndexSizeT,
class GrowthPolicy>
class array_hash: private value_container<T>, private Hash, private GrowthPolicy {
private:
template<typename U>
using has_mapped_type = typename std::integral_constant<bool, !std::is_same<U, void>::value>;
/**
* If there is a mapped type in array_hash, we store the values in m_values of value_container class
* and we store an index to m_values in the bucket. The index is of type IndexSizeT.
*/
using array_bucket = tsl::detail_array_hash::array_bucket<CharT,
typename std::conditional<has_mapped_type<T>::value,
IndexSizeT,
void>::type,
KeyEqual, KeySizeT, StoreNullTerminator>;
public:
template<bool IsConst>
class array_hash_iterator;
using char_type = CharT;
using key_size_type = KeySizeT;
using index_size_type = IndexSizeT;
using size_type = std::size_t;
using hasher = Hash;
using key_equal = KeyEqual;
using iterator = array_hash_iterator<false>;
using const_iterator = array_hash_iterator<true>;
/*
* Iterator classes
*/
public:
template<bool IsConst>
class array_hash_iterator {
friend class array_hash;
private:
using iterator_array_bucket = typename array_bucket::const_iterator;
using iterator_buckets = typename std::conditional<IsConst,
typename std::vector<array_bucket>::const_iterator,
typename std::vector<array_bucket>::iterator>::type;
using array_hash_ptr = typename std::conditional<IsConst,
const array_hash*,
array_hash*>::type;
public:
using iterator_category = std::forward_iterator_tag;
using value_type = typename std::conditional<has_mapped_type<T>::value, T, void>::type;
using difference_type = std::ptrdiff_t;
using reference = typename std::conditional<has_mapped_type<T>::value,
typename std::conditional<
IsConst,
typename std::add_lvalue_reference<const T>::type,
typename std::add_lvalue_reference<T>::type>::type,
void>::type;
using pointer = typename std::conditional<has_mapped_type<T>::value,
typename std::conditional<IsConst, const T*, T*>::type,
void>::type;
private:
array_hash_iterator(iterator_buckets buckets_iterator, iterator_array_bucket array_bucket_iterator,
array_hash_ptr array_hash_p) noexcept:
m_buckets_iterator(buckets_iterator),
m_array_bucket_iterator(array_bucket_iterator),
m_array_hash(array_hash_p)
{
tsl_ah_assert(m_array_hash != nullptr);
}
public:
array_hash_iterator() noexcept: m_array_hash(nullptr) {
}
template<bool TIsConst = IsConst, typename std::enable_if<TIsConst>::type* = nullptr>
array_hash_iterator(const array_hash_iterator<!TIsConst>& other) noexcept :
m_buckets_iterator(other.m_buckets_iterator),
m_array_bucket_iterator(other.m_array_bucket_iterator),
m_array_hash(other.m_array_hash)
{
}
array_hash_iterator(const array_hash_iterator& other) = default;
array_hash_iterator(array_hash_iterator&& other) = default;
array_hash_iterator& operator=(const array_hash_iterator& other) = default;
array_hash_iterator& operator=(array_hash_iterator&& other) = default;
const CharT* key() const {
return m_array_bucket_iterator.key();
}
size_type key_size() const {
return m_array_bucket_iterator.key_size();
}
#ifdef TSL_AH_HAS_STRING_VIEW
std::basic_string_view<CharT> key_sv() const {
return std::basic_string_view<CharT>(key(), key_size());
}
#endif
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
reference value() const {
return this->m_array_hash->m_values[value_position()];
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
reference operator*() const {
return value();
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
pointer operator->() const {
return std::addressof(value());
}
array_hash_iterator& operator++() {
tsl_ah_assert(m_buckets_iterator != m_array_hash->m_buckets_data.end());
tsl_ah_assert(m_array_bucket_iterator != m_buckets_iterator->cend());
++m_array_bucket_iterator;
if(m_array_bucket_iterator == m_buckets_iterator->cend()) {
do {
++m_buckets_iterator;
} while(m_buckets_iterator != m_array_hash->m_buckets_data.end() &&
m_buckets_iterator->empty());
if(m_buckets_iterator != m_array_hash->m_buckets_data.end()) {
m_array_bucket_iterator = m_buckets_iterator->cbegin();
}
}
return *this;
}
array_hash_iterator operator++(int) {
array_hash_iterator tmp(*this);
++*this;
return tmp;
}
friend bool operator==(const array_hash_iterator& lhs, const array_hash_iterator& rhs) {
return lhs.m_buckets_iterator == rhs.m_buckets_iterator &&
lhs.m_array_bucket_iterator == rhs.m_array_bucket_iterator &&
lhs.m_array_hash == rhs.m_array_hash;
}
friend bool operator!=(const array_hash_iterator& lhs, const array_hash_iterator& rhs) {
return !(lhs == rhs);
}
private:
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
IndexSizeT value_position() const {
return this->m_array_bucket_iterator.value();
}
private:
iterator_buckets m_buckets_iterator;
iterator_array_bucket m_array_bucket_iterator;
array_hash_ptr m_array_hash;
};
public:
array_hash(size_type bucket_count,
const Hash& hash,
float max_load_factor): value_container<T>(),
Hash(hash),
GrowthPolicy(bucket_count),
m_buckets_data(bucket_count > max_bucket_count()?
max_bucket_count():
bucket_count),
m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()),
m_nb_elements(0)
{
this->max_load_factor(max_load_factor);
}
array_hash(const array_hash& other): value_container<T>(other),
Hash(other),
GrowthPolicy(other),
m_buckets_data(other.m_buckets_data),
m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()),
m_nb_elements(other.m_nb_elements),
m_max_load_factor(other.m_max_load_factor),
m_load_threshold(other.m_load_threshold)
{
}
array_hash(array_hash&& other) noexcept(std::is_nothrow_move_constructible<value_container<T>>::value &&
std::is_nothrow_move_constructible<Hash>::value &&
std::is_nothrow_move_constructible<GrowthPolicy>::value &&
std::is_nothrow_move_constructible<std::vector<array_bucket>>::value)
: value_container<T>(std::move(other)),
Hash(std::move(other)),
GrowthPolicy(std::move(other)),
m_buckets_data(std::move(other.m_buckets_data)),
m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()),
m_nb_elements(other.m_nb_elements),
m_max_load_factor(other.m_max_load_factor),
m_load_threshold(other.m_load_threshold)
{
other.value_container<T>::clear();
other.GrowthPolicy::clear();
other.m_buckets_data.clear();
other.m_buckets = static_empty_bucket_ptr();
other.m_nb_elements = 0;
other.m_load_threshold = 0;
}
array_hash& operator=(const array_hash& other) {
if(&other != this) {
value_container<T>::operator=(other);
Hash::operator=(other);
GrowthPolicy::operator=(other);
m_buckets_data = other.m_buckets_data;
m_buckets = m_buckets_data.empty()?static_empty_bucket_ptr():
m_buckets_data.data();
m_nb_elements = other.m_nb_elements;
m_max_load_factor = other.m_max_load_factor;
m_load_threshold = other.m_load_threshold;
}
return *this;
}
array_hash& operator=(array_hash&& other) {
other.swap(*this);
other.clear();
return *this;
}
/*
* Iterators
*/
iterator begin() noexcept {
auto begin = m_buckets_data.begin();
while(begin != m_buckets_data.end() && begin->empty()) {
++begin;
}
return (begin != m_buckets_data.end())?iterator(begin, begin->cbegin(), this):end();
}
const_iterator begin() const noexcept {
return cbegin();
}
const_iterator cbegin() const noexcept {
auto begin = m_buckets_data.cbegin();
while(begin != m_buckets_data.cend() && begin->empty()) {
++begin;
}
return (begin != m_buckets_data.cend())?const_iterator(begin, begin->cbegin(), this):cend();
}
iterator end() noexcept {
return iterator(m_buckets_data.end(), array_bucket::cend_it(), this);
}
const_iterator end() const noexcept {
return cend();
}
const_iterator cend() const noexcept {
return const_iterator(m_buckets_data.end(), array_bucket::cend_it(), this);
}
/*
* Capacity
*/
bool empty() const noexcept {
return m_nb_elements == 0;
}
size_type size() const noexcept {
return m_nb_elements;
}
size_type max_size() const noexcept {
return std::numeric_limits<IndexSizeT>::max();
}
size_type max_key_size() const noexcept {
return MAX_KEY_SIZE;
}
void shrink_to_fit() {
clear_old_erased_values();
value_container<T>::shrink_to_fit();
rehash_impl(size_type(std::ceil(float(size())/max_load_factor())));
}
/*
* Modifiers
*/
void clear() noexcept {
value_container<T>::clear();
for(auto& bucket: m_buckets_data) {
bucket.clear();
}
m_nb_elements = 0;
}
template<class... ValueArgs>
std::pair<iterator, bool> emplace(const CharT* key, size_type key_size, ValueArgs&&... value_args) {
const std::size_t hash = hash_key(key, key_size);
std::size_t ibucket = bucket_for_hash(hash);
auto it_find = m_buckets[ibucket].find_or_end_of_bucket(key, key_size);
if(it_find.second) {
return std::make_pair(iterator(m_buckets_data.begin() + ibucket, it_find.first, this), false);
}
if(grow_on_high_load()) {
ibucket = bucket_for_hash(hash);
it_find = m_buckets[ibucket].find_or_end_of_bucket(key, key_size);
}
return emplace_impl(ibucket, it_find.first, key, key_size, std::forward<ValueArgs>(value_args)...);
}
template<class M>
std::pair<iterator, bool> insert_or_assign(const CharT* key, size_type key_size, M&& obj) {
auto it = emplace(key, key_size, std::forward<M>(obj));
if(!it.second) {
it.first.value() = std::forward<M>(obj);
}
return it;
}
iterator erase(const_iterator pos) {
if(should_clear_old_erased_values()) {
clear_old_erased_values();
}
return erase_from_bucket(mutable_iterator(pos));
}
iterator erase(const_iterator first, const_iterator last) {
if(first == last) {
return mutable_iterator(first);
}
/**
* When erasing an element from a bucket with erase_from_bucket, it invalidates all the iterators
* in the array bucket of the element (m_array_bucket_iterator) but not the iterators of the buckets
* itself (m_buckets_iterator).
*
* So first erase all the values between first and last which are not part of the bucket of last,
* and then erase carefully the values in last's bucket.
*/
auto to_delete = mutable_iterator(first);
while(to_delete.m_buckets_iterator != last.m_buckets_iterator) {
to_delete = erase_from_bucket(to_delete);
}
std::size_t nb_elements_until_last = std::distance(to_delete.m_array_bucket_iterator,
last.m_array_bucket_iterator);
while(nb_elements_until_last > 0) {
to_delete = erase_from_bucket(to_delete);
nb_elements_until_last--;
}
if(should_clear_old_erased_values()) {
clear_old_erased_values();
}
return to_delete;
}
size_type erase(const CharT* key, size_type key_size) {
return erase(key, key_size, hash_key(key, key_size));
}
size_type erase(const CharT* key, size_type key_size, std::size_t hash) {
if(should_clear_old_erased_values()) {
clear_old_erased_values();
}
const std::size_t ibucket = bucket_for_hash(hash);
if(m_buckets[ibucket].erase(key, key_size)) {
m_nb_elements--;
return 1;
}
else {
return 0;
}
}
void swap(array_hash& other) {
using std::swap;
swap(static_cast<value_container<T>&>(*this), static_cast<value_container<T>&>(other));
swap(static_cast<Hash&>(*this), static_cast<Hash&>(other));
swap(static_cast<GrowthPolicy&>(*this), static_cast<GrowthPolicy&>(other));
swap(m_buckets_data, other.m_buckets_data);
swap(m_buckets, other.m_buckets);
swap(m_nb_elements, other.m_nb_elements);
swap(m_max_load_factor, other.m_max_load_factor);
swap(m_load_threshold, other.m_load_threshold);
}
/*
* Lookup
*/
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
U& at(const CharT* key, size_type key_size) {
return at(key, key_size, hash_key(key, key_size));
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
const U& at(const CharT* key, size_type key_size) const {
return at(key, key_size, hash_key(key, key_size));
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
U& at(const CharT* key, size_type key_size, std::size_t hash) {
return const_cast<U&>(static_cast<const array_hash*>(this)->at(key, key_size, hash));
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
const U& at(const CharT* key, size_type key_size, std::size_t hash) const {
const std::size_t ibucket = bucket_for_hash(hash);
auto it_find = m_buckets[ibucket].find_or_end_of_bucket(key, key_size);
if(it_find.second) {
return this->m_values[it_find.first.value()];
}
else {
THROW(std::out_of_range, "Couldn't find key.");
}
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
U& access_operator(const CharT* key, size_type key_size) {
const std::size_t hash = hash_key(key, key_size);
std::size_t ibucket = bucket_for_hash(hash);
auto it_find = m_buckets[ibucket].find_or_end_of_bucket(key, key_size);
if(it_find.second) {
return this->m_values[it_find.first.value()];
}
else {
if(grow_on_high_load()) {
ibucket = bucket_for_hash(hash);
it_find = m_buckets[ibucket].find_or_end_of_bucket(key, key_size);
}
return emplace_impl(ibucket, it_find.first, key, key_size, U{}).first.value();
}
}
size_type count(const CharT* key, size_type key_size) const {
return count(key, key_size, hash_key(key, key_size));
}
size_type count(const CharT* key, size_type key_size, std::size_t hash) const {
const std::size_t ibucket = bucket_for_hash(hash);
auto it_find = m_buckets[ibucket].find_or_end_of_bucket(key, key_size);
if(it_find.second) {
return 1;
}
else {
return 0;
}
}
iterator find(const CharT* key, size_type key_size) {
return find(key, key_size, hash_key(key, key_size));
}
const_iterator find(const CharT* key, size_type key_size) const {
return find(key, key_size, hash_key(key, key_size));
}
iterator find(const CharT* key, size_type key_size, std::size_t hash) {
const std::size_t ibucket = bucket_for_hash(hash);
auto it_find = m_buckets[ibucket].find_or_end_of_bucket(key, key_size);
if(it_find.second) {
return iterator(m_buckets_data.begin() + ibucket, it_find.first, this);
}
else {
return end();
}
}
const_iterator find(const CharT* key, size_type key_size, std::size_t hash) const {
const std::size_t ibucket = bucket_for_hash(hash);
auto it_find = m_buckets[ibucket].find_or_end_of_bucket(key, key_size);
if(it_find.second) {
return const_iterator(m_buckets_data.cbegin() + ibucket, it_find.first, this);
}
else {
return cend();
}
}
std::pair<iterator, iterator> equal_range(const CharT* key, size_type key_size) {
return equal_range(key, key_size, hash_key(key, key_size));
}
std::pair<const_iterator, const_iterator> equal_range(const CharT* key, size_type key_size) const {
return equal_range(key, key_size, hash_key(key, key_size));
}
std::pair<iterator, iterator> equal_range(const CharT* key, size_type key_size, std::size_t hash) {
iterator it = find(key, key_size, hash);
return std::make_pair(it, (it == end())?it:std::next(it));
}
std::pair<const_iterator, const_iterator> equal_range(const CharT* key, size_type key_size,
std::size_t hash) const
{
const_iterator it = find(key, key_size, hash);
return std::make_pair(it, (it == cend())?it:std::next(it));
}
/*
* Bucket interface
*/
size_type bucket_count() const {
return m_buckets_data.size();
}
size_type max_bucket_count() const {
return std::min(GrowthPolicy::max_bucket_count(), m_buckets_data.max_size());
}
/*
* Hash policy
*/
float load_factor() const {
if(bucket_count() == 0) {
return 0;
}
return float(m_nb_elements) / float(bucket_count());
}
float max_load_factor() const {
return m_max_load_factor;
}
void max_load_factor(float ml) {
m_max_load_factor = std::max(0.1f, ml);
m_load_threshold = size_type(float(bucket_count())*m_max_load_factor);
}
void rehash(size_type count) {
count = std::max(count, size_type(std::ceil(float(size())/max_load_factor())));
rehash_impl(count);
}
void reserve(size_type count) {
rehash(size_type(std::ceil(float(count)/max_load_factor())));
}
/*
* Observers
*/
hasher hash_function() const {
return static_cast<const hasher&>(*this);
}
// TODO add support for statefull KeyEqual
key_equal key_eq() const {
return KeyEqual();
}
/*
* Other
*/
iterator mutable_iterator(const_iterator it) noexcept {
auto it_bucket = m_buckets_data.begin() + std::distance(m_buckets_data.cbegin(), it.m_buckets_iterator);
return iterator(it_bucket, it.m_array_bucket_iterator, this);
}
template<class Serializer>
void serialize(Serializer& serializer) const {
serialize_impl(serializer);
}
template<class Deserializer>
void deserialize(Deserializer& deserializer, bool hash_compatible) {
deserialize_impl(deserializer, hash_compatible);
}
private:
std::size_t hash_key(const CharT* key, size_type key_size) const {
return Hash::operator()(key, key_size);
}
std::size_t bucket_for_hash(std::size_t hash) const {
return GrowthPolicy::bucket_for_hash(hash);
}
/**
* If there is a mapped_type, the mapped value in m_values is not erased now.
* It will be erased when the ratio between the size of the map and
* the size of the map + the number of deleted values still stored is low enough (see clear_old_erased_values).
*/
iterator erase_from_bucket(iterator pos) noexcept {
auto array_bucket_next_it = pos.m_buckets_iterator->erase(pos.m_array_bucket_iterator);
m_nb_elements--;
if(array_bucket_next_it != pos.m_buckets_iterator->cend()) {
return iterator(pos.m_buckets_iterator, array_bucket_next_it, this);
}
else {
do {
++pos.m_buckets_iterator;
} while(pos.m_buckets_iterator != m_buckets_data.end() && pos.m_buckets_iterator->empty());
if(pos.m_buckets_iterator != m_buckets_data.end()) {
return iterator(pos.m_buckets_iterator, pos.m_buckets_iterator->cbegin(), this);
}
else {
return end();
}
}
}
template<class U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
bool should_clear_old_erased_values(float /*threshold*/ = DEFAULT_CLEAR_OLD_ERASED_VALUE_THRESHOLD) const {
return false;
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
bool should_clear_old_erased_values(float threshold = DEFAULT_CLEAR_OLD_ERASED_VALUE_THRESHOLD) const {
if(this->m_values.size() == 0) {
return false;
}
return float(m_nb_elements)/float(this->m_values.size()) < threshold;
}
template<class U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
void clear_old_erased_values() {
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
void clear_old_erased_values() {
static_assert(std::is_nothrow_move_constructible<U>::value ||
std::is_copy_constructible<U>::value,
"mapped_value must be either copy constructible or nothrow move constructible.");
if(m_nb_elements == this->m_values.size()) {
return;
}
std::vector<T> new_values;
new_values.reserve(size());
for(auto it = begin(); it != end(); ++it) {
new_values.push_back(std::move_if_noexcept(it.value()));
}
IndexSizeT ivalue = 0;
for(auto it = begin(); it != end(); ++it) {
auto it_array_bucket = it.m_buckets_iterator->mutable_iterator(it.m_array_bucket_iterator);
it_array_bucket.set_value(ivalue);
ivalue++;
}
new_values.swap(this->m_values);
tsl_ah_assert(m_nb_elements == this->m_values.size());
}
/**
* Return true if a rehash occurred.
*/
bool grow_on_high_load() {
if(size() >= m_load_threshold) {
rehash_impl(GrowthPolicy::next_bucket_count());
return true;
}
return false;
}
template<class... ValueArgs, class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
std::pair<iterator, bool> emplace_impl(std::size_t ibucket, typename array_bucket::const_iterator end_of_bucket,
const CharT* key, size_type key_size, ValueArgs&&... value_args)
{
if(this->m_values.size() >= max_size()) {
// Try to clear old erased values lingering in m_values. Throw if it doesn't change anything.
clear_old_erased_values();
if(this->m_values.size() >= max_size()) {
THROW(std::length_error, "Can't insert value, too much values in the map.");
}
}
if(this->m_values.size() == this->m_values.capacity()) {
this->m_values.reserve(std::size_t(float(this->m_values.size()) * value_container<T>::VECTOR_GROWTH_RATE));
}
this->m_values.emplace_back(std::forward<ValueArgs>(value_args)...);
auto it = m_buckets[ibucket].append(end_of_bucket, key, key_size, IndexSizeT(this->m_values.size() - 1));
m_nb_elements++;
return std::make_pair(iterator(m_buckets_data.begin() + ibucket, it, this), true);
}
template<class U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
std::pair<iterator, bool> emplace_impl(std::size_t ibucket, typename array_bucket::const_iterator end_of_bucket,
const CharT* key, size_type key_size)
{
if(m_nb_elements >= max_size()) {
THROW(std::length_error, "Can't insert value, too much values in the map.");
}
auto it = m_buckets[ibucket].append(end_of_bucket, key, key_size);
m_nb_elements++;
return std::make_pair(iterator(m_buckets_data.begin() + ibucket, it, this), true);
}
void rehash_impl(size_type bucket_count) {
GrowthPolicy new_growth_policy(bucket_count);
if(bucket_count == this->bucket_count()) {
return;
}
if(should_clear_old_erased_values(REHASH_CLEAR_OLD_ERASED_VALUE_THRESHOLD)) {
clear_old_erased_values();
}
std::vector<std::size_t> required_size_for_bucket(bucket_count, 0);
std::vector<std::size_t> bucket_for_ivalue(size(), 0);
std::size_t ivalue = 0;
for(auto it = begin(); it != end(); ++it) {
const std::size_t hash = hash_key(it.key(), it.key_size());
const std::size_t ibucket = new_growth_policy.bucket_for_hash(hash);
bucket_for_ivalue[ivalue] = ibucket;
required_size_for_bucket[ibucket] += array_bucket::entry_required_bytes(it.key_size());
ivalue++;
}
std::vector<array_bucket> new_buckets;
new_buckets.reserve(bucket_count);
for(std::size_t ibucket = 0; ibucket < bucket_count; ibucket++) {
new_buckets.emplace_back(required_size_for_bucket[ibucket]);
}
ivalue = 0;
for(auto it = begin(); it != end(); ++it) {
const std::size_t ibucket = bucket_for_ivalue[ivalue];
append_iterator_in_reserved_bucket_no_check(new_buckets[ibucket], it);
ivalue++;
}
using std::swap;
swap(static_cast<GrowthPolicy&>(*this), new_growth_policy);
m_buckets_data.swap(new_buckets);
m_buckets = !m_buckets_data.empty()?m_buckets_data.data():
static_empty_bucket_ptr();
// Call max_load_factor to change m_load_threshold
max_load_factor(m_max_load_factor);
}
template<class U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
void append_iterator_in_reserved_bucket_no_check(array_bucket& bucket, iterator it) {
bucket.append_in_reserved_bucket_no_check(it.key(), it.key_size());
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
void append_iterator_in_reserved_bucket_no_check(array_bucket& bucket, iterator it) {
bucket.append_in_reserved_bucket_no_check(it.key(), it.key_size(), it.value_position());
}
/**
* On serialization the values of each bucket (if has_mapped_type is true) are serialized
* next to the bucket. The potential old erased values in value_container are thus not serialized.
*
* On deserialization, when hash_compatible is true, we reaffect the value index (IndexSizeT) of each
* bucket with set_value as the position of each value is no more the same in value_container compared
* to when they were serialized.
*
* It's done this way as we can't call clear_old_erased_values() because we want the serialize
* method to remain const and we don't want to serialize/deserialize old erased values. As we may
* not serialize all the values in value_container, the values we keep can change of index.
* We thus have to modify the value indexes in the buckets.
*/
template<class Serializer>
void serialize_impl(Serializer& serializer) const {
const slz_size_type version = SERIALIZATION_PROTOCOL_VERSION;
serializer(version);
const slz_size_type bucket_count = m_buckets_data.size();
serializer(bucket_count);
const slz_size_type nb_elements = m_nb_elements;
serializer(nb_elements);
const float max_load_factor = m_max_load_factor;
serializer(max_load_factor);
for(const array_bucket& bucket: m_buckets_data) {
bucket.serialize(serializer);
serialize_bucket_values(serializer, bucket);
}
}
template<class Serializer, class U = T,
typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
void serialize_bucket_values(Serializer& /*serializer*/, const array_bucket& /*bucket*/) const {
}
template<class Serializer, class U = T,
typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
void serialize_bucket_values(Serializer& serializer, const array_bucket& bucket) const {
for(auto it = bucket.begin(); it != bucket.end(); ++it) {
serializer(this->m_values[it.value()]);
}
}
template<class Deserializer>
void deserialize_impl(Deserializer& deserializer, bool hash_compatible) {
tsl_ah_assert(m_buckets_data.empty()); // Current hash table must be empty
const slz_size_type version = deserialize_value<slz_size_type>(deserializer);
// For now we only have one version of the serialization protocol.
// If it doesn't match there is a problem with the file.
if(version != SERIALIZATION_PROTOCOL_VERSION) {
THROW(std::runtime_error, "Can't deserialize the array_map/set. The protocol version header is invalid.");
}
const slz_size_type bucket_count_ds = deserialize_value<slz_size_type>(deserializer);
const slz_size_type nb_elements = deserialize_value<slz_size_type>(deserializer);
const float max_load_factor = deserialize_value<float>(deserializer);
m_nb_elements = numeric_cast<IndexSizeT>(nb_elements, "Deserialized nb_elements is too big.");
size_type bucket_count = numeric_cast<size_type>(bucket_count_ds, "Deserialized bucket_count is too big.");
GrowthPolicy::operator=(GrowthPolicy(bucket_count));
this->max_load_factor(max_load_factor);
value_container<T>::reserve(m_nb_elements);
if(hash_compatible) {
if(bucket_count != bucket_count_ds) {
THROW(std::runtime_error, "The GrowthPolicy is not the same even though hash_compatible is true.");
}
m_buckets_data.reserve(bucket_count);
for(size_type i = 0; i < bucket_count; i++) {
m_buckets_data.push_back(array_bucket::deserialize(deserializer));
deserialize_bucket_values(deserializer, m_buckets_data.back());
}
}
else {
m_buckets_data.resize(bucket_count);
for(size_type i = 0; i < bucket_count; i++) {
// TODO use buffer to avoid reallocation on each deserialization.
array_bucket bucket = array_bucket::deserialize(deserializer);
deserialize_bucket_values(deserializer, bucket);
for(auto it_val = bucket.cbegin(); it_val != bucket.cend(); ++it_val) {
const std::size_t ibucket = bucket_for_hash(hash_key(it_val.key(), it_val.key_size()));
auto it_find = m_buckets_data[ibucket].find_or_end_of_bucket(it_val.key(), it_val.key_size());
if(it_find.second) {
THROW(std::runtime_error, "Error on deserialization, the same key is presents multiple times.");
}
append_array_bucket_iterator_in_bucket(m_buckets_data[ibucket], it_find.first, it_val);
}
}
}
m_buckets = m_buckets_data.data();
if(load_factor() > this->max_load_factor()) {
THROW(std::runtime_error, "Invalid max_load_factor. Check that the serializer and deserializer support "
"floats correctly as they can be converted implicitely to ints.");
}
}
template<class Deserializer, class U = T,
typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
void deserialize_bucket_values(Deserializer& /*deserializer*/, array_bucket& /*bucket*/) {
}
template<class Deserializer, class U = T,
typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
void deserialize_bucket_values(Deserializer& deserializer, array_bucket& bucket) {
for(auto it = bucket.begin(); it != bucket.end(); ++it) {
this->m_values.emplace_back(deserialize_value<U>(deserializer));
tsl_ah_assert(this->m_values.size() - 1 <= std::numeric_limits<IndexSizeT>::max());
it.set_value(static_cast<IndexSizeT>(this->m_values.size() - 1));
}
}
template<class U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
void append_array_bucket_iterator_in_bucket(array_bucket& bucket,
typename array_bucket::const_iterator end_of_bucket,
typename array_bucket::const_iterator it_val)
{
bucket.append(end_of_bucket, it_val.key(), it_val.key_size());
}
template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
void append_array_bucket_iterator_in_bucket(array_bucket& bucket,
typename array_bucket::const_iterator end_of_bucket,
typename array_bucket::const_iterator it_val)
{
bucket.append(end_of_bucket, it_val.key(), it_val.key_size(), it_val.value());
}
public:
static const size_type DEFAULT_INIT_BUCKET_COUNT = 0;
static constexpr float DEFAULT_MAX_LOAD_FACTOR = 2.0f;
static const size_type MAX_KEY_SIZE = array_bucket::MAX_KEY_SIZE;
private:
/**
* Protocol version currenlty used for serialization.
*/
static const slz_size_type SERIALIZATION_PROTOCOL_VERSION = 1;
static constexpr float DEFAULT_CLEAR_OLD_ERASED_VALUE_THRESHOLD = 0.6f;
static constexpr float REHASH_CLEAR_OLD_ERASED_VALUE_THRESHOLD = 0.9f;
/**
* Return an always valid pointer to a static empty array_bucket.
*/
array_bucket* static_empty_bucket_ptr() {
static array_bucket empty_bucket;
return &empty_bucket;
}
private:
std::vector<array_bucket> m_buckets_data;
/**
* Points to m_buckets_data.data() if !m_buckets_data.empty() otherwise points to static_empty_bucket_ptr.
* This variable is useful to avoid the cost of checking if m_buckets_data is empty when trying
* to find an element.
*
* TODO Remove m_buckets_data and only use a pointer+size instead of a pointer+vector to save some space in the array_hash object.
*/
array_bucket* m_buckets;
IndexSizeT m_nb_elements;
float m_max_load_factor;
size_type m_load_threshold;
};
} // end namespace detail_array_hash
} //end namespace tsl
#endif