X-Git-Url: http://bilbo.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/4127fe802ff334591a6ba5b315190523fc2a660e..b1245e5ecc0f1eb4e2b76edac0004005e8c9a514:/src/3rd-party/xxhash.hpp diff --git a/src/3rd-party/xxhash.hpp b/src/3rd-party/xxhash.hpp index 5917a50007..a3dbe98912 100644 --- a/src/3rd-party/xxhash.hpp +++ b/src/3rd-party/xxhash.hpp @@ -1,16 +1,18 @@ #pragma once +#include #include #include -#include +#include #include #include -#include + +#include /* xxHash - Extremely Fast Hash algorithm Header File -Copyright (C) 2012-2022, Yann Collet. -Copyright (C) 2017-2022, Red Gavin. +Copyright (C) 2012-2018, Yann Collet. +Copyright (C) 2017-2018, Piotr Pliszka. All rights reserved. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -39,2213 +41,717 @@ You can contact the author at : - xxHash C++ port repository : https://github.com/RedSpah/xxhash_cpp */ -/* Intrinsics -* Sadly has to be included in the global namespace or literally everything breaks -*/ -#include - -namespace xxh -{ - /* ************************************* - * Versioning - ***************************************/ - - namespace version - { - constexpr int cpp_version_major = 0; - constexpr int cpp_version_minor = 8; - constexpr int cpp_version_release = 1; - } - - constexpr uint32_t version_number() - { - return version::cpp_version_major * 10000 + version::cpp_version_minor * 100 + version::cpp_version_release; - } - - - /* ************************************* - * Basic Types - Predefining uint128_t for intrin - ***************************************/ - - namespace typedefs - { - struct alignas(16) uint128_t - { - uint64_t low64 = 0; - uint64_t high64 = 0; - - bool operator==(const uint128_t & other) - { - return (low64 == other.low64 && high64 == other.high64); - } - - bool operator>(const uint128_t & other) - { - return (high64 > other.high64 || low64 > other.low64); - } - - bool operator>=(const uint128_t & other) - { - return (*this > other || *this == other); - } - - bool operator<(const uint128_t & other) - { - return !(*this >= other); - } - - bool operator<=(const uint128_t & other) - { - return !(*this > other); - } - - bool operator!=(const uint128_t & other) - { - return !(*this == other); - } - - uint128_t(uint64_t low, uint64_t high) : low64(low), high64(high) {} - - uint128_t() {} - }; - - } - - using uint128_t = typedefs::uint128_t; - - - /* ************************************* - * Compiler / Platform Specific Features - ***************************************/ - - namespace intrin - { - /*!XXH_CPU_LITTLE_ENDIAN : - * This is a CPU endian detection macro, will be - * automatically set to 1 (little endian) if it is left undefined. - * If compiling for a big endian system (why), XXH_CPU_LITTLE_ENDIAN has to be explicitly defined as 0. - */ -#ifndef XXH_CPU_LITTLE_ENDIAN -# define XXH_CPU_LITTLE_ENDIAN 1 +/* ************************************* + * Tuning parameters + ***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +#if defined(__GNUC__) && (defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__)) +#define XXH_FORCE_MEMORY_ACCESS 2 +#elif defined(__INTEL_COMPILER) || \ + (defined(__GNUC__) && (defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__))) +#define XXH_FORCE_MEMORY_ACCESS 1 +#endif #endif +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independence be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#if !defined(XXH_FORCE_NATIVE_FORMAT) || (XXH_FORCE_NATIVE_FORMAT == 0) /* can be defined externally */ +#define XXH_FORCE_NATIVE_FORMAT 0 +#define XXH_CPU_LITTLE_ENDIAN 1 +#endif - /* Vectorization Detection - * NOTE: XXH_NEON and XXH_VSX aren't supported in this C++ port. - * The primary reason is that I don't have access to an ARM and PowerPC - * machines to test them, and the secondary reason is that I even doubt anyone writing - * code for such machines would bother using a C++ port rather than the original C version. - */ -#ifndef XXH_VECTOR /* can be predefined on command line */ -# if defined(__AVX512F__) -# define XXH_VECTOR 3 /* AVX512 for Skylake and Icelake */ -# elif defined(__AVX2__) -# define XXH_VECTOR 2 /* AVX2 for Haswell and Bulldozer */ -# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) -# define XXH_VECTOR 1 /* SSE2 for Pentium 4 and all x86_64 */ -# else -# define XXH_VECTOR 0 /* Portable scalar version */ -# endif +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; + * set it to 0 when the input is guaranteed to be aligned, + * or when alignment doesn't matter for performance. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +#define XXH_FORCE_ALIGN_CHECK 0 +#else +#define XXH_FORCE_ALIGN_CHECK 1 +#endif #endif - constexpr int vector_mode = XXH_VECTOR; - -#if XXH_VECTOR == 3 /* AVX512 for Skylake and Icelake */ - constexpr int acc_align = 64; - using avx512_underlying = __m512i; - using avx2_underlying = __m256i; - using sse2_underlying = __m128i; -#elif XXH_VECTOR == 2 /* AVX2 for Haswell and Bulldozer */ - constexpr int acc_align = 32; - using avx512_underlying = void; - using avx2_underlying = __m256i; - using sse2_underlying = __m128i; -#elif XXH_VECTOR == 1 /* SSE2 for Pentium 4 and all x86_64 */ - using avx512_underlying = void; - using avx2_underlying = void; //std::array<__m128i, 2>; - using sse2_underlying = __m128i; - constexpr int acc_align = 16; -#else /* Portable scalar version */ - using avx512_underlying = void; - using avx2_underlying = void; //std::array; - using sse2_underlying = void; //std::array; - constexpr int acc_align = 8; +/*!XXH_CPU_LITTLE_ENDIAN : + * This is a CPU endian detection macro, will be + * automatically set to 1 (little endian) if XXH_FORCE_NATIVE_FORMAT + * is left undefined, XXH_FORCE_NATIVE_FORMAT is defined to 0, or if an x86/x86_64 compiler macro is defined. + * If left undefined, endianness will be determined at runtime, at the cost of a slight one-time overhead + * and a larger overhead due to get_endian() not being constexpr. + */ +#ifndef XXH_CPU_LITTLE_ENDIAN +#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +#define XXH_CPU_LITTLE_ENDIAN 1 +#endif #endif +/* ************************************* + * Compiler Specific Options + ***************************************/ +#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +namespace xxh { +/* ************************************* + * Version + ***************************************/ +constexpr int cpp_version_major = 0; +constexpr int cpp_version_minor = 6; +constexpr int cpp_version_release = 5; +constexpr uint32_t version_number() +{ + return cpp_version_major * 10000 + cpp_version_minor * 100 + cpp_version_release; +} - /* Compiler Specifics - * Defines inline macros and includes specific compiler's instrinsics. - * */ -#ifdef XXH_FORCE_INLINE /* First undefining the symbols in case they're already defined */ -# undef XXH_FORCE_INLINE -#endif -#ifdef XXH_NO_INLINE -# undef XXH_NO_INLINE -#endif +namespace hash_t_impl { +/* ************************************* + * Basic Types - Detail + ***************************************/ + +using _hash32_underlying = uint32_t; +using _hash64_underlying = uint64_t; + +template struct hash_type { + using type = void; +}; +template <> struct hash_type<32> { + using type = _hash32_underlying; +}; +template <> struct hash_type<64> { + using type = _hash64_underlying; +}; +} // namespace hash_t_impl + +/* ************************************* + * Basic Types - Public + ***************************************/ + +template using hash_t = typename hash_t_impl::hash_type::type; +using hash32_t = hash_t<32>; +using hash64_t = hash_t<64>; + +/* ************************************* + * Bit Functions - Public + ***************************************/ + +namespace bit_ops { +/* **************************************** + * Intrinsics and Bit Operations + ******************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# pragma warning(disable : 4127) -# define XXH_FORCE_INLINE static __forceinline -# define XXH_NO_INLINE static __declspec(noinline) -# include -#elif defined(__GNUC__) /* Clang / GCC */ -# define XXH_FORCE_INLINE static inline __attribute__((always_inline)) -# define XXH_NO_INLINE static __attribute__((noinline)) -# include +#if defined(_MSC_VER) +inline uint32_t rotl32(uint32_t x, int32_t r) +{ + return _rotl(x, r); +} +inline uint64_t rotl64(uint64_t x, int32_t r) +{ + return _rotl64(x, r); +} #else -# define XXH_FORCE_INLINE static inline -# define XXH_NO_INLINE static +inline uint32_t rotl32(uint32_t x, int32_t r) +{ + return ((x << r) | (x >> (32 - r))); +} +inline uint64_t rotl64(uint64_t x, int32_t r) +{ + return ((x << r) | (x >> (64 - r))); +} #endif - - /* Prefetch - * Can be disabled by defining XXH_NO_PREFETCH - */ -#if defined(XXH_NO_PREFETCH) - XXH_FORCE_INLINE void prefetch(const void* ptr) {} -#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) - XXH_FORCE_INLINE void prefetch(const void* ptr) { _mm_prefetch((const char*)(ptr), _MM_HINT_T0); } -#elif defined(__GNUC__) - XXH_FORCE_INLINE void prefetch(const void* ptr) { __builtin_prefetch((ptr), 0, 3); } +#if defined(_MSC_VER) /* Visual Studio */ +inline uint32_t swap32(uint32_t x) +{ + return _byteswap_ulong(x); +} +inline uint64_t swap64(uint64_t x) +{ + return _byteswap_uint64(x); +} +#elif XXH_GCC_VERSION >= 403 +inline uint32_t swap32(uint32_t x) +{ + return __builtin_bswap32(x); +} +inline uint64_t swap64(uint64_t x) +{ + return __builtin_bswap64(x); +} #else - XXH_FORCE_INLINE void prefetch(const void* ptr) {} +inline uint32_t swap32(uint32_t x) +{ + return ((x << 24) & 0xff000000) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | ((x >> 24) & 0x000000ff); +} +inline uint64_t swap64(uint64_t x) +{ + return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | ((x << 8) & 0x000000ff00000000ULL) | ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} #endif +template inline hash_t rotl(hash_t n, int32_t r){}; +template <> inline hash_t<32> rotl<32>(hash_t<32> n, int32_t r) +{ + return rotl32(n, r); +}; - /* Restrict - * Defines macro for restrict, which in C++ is sadly just a compiler extension (for now). - * Can be disabled by defining XXH_NO_RESTRICT - */ -#ifdef XXH_RESTRICT -# undef XXH_RESTRICT -#endif +template <> inline hash_t<64> rotl<64>(hash_t<64> n, int32_t r) +{ + return rotl64(n, r); +}; -#if (defined(__GNUC__) || defined(_MSC_VER)) && defined(__cplusplus) && !defined(XXH_NO_RESTRICT) -# define XXH_RESTRICT __restrict -#else -# define XXH_RESTRICT -#endif +template inline hash_t swap(hash_t n){}; +template <> inline hash_t<32> swap<32>(hash_t<32> n) +{ + return swap32(n); +}; - /* Likely / Unlikely - * Defines macros for Likely / Unlikely, which are official in C++20, but sadly this library aims the previous standard. - * Not present on MSVC. - * Can be disabled by defining XXH_NO_BRANCH_HINTS - */ -#if ((defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)) && !defined(XXH_NO_BRANCH_HINTS) -# define XXH_likely(x) __builtin_expect(x, 1) -# define XXH_unlikely(x) __builtin_expect(x, 0) -#else -# define XXH_likely(x) (x) -# define XXH_unlikely(x) (x) -#endif +template <> inline hash_t<64> swap<64>(hash_t<64> n) +{ + return swap64(n); +}; +} // namespace bit_ops +/* ************************************* + * Memory Functions - Public + ***************************************/ - namespace bit_ops - { -#if defined(_MSC_VER) - static inline uint32_t rotl32(uint32_t x, int32_t r) { return _rotl(x, r); } - static inline uint64_t rotl64(uint64_t x, int32_t r) { return _rotl64(x, r); } - static inline uint32_t rotr32(uint32_t x, int32_t r) { return _rotr(x, r); } - static inline uint64_t rotr64(uint64_t x, int32_t r) { return _rotr64(x, r); } -#else - static inline uint32_t rotl32(uint32_t x, int32_t r) { return ((x << r) | (x >> (32 - r))); } - static inline uint64_t rotl64(uint64_t x, int32_t r) { return ((x << r) | (x >> (64 - r))); } - static inline uint32_t rotr32(uint32_t x, int32_t r) { return ((x >> r) | (x << (32 - r))); } - static inline uint64_t rotr64(uint64_t x, int32_t r) { return ((x >> r) | (x << (64 - r))); } -#endif +enum class alignment : uint8_t { aligned, unaligned }; +enum class endianness : uint8_t { big_endian = 0, little_endian = 1, unspecified = 2 }; +namespace mem_ops { +/* ************************************* + * Memory Access + ***************************************/ +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 2)) -#if defined(_MSC_VER) /* Visual Studio */ - static inline uint32_t swap32(uint32_t x) { return _byteswap_ulong(x); } - static inline uint64_t swap64(uint64_t x) { return _byteswap_uint64(x); } -#elif defined(__GNUC__) - static inline uint32_t swap32(uint32_t x) { return __builtin_bswap32(x); } - static inline uint64_t swap64(uint64_t x) { return __builtin_bswap64(x); } +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +template inline hash_t read_unaligned(const void* memPtr) +{ + return *(const hash_t*)memPtr; +} + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +template using unalign = union { + hash_t uval; +} __attribute((packed)); + +template inline hash_t read_unaligned(const void* memPtr) +{ + return ((const unalign*)memPtr)->uval; +} #else - static inline uint32_t swap32(uint32_t x) { return ((x << 24) & 0xff000000) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | ((x >> 24) & 0x000000ff); } - static inline uint64_t swap64(uint64_t x) { return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | ((x << 24) & 0x0000ff0000000000ULL) | ((x << 8) & 0x000000ff00000000ULL) | ((x >> 8) & 0x00000000ff000000ULL) | ((x >> 24) & 0x0000000000ff0000ULL) | ((x >> 40) & 0x000000000000ff00ULL) | ((x >> 56) & 0x00000000000000ffULL); } -#endif +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ +template inline hash_t read_unaligned(const void* memPtr) +{ + hash_t val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +inline hash_t<32> read32(const void* memPtr) +{ + return read_unaligned<32>(memPtr); +} +inline hash_t<64> read64(const void* memPtr) +{ + return read_unaligned<64>(memPtr); +} + +/* ************************************* + * Architecture Macros + ***************************************/ + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ + +#ifndef XXH_CPU_LITTLE_ENDIAN + +inline endianness get_endian(endianness endian) +{ + static struct _dummy_t { + std::array endian_lookup = {endianness::big_endian, endianness::little_endian, + endianness::unspecified}; + const int g_one = 1; + _dummy_t() { endian_lookup[2] = static_cast(*(const char*)(&g_one)); } + } _dummy; + + return _dummy.endian_lookup[(uint8_t)endian]; +} + +inline bool is_little_endian() +{ + return get_endian(endianness::unspecified) == endianness::little_endian; +} -#if defined(_MSC_VER) && defined(_M_IX86) // Only for 32-bit MSVC. - XXH_FORCE_INLINE uint64_t mult32to64(uint32_t x, uint32_t y) { return __emulu(x, y); } #else - XXH_FORCE_INLINE uint64_t mult32to64(uint32_t x, uint32_t y) { return (uint64_t)(uint32_t)(x) * (uint64_t)(uint32_t)(y); } -#endif +constexpr endianness get_endian(endianness endian) +{ + constexpr std::array endian_lookup = { + {endianness::big_endian, endianness::little_endian, + (XXH_CPU_LITTLE_ENDIAN) ? endianness::little_endian : endianness::big_endian}}; + return endian_lookup[static_cast(endian)]; +} +constexpr bool is_little_endian() +{ + return get_endian(endianness::unspecified) == endianness::little_endian; +} -#if defined(__GNUC__) && !defined(__clang__) && defined(__i386__) - __attribute__((__target__("no-sse"))) #endif - static inline uint128_t mult64to128(uint64_t lhs, uint64_t rhs) - { -#if defined(__GNUC__) && !defined(__wasm__) \ - && defined(__SIZEOF_INT128__) \ - || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) +/* *************************** + * Memory reads + *****************************/ - __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs; - uint128_t r128; - r128.low64 = (uint64_t)(product); - r128.high64 = (uint64_t)(product >> 64); - return r128; +template inline hash_t readLE_align(const void* ptr, endianness endian, alignment align) +{ + if (align == alignment::unaligned) { + return endian == endianness::little_endian ? read_unaligned(ptr) : bit_ops::swap(read_unaligned(ptr)); + } else { + return endian == endianness::little_endian ? *reinterpret_cast*>(ptr) + : bit_ops::swap(*reinterpret_cast*>(ptr)); + } +} -#elif defined(_M_X64) || defined(_M_IA64) +template inline hash_t readLE(const void* ptr, endianness endian) +{ + return readLE_align(ptr, endian, alignment::unaligned); +} -#ifndef _MSC_VER -# pragma intrinsic(_umul128) -#endif - uint64_t product_high; - uint64_t const product_low = _umul128(lhs, rhs, &product_high); - uint128_t r128; - r128.low64 = product_low; - r128.high64 = product_high; - return r128; +template inline hash_t readBE(const void* ptr) +{ + return is_little_endian() ? bit_ops::swap(read_unaligned(ptr)) : read_unaligned(ptr); +} + +template inline alignment get_alignment(const void* input) +{ + return ((XXH_FORCE_ALIGN_CHECK) && ((reinterpret_cast(input) & ((N / 8) - 1)) == 0)) + ? xxh::alignment::aligned + : xxh::alignment::unaligned; +} +} // namespace mem_ops + +/* ******************************************************************* + * Hash functions + *********************************************************************/ + +namespace detail { +/* ******************************************************************* + * Hash functions - Implementation + *********************************************************************/ + +constexpr static std::array primes32 = {{2654435761U, 2246822519U, 3266489917U, 668265263U, 374761393U}}; +constexpr static std::array primes64 = {{11400714785074694791ULL, 14029467366897019727ULL, + 1609587929392839161ULL, 9650029242287828579ULL, + 2870177450012600261ULL}}; + +template constexpr hash_t PRIME(int32_t n){}; + +template <> constexpr hash32_t PRIME<32>(int32_t n) +{ + return primes32[n - 1]; +} + +template <> constexpr hash64_t PRIME<64>(int32_t n) +{ + return primes64[n - 1]; +} + +template inline hash_t round(hash_t seed, hash_t input) +{ + seed += input * PRIME(2); + seed = bit_ops::rotl(seed, ((N == 32) ? 13 : 31)); + seed *= PRIME(1); + return seed; +} +inline hash64_t mergeRound64(hash64_t acc, hash64_t val) +{ + val = round<64>(0, val); + acc ^= val; + acc = acc * PRIME<64>(1) + PRIME<64>(4); + return acc; +} + +template +inline void endian_align_sub_mergeround( +#if __cplusplus < 201703L + __attribute__((unused)) #else - uint64_t const lo_lo = bit_ops::mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); - uint64_t const hi_lo = bit_ops::mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); - uint64_t const lo_hi = bit_ops::mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); - uint64_t const hi_hi = bit_ops::mult32to64(lhs >> 32, rhs >> 32); - - /* Now add the products together. These will never overflow. */ - uint64_t const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; - uint64_t const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; - uint64_t const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); - - uint128_t r128; - r128.low64 = lower; - r128.high64 = upper; - return r128; + [[maybe_unused]] #endif - } - } - } - - - /* ************************************* - * Basic Types - Everything else - ***************************************/ - - namespace typedefs - { - /* ************************************* - * Basic Types - Detail - ***************************************/ - - template - struct hash_type - { - using type = void; - }; - - template <> - struct hash_type<32> - { - using type = uint32_t; - }; - - template <> - struct hash_type<64> - { - using type = uint64_t; - }; - - template <> - struct hash_type<128> - { - using type = uint128_t; - }; - - - template - struct vec_type - { - using type = void; - }; - - template <> - struct vec_type<64> - { - using type = uint64_t; - }; - - template <> - struct vec_type<128> - { - using type = intrin::sse2_underlying; - }; - - template <> - struct vec_type<256> - { - using type = intrin::avx2_underlying; - }; - - template <> - struct vec_type<512> - { - using type = intrin::avx512_underlying; - }; - - /* Rationale - * On the surface level uint_type appears to be pointless, - * as it is just a copy of hash_type. They do use the same types, - * that is true, but the reasoning for the difference is aimed at humans, - * not the compiler, as a difference between values that are 'just' numbers, - * and those that represent actual hash values. - */ - template - struct uint_type - { - using type = void; - }; - - template <> - struct uint_type<32> - { - using type = uint32_t; - }; - - template <> - struct uint_type<64> - { - using type = uint64_t; - }; - - template <> - struct uint_type<128> - { - using type = uint128_t; - }; - } - - template - using hash_t = typename typedefs::hash_type::type; - using hash32_t = hash_t<32>; - using hash64_t = hash_t<64>; - using hash128_t = hash_t<128>; - - template - using vec_t = typename typedefs::vec_type::type; - using vec64_t = vec_t<64>; - using vec128_t = vec_t<128>; - using vec256_t = vec_t<256>; - using vec512_t = vec_t<512>; - - template - using uint_t = typename typedefs::uint_type::type; - - - - /* ************************************* - * Bit Operations - ***************************************/ - - namespace bit_ops - { - /* **************************************** - * Bit Operations - ******************************************/ - - template - static inline uint_t rotl(uint_t n, int32_t r) - { - if constexpr (N == 32) - { - return intrin::bit_ops::rotl32(n, r); - } - - if constexpr (N == 64) - { - return intrin::bit_ops::rotl64(n, r); - } - } - - template - static inline uint_t rotr(uint_t n, int32_t r) - { - if constexpr (N == 32) - { - return intrin::bit_ops::rotr32(n, r); - } - - if constexpr (N == 64) - { - return intrin::bit_ops::rotr64(n, r); - } - } - - template - static inline uint_t swap(uint_t n) - { - if constexpr (N == 32) - { - return intrin::bit_ops::swap32(n); - } - - if constexpr (N == 64) - { - return intrin::bit_ops::swap64(n); - } - } - - template - static inline vec_t mul32to64(vec_t x, vec_t y) - { - if constexpr (N == 64) - { - return intrin::bit_ops::mult32to64(static_cast(x), static_cast(y)); - } - else - { - return 0; - } - } - - static inline uint128_t mul64to128(uint64_t x, uint64_t y) - { - return intrin::bit_ops::mult64to128(x, y); - } - - static inline uint64_t mul128fold64(uint64_t x, uint64_t y) - { - uint128_t product = mul64to128(x, y); - - return (product.low64 ^ product.high64); - } - } - - - /* ************************************* - * Memory Functions - ***************************************/ - - namespace mem_ops - { - - /* ************************************* - * Endianness - ***************************************/ - - constexpr bool is_little_endian() - { - return (XXH_CPU_LITTLE_ENDIAN == 1); - } - - - /* ************************************* - * Memory Access - ***************************************/ - - template - static inline uint_t read(const void* memPtr) - { - uint_t val; - - memcpy(&val, memPtr, sizeof(val)); - return val; - } - - template - static inline uint_t readLE(const void* ptr) - { - if constexpr (is_little_endian()) - { - return read(ptr); - } - else - { - return bit_ops::swap(read(ptr)); - } - } - - template - static inline uint_t readBE(const void* ptr) - { - if constexpr (is_little_endian()) - { - return bit_ops::swap(read(ptr)); - } - else - { - return read(ptr); - } - } - - template - static void writeLE(void* dst, uint_t v) - { - if constexpr (!is_little_endian()) - { - v = bit_ops::swap(v); - } - - memcpy(dst, &v, sizeof(v)); - } - } - - - /* ************************************* - * Vector Functions - ***************************************/ - - namespace vec_ops - { - template - XXH_FORCE_INLINE vec_t loadu(const vec_t* input) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid template argument passed to xxh::vec_ops::loadu"); - - if constexpr (N == 128) - { - return _mm_loadu_si128(input); - } - - if constexpr (N == 256) - { - return _mm256_loadu_si256(input); - } - - if constexpr (N == 512) - { - return _mm512_loadu_si512(input); - } - - if constexpr (N == 64) - { - return mem_ops::readLE<64>(input); - } - - } - - - // 'xorv' instead of 'xor' because 'xor' is a weird wacky alternate operator expression thing. - template - XXH_FORCE_INLINE vec_t xorv(vec_t a, vec_t b) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::xorv"); - - if constexpr (N == 128) - { - return _mm_xor_si128(a, b); - } - - if constexpr (N == 256) - { - return _mm256_xor_si256(a, b); - } - - if constexpr (N == 512) - { - return _mm512_xor_si512(a, b); - } - - if constexpr (N == 64) - { - return a ^ b; - } - } - - - template - XXH_FORCE_INLINE vec_t mul(vec_t a, vec_t b) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::mul"); - - if constexpr (N == 128) - { - return _mm_mul_epu32(a, b); - } - - if constexpr (N == 256) - { - return _mm256_mul_epu32(a, b); - } - - if constexpr (N == 512) - { - return _mm512_mul_epu32(a, b); - } - - if constexpr (N == 64) - { - return a * b; - } - } - - - template - XXH_FORCE_INLINE vec_t add(vec_t a, vec_t b) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::add"); - - if constexpr (N == 128) - { - return _mm_add_epi64(a, b); - } - - if constexpr (N == 256) - { - return _mm256_add_epi64(a, b); - } - - if constexpr (N == 512) - { - return _mm512_add_epi64(a, b); - } - - if constexpr (N == 64) - { - return a + b; - } - } - - - template - XXH_FORCE_INLINE vec_t shuffle(vec_t a) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::shuffle"); - - if constexpr (N == 128) - { - return _mm_shuffle_epi32(a, _MM_SHUFFLE(S1, S2, S3, S4)); - } - - if constexpr (N == 256) - { - return _mm256_shuffle_epi32(a, _MM_SHUFFLE(S1, S2, S3, S4)); - } - - if constexpr (N == 512) - { - return _mm512_shuffle_epi32(a, _MM_SHUFFLE(S1, S2, S3, S4)); - } - - if constexpr (N == 64) - { - return a; - } - } - - - template - XXH_FORCE_INLINE vec_t set1(int64_t a) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::set1"); - - if constexpr (N == 128) - { - return _mm_set1_epi32(static_cast(a)); - } - - if constexpr (N == 256) - { - return _mm256_set1_epi32(static_cast(a)); - } - - if constexpr (N == 512) - { - return _mm512_set1_epi32(static_cast(a)); - } - - if constexpr (N == 64) - { - return a; - } - } - - - template - XXH_FORCE_INLINE vec_t srli(vec_t n, int a) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::srli"); - - if constexpr (N == 128) - { - return _mm_srli_epi64(n, a); - } - - if constexpr (N == 256) - { - return _mm256_srli_epi64(n, a); - } - - if constexpr (N == 512) - { - return _mm512_srli_epi64(n, a); - } - - if constexpr (N == 64) - { - return n >> a; - } - } - - - template - XXH_FORCE_INLINE vec_t slli(vec_t n, int a) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::slli"); - - if constexpr (N == 128) - { - return _mm_slli_epi64(n, a); - } - - if constexpr (N == 256) - { - return _mm256_slli_epi64(n, a); - } - - if constexpr (N == 512) - { - return _mm512_slli_epi64(n, a); - } - - if constexpr (N == 64) - { - return n << a; - } - } - } - - /* ************************************* - * Canonical represenation - ***************************************/ - - template - struct canonical_t - { - std::array digest{ 0 }; - - canonical_t(hash_t hash) - { - if constexpr (bit_mode < 128) - { - if (mem_ops::is_little_endian()) - { - hash = bit_ops::swap(hash); - } - - memcpy(digest.data(), &hash, sizeof(canonical_t)); - } - else - { - if (mem_ops::is_little_endian()) - { - hash.low64 = bit_ops::swap<64>(hash.low64); - hash.high64 = bit_ops::swap<64>(hash.high64); - } - - memcpy(digest.data(), &hash.high64, sizeof(hash.high64)); - memcpy(digest.data() + sizeof(hash.high64), &hash.low64, sizeof(hash.low64)); - } - } - - hash_t get_hash() const - { - if constexpr (bit_mode < 128) - { - return mem_ops::readBE(&digest); - } - else - { - return { mem_ops::readBE<64>(&digest[8]), mem_ops::readBE<64>(&digest) }; - } - } - }; - - using canonical32_t = canonical_t<32>; - using canonical64_t = canonical_t<64>; - using canonical128_t = canonical_t<128>; - - template - inline hash_t to_canonical(hash_t hash) - { - static_assert(!(bit_mode != 128 && bit_mode != 64 && bit_mode != 32), "Canonical form can only be obtained from 32, 64 and 128 bit hashes."); - canonical_t canon(hash); - hash_t res; - memcpy(&res, &canon, bit_mode / 4); - - return res; - } - - - /* ************************************* - * Algorithm Implementation - xxhash - ***************************************/ - - namespace detail - { - using namespace mem_ops; - using namespace bit_ops; - - - /* ************************************* - * Constants - ***************************************/ - - constexpr static std::array primes32 = { 2654435761U, 2246822519U, 3266489917U, 668265263U, 374761393U }; - constexpr static std::array primes64 = { 11400714785074694791ULL, 14029467366897019727ULL, 1609587929392839161ULL, 9650029242287828579ULL, 2870177450012600261ULL }; - - template - constexpr uint_t PRIME(uint64_t n) - { - if constexpr (N == 32) - { - return primes32[n - 1]; - } - else - { - return primes64[n - 1]; - } - } - - - /* ************************************* - * Functions - ***************************************/ - - template - XXH_FORCE_INLINE uint_t avalanche(uint_t hash) - { - if constexpr (N == 32) - { - hash ^= hash >> 15; - hash *= PRIME<32>(2); - hash ^= hash >> 13; - hash *= PRIME<32>(3); - hash ^= hash >> 16; - return hash; - } - else if constexpr (N == 64) - { - hash ^= hash >> 33; - hash *= PRIME<64>(2); - hash ^= hash >> 29; - hash *= PRIME<64>(3); - hash ^= hash >> 32; - return hash; - } - else return 0; - } - - template - XXH_FORCE_INLINE uint_t round(uint_t seed, uint_t input) - { - seed += input * PRIME(2); - - if constexpr (N == 32) - { - seed = rotl(seed, 13); - } - else - { - seed = rotl(seed, 31); - } - - seed *= PRIME(1); - return seed; - } - - XXH_FORCE_INLINE uint64_t mergeRound64(hash64_t acc, uint64_t val) - { - val = round<64>(0, val); - acc ^= val; - acc = acc * PRIME<64>(1) + PRIME<64>(4); - return acc; - } - - XXH_FORCE_INLINE void endian_align_sub_mergeround(hash64_t& hash_ret, uint64_t v1, uint64_t v2, uint64_t v3, uint64_t v4) - { - hash_ret = mergeRound64(hash_ret, v1); - hash_ret = mergeRound64(hash_ret, v2); - hash_ret = mergeRound64(hash_ret, v3); - hash_ret = mergeRound64(hash_ret, v4); - } - - template - static inline hash_t endian_align_sub_ending(hash_t hash_ret, const uint8_t* p, const uint8_t* bEnd) - { - if constexpr (N == 32) - { - while ((p + 4) <= bEnd) - { - hash_ret += readLE<32>(p) * PRIME<32>(3); - hash_ret = rotl<32>(hash_ret, 17) * PRIME<32>(4); - p += 4; - } - - while (p < bEnd) - { - hash_ret += (*p) * PRIME<32>(5); - hash_ret = rotl<32>(hash_ret, 11) * PRIME<32>(1); - p++; - } - - return avalanche<32>(hash_ret); - } - else - { - while (p + 8 <= bEnd) - { - const uint64_t k1 = round<64>(0, readLE<64>(p)); - - hash_ret ^= k1; - hash_ret = rotl<64>(hash_ret, 27) * PRIME<64>(1) + PRIME<64>(4); - p += 8; - } - - if (p + 4 <= bEnd) - { - hash_ret ^= static_cast(readLE<32>(p))* PRIME<64>(1); - hash_ret = rotl<64>(hash_ret, 23) * PRIME<64>(2) + PRIME<64>(3); - p += 4; - } - - while (p < bEnd) - { - hash_ret ^= (*p) * PRIME<64>(5); - hash_ret = rotl<64>(hash_ret, 11) * PRIME<64>(1); - p++; - } - - return avalanche<64>(hash_ret); - } - } - - template - static inline hash_t endian_align(const void* input, size_t len, uint_t seed) - { - static_assert(!(N != 32 && N != 64), "You can only call endian_align in 32 or 64 bit mode."); - - const uint8_t* p = static_cast(input); - const uint8_t* bEnd = p + len; - hash_t hash_ret; - - if (len >= (N / 2)) - { - const uint8_t* const limit = bEnd - (N / 2); - uint_t v1 = seed + PRIME(1) + PRIME(2); - uint_t v2 = seed + PRIME(2); - uint_t v3 = seed + 0; - uint_t v4 = seed - PRIME(1); - - do - { - v1 = round(v1, readLE(p)); - p += (N / 8); - v2 = round(v2, readLE(p)); - p += (N / 8); - v3 = round(v3, readLE(p)); - p += (N / 8); - v4 = round(v4, readLE(p)); - p += (N / 8); - } - while (p <= limit); - - hash_ret = rotl(v1, 1) + rotl(v2, 7) + rotl(v3, 12) + rotl(v4, 18); - - if constexpr (N == 64) - { - endian_align_sub_mergeround(hash_ret, v1, v2, v3, v4); - } - } - else - { - hash_ret = seed + PRIME(5); - } - - hash_ret += static_cast>(len); - - return endian_align_sub_ending(hash_ret, p, bEnd); - } - } - - - /* ************************************* - * Algorithm Implementation - xxhash3 - ***************************************/ - - namespace detail3 - { - using namespace vec_ops; - using namespace detail; - using namespace mem_ops; - using namespace bit_ops; - - - /* ************************************* - * Enums - ***************************************/ - - enum class vec_mode : uint8_t { scalar = 0, sse2 = 1, avx2 = 2, avx512 = 3 }; - - - /* ************************************* - * Constants - ***************************************/ - - constexpr uint64_t secret_default_size = 192; - constexpr uint64_t secret_size_min = 136; - constexpr uint64_t secret_consume_rate = 8; - constexpr uint64_t stripe_len = 64; - constexpr uint64_t acc_nb = 8; - constexpr uint64_t prefetch_distance = 384; - constexpr uint64_t secret_lastacc_start = 7; - constexpr uint64_t secret_mergeaccs_start = 11; - constexpr uint64_t midsize_max = 240; - constexpr uint64_t midsize_startoffset = 3; - constexpr uint64_t midsize_lastoffset = 17; - - constexpr vec_mode vector_mode = static_cast(intrin::vector_mode); - constexpr uint64_t acc_align = intrin::acc_align; - constexpr std::array vector_bit_width { 64, 128, 256, 512 }; - - - /* ************************************* - * Defaults - ***************************************/ - - alignas(64) constexpr uint8_t default_secret[secret_default_size] = { - 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, - 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, - 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, - 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, - 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, - 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, - 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, - 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, - 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, - 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, - 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, - 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, - }; - - constexpr std::array init_acc = { PRIME<32>(3), PRIME<64>(1), PRIME<64>(2), PRIME<64>(3), PRIME<64>(4), PRIME<32>(2), PRIME<64>(5), PRIME<32>(1) }; - - - /* ************************************* - * Functions - ***************************************/ - - XXH_FORCE_INLINE hash_t<64> avalanche(hash_t<64> h64) - { - constexpr uint64_t avalanche_mul_prime = 0x165667919E3779F9ULL; - - h64 ^= h64 >> 37; - h64 *= avalanche_mul_prime; - h64 ^= h64 >> 32; - return h64; - } - - XXH_FORCE_INLINE hash_t<64> rrmxmx(hash_t<64> h64, uint64_t len) - { - h64 ^= rotl<64>(h64, 49) ^ rotl<64>(h64, 24); - h64 *= 0x9FB21C651E98DF25ULL; - h64 ^= (h64 >> 35) + len; - h64 *= 0x9FB21C651E98DF25ULL; - h64 ^= (h64 >> 28); - return h64; - } - - XXH_FORCE_INLINE void combine_16(void* dest, hash128_t h128) - { - writeLE<64>(dest, readLE<64>(dest) ^ h128.low64); - writeLE<64>((uint8_t*)dest + 8, readLE<64>((uint8_t*)dest + 8) ^ h128.high64); - } - - XXH_FORCE_INLINE void accumulate_512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT input, const void* XXH_RESTRICT secret) - { - constexpr uint64_t bits = vector_bit_width[static_cast(vector_mode)]; - - using vec_t = vec_t; - - alignas(sizeof(vec_t)) vec_t* const xacc = static_cast(acc); - const vec_t* const xinput = static_cast(input); - const vec_t* const xsecret = static_cast(secret); - - for (size_t i = 0; i < stripe_len / sizeof(vec_t); i++) - { - vec_t const data_vec = loadu(xinput + i); - vec_t const key_vec = loadu(xsecret + i); - vec_t const data_key = xorv(data_vec, key_vec); - vec_t product = set1(0); - - if constexpr (vector_mode == vec_mode::scalar) - { - product = mul32to64(srli(slli(data_key, 32),32), srli(data_key, 32)); - xacc[i ^ 1] = add(xacc[i ^ 1], data_vec); - xacc[i] = add(xacc[i], product); - } - else - { - vec_t const data_key_lo = shuffle(data_key); - product = mul(data_key, data_key_lo); - - vec_t const data_swap = shuffle(data_vec); - vec_t const sum = add(xacc[i], data_swap); - xacc[i] = add(sum, product); - } - } - } - - XXH_FORCE_INLINE void scramble_acc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) - { - constexpr uint64_t bits = vector_bit_width[static_cast(vector_mode)];; - - using vec_t = vec_t; - - alignas(sizeof(vec_t)) vec_t* const xacc = (vec_t*)acc; - const vec_t* const xsecret = (const vec_t*)secret; - - for (size_t i = 0; i < stripe_len / sizeof(vec_t); i++) - { - vec_t const acc_vec = xacc[i]; - vec_t const shifted = srli(acc_vec, 47); - vec_t const data_vec = xorv(acc_vec, shifted); - vec_t const key_vec = loadu(xsecret + i); - vec_t const data_key = xorv(data_vec, key_vec); - - if constexpr (vector_mode == vec_mode::scalar) - { - xacc[i] = mul(data_key, set1(PRIME<32>(1))); - } - else - { - vec_t const prime32 = set1(PRIME<32>(1)); - vec_t const data_key_hi = shuffle(data_key); - vec_t const prod_lo = mul(data_key, prime32); - vec_t const prod_hi = mul(data_key_hi, prime32); - - xacc[i] = add(prod_lo, vec_ops::slli(prod_hi, 32)); - } - } - } - - XXH_FORCE_INLINE void accumulate(uint64_t* XXH_RESTRICT acc, const uint8_t* XXH_RESTRICT input, const uint8_t* XXH_RESTRICT secret, size_t nbStripes) - { - for (size_t n = 0; n < nbStripes; n++) - { - const uint8_t* const in = input + n * stripe_len; - - intrin::prefetch(in + prefetch_distance); - accumulate_512(acc, in, secret + n * secret_consume_rate); - } - } - - XXH_FORCE_INLINE void hash_long_internal_loop(uint64_t* XXH_RESTRICT acc, const uint8_t* XXH_RESTRICT input, size_t len, const uint8_t* XXH_RESTRICT secret, size_t secretSize) - { - size_t const nb_rounds = (secretSize - stripe_len) / secret_consume_rate; - size_t const block_len = stripe_len * nb_rounds; - size_t const nb_blocks = (len-1) / block_len; - - for (size_t n = 0; n < nb_blocks; n++) - { - accumulate(acc, input + n * block_len, secret, nb_rounds); - scramble_acc(acc, secret + secretSize - stripe_len); - } - - /* last partial block */ - size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / stripe_len; - - accumulate(acc, input + nb_blocks * block_len, secret, nbStripes); - - /* last stripe */ - const uint8_t* const p = input + len - stripe_len; - - accumulate_512(acc, p, secret + secretSize - stripe_len - secret_lastacc_start); - } - - XXH_FORCE_INLINE uint64_t mix_2_accs(const uint64_t* XXH_RESTRICT acc, const uint8_t* XXH_RESTRICT secret) - { - return mul128fold64(acc[0] ^ readLE<64>(secret), acc[1] ^ readLE<64>(secret + 8)); - } - - XXH_FORCE_INLINE uint64_t merge_accs(const uint64_t* XXH_RESTRICT acc, const uint8_t* XXH_RESTRICT secret, uint64_t start) - { - uint64_t result64 = start; - - result64 += mix_2_accs(acc + 0, secret + 0); - result64 += mix_2_accs(acc + 2, secret + 16); - result64 += mix_2_accs(acc + 4, secret + 32); - result64 += mix_2_accs(acc + 6, secret + 48); - - return avalanche(result64); - } - - XXH_FORCE_INLINE void init_custom_secret(uint8_t* customSecret, uint64_t seed) - { - for (uint64_t i = 0; i < secret_default_size / 16; i++) - { - writeLE<64>(customSecret + i * 16, readLE<64>(default_secret + i * 16) + seed); - writeLE<64>(customSecret + i * 16 + 8, readLE<64>(default_secret + i * 16 + 8) - seed); - } - } - - template - XXH_FORCE_INLINE hash_t len_1to3(const uint8_t* input, size_t len, const uint8_t* secret, uint64_t seed) - { - if constexpr (N == 64) - { - uint8_t const c1 = input[0]; - uint8_t const c2 = input[len >> 1]; - uint8_t const c3 = input[len - 1]; - uint32_t const combined = ((uint32_t)c1 << 16) | (((uint32_t)c2) << 24) | (((uint32_t)c3) << 0) | (((uint32_t)len) << 8); - uint64_t const bitflip = (readLE<32>(secret) ^ readLE<32>(secret + 4)) + seed; - uint64_t const keyed = (uint64_t)combined ^ bitflip; - return detail::avalanche<64>(keyed); - } - else - { - uint8_t const c1 = input[0]; - uint8_t const c2 = input[len >> 1]; - uint8_t const c3 = input[len - 1]; - uint32_t const combinedl = ((uint32_t)c1 << 16) + (((uint32_t)c2) << 24) + (((uint32_t)c3) << 0) + (((uint32_t)len) << 8); - uint32_t const combinedh = rotl<32>(swap<32>(combinedl), 13); - uint64_t const bitflipl = (readLE<32>(secret) ^ readLE<32>(secret + 4)) + seed; - uint64_t const bitfliph = (readLE<32>(secret + 8) ^ readLE<32>(secret + 12)) - seed; - uint64_t const keyed_lo = (uint64_t)combinedl ^ bitflipl; - uint64_t const keyed_hi = (uint64_t)combinedh ^ bitfliph; - hash128_t const h128 = { detail::avalanche<64>(keyed_lo), detail::avalanche<64>(keyed_hi)}; - - return h128; - } - } - - template - XXH_FORCE_INLINE hash_t len_4to8(const uint8_t* input, size_t len, const uint8_t* secret, uint64_t seed) - { - constexpr uint64_t mix_constant = 0x9FB21C651E98DF25ULL; - - seed ^= (uint64_t)swap<32>((uint32_t)seed) << 32; - - if constexpr (N == 64) - { - uint32_t const input1 = readLE<32>(input); - uint32_t const input2 = readLE<32>(input + len - 4); - uint64_t const bitflip = (readLE<64>(secret + 8) ^ readLE<64>(secret + 16)) - seed; - uint64_t const input64 = input2 + ((uint64_t)input1 << 32); - uint64_t keyed = input64 ^ bitflip; - - return rrmxmx(keyed, len); - } - else - { - uint32_t const input_lo = readLE<32>(input); - uint32_t const input_hi = readLE<32>(input + len - 4); - uint64_t const input_64 = input_lo + ((uint64_t)input_hi << 32); - uint64_t const bitflip = (readLE<64>(secret + 16) ^ readLE<64>(secret + 24)) + seed; - uint64_t const keyed = input_64 ^ bitflip; - uint128_t m128 = mul64to128(keyed, PRIME<64>(1) + (len << 2)); - - m128.high64 += (m128.low64 << 1); - m128.low64 ^= (m128.high64 >> 3); - m128.low64 ^= (m128.low64 >> 35); - m128.low64 *= mix_constant; - m128.low64 ^= (m128.low64 >> 28); - m128.high64 = avalanche(m128.high64); - - return m128; - } - } - - template - XXH_FORCE_INLINE hash_t len_9to16(const uint8_t* input, size_t len, const uint8_t* secret, uint64_t seed) - { - if constexpr (N == 64) - { - uint64_t const bitflip1 = (readLE<64>(secret + 24) ^ readLE<64>(secret + 32)) + seed; - uint64_t const bitflip2 = (readLE<64>(secret + 40) ^ readLE<64>(secret + 48)) - seed; - uint64_t const input_lo = readLE<64>(input) ^ bitflip1; - uint64_t const input_hi = readLE<64>(input + len - 8) ^ bitflip2; - uint64_t const acc = len + swap<64>(input_lo) + input_hi + mul128fold64(input_lo, input_hi); - - return avalanche(acc); - } - else - { - uint64_t const bitflipl = (readLE<64>(secret + 32) ^ readLE<64>(secret + 40)) - seed; - uint64_t const bitfliph = (readLE<64>(secret + 48) ^ readLE<64>(secret + 56)) + seed; - uint64_t const input_lo = readLE<64>(input); - uint64_t input_hi = readLE<64>(input + len - 8); - uint128_t m128 = mul64to128(input_lo ^ input_hi ^ bitflipl, PRIME<64>(1)); - - m128.low64 += (uint64_t)(len - 1) << 54; - input_hi ^= bitfliph; - - if constexpr (sizeof(void*) < sizeof(uint64_t)) // 32-bit version - { - m128.high64 += (input_hi & 0xFFFFFFFF00000000) + mul32to64((uint32_t)input_hi, PRIME<32>(2)); - } - else - { - m128.high64 += input_hi + mul32to64((uint32_t)input_hi, PRIME<32>(2) - 1); - } - - m128.low64 ^= swap<64>(m128.high64); - - hash128_t h128 = mul64to128(m128.low64, PRIME<64>(2)); - - h128.high64 += m128.high64 * PRIME<64>(2); - h128.low64 = avalanche(h128.low64); - h128.high64 = avalanche(h128.high64); - - return h128; - } - } - - template - XXH_FORCE_INLINE hash_t len_0to16(const uint8_t* input, size_t len, const uint8_t* secret, uint64_t seed) - { - if (XXH_likely(len > 8)) - { - return len_9to16(input, len, secret, seed); - } - else if (XXH_likely(len >= 4)) - { - return len_4to8(input, len, secret, seed); - } - else if (len) - { - return len_1to3(input, len, secret, seed); - } - else - { - if constexpr (N == 64) - { - return detail::avalanche<64>((seed) ^ (readLE<64>(secret + 56) ^ readLE<64>(secret + 64))); - } - else - { - uint64_t const bitflipl = readLE<64>(secret + 64) ^ readLE<64>(secret + 72); - uint64_t const bitfliph = readLE<64>(secret + 80) ^ readLE<64>(secret + 88); - - return hash128_t(detail::avalanche<64>(( seed) ^ bitflipl), detail::avalanche<64>(( seed) ^ bitfliph)); - } - } - } - - template - XXH_FORCE_INLINE hash_t hash_long_internal(const uint8_t* XXH_RESTRICT input, size_t len, const uint8_t* XXH_RESTRICT secret = default_secret, size_t secretSize = sizeof(default_secret)) - { - alignas(acc_align) std::array acc = init_acc; - - if constexpr (N == 64) - { - hash_long_internal_loop(acc.data(), input, len, secret, secretSize); - - /* converge into final hash */ - return merge_accs(acc.data(), secret + secret_mergeaccs_start, (uint64_t)len * PRIME<64>(1)); - } - else - { - hash_long_internal_loop(acc.data(), input, len, secret, secretSize); - - /* converge into final hash */ - uint64_t const low64 = merge_accs(acc.data(), secret + secret_mergeaccs_start, (uint64_t)len * PRIME<64>(1)); - uint64_t const high64 = merge_accs(acc.data(), secret + secretSize - sizeof(acc) - secret_mergeaccs_start, ~((uint64_t)len * PRIME<64>(2))); - - return hash128_t(low64, high64); - } - } - - XXH_FORCE_INLINE uint64_t mix_16b(const uint8_t* XXH_RESTRICT input, const uint8_t* XXH_RESTRICT secret, uint64_t seed) - { - uint64_t const input_lo = readLE<64>(input); - uint64_t const input_hi = readLE<64>(input + 8); - - return mul128fold64(input_lo ^ (readLE<64>(secret) + seed), input_hi ^ (readLE<64>(secret + 8) - seed)); - } - - XXH_FORCE_INLINE uint128_t mix_32b(uint128_t acc, const uint8_t* input1, const uint8_t* input2, const uint8_t* secret, uint64_t seed) - { - acc.low64 += mix_16b(input1, secret + 0, seed); - acc.low64 ^= readLE<64>(input2) + readLE<64>(input2 + 8); - acc.high64 += mix_16b(input2, secret + 16, seed); - acc.high64 ^= readLE<64>(input1) + readLE<64>(input1 + 8); - - return acc; - } - - template - XXH_FORCE_INLINE hash_t len_17to128(const uint8_t* XXH_RESTRICT input, size_t len, const uint8_t* XXH_RESTRICT secret, uint64_t seed) - { - if constexpr (N == 64) - { - hash64_t acc = len * PRIME<64>(1); - - if (len > 32) - { - if (len > 64) - { - if (len > 96) - { - acc += mix_16b(input + 48, secret + 96, seed); - acc += mix_16b(input + len - 64, secret + 112, seed); - } - - acc += mix_16b(input + 32, secret + 64, seed); - acc += mix_16b(input + len - 48, secret + 80, seed); - } - - acc += mix_16b(input + 16, secret + 32, seed); - acc += mix_16b(input + len - 32, secret + 48, seed); - } - - acc += mix_16b(input + 0, secret + 0, seed); - acc += mix_16b(input + len - 16, secret + 16, seed); - - return avalanche(acc); - } - else - { - hash128_t acc = { len * PRIME<64>(1), 0 }; - - if (len > 32) - { - if (len > 64) - { - if (len > 96) - { - acc = mix_32b(acc, input + 48, input + len - 64, secret + 96, seed); - } - - acc = mix_32b(acc, input + 32, input + len - 48, secret + 64, seed); - } - - acc = mix_32b(acc, input + 16, input + len - 32, secret + 32, seed); - } - - acc = mix_32b(acc, input, input + len - 16, secret, seed); - - uint64_t const low64 = acc.low64 + acc.high64; - uint64_t const high64 = (acc.low64 * PRIME<64>(1)) + (acc.high64 * PRIME<64>(4)) + ((len - seed) * PRIME<64>(2)); - - return { avalanche(low64), (uint64_t)0 - avalanche(high64) }; - } - } - - template - XXH_NO_INLINE hash_t len_129to240(const uint8_t* XXH_RESTRICT input, size_t len, const uint8_t* XXH_RESTRICT secret, uint64_t seed) - { - if constexpr (N == 64) - { - uint64_t acc = len * PRIME<64>(1); - size_t const nbRounds = len / 16; - - for (size_t i = 0; i < 8; i++) - { - acc += mix_16b(input + (i * 16), secret + (i * 16), seed); - } - - acc = avalanche(acc); - - for (size_t i = 8; i < nbRounds; i++) - { - acc += mix_16b(input + (i * 16), secret + ((i - 8) * 16) + midsize_startoffset, seed); - } - - /* last bytes */ - acc += mix_16b(input + len - 16, secret + secret_size_min - midsize_lastoffset, seed); - - return avalanche(acc); - } - else - { - hash128_t acc; - uint64_t const nbRounds = len / 32; - - acc.low64 = len * PRIME<64>(1); - acc.high64 = 0; - - for (size_t i = 0; i < 4; i++) - { - acc = mix_32b(acc, input + (i * 32), input + (i * 32) + 16, secret + (i * 32), seed); - } - - acc.low64 = avalanche(acc.low64); - acc.high64 = avalanche(acc.high64); - - for (size_t i = 4; i < nbRounds; i++) - { - acc = mix_32b(acc, input + (i * 32), input + (i * 32) + 16, secret + midsize_startoffset + ((i - 4) * 32), seed); - } - - /* last bytes */ - acc = mix_32b(acc, input + len - 16, input + len - 32, secret + secret_size_min - midsize_lastoffset - 16, 0ULL - seed); - - uint64_t const low64 = acc.low64 + acc.high64; - uint64_t const high64 = (acc.low64 * PRIME<64>(1)) + (acc.high64 * PRIME<64>(4)) + ((len - seed) * PRIME<64>(2)); - - return { avalanche(low64), (uint64_t)0 - avalanche(high64) }; - } - - } - - template - XXH_NO_INLINE hash_t xxhash3_impl(const void* XXH_RESTRICT input, size_t len, hash64_t seed, const void* XXH_RESTRICT secret = default_secret, size_t secretSize = secret_default_size) - { - - alignas(64) uint8_t custom_secret[secret_default_size]; - - const void* short_secret = secret; - - if (seed != 0) - { - init_custom_secret(custom_secret, seed); - short_secret = default_secret; - } - - if (len <= 16) - { - return len_0to16(static_cast(input), len, static_cast(short_secret), seed); - } - else if (len <= 128) - { - return len_17to128(static_cast(input), len, static_cast(short_secret), seed); - } - else if (len <= midsize_max) - { - return len_129to240(static_cast(input), len, static_cast(short_secret), seed); - } - else - { - return hash_long_internal(static_cast(input), len, static_cast(((seed == 0) ? secret : ((secret == default_secret) ? custom_secret : secret))), ((seed == 0) ? secretSize : ((secret == default_secret) ? secret_default_size : secretSize))); - } - } - - XXH_NO_INLINE void generate_secret(void* secret_buffer, size_t secret_size, const void* custom_seed, size_t seed_size) - { - if (seed_size == 0) - { - custom_seed = default_secret; - seed_size = secret_default_size; - } - - size_t pos = 0; - while (pos < secret_size) - { - size_t const copy_len = std::min(secret_size - pos, seed_size); - memcpy((uint8_t*)secret_buffer + pos, custom_seed, copy_len); - pos += copy_len; - } - - size_t const nbseg16 = secret_size / 16; - canonical128_t scrambled(xxhash3_impl<128>(custom_seed, seed_size, 0)); - for (size_t n = 0; n < nbseg16; n++) - { - hash128_t const h128 = xxhash3_impl<128>(&scrambled, sizeof(scrambled), n); - combine_16((uint8_t*)secret_buffer + n * 16, h128); - } - - combine_16((uint8_t*)secret_buffer + secret_size - 16, scrambled.get_hash()); - } - } - - - /* ************************************* - * Public Access Point - xxhash - ***************************************/ - - template - inline hash_t xxhash(const void* input, size_t len, uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); - return detail::endian_align(input, len, seed); - } - - template - inline hash_t xxhash(const std::basic_string& input, uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); - return detail::endian_align(static_cast(input.data()), input.length() * sizeof(T), seed); - } - - template - inline hash_t xxhash(ContiguousIterator begin, ContiguousIterator end, uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); - using T = typename std::decay_t; - return detail::endian_align(static_cast(&*begin), (end - begin) * sizeof(T), seed); - } - - template - inline hash_t xxhash(const std::vector& input, uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); - return detail::endian_align(static_cast(input.data()), input.size() * sizeof(T), seed); - } - - template - inline hash_t xxhash(const std::array& input, uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); - return detail::endian_align(static_cast(input.data()), AN * sizeof(T), seed); - } - - template - inline hash_t xxhash(const std::initializer_list& input, uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); - return detail::endian_align(static_cast(input.begin()), input.size() * sizeof(T), seed); - } - - - /* ************************************* - * Public Access Point - xxhash3 - ***************************************/ - - template - inline hash_t xxhash3(const void* input, size_t len, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(input, len, seed); - } - - template - inline hash_t xxhash3(const void* input, size_t len, const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(input, len, seed, secret, secretSize); - } - - template - inline hash_t xxhash3(const std::basic_string& input, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.data()), input.length() * sizeof(T), seed); - } - - template - inline hash_t xxhash3(const std::basic_string& input, const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.data()), input.length() * sizeof(T), seed, secret, secretSize); - } - - template - inline hash_t xxhash3(ContiguousIterator begin, ContiguousIterator end, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - using T = typename std::decay_t; - return detail3::xxhash3_impl(static_cast(&*begin), (end - begin) * sizeof(T), seed); - } - - template - inline hash_t xxhash3(ContiguousIterator begin, ContiguousIterator end, const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - using T = typename std::decay_t; - return detail3::xxhash3_impl(static_cast(&*begin), (end - begin) * sizeof(T), seed, secret, secretSize); - } - - template - inline hash_t xxhash3(const std::vector& input, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.data()), input.size() * sizeof(T), seed); - } - - template - inline hash_t xxhash3(const std::vector& input, const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.data()), input.size() * sizeof(T), seed, secret, secretSize); - } - - template - inline hash_t xxhash3(const std::array& input, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.data()), AN * sizeof(T), seed); - } - - template - inline hash_t xxhash3(const std::array& input, const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.data()), AN * sizeof(T), seed, secret, secretSize); - } - - template - inline hash_t xxhash3(const std::initializer_list& input, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.begin()), input.size() * sizeof(T), seed); - } - - template - inline hash_t xxhash3(const std::initializer_list& input, const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.begin()), input.size() * sizeof(T), seed, secret, secretSize); - } - - - /* ************************************* - * Secret Generation Functions - ***************************************/ - - inline void generate_secret(void* secret_buffer, size_t secret_size, const void* custom_seed = detail3::default_secret, size_t seed_length = 0) - { - detail3::generate_secret(secret_buffer, secret_size, custom_seed, seed_length); - } - - template - inline void generate_secret(void* secret_buffer, size_t secret_size, const std::array& custom_seed) - { - detail3::generate_secret(secret_buffer, secret_size, static_cast(custom_seed.data()), AN * sizeof(T)); - } - - template - inline void generate_secret(void* secret_buffer, size_t secret_size, const std::initializer_list& custom_seed) - { - detail3::generate_secret(secret_buffer, secret_size, static_cast(custom_seed.begin()), custom_seed.size() * sizeof(T)); - } - - template - inline void generate_secret(void* secret_buffer, size_t secret_size, const std::vector& custom_seed) - { - detail3::generate_secret(secret_buffer, secret_size, static_cast(custom_seed.data()), custom_seed.size() * sizeof(T)); - } - - template - inline void generate_secret(void* secret_buffer, size_t secret_size, const std::basic_string& custom_seed) - { - detail3::generate_secret(secret_buffer, secret_size, static_cast(custom_seed.data()), custom_seed.length() * sizeof(T)); - } - - template - inline void generate_secret(void* secret_buffer, size_t secret_size, ContiguousIterator begin, ContiguousIterator end) - { - using T = typename std::decay_t; - detail3::generate_secret(secret_buffer, secret_size, static_cast(&*begin), (end - begin) * sizeof(T)); - } - - inline void generate_secret_from_seed(void* secret_buffer, uint64_t seed = 0) - { - alignas(64) uint8_t custom_secret[detail3::secret_default_size]; - detail3::init_custom_secret(custom_secret, seed); - memcpy(secret_buffer, custom_secret, detail3::secret_default_size); - } - - - /* ************************************* - * Hash streaming - xxhash - ***************************************/ - - template - class hash_state_t - { - uint64_t total_len = 0; - uint_t v1 = 0, v2 = 0, v3 = 0, v4 = 0; - std::array, 4> mem = {0, 0, 0, 0}; - uint32_t memsize = 0; - - inline void update_impl(const void* input, size_t length) - { - const uint8_t* p = reinterpret_cast(input); - const uint8_t* const bEnd = p + length; - - total_len += length; - - if (memsize + length < (bit_mode / 2)) - { /* fill in tmp buffer */ - memcpy(reinterpret_cast(mem.data()) + memsize, input, length); - memsize += static_cast(length); - return; - } - - if (memsize > 0) - { /* some data left from previous update */ - memcpy(reinterpret_cast(mem.data()) + memsize, input, (bit_mode / 2) - memsize); - - const uint_t* ptr = mem.data(); - - v1 = detail::round(v1, mem_ops::readLE(ptr)); - ptr++; - v2 = detail::round(v2, mem_ops::readLE(ptr)); - ptr++; - v3 = detail::round(v3, mem_ops::readLE(ptr)); - ptr++; - v4 = detail::round(v4, mem_ops::readLE(ptr)); - - p += (bit_mode / 2) - memsize; - memsize = 0; - } - - if (p <= bEnd - (bit_mode / 2)) - { - const uint8_t* const limit = bEnd - (bit_mode / 2); - - do - { - v1 = detail::round(v1, mem_ops::readLE(p)); - p += (bit_mode / 8); - v2 = detail::round(v2, mem_ops::readLE(p)); - p += (bit_mode / 8); - v3 = detail::round(v3, mem_ops::readLE(p)); - p += (bit_mode / 8); - v4 = detail::round(v4, mem_ops::readLE(p)); - p += (bit_mode / 8); - } - while (p <= limit); - } - - if (p < bEnd) - { - memcpy(mem.data(), p, static_cast(bEnd - p)); - memsize = static_cast(bEnd - p); - } - } - - inline hash_t digest_impl() const - { - const uint8_t* p = reinterpret_cast(mem.data()); - const uint8_t* const bEnd = reinterpret_cast(mem.data()) + memsize; - hash_t hash_ret; - - if (total_len >= (bit_mode / 2)) - { - hash_ret = bit_ops::rotl(v1, 1) + bit_ops::rotl(v2, 7) + bit_ops::rotl(v3, 12) + bit_ops::rotl(v4, 18); - - if constexpr (bit_mode == 64) - { - detail::endian_align_sub_mergeround(hash_ret, v1, v2, v3, v4); - } - } - else - { - hash_ret = v3 + detail::PRIME(5); - } - - hash_ret += static_cast>(total_len); - - return detail::endian_align_sub_ending(hash_ret, p, bEnd); - } - - public: - - hash_state_t(uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash streaming can only be used in 32 and 64 bit modes."); - v1 = seed + detail::PRIME(1) + detail::PRIME(2); - v2 = seed + detail::PRIME(2); - v3 = seed + 0; - v4 = seed - detail::PRIME(1); - }; - - hash_state_t operator=(hash_state_t& other) - { - memcpy(this, &other, sizeof(hash_state_t)); - } - - void reset(uint_t seed = 0) - { - memset(this, 0, sizeof(hash_state_t)); - v1 = seed + detail::PRIME(1) + detail::PRIME(2); - v2 = seed + detail::PRIME(2); - v3 = seed + 0; - v4 = seed - detail::PRIME(1); - } - - void update(const void* input, size_t length) - { - return update_impl(input, length); - } - - template - void update(const std::basic_string& input) - { - return update_impl(static_cast(input.data()), input.length() * sizeof(T)); - } - - template - void update(ContiguousIterator begin, ContiguousIterator end) - { - using T = typename std::decay_t; - return update_impl(static_cast(&*begin), (end - begin) * sizeof(T)); - } - - template - void update(const std::vector& input) - { - return update_impl(static_cast(input.data()), input.size() * sizeof(T)); - } - - template - void update(const std::array& input) - { - return update_impl(static_cast(input.data()), AN * sizeof(T)); - } - - template - void update(const std::initializer_list& input) - { - return update_impl(static_cast(input.begin()), input.size() * sizeof(T)); - } - - hash_t digest() const - { - return digest_impl(); - } - }; - - using hash_state32_t = hash_state_t<32>; - using hash_state64_t = hash_state_t<64>; - - - /* ************************************* - * Hash streaming - xxhash3 - ***************************************/ - - template - class alignas(64) hash3_state_t - { - constexpr static int internal_buffer_size = 256; - constexpr static int internal_buffer_stripes = (internal_buffer_size / detail3::stripe_len); - - alignas(64) uint64_t acc[8]; - alignas(64) uint8_t customSecret[detail3::secret_default_size]; /* used to store a custom secret generated from the seed. Makes state larger. Design might change */ - alignas(64) uint8_t buffer[internal_buffer_size]; - uint32_t bufferedSize = 0; - uint32_t nbStripesPerBlock = 0; - uint32_t nbStripesSoFar = 0; - uint32_t secretLimit = 0; - uint32_t reserved32 = 0; - uint32_t reserved32_2 = 0; - uint64_t totalLen = 0; - uint64_t seed = 0; - bool useSeed = false; - uint64_t reserved64 = 0; - const uint8_t* secret = nullptr; /* note : there is some padding after, due to alignment on 64 bytes */ - - - void consume_stripes(uint64_t* acc, uint32_t& nbStripesSoFar, size_t totalStripes, const uint8_t* input) - { - if (nbStripesPerBlock - nbStripesSoFar <= totalStripes) /* need a scrambling operation */ - { - size_t const nbStripes = nbStripesPerBlock - nbStripesSoFar; - - detail3::accumulate(acc, input, secret + (nbStripesSoFar * detail3::secret_consume_rate), nbStripes); - detail3::scramble_acc(acc, secret + secretLimit); - detail3::accumulate(acc, input + nbStripes * detail3::stripe_len, secret, totalStripes - nbStripes); - nbStripesSoFar = (uint32_t)(totalStripes - nbStripes); - } - else - { - detail3::accumulate(acc, input, secret + (nbStripesSoFar * detail3::secret_consume_rate), totalStripes); - nbStripesSoFar += (uint32_t)totalStripes; - } - } - - void update_impl(const void* input_, size_t len) - { - const uint8_t* input = static_cast(input_); - const uint8_t* const bEnd = input + len; - - totalLen += len; - - if (bufferedSize + len <= internal_buffer_size) - { /* fill in tmp buffer */ - memcpy(buffer + bufferedSize, input, len); - bufferedSize += (uint32_t)len; - return; - } - /* input now > XXH3_INTERNALBUFFER_SIZE */ - - if (bufferedSize > 0) - { /* some input within internal buffer: fill then consume it */ - size_t const loadSize = internal_buffer_size - bufferedSize; - - memcpy(buffer + bufferedSize, input, loadSize); - input += loadSize; - consume_stripes(acc, nbStripesSoFar, internal_buffer_stripes, buffer); - bufferedSize = 0; - } - - /* consume input by full buffer quantities */ - if (input + internal_buffer_size <= bEnd) - { - const uint8_t* const limit = bEnd - internal_buffer_size; - - do - { - consume_stripes(acc, nbStripesSoFar, internal_buffer_stripes, input); - input += internal_buffer_size; - } - while (input < limit); - - memcpy(buffer + sizeof(buffer) - detail3::stripe_len, input - detail3::stripe_len, detail3::stripe_len); - } - - if (input < bEnd) - { /* some remaining input input : buffer it */ - memcpy(buffer, input, (size_t)(bEnd - input)); - bufferedSize = (uint32_t)(bEnd - input); - } - } - - void digest_long(uint64_t* acc_) - { - memcpy(acc_, acc, sizeof(acc)); /* digest locally, state remains unaltered, and can continue ingesting more input afterwards */ - - if (bufferedSize >= detail3::stripe_len) - { - size_t const totalNbStripes = (bufferedSize - 1) / detail3::stripe_len; - uint32_t nbStripesSoFar = this->nbStripesSoFar; - - consume_stripes(acc_, nbStripesSoFar, totalNbStripes, buffer); - - /* one last partial stripe */ - detail3::accumulate_512(acc_, buffer + bufferedSize - detail3::stripe_len, secret + secretLimit - detail3::secret_lastacc_start); - } - else - { /* bufferedSize < STRIPE_LEN */ - /* one last stripe */ - uint8_t lastStripe[detail3::stripe_len]; - size_t const catchupSize = detail3::stripe_len - bufferedSize; - memcpy(lastStripe, buffer + sizeof(buffer) - catchupSize, catchupSize); - memcpy(lastStripe + catchupSize, buffer, bufferedSize); - detail3::accumulate_512(acc_, lastStripe, secret + secretLimit - detail3::secret_lastacc_start); - } - } - - void reset_internal(uint64_t seed_reset, const void* secret_reset, size_t secret_size) - { - memset(this, 0, sizeof(*this)); - memcpy(acc, detail3::init_acc.data(), sizeof(detail3::init_acc)); - seed = seed_reset; - useSeed = (seed != 0); - secret = (const uint8_t*)secret_reset; - secretLimit = (uint32_t)(secret_size - detail3::stripe_len); - nbStripesPerBlock = secretLimit / detail3::secret_consume_rate; - } - - public: - - hash3_state_t operator=(hash3_state_t& other) - { - memcpy(this, &other, sizeof(hash3_state_t)); - } - - hash3_state_t(uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 streaming can only be used in 64 and 128 bit modes."); - reset(seed); - } - - hash3_state_t(const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 streaming can only be used in 64 and 128 bit modes."); - reset(secret, secretSize, seed); - } - - void reset(uint64_t seed = 0) - { - reset_internal(seed, detail3::default_secret, detail3::secret_default_size); - detail3::init_custom_secret(customSecret, seed); - secret = customSecret; - /* - memset(this, 0, sizeof(*this)); - memcpy(acc, detail3::init_acc.data(), sizeof(detail3::init_acc)); - (*this).seed = seed; - - if (seed == 0) - { - secret = detail3::default_secret; - } - else - { - detail3::init_custom_secret(customSecret, seed); - secret = customSecret; - } - - secretLimit = (uint32_t)(detail3::secret_default_size - detail3::stripe_len); - nbStripesPerBlock = secretLimit / detail3::secret_consume_rate;*/ - } - - void reset(const void* secret, size_t secretSize, uint64_t seed = 0) - { - reset_internal(seed, secret, secretSize); - useSeed = true; - /* - - memset(this, 0, sizeof(*this)); - memcpy(acc, detail3::init_acc.data(), sizeof(detail3::init_acc)); - seed = 0; - - (*this).secret = (const uint8_t*)secret; - secretLimit = (uint32_t)(secretSize - detail3::stripe_len); - nbStripesPerBlock = secretLimit / detail3::secret_consume_rate;*/ - } - - void update(const void* input, size_t len) - { - return update_impl(static_cast(input), len); - } - - template - void update(const std::basic_string& input) - { - return update_impl(static_cast(input.data()), input.length() * sizeof(T)); - } - - template - void update(ContiguousIterator begin, ContiguousIterator end) - { - using T = typename std::decay_t; - return update_impl(static_cast(&*begin), (end - begin) * sizeof(T)); - } - - template - void update(const std::vector& input) - { - return update_impl(static_cast(input.data()), input.size() * sizeof(T)); - } - - template - void update(const std::array& input) - { - return update_impl(static_cast(input.data()), AN * sizeof(T)); - } - - template - void update(const std::initializer_list& input) - { - return update_impl(static_cast(input.begin()), input.size() * sizeof(T)); - } - - hash_t digest() - { - if (totalLen > detail3::midsize_max) - { - alignas(128) hash64_t acc[detail3::acc_nb]; - - digest_long(acc); - - if constexpr (bit_mode == 64) - { - return detail3::merge_accs(acc, secret + detail3::secret_mergeaccs_start, (uint64_t)totalLen * detail::PRIME<64>(1)); - } - else - { - uint64_t const low64 = detail3::merge_accs(acc, secret + detail3::secret_mergeaccs_start, (uint64_t)totalLen * detail::PRIME<64>(1)); - uint64_t const high64 = detail3::merge_accs(acc, secret + secretLimit + detail3::stripe_len - sizeof(acc) - detail3::secret_mergeaccs_start, ~((uint64_t)totalLen * detail::PRIME<64>(2))); - - return { low64, high64 }; - } - } - else - { - return detail3::xxhash3_impl(buffer, totalLen, seed, secret, secretLimit + detail3::stripe_len); - } - } - }; - - using hash3_state64_t = hash3_state_t<64>; - using hash3_state128_t = hash3_state_t<128>; + hash_t& hash_ret, + hash_t v1, hash_t v2, hash_t v3, hash_t v4){}; + +template <> +inline void endian_align_sub_mergeround<64>(hash_t<64>& hash_ret, hash_t<64> v1, hash_t<64> v2, hash_t<64> v3, + hash_t<64> v4) +{ + hash_ret = mergeRound64(hash_ret, v1); + hash_ret = mergeRound64(hash_ret, v2); + hash_ret = mergeRound64(hash_ret, v3); + hash_ret = mergeRound64(hash_ret, v4); +} + +template +inline hash_t endian_align_sub_ending(hash_t hash_ret, const uint8_t* p, const uint8_t* bEnd, + xxh::endianness endian, xxh::alignment align){}; + +template <> +inline hash_t<32> endian_align_sub_ending<32>(hash_t<32> hash_ret, const uint8_t* p, const uint8_t* bEnd, + xxh::endianness endian, xxh::alignment align) +{ + while ((p + 4) <= bEnd) { + hash_ret += mem_ops::readLE_align<32>(p, endian, align) * PRIME<32>(3); + hash_ret = bit_ops::rotl<32>(hash_ret, 17) * PRIME<32>(4); + p += 4; + } + + while (p < bEnd) { + hash_ret += (*p) * PRIME<32>(5); + hash_ret = bit_ops::rotl<32>(hash_ret, 11) * PRIME<32>(1); + p++; + } + + hash_ret ^= hash_ret >> 15; + hash_ret *= PRIME<32>(2); + hash_ret ^= hash_ret >> 13; + hash_ret *= PRIME<32>(3); + hash_ret ^= hash_ret >> 16; + + return hash_ret; } + +template <> +inline hash_t<64> endian_align_sub_ending<64>(hash_t<64> hash_ret, const uint8_t* p, const uint8_t* bEnd, + xxh::endianness endian, xxh::alignment align) +{ + while (p + 8 <= bEnd) { + const hash64_t k1 = round<64>(0, mem_ops::readLE_align<64>(p, endian, align)); + hash_ret ^= k1; + hash_ret = bit_ops::rotl<64>(hash_ret, 27) * PRIME<64>(1) + PRIME<64>(4); + p += 8; + } + + if (p + 4 <= bEnd) { + hash_ret ^= static_cast(mem_ops::readLE_align<32>(p, endian, align)) * PRIME<64>(1); + hash_ret = bit_ops::rotl<64>(hash_ret, 23) * PRIME<64>(2) + PRIME<64>(3); + p += 4; + } + + while (p < bEnd) { + hash_ret ^= (*p) * PRIME<64>(5); + hash_ret = bit_ops::rotl<64>(hash_ret, 11) * PRIME<64>(1); + p++; + } + + hash_ret ^= hash_ret >> 33; + hash_ret *= PRIME<64>(2); + hash_ret ^= hash_ret >> 29; + hash_ret *= PRIME<64>(3); + hash_ret ^= hash_ret >> 32; + + return hash_ret; +} + +template +inline hash_t endian_align(const void* input, size_t len, hash_t seed, xxh::endianness endian, + xxh::alignment align) +{ + static_assert(!(N != 32 && N != 64), "You can only call endian_align in 32 or 64 bit mode."); + + const uint8_t* p = static_cast(input); + const uint8_t* bEnd = p + len; + hash_t hash_ret; + + if (len >= (N / 2)) { + const uint8_t* const limit = bEnd - (N / 2); + hash_t v1 = seed + PRIME(1) + PRIME(2); + hash_t v2 = seed + PRIME(2); + hash_t v3 = seed + 0; + hash_t v4 = seed - PRIME(1); + + do { + v1 = round(v1, mem_ops::readLE_align(p, endian, align)); + p += (N / 8); + v2 = round(v2, mem_ops::readLE_align(p, endian, align)); + p += (N / 8); + v3 = round(v3, mem_ops::readLE_align(p, endian, align)); + p += (N / 8); + v4 = round(v4, mem_ops::readLE_align(p, endian, align)); + p += (N / 8); + } while (p <= limit); + + hash_ret = bit_ops::rotl(v1, 1) + bit_ops::rotl(v2, 7) + bit_ops::rotl(v3, 12) + bit_ops::rotl(v4, 18); + + endian_align_sub_mergeround(hash_ret, v1, v2, v3, v4); + } else { + hash_ret = seed + PRIME(5); + } + + hash_ret += static_cast>(len); + + return endian_align_sub_ending(hash_ret, p, bEnd, endian, align); +} +} // namespace detail + +template +hash_t xxhash(const void* input, size_t len, hash_t seed = 0, endianness endian = endianness::unspecified) +{ + static_assert(!(N != 32 && N != 64), "You can only call xxhash in 32 or 64 bit mode."); + return detail::endian_align(input, len, seed, mem_ops::get_endian(endian), mem_ops::get_alignment(input)); +} + +template +hash_t xxhash(const std::basic_string& input, hash_t seed = 0, endianness endian = endianness::unspecified) +{ + static_assert(!(N != 32 && N != 64), "You can only call xxhash in 32 or 64 bit mode."); + return detail::endian_align(static_cast(input.data()), input.length() * sizeof(T), seed, + mem_ops::get_endian(endian), + mem_ops::get_alignment(static_cast(input.data()))); +} + +template +hash_t xxhash(ContiguousIterator begin, ContiguousIterator end, hash_t seed = 0, + endianness endian = endianness::unspecified) +{ + static_assert(!(N != 32 && N != 64), "You can only call xxhash in 32 or 64 bit mode."); + using T = typename std::decay_t; + return detail::endian_align(static_cast(&*begin), (end - begin) * sizeof(T), seed, + mem_ops::get_endian(endian), + mem_ops::get_alignment(static_cast(&*begin))); +} + +template +hash_t xxhash(const std::vector& input, hash_t seed = 0, endianness endian = endianness::unspecified) +{ + static_assert(!(N != 32 && N != 64), "You can only call xxhash in 32 or 64 bit mode."); + return detail::endian_align(static_cast(input.data()), input.size() * sizeof(T), seed, + mem_ops::get_endian(endian), + mem_ops::get_alignment(static_cast(input.data()))); +} + +template +hash_t xxhash(const std::array& input, hash_t seed = 0, endianness endian = endianness::unspecified) +{ + static_assert(!(N != 32 && N != 64), "You can only call xxhash in 32 or 64 bit mode."); + return detail::endian_align(static_cast(input.data()), AN * sizeof(T), seed, + mem_ops::get_endian(endian), + mem_ops::get_alignment(static_cast(input.data()))); +} + +template +hash_t xxhash(const std::initializer_list& input, hash_t seed = 0, endianness endian = endianness::unspecified) +{ + static_assert(!(N != 32 && N != 64), "You can only call xxhash in 32 or 64 bit mode."); + return detail::endian_align(static_cast(input.begin()), input.size() * sizeof(T), seed, + mem_ops::get_endian(endian), + mem_ops::get_alignment(static_cast(input.begin()))); +} + +/* ******************************************************************* + * Hash streaming + *********************************************************************/ +enum class error_code : uint8_t { ok = 0, error }; + +template class hash_state_t { + uint64_t total_len = 0; + hash_t v1 = 0, v2 = 0, v3 = 0, v4 = 0; + std::array, 4> mem = {{0, 0, 0, 0}}; + uint32_t memsize = 0; + + inline error_code _update_impl(const void* input, size_t length, endianness endian) + { + const uint8_t* p = reinterpret_cast(input); + const uint8_t* const bEnd = p + length; + + if (!input) { + return xxh::error_code::error; + } + + total_len += length; + + if (memsize + length < (N / 2)) { /* fill in tmp buffer */ + memcpy(reinterpret_cast(mem.data()) + memsize, input, length); + memsize += static_cast(length); + return error_code::ok; + } + + if (memsize) { /* some data left from previous update */ + memcpy(reinterpret_cast(mem.data()) + memsize, input, (N / 2) - memsize); + + const hash_t* ptr = mem.data(); + v1 = detail::round(v1, mem_ops::readLE(ptr, endian)); + ptr++; + v2 = detail::round(v2, mem_ops::readLE(ptr, endian)); + ptr++; + v3 = detail::round(v3, mem_ops::readLE(ptr, endian)); + ptr++; + v4 = detail::round(v4, mem_ops::readLE(ptr, endian)); + + p += (N / 2) - memsize; + memsize = 0; + } + + if (p <= bEnd - (N / 2)) { + const uint8_t* const limit = bEnd - (N / 2); + + do { + v1 = detail::round(v1, mem_ops::readLE(p, endian)); + p += (N / 8); + v2 = detail::round(v2, mem_ops::readLE(p, endian)); + p += (N / 8); + v3 = detail::round(v3, mem_ops::readLE(p, endian)); + p += (N / 8); + v4 = detail::round(v4, mem_ops::readLE(p, endian)); + p += (N / 8); + } while (p <= limit); + } + + if (p < bEnd) { + memcpy(mem.data(), p, static_cast(bEnd - p)); + memsize = static_cast(bEnd - p); + } + + return error_code::ok; + } + + inline hash_t _digest_impl(endianness endian) const + { + const uint8_t* p = reinterpret_cast(mem.data()); + const uint8_t* const bEnd = reinterpret_cast(mem.data()) + memsize; + hash_t hash_ret; + + if (total_len > (N / 2)) { + hash_ret = + bit_ops::rotl(v1, 1) + bit_ops::rotl(v2, 7) + bit_ops::rotl(v3, 12) + bit_ops::rotl(v4, 18); + + detail::endian_align_sub_mergeround(hash_ret, v1, v2, v3, v4); + } else { + hash_ret = v3 + detail::PRIME(5); + } + + hash_ret += static_cast>(total_len); + + return detail::endian_align_sub_ending(hash_ret, p, bEnd, endian, alignment::unaligned); + } + +public: + hash_state_t(hash_t seed = 0) + { + static_assert(!(N != 32 && N != 64), "You can only stream hashing in 32 or 64 bit mode."); + v1 = seed + detail::PRIME(1) + detail::PRIME(2); + v2 = seed + detail::PRIME(2); + v3 = seed + 0; + v4 = seed - detail::PRIME(1); + }; + + hash_state_t operator=(hash_state_t& other) { memcpy(this, other, sizeof(hash_state_t)); } + + error_code reset(hash_t seed = 0) + { + memset(this, 0, sizeof(hash_state_t)); + v1 = seed + detail::PRIME(1) + detail::PRIME(2); + v2 = seed + detail::PRIME(2); + v3 = seed + 0; + v4 = seed - detail::PRIME(1); + return error_code::ok; + } + + error_code update(const void* input, size_t length, endianness endian = endianness::unspecified) + { + return _update_impl(input, length, mem_ops::get_endian(endian)); + } + + template + error_code update(const std::basic_string& input, endianness endian = endianness::unspecified) + { + return _update_impl(static_cast(input.data()), input.length() * sizeof(T), + mem_ops::get_endian(endian)); + } + + template + error_code update(ContiguousIterator begin, ContiguousIterator end, endianness endian = endianness::unspecified) + { + using T = typename std::decay_t; + return _update_impl(static_cast(&*begin), (end - begin) * sizeof(T), mem_ops::get_endian(endian)); + } + + template error_code update(const std::vector& input, endianness endian = endianness::unspecified) + { + return _update_impl(static_cast(input.data()), input.size() * sizeof(T), mem_ops::get_endian(endian)); + } + + template + error_code update(const std::array& input, endianness endian = endianness::unspecified) + { + return _update_impl(static_cast(input.data()), AN * sizeof(T), mem_ops::get_endian(endian)); + } + + template + error_code update(const std::initializer_list& input, endianness endian = endianness::unspecified) + { + return _update_impl(static_cast(input.begin()), input.size() * sizeof(T), mem_ops::get_endian(endian)); + } + + hash_t digest(endianness endian = endianness::unspecified) { return _digest_impl(mem_ops::get_endian(endian)); } +}; + +using hash_state32_t = hash_state_t<32>; +using hash_state64_t = hash_state_t<64>; + +/* ******************************************************************* + * Canonical + *********************************************************************/ + +template struct canonical_t { + std::array digest; + + canonical_t(hash_t hash) + { + if (mem_ops::is_little_endian()) { + hash = bit_ops::swap(hash); + } + memcpy(digest.data(), &hash, sizeof(canonical_t)); + } + + hash_t get_hash() const { return mem_ops::readBE(&digest); } +}; + +using canonical32_t = canonical_t<32>; +using canonical64_t = canonical_t<64>; +} // namespace xxh