From 4127fe802ff334591a6ba5b315190523fc2a660e Mon Sep 17 00:00:00 2001 From: Arnaud Giersch Date: Sun, 19 Feb 2023 15:48:56 +0100 Subject: [PATCH] Upgrade embedded xxhash (-> v0.8.1). Taken from https://github.com/RedSpah/xxhash_cpp/commit/d21e37c34c4ad4ad562e4ec01ec036e342c422e3 --- COPYING | 4 +- src/3rd-party/xxhash.hpp | 2858 +++++++++++++++++++++++++++++--------- 2 files changed, 2178 insertions(+), 684 deletions(-) diff --git a/COPYING b/COPYING index f4b1b6d7ef..8b87fe6838 100644 --- a/COPYING +++ b/COPYING @@ -193,8 +193,8 @@ License: GPL-3 Files: src/3rd-party/xxhash.hpp Copyright: - Copyright (C) 2012-2018, Yann Collet. - Copyright (C) 2017-2018, Piotr Pliszka. + Copyright (C) 2012-2022, Yann Collet. + Copyright (C) 2017-2022, Red Gavin. License: BSD 2-Clause License: BSL-1.0 diff --git a/src/3rd-party/xxhash.hpp b/src/3rd-party/xxhash.hpp index a3dbe98912..5917a50007 100644 --- a/src/3rd-party/xxhash.hpp +++ b/src/3rd-party/xxhash.hpp @@ -1,18 +1,16 @@ #pragma once -#include #include #include -#include +#include #include #include - -#include +#include /* xxHash - Extremely Fast Hash algorithm Header File -Copyright (C) 2012-2018, Yann Collet. -Copyright (C) 2017-2018, Piotr Pliszka. +Copyright (C) 2012-2022, Yann Collet. +Copyright (C) 2017-2022, Red Gavin. All rights reserved. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -41,717 +39,2213 @@ You can contact the author at : - xxHash C++ port repository : https://github.com/RedSpah/xxhash_cpp */ -/* ************************************* - * Tuning parameters - ***************************************/ -/*!XXH_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. - * It can generate buggy code on targets which do not support unaligned memory accesses. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://stackoverflow.com/a/32095106/646947 for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -#if defined(__GNUC__) && (defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \ - defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__)) -#define XXH_FORCE_MEMORY_ACCESS 2 -#elif defined(__INTEL_COMPILER) || \ - (defined(__GNUC__) && (defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ - defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__))) -#define XXH_FORCE_MEMORY_ACCESS 1 -#endif -#endif +/* Intrinsics +* Sadly has to be included in the global namespace or literally everything breaks +*/ +#include -/*!XXH_FORCE_NATIVE_FORMAT : - * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. - * Results are therefore identical for little-endian and big-endian CPU. - * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. - * Should endian-independence be of no importance for your application, you may set the #define below to 1, - * to improve speed for Big-endian CPU. - * This option has no impact on Little_Endian CPU. - */ -#if !defined(XXH_FORCE_NATIVE_FORMAT) || (XXH_FORCE_NATIVE_FORMAT == 0) /* can be defined externally */ -#define XXH_FORCE_NATIVE_FORMAT 0 -#define XXH_CPU_LITTLE_ENDIAN 1 +namespace xxh +{ + /* ************************************* + * Versioning + ***************************************/ + + namespace version + { + constexpr int cpp_version_major = 0; + constexpr int cpp_version_minor = 8; + constexpr int cpp_version_release = 1; + } + + constexpr uint32_t version_number() + { + return version::cpp_version_major * 10000 + version::cpp_version_minor * 100 + version::cpp_version_release; + } + + + /* ************************************* + * Basic Types - Predefining uint128_t for intrin + ***************************************/ + + namespace typedefs + { + struct alignas(16) uint128_t + { + uint64_t low64 = 0; + uint64_t high64 = 0; + + bool operator==(const uint128_t & other) + { + return (low64 == other.low64 && high64 == other.high64); + } + + bool operator>(const uint128_t & other) + { + return (high64 > other.high64 || low64 > other.low64); + } + + bool operator>=(const uint128_t & other) + { + return (*this > other || *this == other); + } + + bool operator<(const uint128_t & other) + { + return !(*this >= other); + } + + bool operator<=(const uint128_t & other) + { + return !(*this > other); + } + + bool operator!=(const uint128_t & other) + { + return !(*this == other); + } + + uint128_t(uint64_t low, uint64_t high) : low64(low), high64(high) {} + + uint128_t() {} + }; + + } + + using uint128_t = typedefs::uint128_t; + + + /* ************************************* + * Compiler / Platform Specific Features + ***************************************/ + + namespace intrin + { + /*!XXH_CPU_LITTLE_ENDIAN : + * This is a CPU endian detection macro, will be + * automatically set to 1 (little endian) if it is left undefined. + * If compiling for a big endian system (why), XXH_CPU_LITTLE_ENDIAN has to be explicitly defined as 0. + */ +#ifndef XXH_CPU_LITTLE_ENDIAN +# define XXH_CPU_LITTLE_ENDIAN 1 #endif -/*!XXH_FORCE_ALIGN_CHECK : - * This is a minor performance trick, only useful with lots of very small keys. - * It means : check for aligned/unaligned input. - * The check costs one initial branch per hash; - * set it to 0 when the input is guaranteed to be aligned, - * or when alignment doesn't matter for performance. - */ -#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ -#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -#define XXH_FORCE_ALIGN_CHECK 0 -#else -#define XXH_FORCE_ALIGN_CHECK 1 -#endif -#endif -/*!XXH_CPU_LITTLE_ENDIAN : - * This is a CPU endian detection macro, will be - * automatically set to 1 (little endian) if XXH_FORCE_NATIVE_FORMAT - * is left undefined, XXH_FORCE_NATIVE_FORMAT is defined to 0, or if an x86/x86_64 compiler macro is defined. - * If left undefined, endianness will be determined at runtime, at the cost of a slight one-time overhead - * and a larger overhead due to get_endian() not being constexpr. - */ -#ifndef XXH_CPU_LITTLE_ENDIAN -#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -#define XXH_CPU_LITTLE_ENDIAN 1 -#endif + /* Vectorization Detection + * NOTE: XXH_NEON and XXH_VSX aren't supported in this C++ port. + * The primary reason is that I don't have access to an ARM and PowerPC + * machines to test them, and the secondary reason is that I even doubt anyone writing + * code for such machines would bother using a C++ port rather than the original C version. + */ +#ifndef XXH_VECTOR /* can be predefined on command line */ +# if defined(__AVX512F__) +# define XXH_VECTOR 3 /* AVX512 for Skylake and Icelake */ +# elif defined(__AVX2__) +# define XXH_VECTOR 2 /* AVX2 for Haswell and Bulldozer */ +# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) +# define XXH_VECTOR 1 /* SSE2 for Pentium 4 and all x86_64 */ +# else +# define XXH_VECTOR 0 /* Portable scalar version */ +# endif #endif -/* ************************************* - * Compiler Specific Options - ***************************************/ -#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -namespace xxh { -/* ************************************* - * Version - ***************************************/ -constexpr int cpp_version_major = 0; -constexpr int cpp_version_minor = 6; -constexpr int cpp_version_release = 5; -constexpr uint32_t version_number() -{ - return cpp_version_major * 10000 + cpp_version_minor * 100 + cpp_version_release; -} + constexpr int vector_mode = XXH_VECTOR; + +#if XXH_VECTOR == 3 /* AVX512 for Skylake and Icelake */ + constexpr int acc_align = 64; + using avx512_underlying = __m512i; + using avx2_underlying = __m256i; + using sse2_underlying = __m128i; +#elif XXH_VECTOR == 2 /* AVX2 for Haswell and Bulldozer */ + constexpr int acc_align = 32; + using avx512_underlying = void; + using avx2_underlying = __m256i; + using sse2_underlying = __m128i; +#elif XXH_VECTOR == 1 /* SSE2 for Pentium 4 and all x86_64 */ + using avx512_underlying = void; + using avx2_underlying = void; //std::array<__m128i, 2>; + using sse2_underlying = __m128i; + constexpr int acc_align = 16; +#else /* Portable scalar version */ + using avx512_underlying = void; + using avx2_underlying = void; //std::array; + using sse2_underlying = void; //std::array; + constexpr int acc_align = 8; +#endif -namespace hash_t_impl { -/* ************************************* - * Basic Types - Detail - ***************************************/ - -using _hash32_underlying = uint32_t; -using _hash64_underlying = uint64_t; - -template struct hash_type { - using type = void; -}; -template <> struct hash_type<32> { - using type = _hash32_underlying; -}; -template <> struct hash_type<64> { - using type = _hash64_underlying; -}; -} // namespace hash_t_impl - -/* ************************************* - * Basic Types - Public - ***************************************/ - -template using hash_t = typename hash_t_impl::hash_type::type; -using hash32_t = hash_t<32>; -using hash64_t = hash_t<64>; - -/* ************************************* - * Bit Functions - Public - ***************************************/ - -namespace bit_ops { -/* **************************************** - * Intrinsics and Bit Operations - ******************************************/ -#if defined(_MSC_VER) -inline uint32_t rotl32(uint32_t x, int32_t r) -{ - return _rotl(x, r); -} -inline uint64_t rotl64(uint64_t x, int32_t r) -{ - return _rotl64(x, r); -} -#else -inline uint32_t rotl32(uint32_t x, int32_t r) -{ - return ((x << r) | (x >> (32 - r))); -} -inline uint64_t rotl64(uint64_t x, int32_t r) -{ - return ((x << r) | (x >> (64 - r))); -} + /* Compiler Specifics + * Defines inline macros and includes specific compiler's instrinsics. + * */ +#ifdef XXH_FORCE_INLINE /* First undefining the symbols in case they're already defined */ +# undef XXH_FORCE_INLINE +#endif +#ifdef XXH_NO_INLINE +# undef XXH_NO_INLINE #endif -#if defined(_MSC_VER) /* Visual Studio */ -inline uint32_t swap32(uint32_t x) -{ - return _byteswap_ulong(x); -} -inline uint64_t swap64(uint64_t x) -{ - return _byteswap_uint64(x); -} -#elif XXH_GCC_VERSION >= 403 -inline uint32_t swap32(uint32_t x) -{ - return __builtin_bswap32(x); -} -inline uint64_t swap64(uint64_t x) -{ - return __builtin_bswap64(x); -} +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) +# define XXH_FORCE_INLINE static __forceinline +# define XXH_NO_INLINE static __declspec(noinline) +# include +#elif defined(__GNUC__) /* Clang / GCC */ +# define XXH_FORCE_INLINE static inline __attribute__((always_inline)) +# define XXH_NO_INLINE static __attribute__((noinline)) +# include #else -inline uint32_t swap32(uint32_t x) -{ - return ((x << 24) & 0xff000000) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | ((x >> 24) & 0x000000ff); -} -inline uint64_t swap64(uint64_t x) -{ - return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | - ((x << 24) & 0x0000ff0000000000ULL) | ((x << 8) & 0x000000ff00000000ULL) | ((x >> 8) & 0x00000000ff000000ULL) | - ((x >> 24) & 0x0000000000ff0000ULL) | ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL); -} +# define XXH_FORCE_INLINE static inline +# define XXH_NO_INLINE static #endif -template inline hash_t rotl(hash_t n, int32_t r){}; - -template <> inline hash_t<32> rotl<32>(hash_t<32> n, int32_t r) -{ - return rotl32(n, r); -}; - -template <> inline hash_t<64> rotl<64>(hash_t<64> n, int32_t r) -{ - return rotl64(n, r); -}; -template inline hash_t swap(hash_t n){}; -template <> inline hash_t<32> swap<32>(hash_t<32> n) -{ - return swap32(n); -}; - -template <> inline hash_t<64> swap<64>(hash_t<64> n) -{ - return swap64(n); -}; -} // namespace bit_ops - -/* ************************************* - * Memory Functions - Public - ***************************************/ - -enum class alignment : uint8_t { aligned, unaligned }; -enum class endianness : uint8_t { big_endian = 0, little_endian = 1, unspecified = 2 }; - -namespace mem_ops { -/* ************************************* - * Memory Access - ***************************************/ -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 2)) - -/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -template inline hash_t read_unaligned(const void* memPtr) -{ - return *(const hash_t*)memPtr; -} - -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 1)) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -template using unalign = union { - hash_t uval; -} __attribute((packed)); - -template inline hash_t read_unaligned(const void* memPtr) -{ - return ((const unalign*)memPtr)->uval; -} + /* Prefetch + * Can be disabled by defining XXH_NO_PREFETCH + */ +#if defined(XXH_NO_PREFETCH) + XXH_FORCE_INLINE void prefetch(const void* ptr) {} +#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + XXH_FORCE_INLINE void prefetch(const void* ptr) { _mm_prefetch((const char*)(ptr), _MM_HINT_T0); } +#elif defined(__GNUC__) + XXH_FORCE_INLINE void prefetch(const void* ptr) { __builtin_prefetch((ptr), 0, 3); } #else + XXH_FORCE_INLINE void prefetch(const void* ptr) {} +#endif -/* portable and safe solution. Generally efficient. - * see : http://stackoverflow.com/a/32095106/646947 - */ -template inline hash_t read_unaligned(const void* memPtr) -{ - hash_t val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} - -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ - -inline hash_t<32> read32(const void* memPtr) -{ - return read_unaligned<32>(memPtr); -} -inline hash_t<64> read64(const void* memPtr) -{ - return read_unaligned<64>(memPtr); -} - -/* ************************************* - * Architecture Macros - ***************************************/ - -/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ - -#ifndef XXH_CPU_LITTLE_ENDIAN - -inline endianness get_endian(endianness endian) -{ - static struct _dummy_t { - std::array endian_lookup = {endianness::big_endian, endianness::little_endian, - endianness::unspecified}; - const int g_one = 1; - _dummy_t() { endian_lookup[2] = static_cast(*(const char*)(&g_one)); } - } _dummy; - - return _dummy.endian_lookup[(uint8_t)endian]; -} -inline bool is_little_endian() -{ - return get_endian(endianness::unspecified) == endianness::little_endian; -} + /* Restrict + * Defines macro for restrict, which in C++ is sadly just a compiler extension (for now). + * Can be disabled by defining XXH_NO_RESTRICT + */ +#ifdef XXH_RESTRICT +# undef XXH_RESTRICT +#endif +#if (defined(__GNUC__) || defined(_MSC_VER)) && defined(__cplusplus) && !defined(XXH_NO_RESTRICT) +# define XXH_RESTRICT __restrict #else -constexpr endianness get_endian(endianness endian) -{ - constexpr std::array endian_lookup = { - {endianness::big_endian, endianness::little_endian, - (XXH_CPU_LITTLE_ENDIAN) ? endianness::little_endian : endianness::big_endian}}; - return endian_lookup[static_cast(endian)]; -} - -constexpr bool is_little_endian() -{ - return get_endian(endianness::unspecified) == endianness::little_endian; -} - +# define XXH_RESTRICT #endif -/* *************************** - * Memory reads - *****************************/ - -template inline hash_t readLE_align(const void* ptr, endianness endian, alignment align) -{ - if (align == alignment::unaligned) { - return endian == endianness::little_endian ? read_unaligned(ptr) : bit_ops::swap(read_unaligned(ptr)); - } else { - return endian == endianness::little_endian ? *reinterpret_cast*>(ptr) - : bit_ops::swap(*reinterpret_cast*>(ptr)); - } -} - -template inline hash_t readLE(const void* ptr, endianness endian) -{ - return readLE_align(ptr, endian, alignment::unaligned); -} - -template inline hash_t readBE(const void* ptr) -{ - return is_little_endian() ? bit_ops::swap(read_unaligned(ptr)) : read_unaligned(ptr); -} - -template inline alignment get_alignment(const void* input) -{ - return ((XXH_FORCE_ALIGN_CHECK) && ((reinterpret_cast(input) & ((N / 8) - 1)) == 0)) - ? xxh::alignment::aligned - : xxh::alignment::unaligned; -} -} // namespace mem_ops - -/* ******************************************************************* - * Hash functions - *********************************************************************/ - -namespace detail { -/* ******************************************************************* - * Hash functions - Implementation - *********************************************************************/ - -constexpr static std::array primes32 = {{2654435761U, 2246822519U, 3266489917U, 668265263U, 374761393U}}; -constexpr static std::array primes64 = {{11400714785074694791ULL, 14029467366897019727ULL, - 1609587929392839161ULL, 9650029242287828579ULL, - 2870177450012600261ULL}}; - -template constexpr hash_t PRIME(int32_t n){}; - -template <> constexpr hash32_t PRIME<32>(int32_t n) -{ - return primes32[n - 1]; -} - -template <> constexpr hash64_t PRIME<64>(int32_t n) -{ - return primes64[n - 1]; -} -template inline hash_t round(hash_t seed, hash_t input) -{ - seed += input * PRIME(2); - seed = bit_ops::rotl(seed, ((N == 32) ? 13 : 31)); - seed *= PRIME(1); - return seed; -} + /* Likely / Unlikely + * Defines macros for Likely / Unlikely, which are official in C++20, but sadly this library aims the previous standard. + * Not present on MSVC. + * Can be disabled by defining XXH_NO_BRANCH_HINTS + */ +#if ((defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)) && !defined(XXH_NO_BRANCH_HINTS) +# define XXH_likely(x) __builtin_expect(x, 1) +# define XXH_unlikely(x) __builtin_expect(x, 0) +#else +# define XXH_likely(x) (x) +# define XXH_unlikely(x) (x) +#endif -inline hash64_t mergeRound64(hash64_t acc, hash64_t val) -{ - val = round<64>(0, val); - acc ^= val; - acc = acc * PRIME<64>(1) + PRIME<64>(4); - return acc; -} -template -inline void endian_align_sub_mergeround( -#if __cplusplus < 201703L - __attribute__((unused)) + namespace bit_ops + { +#if defined(_MSC_VER) + static inline uint32_t rotl32(uint32_t x, int32_t r) { return _rotl(x, r); } + static inline uint64_t rotl64(uint64_t x, int32_t r) { return _rotl64(x, r); } + static inline uint32_t rotr32(uint32_t x, int32_t r) { return _rotr(x, r); } + static inline uint64_t rotr64(uint64_t x, int32_t r) { return _rotr64(x, r); } #else - [[maybe_unused]] + static inline uint32_t rotl32(uint32_t x, int32_t r) { return ((x << r) | (x >> (32 - r))); } + static inline uint64_t rotl64(uint64_t x, int32_t r) { return ((x << r) | (x >> (64 - r))); } + static inline uint32_t rotr32(uint32_t x, int32_t r) { return ((x >> r) | (x << (32 - r))); } + static inline uint64_t rotr64(uint64_t x, int32_t r) { return ((x >> r) | (x << (64 - r))); } #endif - hash_t& hash_ret, - hash_t v1, hash_t v2, hash_t v3, hash_t v4){}; -template <> -inline void endian_align_sub_mergeround<64>(hash_t<64>& hash_ret, hash_t<64> v1, hash_t<64> v2, hash_t<64> v3, - hash_t<64> v4) -{ - hash_ret = mergeRound64(hash_ret, v1); - hash_ret = mergeRound64(hash_ret, v2); - hash_ret = mergeRound64(hash_ret, v3); - hash_ret = mergeRound64(hash_ret, v4); -} -template -inline hash_t endian_align_sub_ending(hash_t hash_ret, const uint8_t* p, const uint8_t* bEnd, - xxh::endianness endian, xxh::alignment align){}; +#if defined(_MSC_VER) /* Visual Studio */ + static inline uint32_t swap32(uint32_t x) { return _byteswap_ulong(x); } + static inline uint64_t swap64(uint64_t x) { return _byteswap_uint64(x); } +#elif defined(__GNUC__) + static inline uint32_t swap32(uint32_t x) { return __builtin_bswap32(x); } + static inline uint64_t swap64(uint64_t x) { return __builtin_bswap64(x); } +#else + static inline uint32_t swap32(uint32_t x) { return ((x << 24) & 0xff000000) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | ((x >> 24) & 0x000000ff); } + static inline uint64_t swap64(uint64_t x) { return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | ((x << 24) & 0x0000ff0000000000ULL) | ((x << 8) & 0x000000ff00000000ULL) | ((x >> 8) & 0x00000000ff000000ULL) | ((x >> 24) & 0x0000000000ff0000ULL) | ((x >> 40) & 0x000000000000ff00ULL) | ((x >> 56) & 0x00000000000000ffULL); } +#endif -template <> -inline hash_t<32> endian_align_sub_ending<32>(hash_t<32> hash_ret, const uint8_t* p, const uint8_t* bEnd, - xxh::endianness endian, xxh::alignment align) -{ - while ((p + 4) <= bEnd) { - hash_ret += mem_ops::readLE_align<32>(p, endian, align) * PRIME<32>(3); - hash_ret = bit_ops::rotl<32>(hash_ret, 17) * PRIME<32>(4); - p += 4; - } - - while (p < bEnd) { - hash_ret += (*p) * PRIME<32>(5); - hash_ret = bit_ops::rotl<32>(hash_ret, 11) * PRIME<32>(1); - p++; - } - - hash_ret ^= hash_ret >> 15; - hash_ret *= PRIME<32>(2); - hash_ret ^= hash_ret >> 13; - hash_ret *= PRIME<32>(3); - hash_ret ^= hash_ret >> 16; - - return hash_ret; -} -template <> -inline hash_t<64> endian_align_sub_ending<64>(hash_t<64> hash_ret, const uint8_t* p, const uint8_t* bEnd, - xxh::endianness endian, xxh::alignment align) -{ - while (p + 8 <= bEnd) { - const hash64_t k1 = round<64>(0, mem_ops::readLE_align<64>(p, endian, align)); - hash_ret ^= k1; - hash_ret = bit_ops::rotl<64>(hash_ret, 27) * PRIME<64>(1) + PRIME<64>(4); - p += 8; - } - - if (p + 4 <= bEnd) { - hash_ret ^= static_cast(mem_ops::readLE_align<32>(p, endian, align)) * PRIME<64>(1); - hash_ret = bit_ops::rotl<64>(hash_ret, 23) * PRIME<64>(2) + PRIME<64>(3); - p += 4; - } - - while (p < bEnd) { - hash_ret ^= (*p) * PRIME<64>(5); - hash_ret = bit_ops::rotl<64>(hash_ret, 11) * PRIME<64>(1); - p++; - } - - hash_ret ^= hash_ret >> 33; - hash_ret *= PRIME<64>(2); - hash_ret ^= hash_ret >> 29; - hash_ret *= PRIME<64>(3); - hash_ret ^= hash_ret >> 32; - - return hash_ret; -} +#if defined(_MSC_VER) && defined(_M_IX86) // Only for 32-bit MSVC. + XXH_FORCE_INLINE uint64_t mult32to64(uint32_t x, uint32_t y) { return __emulu(x, y); } +#else + XXH_FORCE_INLINE uint64_t mult32to64(uint32_t x, uint32_t y) { return (uint64_t)(uint32_t)(x) * (uint64_t)(uint32_t)(y); } +#endif -template -inline hash_t endian_align(const void* input, size_t len, hash_t seed, xxh::endianness endian, - xxh::alignment align) -{ - static_assert(!(N != 32 && N != 64), "You can only call endian_align in 32 or 64 bit mode."); - - const uint8_t* p = static_cast(input); - const uint8_t* bEnd = p + len; - hash_t hash_ret; - - if (len >= (N / 2)) { - const uint8_t* const limit = bEnd - (N / 2); - hash_t v1 = seed + PRIME(1) + PRIME(2); - hash_t v2 = seed + PRIME(2); - hash_t v3 = seed + 0; - hash_t v4 = seed - PRIME(1); - - do { - v1 = round(v1, mem_ops::readLE_align(p, endian, align)); - p += (N / 8); - v2 = round(v2, mem_ops::readLE_align(p, endian, align)); - p += (N / 8); - v3 = round(v3, mem_ops::readLE_align(p, endian, align)); - p += (N / 8); - v4 = round(v4, mem_ops::readLE_align(p, endian, align)); - p += (N / 8); - } while (p <= limit); - - hash_ret = bit_ops::rotl(v1, 1) + bit_ops::rotl(v2, 7) + bit_ops::rotl(v3, 12) + bit_ops::rotl(v4, 18); - - endian_align_sub_mergeround(hash_ret, v1, v2, v3, v4); - } else { - hash_ret = seed + PRIME(5); - } - - hash_ret += static_cast>(len); - - return endian_align_sub_ending(hash_ret, p, bEnd, endian, align); -} -} // namespace detail -template -hash_t xxhash(const void* input, size_t len, hash_t seed = 0, endianness endian = endianness::unspecified) -{ - static_assert(!(N != 32 && N != 64), "You can only call xxhash in 32 or 64 bit mode."); - return detail::endian_align(input, len, seed, mem_ops::get_endian(endian), mem_ops::get_alignment(input)); -} +#if defined(__GNUC__) && !defined(__clang__) && defined(__i386__) + __attribute__((__target__("no-sse"))) +#endif + static inline uint128_t mult64to128(uint64_t lhs, uint64_t rhs) + { -template -hash_t xxhash(const std::basic_string& input, hash_t seed = 0, endianness endian = endianness::unspecified) -{ - static_assert(!(N != 32 && N != 64), "You can only call xxhash in 32 or 64 bit mode."); - return detail::endian_align(static_cast(input.data()), input.length() * sizeof(T), seed, - mem_ops::get_endian(endian), - mem_ops::get_alignment(static_cast(input.data()))); -} +#if defined(__GNUC__) && !defined(__wasm__) \ + && defined(__SIZEOF_INT128__) \ + || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) -template -hash_t xxhash(ContiguousIterator begin, ContiguousIterator end, hash_t seed = 0, - endianness endian = endianness::unspecified) -{ - static_assert(!(N != 32 && N != 64), "You can only call xxhash in 32 or 64 bit mode."); - using T = typename std::decay_t; - return detail::endian_align(static_cast(&*begin), (end - begin) * sizeof(T), seed, - mem_ops::get_endian(endian), - mem_ops::get_alignment(static_cast(&*begin))); -} + __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs; + uint128_t r128; + r128.low64 = (uint64_t)(product); + r128.high64 = (uint64_t)(product >> 64); + return r128; -template -hash_t xxhash(const std::vector& input, hash_t seed = 0, endianness endian = endianness::unspecified) -{ - static_assert(!(N != 32 && N != 64), "You can only call xxhash in 32 or 64 bit mode."); - return detail::endian_align(static_cast(input.data()), input.size() * sizeof(T), seed, - mem_ops::get_endian(endian), - mem_ops::get_alignment(static_cast(input.data()))); -} +#elif defined(_M_X64) || defined(_M_IA64) -template -hash_t xxhash(const std::array& input, hash_t seed = 0, endianness endian = endianness::unspecified) -{ - static_assert(!(N != 32 && N != 64), "You can only call xxhash in 32 or 64 bit mode."); - return detail::endian_align(static_cast(input.data()), AN * sizeof(T), seed, - mem_ops::get_endian(endian), - mem_ops::get_alignment(static_cast(input.data()))); -} +#ifndef _MSC_VER +# pragma intrinsic(_umul128) +#endif + uint64_t product_high; + uint64_t const product_low = _umul128(lhs, rhs, &product_high); + uint128_t r128; + r128.low64 = product_low; + r128.high64 = product_high; + return r128; -template -hash_t xxhash(const std::initializer_list& input, hash_t seed = 0, endianness endian = endianness::unspecified) -{ - static_assert(!(N != 32 && N != 64), "You can only call xxhash in 32 or 64 bit mode."); - return detail::endian_align(static_cast(input.begin()), input.size() * sizeof(T), seed, - mem_ops::get_endian(endian), - mem_ops::get_alignment(static_cast(input.begin()))); +#else + uint64_t const lo_lo = bit_ops::mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); + uint64_t const hi_lo = bit_ops::mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); + uint64_t const lo_hi = bit_ops::mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); + uint64_t const hi_hi = bit_ops::mult32to64(lhs >> 32, rhs >> 32); + + /* Now add the products together. These will never overflow. */ + uint64_t const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; + uint64_t const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; + uint64_t const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); + + uint128_t r128; + r128.low64 = lower; + r128.high64 = upper; + return r128; +#endif + } + } + } + + + /* ************************************* + * Basic Types - Everything else + ***************************************/ + + namespace typedefs + { + /* ************************************* + * Basic Types - Detail + ***************************************/ + + template + struct hash_type + { + using type = void; + }; + + template <> + struct hash_type<32> + { + using type = uint32_t; + }; + + template <> + struct hash_type<64> + { + using type = uint64_t; + }; + + template <> + struct hash_type<128> + { + using type = uint128_t; + }; + + + template + struct vec_type + { + using type = void; + }; + + template <> + struct vec_type<64> + { + using type = uint64_t; + }; + + template <> + struct vec_type<128> + { + using type = intrin::sse2_underlying; + }; + + template <> + struct vec_type<256> + { + using type = intrin::avx2_underlying; + }; + + template <> + struct vec_type<512> + { + using type = intrin::avx512_underlying; + }; + + /* Rationale + * On the surface level uint_type appears to be pointless, + * as it is just a copy of hash_type. They do use the same types, + * that is true, but the reasoning for the difference is aimed at humans, + * not the compiler, as a difference between values that are 'just' numbers, + * and those that represent actual hash values. + */ + template + struct uint_type + { + using type = void; + }; + + template <> + struct uint_type<32> + { + using type = uint32_t; + }; + + template <> + struct uint_type<64> + { + using type = uint64_t; + }; + + template <> + struct uint_type<128> + { + using type = uint128_t; + }; + } + + template + using hash_t = typename typedefs::hash_type::type; + using hash32_t = hash_t<32>; + using hash64_t = hash_t<64>; + using hash128_t = hash_t<128>; + + template + using vec_t = typename typedefs::vec_type::type; + using vec64_t = vec_t<64>; + using vec128_t = vec_t<128>; + using vec256_t = vec_t<256>; + using vec512_t = vec_t<512>; + + template + using uint_t = typename typedefs::uint_type::type; + + + + /* ************************************* + * Bit Operations + ***************************************/ + + namespace bit_ops + { + /* **************************************** + * Bit Operations + ******************************************/ + + template + static inline uint_t rotl(uint_t n, int32_t r) + { + if constexpr (N == 32) + { + return intrin::bit_ops::rotl32(n, r); + } + + if constexpr (N == 64) + { + return intrin::bit_ops::rotl64(n, r); + } + } + + template + static inline uint_t rotr(uint_t n, int32_t r) + { + if constexpr (N == 32) + { + return intrin::bit_ops::rotr32(n, r); + } + + if constexpr (N == 64) + { + return intrin::bit_ops::rotr64(n, r); + } + } + + template + static inline uint_t swap(uint_t n) + { + if constexpr (N == 32) + { + return intrin::bit_ops::swap32(n); + } + + if constexpr (N == 64) + { + return intrin::bit_ops::swap64(n); + } + } + + template + static inline vec_t mul32to64(vec_t x, vec_t y) + { + if constexpr (N == 64) + { + return intrin::bit_ops::mult32to64(static_cast(x), static_cast(y)); + } + else + { + return 0; + } + } + + static inline uint128_t mul64to128(uint64_t x, uint64_t y) + { + return intrin::bit_ops::mult64to128(x, y); + } + + static inline uint64_t mul128fold64(uint64_t x, uint64_t y) + { + uint128_t product = mul64to128(x, y); + + return (product.low64 ^ product.high64); + } + } + + + /* ************************************* + * Memory Functions + ***************************************/ + + namespace mem_ops + { + + /* ************************************* + * Endianness + ***************************************/ + + constexpr bool is_little_endian() + { + return (XXH_CPU_LITTLE_ENDIAN == 1); + } + + + /* ************************************* + * Memory Access + ***************************************/ + + template + static inline uint_t read(const void* memPtr) + { + uint_t val; + + memcpy(&val, memPtr, sizeof(val)); + return val; + } + + template + static inline uint_t readLE(const void* ptr) + { + if constexpr (is_little_endian()) + { + return read(ptr); + } + else + { + return bit_ops::swap(read(ptr)); + } + } + + template + static inline uint_t readBE(const void* ptr) + { + if constexpr (is_little_endian()) + { + return bit_ops::swap(read(ptr)); + } + else + { + return read(ptr); + } + } + + template + static void writeLE(void* dst, uint_t v) + { + if constexpr (!is_little_endian()) + { + v = bit_ops::swap(v); + } + + memcpy(dst, &v, sizeof(v)); + } + } + + + /* ************************************* + * Vector Functions + ***************************************/ + + namespace vec_ops + { + template + XXH_FORCE_INLINE vec_t loadu(const vec_t* input) + { + static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid template argument passed to xxh::vec_ops::loadu"); + + if constexpr (N == 128) + { + return _mm_loadu_si128(input); + } + + if constexpr (N == 256) + { + return _mm256_loadu_si256(input); + } + + if constexpr (N == 512) + { + return _mm512_loadu_si512(input); + } + + if constexpr (N == 64) + { + return mem_ops::readLE<64>(input); + } + + } + + + // 'xorv' instead of 'xor' because 'xor' is a weird wacky alternate operator expression thing. + template + XXH_FORCE_INLINE vec_t xorv(vec_t a, vec_t b) + { + static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::xorv"); + + if constexpr (N == 128) + { + return _mm_xor_si128(a, b); + } + + if constexpr (N == 256) + { + return _mm256_xor_si256(a, b); + } + + if constexpr (N == 512) + { + return _mm512_xor_si512(a, b); + } + + if constexpr (N == 64) + { + return a ^ b; + } + } + + + template + XXH_FORCE_INLINE vec_t mul(vec_t a, vec_t b) + { + static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::mul"); + + if constexpr (N == 128) + { + return _mm_mul_epu32(a, b); + } + + if constexpr (N == 256) + { + return _mm256_mul_epu32(a, b); + } + + if constexpr (N == 512) + { + return _mm512_mul_epu32(a, b); + } + + if constexpr (N == 64) + { + return a * b; + } + } + + + template + XXH_FORCE_INLINE vec_t add(vec_t a, vec_t b) + { + static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::add"); + + if constexpr (N == 128) + { + return _mm_add_epi64(a, b); + } + + if constexpr (N == 256) + { + return _mm256_add_epi64(a, b); + } + + if constexpr (N == 512) + { + return _mm512_add_epi64(a, b); + } + + if constexpr (N == 64) + { + return a + b; + } + } + + + template + XXH_FORCE_INLINE vec_t shuffle(vec_t a) + { + static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::shuffle"); + + if constexpr (N == 128) + { + return _mm_shuffle_epi32(a, _MM_SHUFFLE(S1, S2, S3, S4)); + } + + if constexpr (N == 256) + { + return _mm256_shuffle_epi32(a, _MM_SHUFFLE(S1, S2, S3, S4)); + } + + if constexpr (N == 512) + { + return _mm512_shuffle_epi32(a, _MM_SHUFFLE(S1, S2, S3, S4)); + } + + if constexpr (N == 64) + { + return a; + } + } + + + template + XXH_FORCE_INLINE vec_t set1(int64_t a) + { + static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::set1"); + + if constexpr (N == 128) + { + return _mm_set1_epi32(static_cast(a)); + } + + if constexpr (N == 256) + { + return _mm256_set1_epi32(static_cast(a)); + } + + if constexpr (N == 512) + { + return _mm512_set1_epi32(static_cast(a)); + } + + if constexpr (N == 64) + { + return a; + } + } + + + template + XXH_FORCE_INLINE vec_t srli(vec_t n, int a) + { + static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::srli"); + + if constexpr (N == 128) + { + return _mm_srli_epi64(n, a); + } + + if constexpr (N == 256) + { + return _mm256_srli_epi64(n, a); + } + + if constexpr (N == 512) + { + return _mm512_srli_epi64(n, a); + } + + if constexpr (N == 64) + { + return n >> a; + } + } + + + template + XXH_FORCE_INLINE vec_t slli(vec_t n, int a) + { + static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::slli"); + + if constexpr (N == 128) + { + return _mm_slli_epi64(n, a); + } + + if constexpr (N == 256) + { + return _mm256_slli_epi64(n, a); + } + + if constexpr (N == 512) + { + return _mm512_slli_epi64(n, a); + } + + if constexpr (N == 64) + { + return n << a; + } + } + } + + /* ************************************* + * Canonical represenation + ***************************************/ + + template + struct canonical_t + { + std::array digest{ 0 }; + + canonical_t(hash_t hash) + { + if constexpr (bit_mode < 128) + { + if (mem_ops::is_little_endian()) + { + hash = bit_ops::swap(hash); + } + + memcpy(digest.data(), &hash, sizeof(canonical_t)); + } + else + { + if (mem_ops::is_little_endian()) + { + hash.low64 = bit_ops::swap<64>(hash.low64); + hash.high64 = bit_ops::swap<64>(hash.high64); + } + + memcpy(digest.data(), &hash.high64, sizeof(hash.high64)); + memcpy(digest.data() + sizeof(hash.high64), &hash.low64, sizeof(hash.low64)); + } + } + + hash_t get_hash() const + { + if constexpr (bit_mode < 128) + { + return mem_ops::readBE(&digest); + } + else + { + return { mem_ops::readBE<64>(&digest[8]), mem_ops::readBE<64>(&digest) }; + } + } + }; + + using canonical32_t = canonical_t<32>; + using canonical64_t = canonical_t<64>; + using canonical128_t = canonical_t<128>; + + template + inline hash_t to_canonical(hash_t hash) + { + static_assert(!(bit_mode != 128 && bit_mode != 64 && bit_mode != 32), "Canonical form can only be obtained from 32, 64 and 128 bit hashes."); + canonical_t canon(hash); + hash_t res; + memcpy(&res, &canon, bit_mode / 4); + + return res; + } + + + /* ************************************* + * Algorithm Implementation - xxhash + ***************************************/ + + namespace detail + { + using namespace mem_ops; + using namespace bit_ops; + + + /* ************************************* + * Constants + ***************************************/ + + constexpr static std::array primes32 = { 2654435761U, 2246822519U, 3266489917U, 668265263U, 374761393U }; + constexpr static std::array primes64 = { 11400714785074694791ULL, 14029467366897019727ULL, 1609587929392839161ULL, 9650029242287828579ULL, 2870177450012600261ULL }; + + template + constexpr uint_t PRIME(uint64_t n) + { + if constexpr (N == 32) + { + return primes32[n - 1]; + } + else + { + return primes64[n - 1]; + } + } + + + /* ************************************* + * Functions + ***************************************/ + + template + XXH_FORCE_INLINE uint_t avalanche(uint_t hash) + { + if constexpr (N == 32) + { + hash ^= hash >> 15; + hash *= PRIME<32>(2); + hash ^= hash >> 13; + hash *= PRIME<32>(3); + hash ^= hash >> 16; + return hash; + } + else if constexpr (N == 64) + { + hash ^= hash >> 33; + hash *= PRIME<64>(2); + hash ^= hash >> 29; + hash *= PRIME<64>(3); + hash ^= hash >> 32; + return hash; + } + else return 0; + } + + template + XXH_FORCE_INLINE uint_t round(uint_t seed, uint_t input) + { + seed += input * PRIME(2); + + if constexpr (N == 32) + { + seed = rotl(seed, 13); + } + else + { + seed = rotl(seed, 31); + } + + seed *= PRIME(1); + return seed; + } + + XXH_FORCE_INLINE uint64_t mergeRound64(hash64_t acc, uint64_t val) + { + val = round<64>(0, val); + acc ^= val; + acc = acc * PRIME<64>(1) + PRIME<64>(4); + return acc; + } + + XXH_FORCE_INLINE void endian_align_sub_mergeround(hash64_t& hash_ret, uint64_t v1, uint64_t v2, uint64_t v3, uint64_t v4) + { + hash_ret = mergeRound64(hash_ret, v1); + hash_ret = mergeRound64(hash_ret, v2); + hash_ret = mergeRound64(hash_ret, v3); + hash_ret = mergeRound64(hash_ret, v4); + } + + template + static inline hash_t endian_align_sub_ending(hash_t hash_ret, const uint8_t* p, const uint8_t* bEnd) + { + if constexpr (N == 32) + { + while ((p + 4) <= bEnd) + { + hash_ret += readLE<32>(p) * PRIME<32>(3); + hash_ret = rotl<32>(hash_ret, 17) * PRIME<32>(4); + p += 4; + } + + while (p < bEnd) + { + hash_ret += (*p) * PRIME<32>(5); + hash_ret = rotl<32>(hash_ret, 11) * PRIME<32>(1); + p++; + } + + return avalanche<32>(hash_ret); + } + else + { + while (p + 8 <= bEnd) + { + const uint64_t k1 = round<64>(0, readLE<64>(p)); + + hash_ret ^= k1; + hash_ret = rotl<64>(hash_ret, 27) * PRIME<64>(1) + PRIME<64>(4); + p += 8; + } + + if (p + 4 <= bEnd) + { + hash_ret ^= static_cast(readLE<32>(p))* PRIME<64>(1); + hash_ret = rotl<64>(hash_ret, 23) * PRIME<64>(2) + PRIME<64>(3); + p += 4; + } + + while (p < bEnd) + { + hash_ret ^= (*p) * PRIME<64>(5); + hash_ret = rotl<64>(hash_ret, 11) * PRIME<64>(1); + p++; + } + + return avalanche<64>(hash_ret); + } + } + + template + static inline hash_t endian_align(const void* input, size_t len, uint_t seed) + { + static_assert(!(N != 32 && N != 64), "You can only call endian_align in 32 or 64 bit mode."); + + const uint8_t* p = static_cast(input); + const uint8_t* bEnd = p + len; + hash_t hash_ret; + + if (len >= (N / 2)) + { + const uint8_t* const limit = bEnd - (N / 2); + uint_t v1 = seed + PRIME(1) + PRIME(2); + uint_t v2 = seed + PRIME(2); + uint_t v3 = seed + 0; + uint_t v4 = seed - PRIME(1); + + do + { + v1 = round(v1, readLE(p)); + p += (N / 8); + v2 = round(v2, readLE(p)); + p += (N / 8); + v3 = round(v3, readLE(p)); + p += (N / 8); + v4 = round(v4, readLE(p)); + p += (N / 8); + } + while (p <= limit); + + hash_ret = rotl(v1, 1) + rotl(v2, 7) + rotl(v3, 12) + rotl(v4, 18); + + if constexpr (N == 64) + { + endian_align_sub_mergeround(hash_ret, v1, v2, v3, v4); + } + } + else + { + hash_ret = seed + PRIME(5); + } + + hash_ret += static_cast>(len); + + return endian_align_sub_ending(hash_ret, p, bEnd); + } + } + + + /* ************************************* + * Algorithm Implementation - xxhash3 + ***************************************/ + + namespace detail3 + { + using namespace vec_ops; + using namespace detail; + using namespace mem_ops; + using namespace bit_ops; + + + /* ************************************* + * Enums + ***************************************/ + + enum class vec_mode : uint8_t { scalar = 0, sse2 = 1, avx2 = 2, avx512 = 3 }; + + + /* ************************************* + * Constants + ***************************************/ + + constexpr uint64_t secret_default_size = 192; + constexpr uint64_t secret_size_min = 136; + constexpr uint64_t secret_consume_rate = 8; + constexpr uint64_t stripe_len = 64; + constexpr uint64_t acc_nb = 8; + constexpr uint64_t prefetch_distance = 384; + constexpr uint64_t secret_lastacc_start = 7; + constexpr uint64_t secret_mergeaccs_start = 11; + constexpr uint64_t midsize_max = 240; + constexpr uint64_t midsize_startoffset = 3; + constexpr uint64_t midsize_lastoffset = 17; + + constexpr vec_mode vector_mode = static_cast(intrin::vector_mode); + constexpr uint64_t acc_align = intrin::acc_align; + constexpr std::array vector_bit_width { 64, 128, 256, 512 }; + + + /* ************************************* + * Defaults + ***************************************/ + + alignas(64) constexpr uint8_t default_secret[secret_default_size] = { + 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, + 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, + 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, + 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, + 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, + 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, + 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, + 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, + 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, + 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, + 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, + 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, + }; + + constexpr std::array init_acc = { PRIME<32>(3), PRIME<64>(1), PRIME<64>(2), PRIME<64>(3), PRIME<64>(4), PRIME<32>(2), PRIME<64>(5), PRIME<32>(1) }; + + + /* ************************************* + * Functions + ***************************************/ + + XXH_FORCE_INLINE hash_t<64> avalanche(hash_t<64> h64) + { + constexpr uint64_t avalanche_mul_prime = 0x165667919E3779F9ULL; + + h64 ^= h64 >> 37; + h64 *= avalanche_mul_prime; + h64 ^= h64 >> 32; + return h64; + } + + XXH_FORCE_INLINE hash_t<64> rrmxmx(hash_t<64> h64, uint64_t len) + { + h64 ^= rotl<64>(h64, 49) ^ rotl<64>(h64, 24); + h64 *= 0x9FB21C651E98DF25ULL; + h64 ^= (h64 >> 35) + len; + h64 *= 0x9FB21C651E98DF25ULL; + h64 ^= (h64 >> 28); + return h64; + } + + XXH_FORCE_INLINE void combine_16(void* dest, hash128_t h128) + { + writeLE<64>(dest, readLE<64>(dest) ^ h128.low64); + writeLE<64>((uint8_t*)dest + 8, readLE<64>((uint8_t*)dest + 8) ^ h128.high64); + } + + XXH_FORCE_INLINE void accumulate_512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT input, const void* XXH_RESTRICT secret) + { + constexpr uint64_t bits = vector_bit_width[static_cast(vector_mode)]; + + using vec_t = vec_t; + + alignas(sizeof(vec_t)) vec_t* const xacc = static_cast(acc); + const vec_t* const xinput = static_cast(input); + const vec_t* const xsecret = static_cast(secret); + + for (size_t i = 0; i < stripe_len / sizeof(vec_t); i++) + { + vec_t const data_vec = loadu(xinput + i); + vec_t const key_vec = loadu(xsecret + i); + vec_t const data_key = xorv(data_vec, key_vec); + vec_t product = set1(0); + + if constexpr (vector_mode == vec_mode::scalar) + { + product = mul32to64(srli(slli(data_key, 32),32), srli(data_key, 32)); + xacc[i ^ 1] = add(xacc[i ^ 1], data_vec); + xacc[i] = add(xacc[i], product); + } + else + { + vec_t const data_key_lo = shuffle(data_key); + product = mul(data_key, data_key_lo); + + vec_t const data_swap = shuffle(data_vec); + vec_t const sum = add(xacc[i], data_swap); + xacc[i] = add(sum, product); + } + } + } + + XXH_FORCE_INLINE void scramble_acc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) + { + constexpr uint64_t bits = vector_bit_width[static_cast(vector_mode)];; + + using vec_t = vec_t; + + alignas(sizeof(vec_t)) vec_t* const xacc = (vec_t*)acc; + const vec_t* const xsecret = (const vec_t*)secret; + + for (size_t i = 0; i < stripe_len / sizeof(vec_t); i++) + { + vec_t const acc_vec = xacc[i]; + vec_t const shifted = srli(acc_vec, 47); + vec_t const data_vec = xorv(acc_vec, shifted); + vec_t const key_vec = loadu(xsecret + i); + vec_t const data_key = xorv(data_vec, key_vec); + + if constexpr (vector_mode == vec_mode::scalar) + { + xacc[i] = mul(data_key, set1(PRIME<32>(1))); + } + else + { + vec_t const prime32 = set1(PRIME<32>(1)); + vec_t const data_key_hi = shuffle(data_key); + vec_t const prod_lo = mul(data_key, prime32); + vec_t const prod_hi = mul(data_key_hi, prime32); + + xacc[i] = add(prod_lo, vec_ops::slli(prod_hi, 32)); + } + } + } + + XXH_FORCE_INLINE void accumulate(uint64_t* XXH_RESTRICT acc, const uint8_t* XXH_RESTRICT input, const uint8_t* XXH_RESTRICT secret, size_t nbStripes) + { + for (size_t n = 0; n < nbStripes; n++) + { + const uint8_t* const in = input + n * stripe_len; + + intrin::prefetch(in + prefetch_distance); + accumulate_512(acc, in, secret + n * secret_consume_rate); + } + } + + XXH_FORCE_INLINE void hash_long_internal_loop(uint64_t* XXH_RESTRICT acc, const uint8_t* XXH_RESTRICT input, size_t len, const uint8_t* XXH_RESTRICT secret, size_t secretSize) + { + size_t const nb_rounds = (secretSize - stripe_len) / secret_consume_rate; + size_t const block_len = stripe_len * nb_rounds; + size_t const nb_blocks = (len-1) / block_len; + + for (size_t n = 0; n < nb_blocks; n++) + { + accumulate(acc, input + n * block_len, secret, nb_rounds); + scramble_acc(acc, secret + secretSize - stripe_len); + } + + /* last partial block */ + size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / stripe_len; + + accumulate(acc, input + nb_blocks * block_len, secret, nbStripes); + + /* last stripe */ + const uint8_t* const p = input + len - stripe_len; + + accumulate_512(acc, p, secret + secretSize - stripe_len - secret_lastacc_start); + } + + XXH_FORCE_INLINE uint64_t mix_2_accs(const uint64_t* XXH_RESTRICT acc, const uint8_t* XXH_RESTRICT secret) + { + return mul128fold64(acc[0] ^ readLE<64>(secret), acc[1] ^ readLE<64>(secret + 8)); + } + + XXH_FORCE_INLINE uint64_t merge_accs(const uint64_t* XXH_RESTRICT acc, const uint8_t* XXH_RESTRICT secret, uint64_t start) + { + uint64_t result64 = start; + + result64 += mix_2_accs(acc + 0, secret + 0); + result64 += mix_2_accs(acc + 2, secret + 16); + result64 += mix_2_accs(acc + 4, secret + 32); + result64 += mix_2_accs(acc + 6, secret + 48); + + return avalanche(result64); + } + + XXH_FORCE_INLINE void init_custom_secret(uint8_t* customSecret, uint64_t seed) + { + for (uint64_t i = 0; i < secret_default_size / 16; i++) + { + writeLE<64>(customSecret + i * 16, readLE<64>(default_secret + i * 16) + seed); + writeLE<64>(customSecret + i * 16 + 8, readLE<64>(default_secret + i * 16 + 8) - seed); + } + } + + template + XXH_FORCE_INLINE hash_t len_1to3(const uint8_t* input, size_t len, const uint8_t* secret, uint64_t seed) + { + if constexpr (N == 64) + { + uint8_t const c1 = input[0]; + uint8_t const c2 = input[len >> 1]; + uint8_t const c3 = input[len - 1]; + uint32_t const combined = ((uint32_t)c1 << 16) | (((uint32_t)c2) << 24) | (((uint32_t)c3) << 0) | (((uint32_t)len) << 8); + uint64_t const bitflip = (readLE<32>(secret) ^ readLE<32>(secret + 4)) + seed; + uint64_t const keyed = (uint64_t)combined ^ bitflip; + return detail::avalanche<64>(keyed); + } + else + { + uint8_t const c1 = input[0]; + uint8_t const c2 = input[len >> 1]; + uint8_t const c3 = input[len - 1]; + uint32_t const combinedl = ((uint32_t)c1 << 16) + (((uint32_t)c2) << 24) + (((uint32_t)c3) << 0) + (((uint32_t)len) << 8); + uint32_t const combinedh = rotl<32>(swap<32>(combinedl), 13); + uint64_t const bitflipl = (readLE<32>(secret) ^ readLE<32>(secret + 4)) + seed; + uint64_t const bitfliph = (readLE<32>(secret + 8) ^ readLE<32>(secret + 12)) - seed; + uint64_t const keyed_lo = (uint64_t)combinedl ^ bitflipl; + uint64_t const keyed_hi = (uint64_t)combinedh ^ bitfliph; + hash128_t const h128 = { detail::avalanche<64>(keyed_lo), detail::avalanche<64>(keyed_hi)}; + + return h128; + } + } + + template + XXH_FORCE_INLINE hash_t len_4to8(const uint8_t* input, size_t len, const uint8_t* secret, uint64_t seed) + { + constexpr uint64_t mix_constant = 0x9FB21C651E98DF25ULL; + + seed ^= (uint64_t)swap<32>((uint32_t)seed) << 32; + + if constexpr (N == 64) + { + uint32_t const input1 = readLE<32>(input); + uint32_t const input2 = readLE<32>(input + len - 4); + uint64_t const bitflip = (readLE<64>(secret + 8) ^ readLE<64>(secret + 16)) - seed; + uint64_t const input64 = input2 + ((uint64_t)input1 << 32); + uint64_t keyed = input64 ^ bitflip; + + return rrmxmx(keyed, len); + } + else + { + uint32_t const input_lo = readLE<32>(input); + uint32_t const input_hi = readLE<32>(input + len - 4); + uint64_t const input_64 = input_lo + ((uint64_t)input_hi << 32); + uint64_t const bitflip = (readLE<64>(secret + 16) ^ readLE<64>(secret + 24)) + seed; + uint64_t const keyed = input_64 ^ bitflip; + uint128_t m128 = mul64to128(keyed, PRIME<64>(1) + (len << 2)); + + m128.high64 += (m128.low64 << 1); + m128.low64 ^= (m128.high64 >> 3); + m128.low64 ^= (m128.low64 >> 35); + m128.low64 *= mix_constant; + m128.low64 ^= (m128.low64 >> 28); + m128.high64 = avalanche(m128.high64); + + return m128; + } + } + + template + XXH_FORCE_INLINE hash_t len_9to16(const uint8_t* input, size_t len, const uint8_t* secret, uint64_t seed) + { + if constexpr (N == 64) + { + uint64_t const bitflip1 = (readLE<64>(secret + 24) ^ readLE<64>(secret + 32)) + seed; + uint64_t const bitflip2 = (readLE<64>(secret + 40) ^ readLE<64>(secret + 48)) - seed; + uint64_t const input_lo = readLE<64>(input) ^ bitflip1; + uint64_t const input_hi = readLE<64>(input + len - 8) ^ bitflip2; + uint64_t const acc = len + swap<64>(input_lo) + input_hi + mul128fold64(input_lo, input_hi); + + return avalanche(acc); + } + else + { + uint64_t const bitflipl = (readLE<64>(secret + 32) ^ readLE<64>(secret + 40)) - seed; + uint64_t const bitfliph = (readLE<64>(secret + 48) ^ readLE<64>(secret + 56)) + seed; + uint64_t const input_lo = readLE<64>(input); + uint64_t input_hi = readLE<64>(input + len - 8); + uint128_t m128 = mul64to128(input_lo ^ input_hi ^ bitflipl, PRIME<64>(1)); + + m128.low64 += (uint64_t)(len - 1) << 54; + input_hi ^= bitfliph; + + if constexpr (sizeof(void*) < sizeof(uint64_t)) // 32-bit version + { + m128.high64 += (input_hi & 0xFFFFFFFF00000000) + mul32to64((uint32_t)input_hi, PRIME<32>(2)); + } + else + { + m128.high64 += input_hi + mul32to64((uint32_t)input_hi, PRIME<32>(2) - 1); + } + + m128.low64 ^= swap<64>(m128.high64); + + hash128_t h128 = mul64to128(m128.low64, PRIME<64>(2)); + + h128.high64 += m128.high64 * PRIME<64>(2); + h128.low64 = avalanche(h128.low64); + h128.high64 = avalanche(h128.high64); + + return h128; + } + } + + template + XXH_FORCE_INLINE hash_t len_0to16(const uint8_t* input, size_t len, const uint8_t* secret, uint64_t seed) + { + if (XXH_likely(len > 8)) + { + return len_9to16(input, len, secret, seed); + } + else if (XXH_likely(len >= 4)) + { + return len_4to8(input, len, secret, seed); + } + else if (len) + { + return len_1to3(input, len, secret, seed); + } + else + { + if constexpr (N == 64) + { + return detail::avalanche<64>((seed) ^ (readLE<64>(secret + 56) ^ readLE<64>(secret + 64))); + } + else + { + uint64_t const bitflipl = readLE<64>(secret + 64) ^ readLE<64>(secret + 72); + uint64_t const bitfliph = readLE<64>(secret + 80) ^ readLE<64>(secret + 88); + + return hash128_t(detail::avalanche<64>(( seed) ^ bitflipl), detail::avalanche<64>(( seed) ^ bitfliph)); + } + } + } + + template + XXH_FORCE_INLINE hash_t hash_long_internal(const uint8_t* XXH_RESTRICT input, size_t len, const uint8_t* XXH_RESTRICT secret = default_secret, size_t secretSize = sizeof(default_secret)) + { + alignas(acc_align) std::array acc = init_acc; + + if constexpr (N == 64) + { + hash_long_internal_loop(acc.data(), input, len, secret, secretSize); + + /* converge into final hash */ + return merge_accs(acc.data(), secret + secret_mergeaccs_start, (uint64_t)len * PRIME<64>(1)); + } + else + { + hash_long_internal_loop(acc.data(), input, len, secret, secretSize); + + /* converge into final hash */ + uint64_t const low64 = merge_accs(acc.data(), secret + secret_mergeaccs_start, (uint64_t)len * PRIME<64>(1)); + uint64_t const high64 = merge_accs(acc.data(), secret + secretSize - sizeof(acc) - secret_mergeaccs_start, ~((uint64_t)len * PRIME<64>(2))); + + return hash128_t(low64, high64); + } + } + + XXH_FORCE_INLINE uint64_t mix_16b(const uint8_t* XXH_RESTRICT input, const uint8_t* XXH_RESTRICT secret, uint64_t seed) + { + uint64_t const input_lo = readLE<64>(input); + uint64_t const input_hi = readLE<64>(input + 8); + + return mul128fold64(input_lo ^ (readLE<64>(secret) + seed), input_hi ^ (readLE<64>(secret + 8) - seed)); + } + + XXH_FORCE_INLINE uint128_t mix_32b(uint128_t acc, const uint8_t* input1, const uint8_t* input2, const uint8_t* secret, uint64_t seed) + { + acc.low64 += mix_16b(input1, secret + 0, seed); + acc.low64 ^= readLE<64>(input2) + readLE<64>(input2 + 8); + acc.high64 += mix_16b(input2, secret + 16, seed); + acc.high64 ^= readLE<64>(input1) + readLE<64>(input1 + 8); + + return acc; + } + + template + XXH_FORCE_INLINE hash_t len_17to128(const uint8_t* XXH_RESTRICT input, size_t len, const uint8_t* XXH_RESTRICT secret, uint64_t seed) + { + if constexpr (N == 64) + { + hash64_t acc = len * PRIME<64>(1); + + if (len > 32) + { + if (len > 64) + { + if (len > 96) + { + acc += mix_16b(input + 48, secret + 96, seed); + acc += mix_16b(input + len - 64, secret + 112, seed); + } + + acc += mix_16b(input + 32, secret + 64, seed); + acc += mix_16b(input + len - 48, secret + 80, seed); + } + + acc += mix_16b(input + 16, secret + 32, seed); + acc += mix_16b(input + len - 32, secret + 48, seed); + } + + acc += mix_16b(input + 0, secret + 0, seed); + acc += mix_16b(input + len - 16, secret + 16, seed); + + return avalanche(acc); + } + else + { + hash128_t acc = { len * PRIME<64>(1), 0 }; + + if (len > 32) + { + if (len > 64) + { + if (len > 96) + { + acc = mix_32b(acc, input + 48, input + len - 64, secret + 96, seed); + } + + acc = mix_32b(acc, input + 32, input + len - 48, secret + 64, seed); + } + + acc = mix_32b(acc, input + 16, input + len - 32, secret + 32, seed); + } + + acc = mix_32b(acc, input, input + len - 16, secret, seed); + + uint64_t const low64 = acc.low64 + acc.high64; + uint64_t const high64 = (acc.low64 * PRIME<64>(1)) + (acc.high64 * PRIME<64>(4)) + ((len - seed) * PRIME<64>(2)); + + return { avalanche(low64), (uint64_t)0 - avalanche(high64) }; + } + } + + template + XXH_NO_INLINE hash_t len_129to240(const uint8_t* XXH_RESTRICT input, size_t len, const uint8_t* XXH_RESTRICT secret, uint64_t seed) + { + if constexpr (N == 64) + { + uint64_t acc = len * PRIME<64>(1); + size_t const nbRounds = len / 16; + + for (size_t i = 0; i < 8; i++) + { + acc += mix_16b(input + (i * 16), secret + (i * 16), seed); + } + + acc = avalanche(acc); + + for (size_t i = 8; i < nbRounds; i++) + { + acc += mix_16b(input + (i * 16), secret + ((i - 8) * 16) + midsize_startoffset, seed); + } + + /* last bytes */ + acc += mix_16b(input + len - 16, secret + secret_size_min - midsize_lastoffset, seed); + + return avalanche(acc); + } + else + { + hash128_t acc; + uint64_t const nbRounds = len / 32; + + acc.low64 = len * PRIME<64>(1); + acc.high64 = 0; + + for (size_t i = 0; i < 4; i++) + { + acc = mix_32b(acc, input + (i * 32), input + (i * 32) + 16, secret + (i * 32), seed); + } + + acc.low64 = avalanche(acc.low64); + acc.high64 = avalanche(acc.high64); + + for (size_t i = 4; i < nbRounds; i++) + { + acc = mix_32b(acc, input + (i * 32), input + (i * 32) + 16, secret + midsize_startoffset + ((i - 4) * 32), seed); + } + + /* last bytes */ + acc = mix_32b(acc, input + len - 16, input + len - 32, secret + secret_size_min - midsize_lastoffset - 16, 0ULL - seed); + + uint64_t const low64 = acc.low64 + acc.high64; + uint64_t const high64 = (acc.low64 * PRIME<64>(1)) + (acc.high64 * PRIME<64>(4)) + ((len - seed) * PRIME<64>(2)); + + return { avalanche(low64), (uint64_t)0 - avalanche(high64) }; + } + + } + + template + XXH_NO_INLINE hash_t xxhash3_impl(const void* XXH_RESTRICT input, size_t len, hash64_t seed, const void* XXH_RESTRICT secret = default_secret, size_t secretSize = secret_default_size) + { + + alignas(64) uint8_t custom_secret[secret_default_size]; + + const void* short_secret = secret; + + if (seed != 0) + { + init_custom_secret(custom_secret, seed); + short_secret = default_secret; + } + + if (len <= 16) + { + return len_0to16(static_cast(input), len, static_cast(short_secret), seed); + } + else if (len <= 128) + { + return len_17to128(static_cast(input), len, static_cast(short_secret), seed); + } + else if (len <= midsize_max) + { + return len_129to240(static_cast(input), len, static_cast(short_secret), seed); + } + else + { + return hash_long_internal(static_cast(input), len, static_cast(((seed == 0) ? secret : ((secret == default_secret) ? custom_secret : secret))), ((seed == 0) ? secretSize : ((secret == default_secret) ? secret_default_size : secretSize))); + } + } + + XXH_NO_INLINE void generate_secret(void* secret_buffer, size_t secret_size, const void* custom_seed, size_t seed_size) + { + if (seed_size == 0) + { + custom_seed = default_secret; + seed_size = secret_default_size; + } + + size_t pos = 0; + while (pos < secret_size) + { + size_t const copy_len = std::min(secret_size - pos, seed_size); + memcpy((uint8_t*)secret_buffer + pos, custom_seed, copy_len); + pos += copy_len; + } + + size_t const nbseg16 = secret_size / 16; + canonical128_t scrambled(xxhash3_impl<128>(custom_seed, seed_size, 0)); + for (size_t n = 0; n < nbseg16; n++) + { + hash128_t const h128 = xxhash3_impl<128>(&scrambled, sizeof(scrambled), n); + combine_16((uint8_t*)secret_buffer + n * 16, h128); + } + + combine_16((uint8_t*)secret_buffer + secret_size - 16, scrambled.get_hash()); + } + } + + + /* ************************************* + * Public Access Point - xxhash + ***************************************/ + + template + inline hash_t xxhash(const void* input, size_t len, uint_t seed = 0) + { + static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); + return detail::endian_align(input, len, seed); + } + + template + inline hash_t xxhash(const std::basic_string& input, uint_t seed = 0) + { + static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); + return detail::endian_align(static_cast(input.data()), input.length() * sizeof(T), seed); + } + + template + inline hash_t xxhash(ContiguousIterator begin, ContiguousIterator end, uint_t seed = 0) + { + static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); + using T = typename std::decay_t; + return detail::endian_align(static_cast(&*begin), (end - begin) * sizeof(T), seed); + } + + template + inline hash_t xxhash(const std::vector& input, uint_t seed = 0) + { + static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); + return detail::endian_align(static_cast(input.data()), input.size() * sizeof(T), seed); + } + + template + inline hash_t xxhash(const std::array& input, uint_t seed = 0) + { + static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); + return detail::endian_align(static_cast(input.data()), AN * sizeof(T), seed); + } + + template + inline hash_t xxhash(const std::initializer_list& input, uint_t seed = 0) + { + static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); + return detail::endian_align(static_cast(input.begin()), input.size() * sizeof(T), seed); + } + + + /* ************************************* + * Public Access Point - xxhash3 + ***************************************/ + + template + inline hash_t xxhash3(const void* input, size_t len, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(input, len, seed); + } + + template + inline hash_t xxhash3(const void* input, size_t len, const void* secret, size_t secretSize, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(input, len, seed, secret, secretSize); + } + + template + inline hash_t xxhash3(const std::basic_string& input, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.data()), input.length() * sizeof(T), seed); + } + + template + inline hash_t xxhash3(const std::basic_string& input, const void* secret, size_t secretSize, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.data()), input.length() * sizeof(T), seed, secret, secretSize); + } + + template + inline hash_t xxhash3(ContiguousIterator begin, ContiguousIterator end, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + using T = typename std::decay_t; + return detail3::xxhash3_impl(static_cast(&*begin), (end - begin) * sizeof(T), seed); + } + + template + inline hash_t xxhash3(ContiguousIterator begin, ContiguousIterator end, const void* secret, size_t secretSize, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + using T = typename std::decay_t; + return detail3::xxhash3_impl(static_cast(&*begin), (end - begin) * sizeof(T), seed, secret, secretSize); + } + + template + inline hash_t xxhash3(const std::vector& input, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.data()), input.size() * sizeof(T), seed); + } + + template + inline hash_t xxhash3(const std::vector& input, const void* secret, size_t secretSize, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.data()), input.size() * sizeof(T), seed, secret, secretSize); + } + + template + inline hash_t xxhash3(const std::array& input, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.data()), AN * sizeof(T), seed); + } + + template + inline hash_t xxhash3(const std::array& input, const void* secret, size_t secretSize, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.data()), AN * sizeof(T), seed, secret, secretSize); + } + + template + inline hash_t xxhash3(const std::initializer_list& input, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.begin()), input.size() * sizeof(T), seed); + } + + template + inline hash_t xxhash3(const std::initializer_list& input, const void* secret, size_t secretSize, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); + return detail3::xxhash3_impl(static_cast(input.begin()), input.size() * sizeof(T), seed, secret, secretSize); + } + + + /* ************************************* + * Secret Generation Functions + ***************************************/ + + inline void generate_secret(void* secret_buffer, size_t secret_size, const void* custom_seed = detail3::default_secret, size_t seed_length = 0) + { + detail3::generate_secret(secret_buffer, secret_size, custom_seed, seed_length); + } + + template + inline void generate_secret(void* secret_buffer, size_t secret_size, const std::array& custom_seed) + { + detail3::generate_secret(secret_buffer, secret_size, static_cast(custom_seed.data()), AN * sizeof(T)); + } + + template + inline void generate_secret(void* secret_buffer, size_t secret_size, const std::initializer_list& custom_seed) + { + detail3::generate_secret(secret_buffer, secret_size, static_cast(custom_seed.begin()), custom_seed.size() * sizeof(T)); + } + + template + inline void generate_secret(void* secret_buffer, size_t secret_size, const std::vector& custom_seed) + { + detail3::generate_secret(secret_buffer, secret_size, static_cast(custom_seed.data()), custom_seed.size() * sizeof(T)); + } + + template + inline void generate_secret(void* secret_buffer, size_t secret_size, const std::basic_string& custom_seed) + { + detail3::generate_secret(secret_buffer, secret_size, static_cast(custom_seed.data()), custom_seed.length() * sizeof(T)); + } + + template + inline void generate_secret(void* secret_buffer, size_t secret_size, ContiguousIterator begin, ContiguousIterator end) + { + using T = typename std::decay_t; + detail3::generate_secret(secret_buffer, secret_size, static_cast(&*begin), (end - begin) * sizeof(T)); + } + + inline void generate_secret_from_seed(void* secret_buffer, uint64_t seed = 0) + { + alignas(64) uint8_t custom_secret[detail3::secret_default_size]; + detail3::init_custom_secret(custom_secret, seed); + memcpy(secret_buffer, custom_secret, detail3::secret_default_size); + } + + + /* ************************************* + * Hash streaming - xxhash + ***************************************/ + + template + class hash_state_t + { + uint64_t total_len = 0; + uint_t v1 = 0, v2 = 0, v3 = 0, v4 = 0; + std::array, 4> mem = {0, 0, 0, 0}; + uint32_t memsize = 0; + + inline void update_impl(const void* input, size_t length) + { + const uint8_t* p = reinterpret_cast(input); + const uint8_t* const bEnd = p + length; + + total_len += length; + + if (memsize + length < (bit_mode / 2)) + { /* fill in tmp buffer */ + memcpy(reinterpret_cast(mem.data()) + memsize, input, length); + memsize += static_cast(length); + return; + } + + if (memsize > 0) + { /* some data left from previous update */ + memcpy(reinterpret_cast(mem.data()) + memsize, input, (bit_mode / 2) - memsize); + + const uint_t* ptr = mem.data(); + + v1 = detail::round(v1, mem_ops::readLE(ptr)); + ptr++; + v2 = detail::round(v2, mem_ops::readLE(ptr)); + ptr++; + v3 = detail::round(v3, mem_ops::readLE(ptr)); + ptr++; + v4 = detail::round(v4, mem_ops::readLE(ptr)); + + p += (bit_mode / 2) - memsize; + memsize = 0; + } + + if (p <= bEnd - (bit_mode / 2)) + { + const uint8_t* const limit = bEnd - (bit_mode / 2); + + do + { + v1 = detail::round(v1, mem_ops::readLE(p)); + p += (bit_mode / 8); + v2 = detail::round(v2, mem_ops::readLE(p)); + p += (bit_mode / 8); + v3 = detail::round(v3, mem_ops::readLE(p)); + p += (bit_mode / 8); + v4 = detail::round(v4, mem_ops::readLE(p)); + p += (bit_mode / 8); + } + while (p <= limit); + } + + if (p < bEnd) + { + memcpy(mem.data(), p, static_cast(bEnd - p)); + memsize = static_cast(bEnd - p); + } + } + + inline hash_t digest_impl() const + { + const uint8_t* p = reinterpret_cast(mem.data()); + const uint8_t* const bEnd = reinterpret_cast(mem.data()) + memsize; + hash_t hash_ret; + + if (total_len >= (bit_mode / 2)) + { + hash_ret = bit_ops::rotl(v1, 1) + bit_ops::rotl(v2, 7) + bit_ops::rotl(v3, 12) + bit_ops::rotl(v4, 18); + + if constexpr (bit_mode == 64) + { + detail::endian_align_sub_mergeround(hash_ret, v1, v2, v3, v4); + } + } + else + { + hash_ret = v3 + detail::PRIME(5); + } + + hash_ret += static_cast>(total_len); + + return detail::endian_align_sub_ending(hash_ret, p, bEnd); + } + + public: + + hash_state_t(uint_t seed = 0) + { + static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash streaming can only be used in 32 and 64 bit modes."); + v1 = seed + detail::PRIME(1) + detail::PRIME(2); + v2 = seed + detail::PRIME(2); + v3 = seed + 0; + v4 = seed - detail::PRIME(1); + }; + + hash_state_t operator=(hash_state_t& other) + { + memcpy(this, &other, sizeof(hash_state_t)); + } + + void reset(uint_t seed = 0) + { + memset(this, 0, sizeof(hash_state_t)); + v1 = seed + detail::PRIME(1) + detail::PRIME(2); + v2 = seed + detail::PRIME(2); + v3 = seed + 0; + v4 = seed - detail::PRIME(1); + } + + void update(const void* input, size_t length) + { + return update_impl(input, length); + } + + template + void update(const std::basic_string& input) + { + return update_impl(static_cast(input.data()), input.length() * sizeof(T)); + } + + template + void update(ContiguousIterator begin, ContiguousIterator end) + { + using T = typename std::decay_t; + return update_impl(static_cast(&*begin), (end - begin) * sizeof(T)); + } + + template + void update(const std::vector& input) + { + return update_impl(static_cast(input.data()), input.size() * sizeof(T)); + } + + template + void update(const std::array& input) + { + return update_impl(static_cast(input.data()), AN * sizeof(T)); + } + + template + void update(const std::initializer_list& input) + { + return update_impl(static_cast(input.begin()), input.size() * sizeof(T)); + } + + hash_t digest() const + { + return digest_impl(); + } + }; + + using hash_state32_t = hash_state_t<32>; + using hash_state64_t = hash_state_t<64>; + + + /* ************************************* + * Hash streaming - xxhash3 + ***************************************/ + + template + class alignas(64) hash3_state_t + { + constexpr static int internal_buffer_size = 256; + constexpr static int internal_buffer_stripes = (internal_buffer_size / detail3::stripe_len); + + alignas(64) uint64_t acc[8]; + alignas(64) uint8_t customSecret[detail3::secret_default_size]; /* used to store a custom secret generated from the seed. Makes state larger. Design might change */ + alignas(64) uint8_t buffer[internal_buffer_size]; + uint32_t bufferedSize = 0; + uint32_t nbStripesPerBlock = 0; + uint32_t nbStripesSoFar = 0; + uint32_t secretLimit = 0; + uint32_t reserved32 = 0; + uint32_t reserved32_2 = 0; + uint64_t totalLen = 0; + uint64_t seed = 0; + bool useSeed = false; + uint64_t reserved64 = 0; + const uint8_t* secret = nullptr; /* note : there is some padding after, due to alignment on 64 bytes */ + + + void consume_stripes(uint64_t* acc, uint32_t& nbStripesSoFar, size_t totalStripes, const uint8_t* input) + { + if (nbStripesPerBlock - nbStripesSoFar <= totalStripes) /* need a scrambling operation */ + { + size_t const nbStripes = nbStripesPerBlock - nbStripesSoFar; + + detail3::accumulate(acc, input, secret + (nbStripesSoFar * detail3::secret_consume_rate), nbStripes); + detail3::scramble_acc(acc, secret + secretLimit); + detail3::accumulate(acc, input + nbStripes * detail3::stripe_len, secret, totalStripes - nbStripes); + nbStripesSoFar = (uint32_t)(totalStripes - nbStripes); + } + else + { + detail3::accumulate(acc, input, secret + (nbStripesSoFar * detail3::secret_consume_rate), totalStripes); + nbStripesSoFar += (uint32_t)totalStripes; + } + } + + void update_impl(const void* input_, size_t len) + { + const uint8_t* input = static_cast(input_); + const uint8_t* const bEnd = input + len; + + totalLen += len; + + if (bufferedSize + len <= internal_buffer_size) + { /* fill in tmp buffer */ + memcpy(buffer + bufferedSize, input, len); + bufferedSize += (uint32_t)len; + return; + } + /* input now > XXH3_INTERNALBUFFER_SIZE */ + + if (bufferedSize > 0) + { /* some input within internal buffer: fill then consume it */ + size_t const loadSize = internal_buffer_size - bufferedSize; + + memcpy(buffer + bufferedSize, input, loadSize); + input += loadSize; + consume_stripes(acc, nbStripesSoFar, internal_buffer_stripes, buffer); + bufferedSize = 0; + } + + /* consume input by full buffer quantities */ + if (input + internal_buffer_size <= bEnd) + { + const uint8_t* const limit = bEnd - internal_buffer_size; + + do + { + consume_stripes(acc, nbStripesSoFar, internal_buffer_stripes, input); + input += internal_buffer_size; + } + while (input < limit); + + memcpy(buffer + sizeof(buffer) - detail3::stripe_len, input - detail3::stripe_len, detail3::stripe_len); + } + + if (input < bEnd) + { /* some remaining input input : buffer it */ + memcpy(buffer, input, (size_t)(bEnd - input)); + bufferedSize = (uint32_t)(bEnd - input); + } + } + + void digest_long(uint64_t* acc_) + { + memcpy(acc_, acc, sizeof(acc)); /* digest locally, state remains unaltered, and can continue ingesting more input afterwards */ + + if (bufferedSize >= detail3::stripe_len) + { + size_t const totalNbStripes = (bufferedSize - 1) / detail3::stripe_len; + uint32_t nbStripesSoFar = this->nbStripesSoFar; + + consume_stripes(acc_, nbStripesSoFar, totalNbStripes, buffer); + + /* one last partial stripe */ + detail3::accumulate_512(acc_, buffer + bufferedSize - detail3::stripe_len, secret + secretLimit - detail3::secret_lastacc_start); + } + else + { /* bufferedSize < STRIPE_LEN */ + /* one last stripe */ + uint8_t lastStripe[detail3::stripe_len]; + size_t const catchupSize = detail3::stripe_len - bufferedSize; + memcpy(lastStripe, buffer + sizeof(buffer) - catchupSize, catchupSize); + memcpy(lastStripe + catchupSize, buffer, bufferedSize); + detail3::accumulate_512(acc_, lastStripe, secret + secretLimit - detail3::secret_lastacc_start); + } + } + + void reset_internal(uint64_t seed_reset, const void* secret_reset, size_t secret_size) + { + memset(this, 0, sizeof(*this)); + memcpy(acc, detail3::init_acc.data(), sizeof(detail3::init_acc)); + seed = seed_reset; + useSeed = (seed != 0); + secret = (const uint8_t*)secret_reset; + secretLimit = (uint32_t)(secret_size - detail3::stripe_len); + nbStripesPerBlock = secretLimit / detail3::secret_consume_rate; + } + + public: + + hash3_state_t operator=(hash3_state_t& other) + { + memcpy(this, &other, sizeof(hash3_state_t)); + } + + hash3_state_t(uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 streaming can only be used in 64 and 128 bit modes."); + reset(seed); + } + + hash3_state_t(const void* secret, size_t secretSize, uint64_t seed = 0) + { + static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 streaming can only be used in 64 and 128 bit modes."); + reset(secret, secretSize, seed); + } + + void reset(uint64_t seed = 0) + { + reset_internal(seed, detail3::default_secret, detail3::secret_default_size); + detail3::init_custom_secret(customSecret, seed); + secret = customSecret; + /* + memset(this, 0, sizeof(*this)); + memcpy(acc, detail3::init_acc.data(), sizeof(detail3::init_acc)); + (*this).seed = seed; + + if (seed == 0) + { + secret = detail3::default_secret; + } + else + { + detail3::init_custom_secret(customSecret, seed); + secret = customSecret; + } + + secretLimit = (uint32_t)(detail3::secret_default_size - detail3::stripe_len); + nbStripesPerBlock = secretLimit / detail3::secret_consume_rate;*/ + } + + void reset(const void* secret, size_t secretSize, uint64_t seed = 0) + { + reset_internal(seed, secret, secretSize); + useSeed = true; + /* + + memset(this, 0, sizeof(*this)); + memcpy(acc, detail3::init_acc.data(), sizeof(detail3::init_acc)); + seed = 0; + + (*this).secret = (const uint8_t*)secret; + secretLimit = (uint32_t)(secretSize - detail3::stripe_len); + nbStripesPerBlock = secretLimit / detail3::secret_consume_rate;*/ + } + + void update(const void* input, size_t len) + { + return update_impl(static_cast(input), len); + } + + template + void update(const std::basic_string& input) + { + return update_impl(static_cast(input.data()), input.length() * sizeof(T)); + } + + template + void update(ContiguousIterator begin, ContiguousIterator end) + { + using T = typename std::decay_t; + return update_impl(static_cast(&*begin), (end - begin) * sizeof(T)); + } + + template + void update(const std::vector& input) + { + return update_impl(static_cast(input.data()), input.size() * sizeof(T)); + } + + template + void update(const std::array& input) + { + return update_impl(static_cast(input.data()), AN * sizeof(T)); + } + + template + void update(const std::initializer_list& input) + { + return update_impl(static_cast(input.begin()), input.size() * sizeof(T)); + } + + hash_t digest() + { + if (totalLen > detail3::midsize_max) + { + alignas(128) hash64_t acc[detail3::acc_nb]; + + digest_long(acc); + + if constexpr (bit_mode == 64) + { + return detail3::merge_accs(acc, secret + detail3::secret_mergeaccs_start, (uint64_t)totalLen * detail::PRIME<64>(1)); + } + else + { + uint64_t const low64 = detail3::merge_accs(acc, secret + detail3::secret_mergeaccs_start, (uint64_t)totalLen * detail::PRIME<64>(1)); + uint64_t const high64 = detail3::merge_accs(acc, secret + secretLimit + detail3::stripe_len - sizeof(acc) - detail3::secret_mergeaccs_start, ~((uint64_t)totalLen * detail::PRIME<64>(2))); + + return { low64, high64 }; + } + } + else + { + return detail3::xxhash3_impl(buffer, totalLen, seed, secret, secretLimit + detail3::stripe_len); + } + } + }; + + using hash3_state64_t = hash3_state_t<64>; + using hash3_state128_t = hash3_state_t<128>; } - -/* ******************************************************************* - * Hash streaming - *********************************************************************/ -enum class error_code : uint8_t { ok = 0, error }; - -template class hash_state_t { - uint64_t total_len = 0; - hash_t v1 = 0, v2 = 0, v3 = 0, v4 = 0; - std::array, 4> mem = {{0, 0, 0, 0}}; - uint32_t memsize = 0; - - inline error_code _update_impl(const void* input, size_t length, endianness endian) - { - const uint8_t* p = reinterpret_cast(input); - const uint8_t* const bEnd = p + length; - - if (!input) { - return xxh::error_code::error; - } - - total_len += length; - - if (memsize + length < (N / 2)) { /* fill in tmp buffer */ - memcpy(reinterpret_cast(mem.data()) + memsize, input, length); - memsize += static_cast(length); - return error_code::ok; - } - - if (memsize) { /* some data left from previous update */ - memcpy(reinterpret_cast(mem.data()) + memsize, input, (N / 2) - memsize); - - const hash_t* ptr = mem.data(); - v1 = detail::round(v1, mem_ops::readLE(ptr, endian)); - ptr++; - v2 = detail::round(v2, mem_ops::readLE(ptr, endian)); - ptr++; - v3 = detail::round(v3, mem_ops::readLE(ptr, endian)); - ptr++; - v4 = detail::round(v4, mem_ops::readLE(ptr, endian)); - - p += (N / 2) - memsize; - memsize = 0; - } - - if (p <= bEnd - (N / 2)) { - const uint8_t* const limit = bEnd - (N / 2); - - do { - v1 = detail::round(v1, mem_ops::readLE(p, endian)); - p += (N / 8); - v2 = detail::round(v2, mem_ops::readLE(p, endian)); - p += (N / 8); - v3 = detail::round(v3, mem_ops::readLE(p, endian)); - p += (N / 8); - v4 = detail::round(v4, mem_ops::readLE(p, endian)); - p += (N / 8); - } while (p <= limit); - } - - if (p < bEnd) { - memcpy(mem.data(), p, static_cast(bEnd - p)); - memsize = static_cast(bEnd - p); - } - - return error_code::ok; - } - - inline hash_t _digest_impl(endianness endian) const - { - const uint8_t* p = reinterpret_cast(mem.data()); - const uint8_t* const bEnd = reinterpret_cast(mem.data()) + memsize; - hash_t hash_ret; - - if (total_len > (N / 2)) { - hash_ret = - bit_ops::rotl(v1, 1) + bit_ops::rotl(v2, 7) + bit_ops::rotl(v3, 12) + bit_ops::rotl(v4, 18); - - detail::endian_align_sub_mergeround(hash_ret, v1, v2, v3, v4); - } else { - hash_ret = v3 + detail::PRIME(5); - } - - hash_ret += static_cast>(total_len); - - return detail::endian_align_sub_ending(hash_ret, p, bEnd, endian, alignment::unaligned); - } - -public: - hash_state_t(hash_t seed = 0) - { - static_assert(!(N != 32 && N != 64), "You can only stream hashing in 32 or 64 bit mode."); - v1 = seed + detail::PRIME(1) + detail::PRIME(2); - v2 = seed + detail::PRIME(2); - v3 = seed + 0; - v4 = seed - detail::PRIME(1); - }; - - hash_state_t operator=(hash_state_t& other) { memcpy(this, other, sizeof(hash_state_t)); } - - error_code reset(hash_t seed = 0) - { - memset(this, 0, sizeof(hash_state_t)); - v1 = seed + detail::PRIME(1) + detail::PRIME(2); - v2 = seed + detail::PRIME(2); - v3 = seed + 0; - v4 = seed - detail::PRIME(1); - return error_code::ok; - } - - error_code update(const void* input, size_t length, endianness endian = endianness::unspecified) - { - return _update_impl(input, length, mem_ops::get_endian(endian)); - } - - template - error_code update(const std::basic_string& input, endianness endian = endianness::unspecified) - { - return _update_impl(static_cast(input.data()), input.length() * sizeof(T), - mem_ops::get_endian(endian)); - } - - template - error_code update(ContiguousIterator begin, ContiguousIterator end, endianness endian = endianness::unspecified) - { - using T = typename std::decay_t; - return _update_impl(static_cast(&*begin), (end - begin) * sizeof(T), mem_ops::get_endian(endian)); - } - - template error_code update(const std::vector& input, endianness endian = endianness::unspecified) - { - return _update_impl(static_cast(input.data()), input.size() * sizeof(T), mem_ops::get_endian(endian)); - } - - template - error_code update(const std::array& input, endianness endian = endianness::unspecified) - { - return _update_impl(static_cast(input.data()), AN * sizeof(T), mem_ops::get_endian(endian)); - } - - template - error_code update(const std::initializer_list& input, endianness endian = endianness::unspecified) - { - return _update_impl(static_cast(input.begin()), input.size() * sizeof(T), mem_ops::get_endian(endian)); - } - - hash_t digest(endianness endian = endianness::unspecified) { return _digest_impl(mem_ops::get_endian(endian)); } -}; - -using hash_state32_t = hash_state_t<32>; -using hash_state64_t = hash_state_t<64>; - -/* ******************************************************************* - * Canonical - *********************************************************************/ - -template struct canonical_t { - std::array digest; - - canonical_t(hash_t hash) - { - if (mem_ops::is_little_endian()) { - hash = bit_ops::swap(hash); - } - memcpy(digest.data(), &hash, sizeof(canonical_t)); - } - - hash_t get_hash() const { return mem_ops::readBE(&digest); } -}; - -using canonical32_t = canonical_t<32>; -using canonical64_t = canonical_t<64>; -} // namespace xxh -- 2.20.1