15#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
16 #define HAS_X86_SIMD 1
17 #if defined(__AVX512F__) || defined(HAS_AVX512)
24 #include <immintrin.h>
25 #elif defined(__AVX2__) || defined(HAS_AVX2)
29 #include <immintrin.h>
30 #elif defined(__SSE4_2__) || defined(HAS_SSE42)
34 #include <nmmintrin.h>
35 #include <smmintrin.h>
36 #include <tmmintrin.h>
37 #elif defined(__SSE2__)
39 #include <emmintrin.h>
42 #if defined(HAS_SSE42) || defined(HAS_SSE2)
43 #include <xmmintrin.h>
44 #include <emmintrin.h>
45 #include <pmmintrin.h>
46 #elif defined(__SSE2__)
48 #include <emmintrin.h>
50#elif defined(__ARM_NEON) || defined(__aarch64__)
51 #define HAS_ARM_NEON 1
62 #if defined(HAS_AVX512) && defined(__x86_64__)
65 constexpr size_t float_simd_width_512 = 16;
66 constexpr size_t double_simd_width_512 = 8;
67 constexpr size_t int32_simd_width_512 = 16;
68 using float_simd = __m512;
69 using double_simd = __m512d;
70 using int32_simd = __m512i;
71 using float_simd_512 = __m512;
72 using double_simd_512 = __m512d;
73 using int32_simd_512 = __m512i;
74 #elif defined(HAS_AVX2) && defined(__x86_64__)
77 using float_simd = __m256;
78 using double_simd = __m256d;
79 using int32_simd = __m256i;
80 #elif (defined(HAS_SSE42) || defined(HAS_SSE2)) && (defined(__x86_64__) || defined(__i386__))
83 using float_simd = __m128;
84 using double_simd = __m128d;
85 using int32_simd = __m128i;
86 #elif defined(HAS_ARM_NEON) || defined(__ARM_NEON) || defined(__ARM_NEON__)
89 using float_simd = float32x4_t;
90 using double_simd = float64x2_t;
91 using int32_simd = int32x4_t;
106 static float sum_floats(
const std::vector<ValueVariant>& values);
111 static double sum_doubles(
const std::vector<ValueVariant>& values);
116 static std::optional<float>
min_float(
const std::vector<ValueVariant>& values);
121 static std::optional<float>
max_float(
const std::vector<ValueVariant>& values);
127 static std::optional<double>
average(
const std::vector<ValueVariant>& values);
133 const std::vector<ValueVariant>& values,
140 const std::vector<ValueVariant>& values,
141 std::string_view pattern);
146 template<
typename T,
typename Func>
153 const std::vector<ValueVariant>& a,
154 const std::vector<ValueVariant>& b);
159 static void fast_copy(
const void* src,
void* dst,
size_t size);
164 static bool fast_compare(
const void* a,
const void* b,
size_t size);
170 const std::vector<ValueVariant>& values);
175 static uint64_t
simd_hash(
const void* data,
size_t size);
179 #if defined(HAS_AVX512)
180 static float sum_floats_avx512(
const float* data,
size_t count);
181 static float min_float_avx512(
const float* data,
size_t count);
182 static float max_float_avx512(
const float* data,
size_t count);
183 static double sum_doubles_avx512(
const double* data,
size_t count);
186 #if defined(HAS_AVX2)
187 static float sum_floats_avx2(
const float* data,
size_t count);
188 static float min_float_avx2(
const float* data,
size_t count);
189 static float max_float_avx2(
const float* data,
size_t count);
192 #if defined(HAS_SSE42) || defined(HAS_SSE2)
193 static float sum_floats_sse(
const float* data,
size_t count);
194 static float min_float_sse(
const float* data,
size_t count);
195 static float max_float_sse(
const float* data,
size_t count);
198 #if defined(HAS_ARM_NEON)
199 static float sum_floats_neon(
const float* data,
size_t count);
200 static float min_float_neon(
const float* data,
size_t count);
201 static float max_float_neon(
const float* data,
size_t count);
219 static std::vector<uint8_t>
compress(
const std::vector<uint8_t>& data);
224 static std::vector<uint8_t>
decompress(
const std::vector<uint8_t>& compressed);
273 #if defined(HAS_AVX512)
275 #elif defined(HAS_AVX2)
277 #elif defined(HAS_SSE42) || defined(HAS_SSE2) || defined(HAS_ARM_NEON)
294 #if defined(HAS_X86_SIMD) || defined(HAS_ARM_NEON)
302 struct simd_traits<double> {
308 struct simd_traits<int32_t> {
SIMD-accelerated data compressor.
static std::vector< uint8_t > decompress(const std::vector< uint8_t > &compressed)
Decompress data using SIMD-accelerated algorithm.
static bool is_compressible(const std::vector< uint8_t > &data)
Check if data is compressible (entropy estimation)
static std::vector< uint8_t > compress(const std::vector< uint8_t > &data)
Compress data using SIMD-accelerated algorithm.
SIMD processor for vectorized operations on container values.
static bool fast_compare(const void *a, const void *b, size_t size)
Fast memory comparison using SIMD.
static std::vector< std::vector< uint8_t > > parallel_serialize(const std::vector< ValueVariant > &values)
Serialize multiple values in parallel.
static uint64_t simd_hash(const void *data, size_t size)
Compute hash of data using SIMD.
static std::optional< float > dot_product_floats(const std::vector< ValueVariant > &a, const std::vector< ValueVariant > &b)
Parallel dot product of two float arrays.
static double sum_doubles(const std::vector< ValueVariant > &values)
Sum all double values in a container using SIMD.
static void fast_copy(const void *src, void *dst, size_t size)
Fast memory copy using SIMD.
static float sum_floats_scalar(const float *data, size_t count)
static float sum_floats(const std::vector< ValueVariant > &values)
Sum all float values in a container using SIMD.
static void transform_numeric(std::vector< ValueVariant > &values, Func &&func)
Transform all numeric values by applying a function.
static std::optional< double > average(const std::vector< ValueVariant > &values)
Compute average of numeric values.
static float max_float_scalar(const float *data, size_t count)
static float min_float_scalar(const float *data, size_t count)
static std::vector< size_t > find_string_pattern(const std::vector< ValueVariant > &values, std::string_view pattern)
Vectorized string search using SIMD.
static std::optional< float > max_float(const std::vector< ValueVariant > &values)
Find maximum float value using SIMD.
static std::optional< float > min_float(const std::vector< ValueVariant > &values)
Find minimum float value using SIMD.
static std::vector< size_t > find_equal_floats(const std::vector< ValueVariant > &values, float target)
Vectorized comparison - find all values equal to target.
Utility to check SIMD support at runtime.
static bool has_avx512vl()
static bool has_avx512dq()
static bool has_avx512bw()
static simd_level get_best_simd_level()
Get the best available SIMD instruction set level.
static bool has_avx512f()
static std::string get_simd_info()
Get a string describing available SIMD features.
static size_t get_optimal_width()
Get the optimal SIMD width for current platform.
simd_level
SIMD instruction set level enumeration.
constexpr size_t double_simd_width
constexpr size_t float_simd_width
SIMD width detection.
Template for SIMD operations on different types.
static constexpr bool supported
static constexpr size_t width