Container System 0.1.0
High-performance C++20 type-safe container framework with SIMD-accelerated serialization
Loading...
Searching...
No Matches
simd_processor.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2024, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
5#pragma once
6
7#include "value.h"
8#include <vector>
9#include <string>
10#include <numeric>
11#include <algorithm>
12#include <cstring>
13
14// Platform-specific SIMD headers
15#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
16 #define HAS_X86_SIMD 1
17 #if defined(__AVX512F__) || defined(HAS_AVX512)
18 #ifndef HAS_AVX512
19 #define HAS_AVX512 1
20 #endif
21 #ifndef HAS_AVX2
22 #define HAS_AVX2 1 // AVX-512 implies AVX2
23 #endif
24 #include <immintrin.h>
25 #elif defined(__AVX2__) || defined(HAS_AVX2)
26 #ifndef HAS_AVX2
27 #define HAS_AVX2 1
28 #endif
29 #include <immintrin.h>
30 #elif defined(__SSE4_2__) || defined(HAS_SSE42)
31 #ifndef HAS_SSE42
32 #define HAS_SSE42 1
33 #endif
34 #include <nmmintrin.h> // SSE4.2
35 #include <smmintrin.h> // SSE4.1
36 #include <tmmintrin.h> // SSSE3
37 #elif defined(__SSE2__)
38 #define HAS_SSE2 1
39 #include <emmintrin.h> // SSE2
40 #endif
41 // Basic SSE headers for all x86
42 #if defined(HAS_SSE42) || defined(HAS_SSE2)
43 #include <xmmintrin.h> // SSE
44 #include <emmintrin.h> // SSE2
45 #include <pmmintrin.h> // SSE3
46 #elif defined(__SSE2__)
47 #define HAS_SSE2 1
48 #include <emmintrin.h>
49 #endif
50#elif defined(__ARM_NEON) || defined(__aarch64__)
51 #define HAS_ARM_NEON 1
52 #include <arm_neon.h>
53#endif
54
55namespace kcenon::container
56{
57namespace simd
58{
62 #if defined(HAS_AVX512) && defined(__x86_64__)
63 constexpr size_t float_simd_width = 16; // 512-bit / 32-bit
64 constexpr size_t double_simd_width = 8; // 512-bit / 64-bit
65 constexpr size_t float_simd_width_512 = 16;
66 constexpr size_t double_simd_width_512 = 8;
67 constexpr size_t int32_simd_width_512 = 16;
68 using float_simd = __m512;
69 using double_simd = __m512d;
70 using int32_simd = __m512i;
71 using float_simd_512 = __m512;
72 using double_simd_512 = __m512d;
73 using int32_simd_512 = __m512i;
74 #elif defined(HAS_AVX2) && defined(__x86_64__)
75 constexpr size_t float_simd_width = 8; // 256-bit / 32-bit
76 constexpr size_t double_simd_width = 4; // 256-bit / 64-bit
77 using float_simd = __m256;
78 using double_simd = __m256d;
79 using int32_simd = __m256i;
80 #elif (defined(HAS_SSE42) || defined(HAS_SSE2)) && (defined(__x86_64__) || defined(__i386__))
81 constexpr size_t float_simd_width = 4; // 128-bit / 32-bit
82 constexpr size_t double_simd_width = 2; // 128-bit / 64-bit
83 using float_simd = __m128;
84 using double_simd = __m128d;
85 using int32_simd = __m128i;
86 #elif defined(HAS_ARM_NEON) || defined(__ARM_NEON) || defined(__ARM_NEON__)
87 constexpr size_t float_simd_width = 4; // 128-bit / 32-bit
88 constexpr size_t double_simd_width = 2; // 128-bit / 64-bit
89 using float_simd = float32x4_t;
90 using double_simd = float64x2_t;
91 using int32_simd = int32x4_t;
92 #else
93 constexpr size_t float_simd_width = 1; // No SIMD
94 constexpr size_t double_simd_width = 1;
95 #endif
96
101 {
102 public:
106 static float sum_floats(const std::vector<ValueVariant>& values);
107
111 static double sum_doubles(const std::vector<ValueVariant>& values);
112
116 static std::optional<float> min_float(const std::vector<ValueVariant>& values);
117
121 static std::optional<float> max_float(const std::vector<ValueVariant>& values);
122
126 template<typename T>
127 static std::optional<double> average(const std::vector<ValueVariant>& values);
128
132 static std::vector<size_t> find_equal_floats(
133 const std::vector<ValueVariant>& values,
134 float target);
135
139 static std::vector<size_t> find_string_pattern(
140 const std::vector<ValueVariant>& values,
141 std::string_view pattern);
142
146 template<typename T, typename Func>
147 static void transform_numeric(std::vector<ValueVariant>& values, Func&& func);
148
152 static std::optional<float> dot_product_floats(
153 const std::vector<ValueVariant>& a,
154 const std::vector<ValueVariant>& b);
155
159 static void fast_copy(const void* src, void* dst, size_t size);
160
164 static bool fast_compare(const void* a, const void* b, size_t size);
165
169 static std::vector<std::vector<uint8_t>> parallel_serialize(
170 const std::vector<ValueVariant>& values);
171
175 static uint64_t simd_hash(const void* data, size_t size);
176
177 private:
178 // Platform-specific implementations
179 #if defined(HAS_AVX512)
180 static float sum_floats_avx512(const float* data, size_t count);
181 static float min_float_avx512(const float* data, size_t count);
182 static float max_float_avx512(const float* data, size_t count);
183 static double sum_doubles_avx512(const double* data, size_t count);
184 #endif
185
186 #if defined(HAS_AVX2)
187 static float sum_floats_avx2(const float* data, size_t count);
188 static float min_float_avx2(const float* data, size_t count);
189 static float max_float_avx2(const float* data, size_t count);
190 #endif
191
192 #if defined(HAS_SSE42) || defined(HAS_SSE2)
193 static float sum_floats_sse(const float* data, size_t count);
194 static float min_float_sse(const float* data, size_t count);
195 static float max_float_sse(const float* data, size_t count);
196 #endif
197
198 #if defined(HAS_ARM_NEON)
199 static float sum_floats_neon(const float* data, size_t count);
200 static float min_float_neon(const float* data, size_t count);
201 static float max_float_neon(const float* data, size_t count);
202 #endif
203
204 // Scalar fallbacks
205 static float sum_floats_scalar(const float* data, size_t count);
206 static float min_float_scalar(const float* data, size_t count);
207 static float max_float_scalar(const float* data, size_t count);
208 };
209
214 {
215 public:
219 static std::vector<uint8_t> compress(const std::vector<uint8_t>& data);
220
224 static std::vector<uint8_t> decompress(const std::vector<uint8_t>& compressed);
225
229 static bool is_compressible(const std::vector<uint8_t>& data);
230 };
231
235 enum class simd_level {
236 none = 0,
237 sse2,
238 sse42,
239 avx2,
240 avx512,
241 neon
242 };
243
248 {
249 public:
250 static bool has_sse2();
251 static bool has_sse42();
252 static bool has_avx2();
253 static bool has_avx512f(); // AVX-512 Foundation
254 static bool has_avx512dq(); // AVX-512 Double/Quad word
255 static bool has_avx512bw(); // AVX-512 Byte/Word
256 static bool has_avx512vl(); // AVX-512 Vector Length extensions
257 static bool has_neon();
258
263
267 static std::string get_simd_info();
268
272 static size_t get_optimal_width() {
273 #if defined(HAS_AVX512)
274 return 16;
275 #elif defined(HAS_AVX2)
276 return 8;
277 #elif defined(HAS_SSE42) || defined(HAS_SSE2) || defined(HAS_ARM_NEON)
278 return 4;
279 #else
280 return 1;
281 #endif
282 }
283 };
284
288 template<typename T>
289 struct simd_traits {
290 static constexpr size_t width = 1;
291 static constexpr bool supported = false;
292 };
293
294 #if defined(HAS_X86_SIMD) || defined(HAS_ARM_NEON)
295 template<>
296 struct simd_traits<float> {
297 static constexpr size_t width = float_simd_width;
298 static constexpr bool supported = true;
299 };
300
301 template<>
302 struct simd_traits<double> {
303 static constexpr size_t width = double_simd_width;
304 static constexpr bool supported = true;
305 };
306
307 template<>
308 struct simd_traits<int32_t> {
309 static constexpr size_t width = float_simd_width; // Same as float
310 static constexpr bool supported = true;
311 };
312 #endif
313
314} // namespace simd
315} // namespace kcenon::container
SIMD-accelerated data compressor.
static std::vector< uint8_t > decompress(const std::vector< uint8_t > &compressed)
Decompress data using SIMD-accelerated algorithm.
static bool is_compressible(const std::vector< uint8_t > &data)
Check if data is compressible (entropy estimation)
static std::vector< uint8_t > compress(const std::vector< uint8_t > &data)
Compress data using SIMD-accelerated algorithm.
SIMD processor for vectorized operations on container values.
static bool fast_compare(const void *a, const void *b, size_t size)
Fast memory comparison using SIMD.
static std::vector< std::vector< uint8_t > > parallel_serialize(const std::vector< ValueVariant > &values)
Serialize multiple values in parallel.
static uint64_t simd_hash(const void *data, size_t size)
Compute hash of data using SIMD.
static std::optional< float > dot_product_floats(const std::vector< ValueVariant > &a, const std::vector< ValueVariant > &b)
Parallel dot product of two float arrays.
static double sum_doubles(const std::vector< ValueVariant > &values)
Sum all double values in a container using SIMD.
static void fast_copy(const void *src, void *dst, size_t size)
Fast memory copy using SIMD.
static float sum_floats_scalar(const float *data, size_t count)
static float sum_floats(const std::vector< ValueVariant > &values)
Sum all float values in a container using SIMD.
static void transform_numeric(std::vector< ValueVariant > &values, Func &&func)
Transform all numeric values by applying a function.
static std::optional< double > average(const std::vector< ValueVariant > &values)
Compute average of numeric values.
static float max_float_scalar(const float *data, size_t count)
static float min_float_scalar(const float *data, size_t count)
static std::vector< size_t > find_string_pattern(const std::vector< ValueVariant > &values, std::string_view pattern)
Vectorized string search using SIMD.
static std::optional< float > max_float(const std::vector< ValueVariant > &values)
Find maximum float value using SIMD.
static std::optional< float > min_float(const std::vector< ValueVariant > &values)
Find minimum float value using SIMD.
static std::vector< size_t > find_equal_floats(const std::vector< ValueVariant > &values, float target)
Vectorized comparison - find all values equal to target.
Utility to check SIMD support at runtime.
static simd_level get_best_simd_level()
Get the best available SIMD instruction set level.
static std::string get_simd_info()
Get a string describing available SIMD features.
static size_t get_optimal_width()
Get the optimal SIMD width for current platform.
simd_level
SIMD instruction set level enumeration.
constexpr size_t double_simd_width
constexpr size_t float_simd_width
SIMD width detection.
Template for SIMD operations on different types.