Monitoring System 0.1.0
System resource monitoring with pluggable collectors and alerting
Loading...
Searching...
No Matches
simd_aggregator.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
11#pragma once
12
13#include <algorithm>
14#include <atomic>
15#include <cmath>
16#include <cstddef>
17#include <limits>
18#include <memory>
19#include <numeric>
20#include <vector>
21
23
24// Platform-specific SIMD includes
25#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
26 #if defined(__AVX2__)
27 #include <immintrin.h>
28 #define SIMD_AVX2_AVAILABLE 1
29 #elif defined(__SSE4_1__)
30 #include <smmintrin.h>
31 #define SIMD_SSE4_AVAILABLE 1
32 #elif defined(__SSE2__)
33 #include <emmintrin.h>
34 #define SIMD_SSE2_AVAILABLE 1
35 #endif
36#elif defined(__aarch64__) || defined(_M_ARM64)
37 #include <arm_neon.h>
38 #define SIMD_NEON_AVAILABLE 1
39#endif
40
41namespace kcenon::monitoring {
42
47 bool sse2_available = false;
48 bool sse4_available = false;
49 bool avx_available = false;
50 bool avx2_available = false;
51 bool avx512_available = false;
52 bool neon_available = false;
53
59
60#if defined(SIMD_AVX2_AVAILABLE)
61 caps.avx2_available = true;
62 caps.avx_available = true;
63 caps.sse4_available = true;
64 caps.sse2_available = true;
65#elif defined(SIMD_SSE4_AVAILABLE)
66 caps.sse4_available = true;
67 caps.sse2_available = true;
68#elif defined(SIMD_SSE2_AVAILABLE)
69 caps.sse2_available = true;
70#elif defined(SIMD_NEON_AVAILABLE)
71 caps.neon_available = true;
72#endif
73
74 return caps;
75 }
76};
77
82 bool enable_simd = true;
83 size_t vector_size = 8;
84 size_t alignment = 32;
85 bool use_fma = true;
86
91 bool validate() const {
92 // Vector size must be power of 2
93 if (vector_size == 0 || (vector_size & (vector_size - 1)) != 0) {
94 return false;
95 }
96 // Alignment must be power of 2
97 if (alignment == 0 || (alignment & (alignment - 1)) != 0) {
98 return false;
99 }
100 return true;
101 }
102};
103
108 size_t count = 0;
109 double sum = 0.0;
110 double mean = 0.0;
111 double variance = 0.0;
112 double std_dev = 0.0;
113 double min_val = 0.0;
114 double max_val = 0.0;
115};
116
121 std::atomic<size_t> total_operations{0};
122 std::atomic<size_t> simd_operations{0};
123 std::atomic<size_t> scalar_operations{0};
124 std::atomic<size_t> total_elements_processed{0};
125
132
134 if (this != &other) {
135 total_operations.store(other.total_operations.load());
136 simd_operations.store(other.simd_operations.load());
137 scalar_operations.store(other.scalar_operations.load());
138 total_elements_processed.store(other.total_elements_processed.load());
139 }
140 return *this;
141 }
142
144 : total_operations(other.total_operations.load())
145 , simd_operations(other.simd_operations.load())
146 , scalar_operations(other.scalar_operations.load())
147 , total_elements_processed(other.total_elements_processed.load()) {}
148
150 if (this != &other) {
151 total_operations.store(other.total_operations.load());
152 simd_operations.store(other.simd_operations.load());
153 scalar_operations.store(other.scalar_operations.load());
154 total_elements_processed.store(other.total_elements_processed.load());
155 }
156 return *this;
157 }
158
163 double get_simd_utilization() const {
164 auto total = total_operations.load();
165 if (total == 0) {
166 return 0.0;
167 }
168 return (static_cast<double>(simd_operations.load()) / static_cast<double>(total)) * 100.0;
169 }
170
174 void reset() {
175 total_operations.store(0);
176 simd_operations.store(0);
177 scalar_operations.store(0);
179 }
180};
181
190public:
195
200 explicit simd_aggregator(const simd_config& config)
201 : config_(config)
202 , capabilities_(simd_capabilities::detect()) {}
203
209 common::Result<double> sum(const std::vector<double>& data) {
210 if (data.empty()) {
211 return common::Result<double>::err(error_info(monitoring_error_code::invalid_argument, "Cannot compute sum of empty data").to_common_error());
212 }
213
215 stats_.total_elements_processed += data.size();
216
217 double result = 0.0;
218
219 if (should_use_simd(data.size())) {
220 result = sum_simd(data);
222 } else {
223 result = sum_scalar(data);
225 }
226
227 return common::ok(result);
228 }
229
235 common::Result<double> mean(const std::vector<double>& data) {
236 if (data.empty()) {
237 return common::Result<double>::err(error_info(monitoring_error_code::invalid_argument, "Cannot compute mean of empty data").to_common_error());
238 }
239
240 auto sum_result = sum(data);
241 if (sum_result.is_err()) {
242 return sum_result;
243 }
244
245 return common::ok(sum_result.value() / static_cast<double>(data.size()));
246 }
247
253 common::Result<double> min(const std::vector<double>& data) {
254 if (data.empty()) {
255 return common::Result<double>::err(error_info(monitoring_error_code::invalid_argument, "Cannot compute min of empty data").to_common_error());
256 }
257
259 stats_.total_elements_processed += data.size();
260
261 double result = 0.0;
262
263 if (should_use_simd(data.size())) {
264 result = min_simd(data);
266 } else {
267 result = min_scalar(data);
269 }
270
271 return common::ok(result);
272 }
273
279 common::Result<double> max(const std::vector<double>& data) {
280 if (data.empty()) {
281 return common::Result<double>::err(error_info(monitoring_error_code::invalid_argument, "Cannot compute max of empty data").to_common_error());
282 }
283
285 stats_.total_elements_processed += data.size();
286
287 double result = 0.0;
288
289 if (should_use_simd(data.size())) {
290 result = max_simd(data);
292 } else {
293 result = max_scalar(data);
295 }
296
297 return common::ok(result);
298 }
299
305 common::Result<double> variance(const std::vector<double>& data) {
306 if (data.empty()) {
307 return common::Result<double>::err(error_info(monitoring_error_code::invalid_argument, "Cannot compute variance of empty data").to_common_error());
308 }
309
310 if (data.size() == 1) {
311 return common::ok(0.0);
312 }
313
314 auto mean_result = mean(data);
315 if (mean_result.is_err()) {
316 return mean_result;
317 }
318
319 double data_mean = mean_result.value();
320 double sum_sq_diff = 0.0;
321
322 for (const auto& val : data) {
323 double diff = val - data_mean;
324 sum_sq_diff += diff * diff;
325 }
326
327 return common::ok(sum_sq_diff / static_cast<double>(data.size() - 1));
328 }
329
335 common::Result<statistical_summary> compute_summary(const std::vector<double>& data) {
336 if (data.empty()) {
337 return common::Result<statistical_summary>::err(error_info(monitoring_error_code::invalid_argument, "Cannot compute summary of empty data").to_common_error());
338 }
339
341 summary.count = data.size();
342
343 // Compute sum
344 auto sum_result = sum(data);
345 if (sum_result.is_err()) {
346 return common::Result<statistical_summary>::err(error_info(monitoring_error_code::operation_failed, "Failed to compute sum").to_common_error());
347 }
348 summary.sum = sum_result.value();
349 summary.mean = summary.sum / static_cast<double>(summary.count);
350
351 // Compute min/max
352 auto min_result = min(data);
353 auto max_result = max(data);
354
355 if (min_result.is_err() || max_result.is_err()) {
356 return common::Result<statistical_summary>::err(error_info(monitoring_error_code::operation_failed, "Failed to compute min/max").to_common_error());
357 }
358
359 summary.min_val = min_result.value();
360 summary.max_val = max_result.value();
361
362 // Compute variance
363 if (summary.count > 1) {
364 auto var_result = variance(data);
365 if (var_result.is_ok()) {
366 summary.variance = var_result.value();
367 summary.std_dev = std::sqrt(summary.variance);
368 }
369 }
370
371 return common::ok(summary);
372 }
373
379 return capabilities_;
380 }
381
386 common::Result<bool> test_simd() {
387 // Create test data
388 std::vector<double> test_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
389
390 // Test sum
391 auto sum_result = sum(test_data);
392 if (sum_result.is_err()) {
393 return common::ok(false);
394 }
395
396 if (std::abs(sum_result.value() - 36.0) > 1e-10) {
397 return common::ok(false);
398 }
399
400 // Test mean
401 auto mean_result = mean(test_data);
402 if (mean_result.is_err()) {
403 return common::ok(false);
404 }
405
406 if (std::abs(mean_result.value() - 4.5) > 1e-10) {
407 return common::ok(false);
408 }
409
410 // Test min/max
411 auto min_result = min(test_data);
412 auto max_result = max(test_data);
413
414 if (min_result.is_err() || max_result.is_err()) {
415 return common::ok(false);
416 }
417
418 if (std::abs(min_result.value() - 1.0) > 1e-10 ||
419 std::abs(max_result.value() - 8.0) > 1e-10) {
420 return common::ok(false);
421 }
422
423 return common::ok(true);
424 }
425
431 return stats_;
432 }
433
438 stats_.reset();
439 }
440
441private:
442 bool should_use_simd(size_t data_size) const {
443 if (!config_.enable_simd) {
444 return false;
445 }
446
447 // Use SIMD only for sufficiently large datasets
448 if (data_size < config_.vector_size * 2) {
449 return false;
450 }
451
452 // Check if any SIMD is available
456 }
457
458 double sum_scalar(const std::vector<double>& data) const {
459 return std::accumulate(data.begin(), data.end(), 0.0);
460 }
461
462 double sum_simd(const std::vector<double>& data) const {
463#if defined(SIMD_AVX2_AVAILABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) && defined(__AVX2__)
464 const size_t simd_width = 4; // AVX processes 4 doubles at a time
465 size_t simd_count = data.size() / simd_width;
466
467 __m256d sum_vec = _mm256_setzero_pd();
468
469 for (size_t i = 0; i < simd_count; ++i) {
470 __m256d vec = _mm256_loadu_pd(&data[i * simd_width]);
471 sum_vec = _mm256_add_pd(sum_vec, vec);
472 }
473
474 // Horizontal sum
475 alignas(32) double temp[4];
476 _mm256_storeu_pd(temp, sum_vec);
477 double result = temp[0] + temp[1] + temp[2] + temp[3];
478
479 // Handle remaining elements
480 for (size_t i = simd_count * simd_width; i < data.size(); ++i) {
481 result += data[i];
482 }
483
484 return result;
485#elif defined(SIMD_SSE2_AVAILABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86))
486 const size_t simd_width = 2; // SSE processes 2 doubles at a time
487 size_t simd_count = data.size() / simd_width;
488
489 __m128d sum_vec = _mm_setzero_pd();
490
491 for (size_t i = 0; i < simd_count; ++i) {
492 __m128d vec = _mm_loadu_pd(&data[i * simd_width]);
493 sum_vec = _mm_add_pd(sum_vec, vec);
494 }
495
496 alignas(16) double temp[2];
497 _mm_storeu_pd(temp, sum_vec);
498 double result = temp[0] + temp[1];
499
500 for (size_t i = simd_count * simd_width; i < data.size(); ++i) {
501 result += data[i];
502 }
503
504 return result;
505#elif defined(SIMD_NEON_AVAILABLE) && (defined(__aarch64__) || defined(_M_ARM64))
506 const size_t simd_width = 2; // NEON processes 2 doubles at a time
507 size_t simd_count = data.size() / simd_width;
508
509 float64x2_t sum_vec = vdupq_n_f64(0.0);
510
511 for (size_t i = 0; i < simd_count; ++i) {
512 float64x2_t vec = vld1q_f64(&data[i * simd_width]);
513 sum_vec = vaddq_f64(sum_vec, vec);
514 }
515
516 double result = vgetq_lane_f64(sum_vec, 0) + vgetq_lane_f64(sum_vec, 1);
517
518 for (size_t i = simd_count * simd_width; i < data.size(); ++i) {
519 result += data[i];
520 }
521
522 return result;
523#else
524 return sum_scalar(data);
525#endif
526 }
527
528 double min_scalar(const std::vector<double>& data) const {
529 return *std::min_element(data.begin(), data.end());
530 }
531
532 double min_simd(const std::vector<double>& data) const {
533#if defined(SIMD_AVX2_AVAILABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) && defined(__AVX2__)
534 const size_t simd_width = 4;
535 size_t simd_count = data.size() / simd_width;
536
537 __m256d min_vec = _mm256_set1_pd(std::numeric_limits<double>::max());
538
539 for (size_t i = 0; i < simd_count; ++i) {
540 __m256d vec = _mm256_loadu_pd(&data[i * simd_width]);
541 min_vec = _mm256_min_pd(min_vec, vec);
542 }
543
544 alignas(32) double temp[4];
545 _mm256_storeu_pd(temp, min_vec);
546 double result = std::min({temp[0], temp[1], temp[2], temp[3]});
547
548 for (size_t i = simd_count * simd_width; i < data.size(); ++i) {
549 result = std::min(result, data[i]);
550 }
551
552 return result;
553#elif defined(SIMD_SSE2_AVAILABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86))
554 const size_t simd_width = 2;
555 size_t simd_count = data.size() / simd_width;
556
557 __m128d min_vec = _mm_set1_pd(std::numeric_limits<double>::max());
558
559 for (size_t i = 0; i < simd_count; ++i) {
560 __m128d vec = _mm_loadu_pd(&data[i * simd_width]);
561 min_vec = _mm_min_pd(min_vec, vec);
562 }
563
564 alignas(16) double temp[2];
565 _mm_storeu_pd(temp, min_vec);
566 double result = std::min(temp[0], temp[1]);
567
568 for (size_t i = simd_count * simd_width; i < data.size(); ++i) {
569 result = std::min(result, data[i]);
570 }
571
572 return result;
573#elif defined(SIMD_NEON_AVAILABLE) && (defined(__aarch64__) || defined(_M_ARM64))
574 const size_t simd_width = 2;
575 size_t simd_count = data.size() / simd_width;
576
577 float64x2_t min_vec = vdupq_n_f64(std::numeric_limits<double>::max());
578
579 for (size_t i = 0; i < simd_count; ++i) {
580 float64x2_t vec = vld1q_f64(&data[i * simd_width]);
581 min_vec = vminq_f64(min_vec, vec);
582 }
583
584 double result = std::min(vgetq_lane_f64(min_vec, 0), vgetq_lane_f64(min_vec, 1));
585
586 for (size_t i = simd_count * simd_width; i < data.size(); ++i) {
587 result = std::min(result, data[i]);
588 }
589
590 return result;
591#else
592 return min_scalar(data);
593#endif
594 }
595
596 double max_scalar(const std::vector<double>& data) const {
597 return *std::max_element(data.begin(), data.end());
598 }
599
600 double max_simd(const std::vector<double>& data) const {
601#if defined(SIMD_AVX2_AVAILABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) && defined(__AVX2__)
602 const size_t simd_width = 4;
603 size_t simd_count = data.size() / simd_width;
604
605 __m256d max_vec = _mm256_set1_pd(std::numeric_limits<double>::lowest());
606
607 for (size_t i = 0; i < simd_count; ++i) {
608 __m256d vec = _mm256_loadu_pd(&data[i * simd_width]);
609 max_vec = _mm256_max_pd(max_vec, vec);
610 }
611
612 alignas(32) double temp[4];
613 _mm256_storeu_pd(temp, max_vec);
614 double result = std::max({temp[0], temp[1], temp[2], temp[3]});
615
616 for (size_t i = simd_count * simd_width; i < data.size(); ++i) {
617 result = std::max(result, data[i]);
618 }
619
620 return result;
621#elif defined(SIMD_SSE2_AVAILABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86))
622 const size_t simd_width = 2;
623 size_t simd_count = data.size() / simd_width;
624
625 __m128d max_vec = _mm_set1_pd(std::numeric_limits<double>::lowest());
626
627 for (size_t i = 0; i < simd_count; ++i) {
628 __m128d vec = _mm_loadu_pd(&data[i * simd_width]);
629 max_vec = _mm_max_pd(max_vec, vec);
630 }
631
632 alignas(16) double temp[2];
633 _mm_storeu_pd(temp, max_vec);
634 double result = std::max(temp[0], temp[1]);
635
636 for (size_t i = simd_count * simd_width; i < data.size(); ++i) {
637 result = std::max(result, data[i]);
638 }
639
640 return result;
641#elif defined(SIMD_NEON_AVAILABLE) && (defined(__aarch64__) || defined(_M_ARM64))
642 const size_t simd_width = 2;
643 size_t simd_count = data.size() / simd_width;
644
645 float64x2_t max_vec = vdupq_n_f64(std::numeric_limits<double>::lowest());
646
647 for (size_t i = 0; i < simd_count; ++i) {
648 float64x2_t vec = vld1q_f64(&data[i * simd_width]);
649 max_vec = vmaxq_f64(max_vec, vec);
650 }
651
652 double result = std::max(vgetq_lane_f64(max_vec, 0), vgetq_lane_f64(max_vec, 1));
653
654 for (size_t i = simd_count * simd_width; i < data.size(); ++i) {
655 result = std::max(result, data[i]);
656 }
657
658 return result;
659#else
660 return max_scalar(data);
661#endif
662 }
663
667};
668
673inline std::unique_ptr<simd_aggregator> make_simd_aggregator() {
674 return std::make_unique<simd_aggregator>();
675}
676
682inline std::unique_ptr<simd_aggregator> make_simd_aggregator(const simd_config& config) {
683 return std::make_unique<simd_aggregator>(config);
684}
685
690inline std::vector<simd_config> create_default_simd_configs() {
691 return {
692 // SIMD enabled with default settings
693 {.enable_simd = true, .vector_size = 8, .alignment = 32, .use_fma = true},
694 // SIMD disabled for comparison
695 {.enable_simd = false, .vector_size = 8, .alignment = 32, .use_fma = false},
696 // Small vector size for smaller datasets
697 {.enable_simd = true, .vector_size = 4, .alignment = 16, .use_fma = true},
698 // Large vector size for AVX-512
699 {.enable_simd = true, .vector_size = 16, .alignment = 64, .use_fma = true}
700 };
701}
702
703} // namespace kcenon::monitoring
SIMD-accelerated statistical aggregator.
simd_aggregator()
Default constructor with default configuration.
void reset_statistics()
Reset statistics.
common::Result< statistical_summary > compute_summary(const std::vector< double > &data)
Compute full statistical summary.
double min_scalar(const std::vector< double > &data) const
bool should_use_simd(size_t data_size) const
const simd_aggregator_statistics & get_statistics() const
Get aggregator statistics.
double max_scalar(const std::vector< double > &data) const
common::Result< double > mean(const std::vector< double > &data)
Calculate mean of elements.
double max_simd(const std::vector< double > &data) const
common::Result< bool > test_simd()
Self-test SIMD functionality.
common::Result< double > sum(const std::vector< double > &data)
Calculate sum of elements.
common::Result< double > variance(const std::vector< double > &data)
Calculate variance of elements.
double min_simd(const std::vector< double > &data) const
simd_aggregator(const simd_config &config)
Construct with configuration.
simd_aggregator_statistics stats_
double sum_simd(const std::vector< double > &data) const
const simd_capabilities & get_capabilities() const
Get SIMD capabilities.
common::Result< double > min(const std::vector< double > &data)
Find minimum value.
double sum_scalar(const std::vector< double > &data) const
common::Result< double > max(const std::vector< double > &data)
Find maximum value.
@ summary
Pre-calculated quantiles and count/sum.
std::unique_ptr< simd_aggregator > make_simd_aggregator()
Create a SIMD aggregator with default configuration.
std::vector< simd_config > create_default_simd_configs()
Create default SIMD configurations for different use cases.
Result pattern type definitions for monitoring system.
Extended error information with context.
Statistics for SIMD aggregator operations.
simd_aggregator_statistics(simd_aggregator_statistics &&other) noexcept
simd_aggregator_statistics(const simd_aggregator_statistics &other)
simd_aggregator_statistics & operator=(simd_aggregator_statistics &&other) noexcept
simd_aggregator_statistics & operator=(const simd_aggregator_statistics &other)
double get_simd_utilization() const
Get SIMD utilization rate.
SIMD capabilities detection.
static simd_capabilities detect()
Detect available SIMD features at runtime.
Configuration for SIMD aggregator.
bool validate() const
Validate configuration.
bool enable_simd
Enable SIMD acceleration.
size_t alignment
Memory alignment for SIMD operations.
size_t vector_size
SIMD vector width for processing.
bool use_fma
Use fused multiply-add if available.