5#include <gtest/gtest.h>
34 std::random_device rd;
35 std::mt19937 gen(rd());
36 std::normal_distribution<> dis(mean, stddev);
38 std::vector<double> samples;
39 samples.reserve(count);
41 for (
size_t i = 0; i < count; ++i) {
42 samples.push_back(dis(gen));
52 std::random_device rd;
53 std::mt19937 gen(rd());
54 std::uniform_real_distribution<> dis(min, max);
56 std::vector<double> samples;
57 samples.reserve(count);
59 for (
size_t i = 0; i < count; ++i) {
60 samples.push_back(dis(gen));
71 EXPECT_EQ(stats.
count(), 0);
72 EXPECT_EQ(stats.
mean(), 0.0);
76 std::vector<double> values = {1.0, 2.0, 3.0, 4.0, 5.0};
77 for (
double value : values) {
81 EXPECT_EQ(stats.
count(), 5);
82 EXPECT_DOUBLE_EQ(stats.
mean(), 3.0);
85 EXPECT_EQ(full_stats.count, 5);
86 EXPECT_DOUBLE_EQ(full_stats.mean, 3.0);
87 EXPECT_DOUBLE_EQ(full_stats.min_value, 1.0);
88 EXPECT_DOUBLE_EQ(full_stats.max_value, 5.0);
89 EXPECT_GT(full_stats.variance, 0.0);
90 EXPECT_GT(full_stats.std_deviation, 0.0);
97 auto samples = generate_normal_samples(10000, 100.0, 15.0);
99 for (
double sample : samples) {
106 EXPECT_NEAR(full_stats.mean, 100.0, 1.0);
107 EXPECT_NEAR(full_stats.std_deviation, 15.0, 1.0);
108 EXPECT_EQ(full_stats.count, 10000);
116 for (
int i = 1; i <= 100; ++i) {
120 double estimated_median = median_estimator.
get_quantile();
123 EXPECT_NEAR(estimated_median, 50.5, 5.0);
130 auto samples = generate_uniform_samples(1000, 0.0, 100.0);
132 for (
double sample : samples) {
139 EXPECT_NEAR(p95, 95.0, 10.0);
146 auto now = std::chrono::system_clock::now();
149 for (
int i = 0; i < 10; ++i) {
150 window.
add_value(
static_cast<double>(i), now + std::chrono::milliseconds(i * 10));
153 EXPECT_EQ(window.
size(), 10);
156 EXPECT_EQ(values.size(), 10);
159 for (
size_t i = 0; i < values.size(); ++i) {
160 EXPECT_EQ(values[i],
static_cast<double>(i));
167 auto now = std::chrono::system_clock::now();
170 for (
int i = 0; i < 5; ++i) {
171 window.
add_value(
static_cast<double>(i), now - std::chrono::milliseconds(200));
175 for (
int i = 10; i < 15; ++i) {
176 window.
add_value(
static_cast<double>(i), now);
182 EXPECT_EQ(values.size(), 5);
183 for (
size_t i = 0; i < values.size(); ++i) {
184 EXPECT_EQ(values[i],
static_cast<double>(i + 10));
197 std::vector<double> values = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
198 for (
double value : values) {
200 EXPECT_TRUE(result.is_ok());
205 EXPECT_EQ(stats.count, 10);
206 EXPECT_DOUBLE_EQ(stats.mean, 5.5);
207 EXPECT_DOUBLE_EQ(stats.min_value, 1.0);
208 EXPECT_DOUBLE_EQ(stats.max_value, 10.0);
209 EXPECT_GT(stats.std_deviation, 0.0);
212 EXPECT_GT(stats.percentiles.size(), 0);
219 for (
int i = 1; i <= 100; ++i) {
228 EXPECT_TRUE(p50_result);
229 EXPECT_TRUE(p95_result);
230 EXPECT_TRUE(p99_result);
233 EXPECT_NEAR(p50_result.value(), 50.0, 10.0);
234 EXPECT_NEAR(p95_result.value(), 95.0, 10.0);
235 EXPECT_NEAR(p99_result.value(), 99.0, 10.0);
246 for (
int i = 45; i <= 55; ++i) {
256 EXPECT_GT(stats.outlier_count, 0);
257 EXPECT_GT(stats.outliers.size(), 0);
264 for (
int i = 1; i <= 10; ++i) {
268 EXPECT_EQ(aggregator.
count(), 10);
273 EXPECT_EQ(aggregator.
count(), 0);
274 EXPECT_EQ(aggregator.
mean(), 0.0);
275 EXPECT_EQ(aggregator.
variance(), 0.0);
280 auto storage = std::make_shared<metric_storage>();
293 EXPECT_TRUE(add_result.is_ok());
296 for (
int i = 1; i <= 100; ++i) {
298 EXPECT_TRUE(result.is_ok());
303 EXPECT_TRUE(stats_result.is_ok());
305 auto stats = stats_result.value();
306 EXPECT_EQ(stats.count, 100);
307 EXPECT_GT(stats.mean, 0.0);
314 std::vector<std::string> metric_names = {
"cpu_usage",
"memory_usage",
"network_io"};
316 for (
const auto& metric_name : metric_names) {
323 EXPECT_TRUE(result.is_ok());
327 for (
const auto& metric_name : metric_names) {
328 for (
int i = 1; i <= 50; ++i) {
330 EXPECT_TRUE(result.is_ok());
336 EXPECT_EQ(configured.size(), 3);
338 for (
const auto& metric_name : metric_names) {
339 EXPECT_NE(std::find(configured.begin(), configured.end(), metric_name), configured.end());
344 auto storage = std::make_shared<metric_storage>();
356 auto samples = generate_normal_samples(1000, 100.0, 20.0);
357 for (
double sample : samples) {
363 EXPECT_TRUE(result.is_ok());
365 auto agg_result = result.value();
366 EXPECT_EQ(agg_result.source_metric,
"response_time");
367 EXPECT_EQ(agg_result.samples_processed, 1000);
368 EXPECT_GE(agg_result.processing_duration.count(), 0);
372 auto latest =
storage->get_latest_value(
"response_time_agg.mean");
373 EXPECT_TRUE(latest.is_ok());
374 EXPECT_NEAR(latest.value(), 100.0, 10.0);
386 EXPECT_FALSE(result.is_ok());
394 EXPECT_TRUE(result1.is_ok());
397 EXPECT_FALSE(result2.is_ok());
403 std::vector<double> x1 = {1, 2, 3, 4, 5};
404 std::vector<double> y1 = {2, 4, 6, 8, 10};
407 EXPECT_NEAR(corr1, 1.0, 0.001);
410 std::vector<double> x2 = {1, 2, 3, 4, 5};
411 std::vector<double> y2 = {5, 4, 3, 2, 1};
414 EXPECT_NEAR(corr2, -1.0, 0.001);
417 std::vector<double> x3 = {1, 2, 3, 4, 5};
418 std::vector<double> y3 = {3, 3, 3, 3, 3};
421 EXPECT_NEAR(corr3, 0.0, 0.001);
424 std::vector<double> x4 = {1, 2, 3};
425 std::vector<double> y4 = {1, 2, 3, 4, 5};
428 EXPECT_EQ(corr4, 0.0);
434 EXPECT_GT(rules.size(), 0);
437 for (
const auto& rule : rules) {
438 auto validation = rule.validate();
439 EXPECT_TRUE(validation.is_ok()) <<
"Rule validation failed for: " << rule.source_metric;
443 std::vector<std::string> expected_metrics = {
"response_time",
"request_count",
"error_count"};
445 for (
const auto& expected : expected_metrics) {
447 for (
const auto& rule : rules) {
448 if (rule.source_metric == expected) {
453 EXPECT_TRUE(found) <<
"Expected metric not found: " << expected;
463 auto validation = invalid_config.
validate();
464 EXPECT_FALSE(validation.is_ok());
471 validation = valid_config.
validate();
472 EXPECT_TRUE(validation.is_ok());
479 validation = invalid_rule.
validate();
480 EXPECT_FALSE(validation.is_ok());
487 const int num_threads = 4;
488 const int observations_per_thread = 1000;
489 std::vector<std::thread> threads;
492 for (
int t = 0; t < num_threads; ++t) {
493 threads.emplace_back([&aggregator, t, observations_per_thread]() {
494 std::random_device rd;
495 std::mt19937 gen(rd());
496 std::uniform_real_distribution<> dis(0.0, 100.0);
498 for (
int i = 0; i < observations_per_thread; ++i) {
499 double value = dis(gen);
503 std::this_thread::sleep_for(std::chrono::microseconds(1));
509 for (
auto& thread : threads) {
514 EXPECT_EQ(aggregator.
count(), num_threads * observations_per_thread);
517 EXPECT_GT(stats.mean, 0.0);
518 EXPECT_GT(stats.std_deviation, 0.0);
Metric aggregation processing pipeline.
Test suite for Phase 3 P2: Statistical aggregation functions.
std::vector< double > generate_normal_samples(size_t count, double mean=0.0, double stddev=1.0)
Generate normal distribution samples.
std::vector< double > generate_uniform_samples(size_t count, double min=0.0, double max=1.0)
Generate uniform distribution samples.
Processes metric streams and generates aggregated statistics.
common::VoidResult add_aggregation_rule(const aggregation_rule &rule)
Add an aggregation rule.
common::Result< stream_aggregation_result > force_aggregation(const std::string &metric_name)
Force aggregation for a metric.
common::VoidResult process_observation(const std::string &metric_name, double value)
Process an observation for a metric.
common::Result< streaming_statistics > get_current_statistics(const std::string &metric_name) const
Get current statistics for a metric.
std::vector< std::string > get_configured_metrics() const
Get list of configured metrics.
Time-windowed value collection.
void add_value(const T &value, time_point timestamp)
Add a value with timestamp.
size_t size() const
Get current size.
std::vector< T > get_values() const
Get all values in the window.
Welford's algorithm for computing streaming statistics.
double variance() const
Get running variance (sample variance)
void add_value(double value)
Add a value to the statistics.
double mean() const
Get running mean.
streaming_statistics get_statistics() const
Get full statistics.
size_t count() const
Get sample count.
P² algorithm for streaming quantile estimation.
void add_observation(double x)
Add an observation.
double get_quantile() const
Get the estimated quantile.
Full-featured streaming aggregation.
double mean() const
Get mean.
streaming_statistics get_statistics() const
Get full statistics.
common::VoidResult add_observation(double value)
Add an observation.
double variance() const
Get variance.
std::optional< double > get_percentile(double p) const
Get specific percentile.
void reset()
Reset the aggregator.
size_t count() const
Get observation count.
Memory-efficient metric storage with ring buffer backend.
double pearson_correlation(const std::vector< double > &x, const std::vector< double > &y)
Calculate Pearson correlation coefficient.
std::vector< aggregation_rule > create_standard_aggregation_rules()
Create standard aggregation rules for common metrics.
@ storage
Storage device sensor.
Streaming statistical aggregation for real-time metrics.
Configuration for metric aggregation.
std::chrono::milliseconds aggregation_interval
Aggregation interval.
bool compute_rate
Compute rate of change.
std::string target_metric_prefix
Prefix for aggregated metrics.
std::string source_metric
Source metric name.
bool detect_outliers
Enable outlier detection.
common::VoidResult validate() const
Validate the aggregation rule.
std::vector< double > percentiles
Percentiles to compute.
Configuration for stream aggregator.
std::chrono::milliseconds window_duration
common::VoidResult validate() const
Validate configuration.
bool enable_outlier_detection
TEST_F(StreamAggregationTest, OnlineStatisticsBasic)