Monitoring System 0.1.0
System resource monitoring with pluggable collectors and alerting
Loading...
Searching...
No Matches
test_performance_monitoring.cpp
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
10#include <gtest/gtest.h>
12
13#include <chrono>
14#include <random>
15#include <set>
16#include <thread>
17#include <vector>
18
19using namespace kcenon::monitoring;
20
21class PerformanceMonitoringTest : public ::testing::Test {
22 protected:
25
26 void SetUp() override {
28 monitor.set_enabled(true);
29 }
30
31 void TearDown() override { monitor.cleanup(); }
32
33 // Helper function to simulate work
34 void simulate_work(std::chrono::milliseconds duration) {
35 std::this_thread::sleep_for(duration);
36 }
37};
38
39TEST_F(PerformanceMonitoringTest, RecordSingleSample) {
40 auto result = profiler.record_sample("test_operation",
41 std::chrono::nanoseconds(1000000), // 1ms
42 true);
43
44 ASSERT_TRUE(result.is_ok());
45 EXPECT_TRUE(result.value());
46
47 auto metrics_result = profiler.get_metrics("test_operation");
48 ASSERT_TRUE(metrics_result.is_ok());
49
50 auto metrics = metrics_result.value();
51 EXPECT_EQ(metrics.operation_name, "test_operation");
52 EXPECT_EQ(metrics.call_count, 1);
53 EXPECT_EQ(metrics.error_count, 0);
54 EXPECT_EQ(metrics.mean_duration.count(), 1000000);
55}
56
57TEST_F(PerformanceMonitoringTest, RecordMultipleSamples) {
58 std::vector<std::chrono::nanoseconds> durations = {
59 std::chrono::nanoseconds(1000000), // 1ms
60 std::chrono::nanoseconds(2000000), // 2ms
61 std::chrono::nanoseconds(3000000), // 3ms
62 std::chrono::nanoseconds(4000000), // 4ms
63 std::chrono::nanoseconds(5000000) // 5ms
64 };
65
66 for (const auto& duration : durations) {
67 profiler.record_sample("multi_operation", duration, true);
68 }
69
70 auto metrics_result = profiler.get_metrics("multi_operation");
71 ASSERT_TRUE(metrics_result.is_ok());
72
73 auto metrics = metrics_result.value();
74 EXPECT_EQ(metrics.call_count, 5);
75 EXPECT_EQ(metrics.error_count, 0);
76 EXPECT_EQ(metrics.min_duration.count(), 1000000);
77 EXPECT_EQ(metrics.max_duration.count(), 5000000);
78 EXPECT_EQ(metrics.median_duration.count(), 3000000);
79 EXPECT_EQ(metrics.mean_duration.count(), 3000000);
80}
81
82TEST_F(PerformanceMonitoringTest, RecordErrorSamples) {
83 profiler.record_sample("error_operation", std::chrono::nanoseconds(1000000), true);
84 profiler.record_sample("error_operation", std::chrono::nanoseconds(2000000), false);
85 profiler.record_sample("error_operation", std::chrono::nanoseconds(3000000), false);
86 profiler.record_sample("error_operation", std::chrono::nanoseconds(4000000), true);
87
88 auto metrics_result = profiler.get_metrics("error_operation");
89 ASSERT_TRUE(metrics_result.is_ok());
90
91 auto metrics = metrics_result.value();
92 EXPECT_EQ(metrics.call_count, 4);
93 EXPECT_EQ(metrics.error_count, 2);
94}
95
97 {
98 scoped_timer timer(&profiler, "scoped_operation");
99 simulate_work(std::chrono::milliseconds(10));
100 }
101
102 auto metrics_result = profiler.get_metrics("scoped_operation");
103 ASSERT_TRUE(metrics_result.is_ok());
104
105 auto metrics = metrics_result.value();
106 EXPECT_EQ(metrics.call_count, 1);
107 EXPECT_GE(metrics.mean_duration.count(), 10000000); // At least 10ms
108}
109
110TEST_F(PerformanceMonitoringTest, ScopedTimerWithError) {
111 {
112 scoped_timer timer(&profiler, "error_scoped_operation");
113 simulate_work(std::chrono::milliseconds(5));
114 timer.mark_failed();
115 }
116
117 auto metrics_result = profiler.get_metrics("error_scoped_operation");
118 ASSERT_TRUE(metrics_result.is_ok());
119
120 auto metrics = metrics_result.value();
121 EXPECT_EQ(metrics.call_count, 1);
122 EXPECT_EQ(metrics.error_count, 1);
123}
124
125TEST_F(PerformanceMonitoringTest, PercentileCalculation) {
126 // Generate 100 samples with known distribution
127 for (int i = 1; i <= 100; ++i) {
128 profiler.record_sample("percentile_test",
129 std::chrono::nanoseconds(i * 1000000), // i ms
130 true);
131 }
132
133 auto metrics_result = profiler.get_metrics("percentile_test");
134 ASSERT_TRUE(metrics_result.is_ok());
135
136 auto metrics = metrics_result.value();
137 EXPECT_EQ(metrics.call_count, 100);
138
139 // P50 should be around 50ms
140 EXPECT_GE(metrics.median_duration.count(), 49000000);
141 EXPECT_LE(metrics.median_duration.count(), 51000000);
142
143 // P95 should be around 95ms
144 EXPECT_GE(metrics.p95_duration.count(), 94000000);
145 EXPECT_LE(metrics.p95_duration.count(), 96000000);
146
147 // P99 should be around 99ms
148 EXPECT_GE(metrics.p99_duration.count(), 98000000);
149 EXPECT_LE(metrics.p99_duration.count(), 100000000);
150}
151
152TEST_F(PerformanceMonitoringTest, ThroughputCalculation) {
153 // Record 10 operations each taking 100ms
154 for (int i = 0; i < 10; ++i) {
155 profiler.record_sample("throughput_test",
156 std::chrono::nanoseconds(100000000), // 100ms
157 true);
158 }
159
160 auto metrics_result = profiler.get_metrics("throughput_test");
161 ASSERT_TRUE(metrics_result.is_ok());
162
163 auto metrics = metrics_result.value();
164 EXPECT_EQ(metrics.call_count, 10);
165 // Throughput calculation depends on implementation
166 EXPECT_GE(metrics.throughput, 0.0);
167}
168
170 profiler.record_sample("clear_test", std::chrono::nanoseconds(1000000), true);
171
172 auto before_result = profiler.get_metrics("clear_test");
173 ASSERT_TRUE(before_result.is_ok());
174 EXPECT_EQ(before_result.value().call_count, 1);
175
176 auto result = profiler.clear_samples("clear_test");
177 ASSERT_TRUE(result.is_ok());
178 EXPECT_TRUE(result.value());
179
180 // After clear, metrics may still exist but with reset values
181 auto after_result = profiler.get_metrics("clear_test");
182 if (after_result.is_ok()) {
183 EXPECT_EQ(after_result.value().call_count, 0);
184 }
185}
186
188 profiler.record_sample("op1", std::chrono::nanoseconds(1000000), true);
189 profiler.record_sample("op2", std::chrono::nanoseconds(2000000), true);
190 profiler.record_sample("op3", std::chrono::nanoseconds(3000000), true);
191
192 auto all_metrics = profiler.get_all_metrics();
193 EXPECT_EQ(all_metrics.size(), 3);
194
195 std::set<std::string> operation_names;
196 for (const auto& metrics : all_metrics) {
197 operation_names.insert(metrics.operation_name);
198 }
199
200 EXPECT_TRUE(operation_names.count("op1") > 0);
201 EXPECT_TRUE(operation_names.count("op2") > 0);
202 EXPECT_TRUE(operation_names.count("op3") > 0);
203}
204
205TEST_F(PerformanceMonitoringTest, ProfilerEnableDisable) {
206 profiler.set_enabled(false);
207
208 auto result = profiler.record_sample("disabled_test", std::chrono::nanoseconds(1000000), true);
209 ASSERT_TRUE(result.is_ok());
210
211 // Sample should not be recorded when disabled
212 auto metrics_result = profiler.get_metrics("disabled_test");
213 ASSERT_FALSE(metrics_result.is_ok());
214
215 profiler.set_enabled(true);
216 profiler.record_sample("enabled_test", std::chrono::nanoseconds(1000000), true);
217
218 metrics_result = profiler.get_metrics("enabled_test");
219 ASSERT_TRUE(metrics_result.is_ok());
220}
221
223 system_monitor sys_monitor;
224
225 auto result = sys_monitor.get_current_metrics();
226 ASSERT_TRUE(result.is_ok());
227
228 auto metrics = result.value();
229
230 // Basic sanity checks
231 EXPECT_GE(metrics.cpu_usage_percent, 0.0);
232 EXPECT_LE(metrics.cpu_usage_percent, 100.0);
233
234 EXPECT_GE(metrics.memory_usage_percent, 0.0);
235 EXPECT_LE(metrics.memory_usage_percent, 100.0);
236
237 EXPECT_GT(metrics.memory_usage_bytes, 0);
238 EXPECT_GT(metrics.thread_count, 0);
239}
240
241TEST_F(PerformanceMonitoringTest, SystemMonitoringHistory) {
242 system_monitor sys_monitor;
243
244 auto start_result = sys_monitor.start_monitoring(std::chrono::milliseconds(100));
245 ASSERT_TRUE(start_result.is_ok());
246
247 // Poll for samples instead of sleeping for fixed duration
248 auto deadline = std::chrono::steady_clock::now() + std::chrono::seconds(5);
249 std::vector<system_metrics> history;
250 while (std::chrono::steady_clock::now() < deadline) {
251 history = sys_monitor.get_history(std::chrono::seconds(1));
252 if (history.size() >= 2) {
253 break;
254 }
255 std::this_thread::yield();
256 }
257
258 EXPECT_GE(history.size(), 2); // Should have at least 2 samples (relaxed for CI)
259
260 // Check that timestamps are increasing
261 for (size_t i = 1; i < history.size(); ++i) {
262 EXPECT_GT(history[i].timestamp, history[i - 1].timestamp);
263 }
264
265 auto stop_result = sys_monitor.stop_monitoring();
266 ASSERT_TRUE(stop_result.is_ok());
267}
268
269TEST_F(PerformanceMonitoringTest, PerformanceMonitorCollect) {
270 // Record some performance samples
271 monitor.get_profiler().record_sample("collect_test", std::chrono::nanoseconds(5000000), true);
272
273 auto init_result = monitor.initialize();
274 ASSERT_TRUE(init_result.is_ok());
275
276 auto snapshot_result = monitor.collect();
277 ASSERT_TRUE(snapshot_result.is_ok());
278
279 auto snapshot = snapshot_result.value();
280 EXPECT_EQ(snapshot.source_id, "performance_monitor");
281 EXPECT_GT(snapshot.metrics.size(), 0);
282
283 // Check for expected metrics
284 bool found_perf_metric = false;
285
286 for (const auto& metric : snapshot.metrics) {
287 if (metric.name.find("collect_test") != std::string::npos) {
288 found_perf_metric = true;
289 }
290 }
291
292 EXPECT_TRUE(found_perf_metric);
293 // System metrics may or may not be present depending on platform
294}
295
297 monitor.set_cpu_threshold(0.0); // Set impossibly low threshold
298 monitor.set_memory_threshold(0.0);
299 monitor.set_latency_threshold(std::chrono::milliseconds(0));
300
301 // Record a sample to trigger latency threshold
302 monitor.get_profiler().record_sample("threshold_test", std::chrono::nanoseconds(1000000), true);
303
304 auto init_result = monitor.initialize();
305 ASSERT_TRUE(init_result.is_ok());
306
307 auto threshold_result = monitor.check_thresholds();
308 ASSERT_TRUE(threshold_result.is_ok());
309 EXPECT_TRUE(threshold_result.value()); // Should exceed thresholds
310}
311
312TEST_F(PerformanceMonitoringTest, GlobalPerformanceMonitor) {
313 auto& global = global_performance_monitor();
314
315 {
316 PERF_TIMER("global_test_operation");
317 simulate_work(std::chrono::milliseconds(10));
318 }
319
320 auto metrics_result = global.get_profiler().get_metrics("global_test_operation");
321 ASSERT_TRUE(metrics_result.is_ok());
322
323 auto metrics = metrics_result.value();
324 EXPECT_EQ(metrics.call_count, 1);
325 EXPECT_GE(metrics.mean_duration.count(), 10000000);
326}
327
328TEST_F(PerformanceMonitoringTest, PerformanceBenchmark) {
329 performance_benchmark benchmark("test_benchmark");
330 benchmark.set_iterations(100);
331 benchmark.set_warmup_iterations(10);
332
333 auto result = benchmark.run("simple_operation", []() {
334 // Simulate some simple work
335 volatile int sum = 0;
336 for (int i = 0; i < 1000; ++i) {
337 sum += i;
338 }
339 });
340
341 ASSERT_TRUE(result.is_ok());
342
343 auto metrics = result.value();
344 EXPECT_EQ(metrics.call_count, 100);
345 EXPECT_GT(metrics.mean_duration.count(), 0);
346 EXPECT_GE(metrics.max_duration.count(), metrics.min_duration.count());
347}
348
349TEST_F(PerformanceMonitoringTest, BenchmarkComparison) {
350 performance_benchmark benchmark("comparison_benchmark");
351 benchmark.set_iterations(50);
352 benchmark.set_warmup_iterations(5);
353
354 auto result = benchmark.compare(
355 "fast_operation",
356 []() {
357 volatile int sum = 0;
358 for (int i = 0; i < 100; ++i) {
359 sum += i;
360 }
361 },
362 "slow_operation",
363 []() {
364 volatile int sum = 0;
365 for (int i = 0; i < 10000; ++i) {
366 sum += i;
367 }
368 });
369
370 ASSERT_TRUE(result.is_ok());
371
372 auto [fast_metrics, slow_metrics] = result.value();
373
374 EXPECT_EQ(fast_metrics.call_count, 50);
375 EXPECT_EQ(slow_metrics.call_count, 50);
376
377 // Fast operation should be faster than slow operation
378 EXPECT_LT(fast_metrics.mean_duration, slow_metrics.mean_duration);
379}
380
382 profiler.set_max_samples(10);
383
384 // Record 20 samples
385 for (int i = 0; i < 20; ++i) {
386 profiler.record_sample("limit_test", std::chrono::nanoseconds(i * 1000000), true);
387 }
388
389 auto metrics_result = profiler.get_metrics("limit_test");
390 ASSERT_TRUE(metrics_result.is_ok());
391
392 auto metrics = metrics_result.value();
393 // Call count should still be 20
394 EXPECT_EQ(metrics.call_count, 20);
395
396 // But only last 10 samples should be in statistics
397 // The minimum should be from sample 10 (10ms), not sample 0 (0ms)
398 EXPECT_GE(metrics.min_duration.count(), 10000000);
399}
400
401TEST_F(PerformanceMonitoringTest, ConcurrentRecording) {
402 const int num_threads = 10;
403 const int samples_per_thread = 100;
404
405 std::vector<std::thread> threads;
406
407 for (int t = 0; t < num_threads; ++t) {
408 threads.emplace_back([this, t, samples_per_thread]() {
409 for (int i = 0; i < samples_per_thread; ++i) {
410 profiler.record_sample("concurrent_test",
411 std::chrono::nanoseconds((t + 1) * 1000000), true);
412 }
413 });
414 }
415
416 for (auto& thread : threads) {
417 thread.join();
418 }
419
420 auto metrics_result = profiler.get_metrics("concurrent_test");
421 ASSERT_TRUE(metrics_result.is_ok());
422
423 auto metrics = metrics_result.value();
424 EXPECT_EQ(metrics.call_count, num_threads * samples_per_thread);
425}
426
427// =========================================================================
428// Tagged Metric Tests
429// =========================================================================
430
431TEST_F(PerformanceMonitoringTest, RecordCounterWithoutTags) {
432 auto result = monitor.record_counter("requests_total", 1.0);
433 ASSERT_TRUE(result.is_ok());
434
435 result = monitor.record_counter("requests_total", 2.0);
436 ASSERT_TRUE(result.is_ok());
437
438 auto tagged_metrics = monitor.get_all_tagged_metrics();
439 ASSERT_EQ(tagged_metrics.size(), 1);
440
441 EXPECT_EQ(tagged_metrics[0].name, "requests_total");
442 EXPECT_EQ(tagged_metrics[0].value, 3.0); // 1.0 + 2.0
443 EXPECT_EQ(tagged_metrics[0].type, recorded_metric_type::counter);
444 EXPECT_TRUE(tagged_metrics[0].tags.empty());
445}
446
447TEST_F(PerformanceMonitoringTest, RecordCounterWithTags) {
448 tag_map tags1 = {{"method", "GET"}, {"endpoint", "/api/users"}};
449 tag_map tags2 = {{"method", "POST"}, {"endpoint", "/api/users"}};
450
451 auto result = monitor.record_counter("http_requests", 1.0, tags1);
452 ASSERT_TRUE(result.is_ok());
453
454 result = monitor.record_counter("http_requests", 1.0, tags2);
455 ASSERT_TRUE(result.is_ok());
456
457 result = monitor.record_counter("http_requests", 1.0, tags1);
458 ASSERT_TRUE(result.is_ok());
459
460 auto tagged_metrics = monitor.get_all_tagged_metrics();
461 ASSERT_EQ(tagged_metrics.size(), 2); // Two different tag combinations
462
463 // Find GET metric
464 auto get_it = std::find_if(tagged_metrics.begin(), tagged_metrics.end(),
465 [](const tagged_metric& m) {
466 return m.tags.count("method") && m.tags.at("method") == "GET";
467 });
468 ASSERT_NE(get_it, tagged_metrics.end());
469 EXPECT_EQ(get_it->value, 2.0); // Two GET requests
470
471 // Find POST metric
472 auto post_it = std::find_if(tagged_metrics.begin(), tagged_metrics.end(),
473 [](const tagged_metric& m) {
474 return m.tags.count("method") && m.tags.at("method") == "POST";
475 });
476 ASSERT_NE(post_it, tagged_metrics.end());
477 EXPECT_EQ(post_it->value, 1.0); // One POST request
478}
479
480TEST_F(PerformanceMonitoringTest, RecordGaugeWithTags) {
481 tag_map tags = {{"pool", "database"}, {"host", "db-primary"}};
482
483 auto result = monitor.record_gauge("active_connections", 10.0, tags);
484 ASSERT_TRUE(result.is_ok());
485
486 result = monitor.record_gauge("active_connections", 15.0, tags);
487 ASSERT_TRUE(result.is_ok());
488
489 auto tagged_metrics = monitor.get_all_tagged_metrics();
490 ASSERT_EQ(tagged_metrics.size(), 1);
491
492 EXPECT_EQ(tagged_metrics[0].name, "active_connections");
493 EXPECT_EQ(tagged_metrics[0].value, 15.0); // Gauge replaces value
494 EXPECT_EQ(tagged_metrics[0].type, recorded_metric_type::gauge);
495 EXPECT_EQ(tagged_metrics[0].tags.size(), 2);
496 EXPECT_EQ(tagged_metrics[0].tags.at("pool"), "database");
497}
498
499TEST_F(PerformanceMonitoringTest, RecordHistogramWithTags) {
500 tag_map tags = {{"service", "auth"}, {"operation", "login"}};
501
502 for (double i = 1.0; i <= 5.0; i += 1.0) {
503 auto result = monitor.record_histogram("request_duration_ms", i * 100.0, tags);
504 ASSERT_TRUE(result.is_ok());
505 }
506
507 auto tagged_metrics = monitor.get_all_tagged_metrics();
508 ASSERT_EQ(tagged_metrics.size(), 1);
509
510 EXPECT_EQ(tagged_metrics[0].name, "request_duration_ms");
511 EXPECT_EQ(tagged_metrics[0].value, 500.0); // Last value
512 EXPECT_EQ(tagged_metrics[0].type, recorded_metric_type::histogram);
513}
514
515TEST_F(PerformanceMonitoringTest, TaggedMetricsInCollect) {
516 tag_map tags = {{"method", "GET"}, {"status", "200"}};
517 monitor.record_counter("http_requests", 5.0, tags);
518
519 auto init_result = monitor.initialize();
520 ASSERT_TRUE(init_result.is_ok());
521
522 auto snapshot_result = monitor.collect();
523 ASSERT_TRUE(snapshot_result.is_ok());
524
525 auto snapshot = snapshot_result.value();
526
527 // Find our tagged metric in the snapshot
528 bool found = false;
529 for (const auto& metric : snapshot.metrics) {
530 if (metric.name == "http_requests" && !metric.tags.empty()) {
531 found = true;
532 EXPECT_EQ(metric.value, 5.0);
533 EXPECT_EQ(metric.tags.size(), 2);
534 EXPECT_EQ(metric.tags.at("method"), "GET");
535 EXPECT_EQ(metric.tags.at("status"), "200");
536 break;
537 }
538 }
539 EXPECT_TRUE(found);
540}
541
543 monitor.record_counter("counter1", 1.0);
544 monitor.record_gauge("gauge1", 10.0);
545
546 EXPECT_EQ(monitor.get_all_tagged_metrics().size(), 2);
547
548 monitor.clear_all_metrics();
549
550 EXPECT_EQ(monitor.get_all_tagged_metrics().size(), 0);
551}
552
553TEST_F(PerformanceMonitoringTest, ResetClearsTaggedMetrics) {
554 monitor.record_counter("test_counter", 1.0);
555
556 EXPECT_EQ(monitor.get_all_tagged_metrics().size(), 1);
557
558 monitor.reset();
559
560 EXPECT_EQ(monitor.get_all_tagged_metrics().size(), 0);
561}
562
563TEST_F(PerformanceMonitoringTest, EmptyMetricNameRejected) {
564 auto result = monitor.record_counter("", 1.0);
565 EXPECT_FALSE(result.is_ok());
566}
567
569 // Tags with same keys in different order should produce same metric
570 tag_map tags1 = {{"a", "1"}, {"b", "2"}};
571 tag_map tags2 = {{"b", "2"}, {"a", "1"}};
572
573 monitor.record_counter("test_metric", 1.0, tags1);
574 monitor.record_counter("test_metric", 1.0, tags2);
575
576 auto tagged_metrics = monitor.get_all_tagged_metrics();
577 ASSERT_EQ(tagged_metrics.size(), 1); // Should be same metric
578 EXPECT_EQ(tagged_metrics[0].value, 2.0); // Both increments combined
579}
void simulate_work(std::chrono::milliseconds duration)
void set_warmup_iterations(std::uint32_t warmup)
Set number of warmup iterations.
common::Result< performance_metrics > run(const std::string &operation_name, Func &&func)
Run a benchmark.
void set_iterations(std::uint32_t iterations)
Set number of benchmark iterations.
common::Result< std::pair< performance_metrics, performance_metrics > > compare(const std::string &operation1_name, Func1 &&func1, const std::string &operation2_name, Func2 &&func2)
Compare two operations.
Performance monitor combining profiling and system monitoring.
common::VoidResult cleanup() override
Cleanup collector resources.
common::VoidResult set_enabled(bool enable) override
Enable or disable the collector.
Performance profiler for code sections.
void clear_all_samples()
Clear all samples.
std::vector< system_metrics > get_history(std::chrono::seconds duration=std::chrono::seconds(60)) const
Get historical metrics.
common::Result< bool > start_monitoring(std::chrono::milliseconds interval=std::chrono::milliseconds(1000))
Start monitoring system resources.
common::Result< bool > stop_monitoring()
Stop monitoring.
common::Result< system_metrics > get_current_metrics() const
Get current system metrics.
@ timer
StatsD-specific timer metric.
performance_monitor & global_performance_monitor()
Global performance monitor instance.
std::unordered_map< std::string, std::string > tag_map
Type alias for metric tags/labels.
Performance monitoring and profiling implementation.
#define PERF_TIMER(operation_name)
Helper macro for timing code sections.
Basic metric structure for interface compatibility.
std::variant< double, int64_t, std::string > value
std::unordered_map< std::string, std::string > tags
Represents a metric value with associated tags.
TEST_F(PerformanceMonitoringTest, RecordSingleSample)