Database System 0.1.0
Advanced C++20 Database System with Multi-Backend Support
Loading...
Searching...
No Matches
performance_monitor.cpp
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
6#include <iostream>
7#include <sstream>
8#include <algorithm>
9#include <iomanip>
10#include <functional>
11
13{
14 // performance_alert implementation
15 performance_alert::performance_alert(alert_type type, const std::string& message,
16 std::chrono::steady_clock::time_point timestamp)
17 : type_(type), message_(message), timestamp_(timestamp)
18 {
19 }
20
21 // query_timer implementation
22
23 // New constructor with explicit performance_monitor (Sprint 3, Task 3.2)
24 query_timer::query_timer(const std::string& query, database_types db_type,
25 std::shared_ptr<performance_monitor> monitor)
26 : start_time_(std::chrono::steady_clock::now())
27 , monitor_(std::move(monitor))
28 {
29 metrics_.query_hash = std::to_string(std::hash<std::string>{}(query));
31 metrics_.db_type = db_type;
32 metrics_.success = true; // Assume success unless error is set
33 }
34
35
37 {
38 metrics_.end_time = std::chrono::steady_clock::now();
39 metrics_.execution_time = std::chrono::duration_cast<std::chrono::microseconds>(
41
42 // Use injected monitor if available
43 if (monitor_) {
44 monitor_->record_query_metrics(metrics_);
45 }
46 // Note: If monitor is not injected, metrics are not recorded.
47 // Always use query_timer with a performance_monitor instance.
48 }
49
50 void query_timer::set_error(const std::string& error)
51 {
52 metrics_.success = false;
53 metrics_.error_message = error;
54 }
55
56
58 {
59 // Disable background thread in sanitizer builds to avoid deadlocks
60 // TSan/ASan/UBSan heavily instrument condition_variable operations causing extreme slowdowns
61 // GCC defines __SANITIZE_THREAD__, __SANITIZE_ADDRESS__
62 // Clang defines __has_feature(thread_sanitizer), __has_feature(address_sanitizer)
63
64 // Check for sanitizers at compile time
65 bool is_sanitizer_build = false;
66#if defined(__SANITIZE_THREAD__) || defined(__SANITIZE_ADDRESS__)
67 // GCC sanitizer detected
68 is_sanitizer_build = true;
69#endif
70
71#ifdef __clang__
72 // Clang-specific feature detection
73# if __has_feature(thread_sanitizer) || __has_feature(address_sanitizer) || __has_feature(undefined_behavior_sanitizer)
74 is_sanitizer_build = true;
75# endif
76#endif
77
78 if (is_sanitizer_build) {
79 // Sanitizer build detected - disable background thread
80 cleanup_running_ = false;
81 std::cout << "performance_monitor: background cleanup thread disabled (sanitizer build)" << std::endl;
82 } else {
83 // Normal build - start background cleanup thread
85 }
86 }
87
89 {
90 cleanup_running_ = false;
91 cleanup_cv_.notify_all();
92 if (cleanup_thread_.joinable()) {
93 cleanup_thread_.join();
94 }
95 }
96
97 void performance_monitor::set_alert_thresholds(double error_rate_threshold,
98 std::chrono::microseconds latency_threshold)
99 {
100 error_rate_threshold_ = error_rate_threshold;
101 latency_threshold_ = latency_threshold;
102 }
103
105 {
106 if (!monitoring_enabled_) return;
107
108 bool slow_query = false;
109 {
110 std::lock_guard<std::mutex> lock(metrics_mutex_);
111 query_history_.push_back(metrics);
112
113 // Update query patterns
114 query_patterns_[metrics.query_hash]++;
115
116 auto& avg_time = query_avg_times_[metrics.query_hash];
117 auto count = query_patterns_[metrics.query_hash];
118 avg_time = std::chrono::microseconds(
119 (avg_time.count() * (count - 1) + metrics.execution_time.count()) / count);
120
121 // Check for slow queries (evaluate under lock, act outside)
122 slow_query = (metrics.execution_time > latency_threshold_);
123 }
124
125 // emit_alert and check_thresholds acquire metrics_mutex_ internally,
126 // so they must be called outside the lock scope to avoid deadlock
127 if (slow_query) {
129 "Slow query detected: " + std::to_string(metrics.execution_time.count()) + "μs");
130 }
131
133 }
134
136 {
137 if (!monitoring_enabled_) return;
138
139 bool pool_exhaustion = false;
140 size_t active = 0;
141 size_t total = 0;
142 {
143 std::lock_guard<std::mutex> lock(metrics_mutex_);
144 auto& stored_metrics = connection_metrics_[db_type];
145 stored_metrics.total_connections.store(metrics.total_connections.load());
146 stored_metrics.active_connections.store(metrics.active_connections.load());
147 stored_metrics.idle_connections.store(metrics.idle_connections.load());
148 stored_metrics.failed_connections.store(metrics.failed_connections.load());
149 stored_metrics.avg_acquisition_time.store(metrics.avg_acquisition_time.load());
150 stored_metrics.max_acquisition_time.store(metrics.max_acquisition_time.load());
151 stored_metrics.last_update = metrics.last_update;
152
153 // Check for connection pool exhaustion (evaluate under lock, act outside)
154 total = metrics.total_connections.load();
155 active = metrics.active_connections.load();
156 pool_exhaustion = (total > 0 && (double(active) / total) > 0.9);
157 }
158
159 if (pool_exhaustion) {
161 "Connection pool utilization high: " + std::to_string(active) + "/" + std::to_string(total));
162 }
163 }
164
165 void performance_monitor::record_slow_query(const std::string& query, std::chrono::microseconds execution_time)
166 {
167 if (!monitoring_enabled_) return;
168
170 "Slow query: " + query.substr(0, 100) + "... (" +
171 std::to_string(execution_time.count()) + "μs)");
172 }
173
175 {
176 std::lock_guard<std::mutex> lock(metrics_mutex_);
177
178 performance_summary summary;
179 summary.measurement_start = std::chrono::steady_clock::now() - retention_period_;
180 summary.measurement_end = std::chrono::steady_clock::now();
181
182 if (query_history_.empty()) {
183 return summary;
184 }
185
186 // Filter recent queries
187 auto recent_start = summary.measurement_start;
188 auto recent_queries = std::count_if(query_history_.begin(), query_history_.end(),
189 [recent_start](const query_metrics& m) {
190 return m.start_time >= recent_start;
191 });
192
193 summary.total_queries = recent_queries;
194
195 // Calculate metrics from recent queries
196 std::chrono::microseconds total_time{0};
197 std::chrono::microseconds min_time{std::chrono::microseconds::max()};
198 std::chrono::microseconds max_time{0};
199 size_t successful = 0;
200
201 for (const auto& metrics : query_history_) {
202 if (metrics.start_time < recent_start) continue;
203
204 total_time += metrics.execution_time;
205 min_time = std::min(min_time, metrics.execution_time);
206 max_time = std::max(max_time, metrics.execution_time);
207
208 if (metrics.success) {
209 successful++;
210 } else {
211 summary.error_counts[metrics.error_message]++;
212 }
213 }
214
215 summary.successful_queries = successful;
216 summary.failed_queries = summary.total_queries - successful;
217
218 if (summary.total_queries > 0) {
219 summary.avg_query_time = total_time / summary.total_queries;
220 summary.min_query_time = min_time;
221 summary.max_query_time = max_time;
222 summary.error_rate = double(summary.failed_queries) / summary.total_queries;
223 }
224
225 // Calculate QPS
226 auto duration_seconds = std::chrono::duration_cast<std::chrono::seconds>(
227 summary.measurement_end - summary.measurement_start).count();
228 if (duration_seconds > 0) {
229 summary.queries_per_second = double(summary.total_queries) / duration_seconds;
230 }
231
232 // Connection metrics summary
233 size_t total_connections = 0;
234 size_t active_connections = 0;
235 for (const auto& [db_type, conn_metrics] : connection_metrics_) {
236 total_connections += conn_metrics.total_connections.load();
237 active_connections += conn_metrics.active_connections.load();
238 }
239
240 summary.total_connections = total_connections;
241 summary.active_connections = active_connections;
242 if (total_connections > 0) {
243 summary.connection_utilization = double(active_connections) / total_connections;
244 }
245
246 return summary;
247 }
248
250 {
251 std::lock_guard<std::mutex> lock(metrics_mutex_);
252
253 performance_summary summary;
254 summary.measurement_start = std::chrono::steady_clock::now() - retention_period_;
255 summary.measurement_end = std::chrono::steady_clock::now();
256
257 // Filter by database type and time
258 auto recent_start = summary.measurement_start;
259 auto db_queries = std::count_if(query_history_.begin(), query_history_.end(),
260 [recent_start, db_type](const query_metrics& m) {
261 return m.start_time >= recent_start && m.db_type == db_type;
262 });
263
264 summary.total_queries = db_queries;
265
266 // Calculate metrics for specific database type
267 std::chrono::microseconds total_time{0};
268 size_t successful = 0;
269
270 for (const auto& metrics : query_history_) {
271 if (metrics.start_time < recent_start || metrics.db_type != db_type) continue;
272
273 total_time += metrics.execution_time;
274 if (metrics.success) {
275 successful++;
276 }
277 }
278
279 summary.successful_queries = successful;
280 summary.failed_queries = summary.total_queries - successful;
281
282 if (summary.total_queries > 0) {
283 summary.avg_query_time = total_time / summary.total_queries;
284 summary.error_rate = double(summary.failed_queries) / summary.total_queries;
285 }
286
287 // Connection metrics for specific database
288 auto it = connection_metrics_.find(db_type);
289 if (it != connection_metrics_.end()) {
290 summary.total_connections = it->second.total_connections.load();
291 summary.active_connections = it->second.active_connections.load();
292 if (summary.total_connections > 0) {
293 summary.connection_utilization = double(summary.active_connections) / summary.total_connections;
294 }
295 }
296
297 return summary;
298 }
299
300 std::vector<query_metrics> performance_monitor::get_recent_queries(std::chrono::minutes window) const
301 {
302 std::lock_guard<std::mutex> lock(metrics_mutex_);
303
304 auto cutoff = std::chrono::steady_clock::now() - window;
305 std::vector<query_metrics> recent;
306
307 std::copy_if(query_history_.begin(), query_history_.end(), std::back_inserter(recent),
308 [cutoff](const query_metrics& m) {
309 return m.start_time >= cutoff;
310 });
311
312 return recent;
313 }
314
315 std::vector<query_metrics> performance_monitor::get_slow_queries(std::chrono::microseconds threshold) const
316 {
317 std::lock_guard<std::mutex> lock(metrics_mutex_);
318
319 std::vector<query_metrics> slow_queries;
320 std::copy_if(query_history_.begin(), query_history_.end(), std::back_inserter(slow_queries),
321 [threshold](const query_metrics& m) {
322 return m.execution_time >= threshold;
323 });
324
325 return slow_queries;
326 }
327
328 void performance_monitor::update_connection_count(database_types db_type, size_t active, size_t total)
329 {
330 if (!monitoring_enabled_) return;
331
332 std::lock_guard<std::mutex> lock(metrics_mutex_);
333 auto& metrics = connection_metrics_[db_type];
334 metrics.active_connections = active;
335 metrics.total_connections = total;
336 metrics.last_update = std::chrono::steady_clock::now();
337 }
338
340 {
341 std::lock_guard<std::mutex> lock(metrics_mutex_);
342 auto it = connection_metrics_.find(db_type);
343 if (it != connection_metrics_.end()) {
344 connection_metrics result;
345 result.total_connections.store(it->second.total_connections.load());
346 result.active_connections.store(it->second.active_connections.load());
347 result.idle_connections.store(it->second.idle_connections.load());
348 result.failed_connections.store(it->second.failed_connections.load());
349 result.avg_acquisition_time.store(it->second.avg_acquisition_time.load());
350 result.max_acquisition_time.store(it->second.max_acquisition_time.load());
351 result.last_update = it->second.last_update;
352 return result;
353 }
354 return connection_metrics{};
355 }
356
357 void performance_monitor::register_alert_handler(std::function<void(const performance_alert&)> handler)
358 {
359 std::lock_guard<std::mutex> lock(handlers_mutex_);
360 alert_handlers_.push_back(handler);
361 }
362
363 std::vector<performance_alert> performance_monitor::get_recent_alerts(std::chrono::minutes window) const
364 {
365 std::lock_guard<std::mutex> lock(metrics_mutex_);
366
367 auto cutoff = std::chrono::steady_clock::now() - window;
368 std::vector<performance_alert> recent;
369
370 std::copy_if(alerts_.begin(), alerts_.end(), std::back_inserter(recent),
371 [cutoff](const performance_alert& alert) {
372 return alert.timestamp() >= cutoff;
373 });
374
375 return recent;
376 }
377
379 {
380 std::lock_guard<std::mutex> lock(metrics_mutex_);
381 query_history_.clear();
382 connection_metrics_.clear();
383 alerts_.clear();
384 query_patterns_.clear();
385 query_avg_times_.clear();
386 }
387
389 {
390 std::lock_guard<std::mutex> lock(metrics_mutex_);
391
392 auto cutoff = std::chrono::steady_clock::now() - retention_period_;
393
394 // Remove old query metrics
395 query_history_.erase(
396 std::remove_if(query_history_.begin(), query_history_.end(),
397 [cutoff](const query_metrics& m) {
398 return m.start_time < cutoff;
399 }),
400 query_history_.end());
401
402 // Remove old alerts
403 alerts_.erase(
404 std::remove_if(alerts_.begin(), alerts_.end(),
405 [cutoff](const performance_alert& alert) {
406 return alert.timestamp() < cutoff;
407 }),
408 alerts_.end());
409 }
410
412 {
413 auto summary = get_performance_summary();
414
415 std::ostringstream json;
416 json << "{\n";
417 json << " \"total_queries\": " << summary.total_queries << ",\n";
418 json << " \"successful_queries\": " << summary.successful_queries << ",\n";
419 json << " \"failed_queries\": " << summary.failed_queries << ",\n";
420 json << " \"avg_query_time_us\": " << summary.avg_query_time.count() << ",\n";
421 json << " \"queries_per_second\": " << summary.queries_per_second << ",\n";
422 json << " \"error_rate\": " << summary.error_rate << ",\n";
423 json << " \"total_connections\": " << summary.total_connections << ",\n";
424 json << " \"active_connections\": " << summary.active_connections << ",\n";
425 json << " \"connection_utilization\": " << summary.connection_utilization << "\n";
426 json << "}";
427
428 return json.str();
429 }
430
432 {
433 // Cleanup every 5 minutes, but check shutdown flag every second
434 const auto cleanup_interval = std::chrono::minutes(5);
435 auto last_cleanup = std::chrono::steady_clock::now();
436
437 while (cleanup_running_) {
438 std::unique_lock<std::mutex> lock(cleanup_mutex_);
439 // Wait for 1 second or until notified to stop
440 cleanup_cv_.wait_for(lock, std::chrono::seconds(1), [this] { return !cleanup_running_.load(); });
441
442 // Perform cleanup every 5 minutes
443 auto now = std::chrono::steady_clock::now();
444 if (cleanup_running_ && (now - last_cleanup) >= cleanup_interval) {
447 last_cleanup = now;
448 }
449 }
450 }
451
453 {
454 auto summary = get_performance_summary();
455
456 // Check error rate threshold
457 if (summary.error_rate > error_rate_threshold_) {
459 "High error rate: " + std::to_string(summary.error_rate * 100) + "%");
460 }
461
462 // Check latency threshold
463 if (summary.avg_query_time > latency_threshold_) {
465 "High average latency: " + std::to_string(summary.avg_query_time.count()) + "μs");
466 }
467 }
468
470 {
471 performance_alert alert(type, message, std::chrono::steady_clock::now());
472
473 {
474 std::lock_guard<std::mutex> lock(metrics_mutex_);
475 alerts_.push_back(alert);
476 }
477
478 // Notify alert handlers
479 std::lock_guard<std::mutex> lock(handlers_mutex_);
480 for (const auto& handler : alert_handlers_) {
481 try {
482 handler(alert);
483 } catch (const std::exception& e) {
484 std::cerr << "Alert handler exception: " << e.what() << std::endl;
485 }
486 }
487 }
488
489 std::string performance_monitor::calculate_query_hash(const std::string& query) const
490 {
491 return std::to_string(std::hash<std::string>{}(query));
492 }
493
494 // prometheus_exporter implementation
495 prometheus_exporter::prometheus_exporter(const std::string& endpoint, int port)
496 : endpoint_(endpoint), port_(port)
497 {
498 }
499
501 {
502 // In a real implementation, this would send HTTP POST to Prometheus push gateway
503 std::string metrics = format_prometheus_metrics(summary);
504 std::cout << "Prometheus metrics:\n" << metrics << std::endl;
505 return true;
506 }
507
508 bool prometheus_exporter::export_alerts(const std::vector<performance_alert>& alerts)
509 {
510 // Export alerts as metrics
511 for (const auto& alert : alerts) {
512 std::cout << "database_alert{type=\"" << static_cast<int>(alert.type())
513 << "\"} 1 " << std::chrono::duration_cast<std::chrono::milliseconds>(
514 alert.timestamp().time_since_epoch()).count() << std::endl;
515 }
516 return true;
517 }
518
520 {
521 std::ostringstream metrics;
522
523 metrics << "# HELP database_queries_total Total number of database queries\n";
524 metrics << "# TYPE database_queries_total counter\n";
525 metrics << "database_queries_total " << summary.total_queries << "\n";
526
527 metrics << "# HELP database_query_duration_microseconds Average query duration in microseconds\n";
528 metrics << "# TYPE database_query_duration_microseconds gauge\n";
529 metrics << "database_query_duration_microseconds " << summary.avg_query_time.count() << "\n";
530
531 metrics << "# HELP database_error_rate Query error rate\n";
532 metrics << "# TYPE database_error_rate gauge\n";
533 metrics << "database_error_rate " << summary.error_rate << "\n";
534
535 metrics << "# HELP database_connections_active Active database connections\n";
536 metrics << "# TYPE database_connections_active gauge\n";
537 metrics << "database_connections_active " << summary.active_connections << "\n";
538
539 return metrics.str();
540 }
541
542} // namespace database::monitoring
Alert system for performance thresholds.
performance_alert(alert_type type, const std::string &message, std::chrono::steady_clock::time_point timestamp)
std::vector< std::function< void(const performance_alert &)> > alert_handlers_
void record_slow_query(const std::string &query, std::chrono::microseconds execution_time)
void record_connection_metrics(database_types db_type, const connection_metrics &metrics)
void set_alert_thresholds(double error_rate_threshold, std::chrono::microseconds latency_threshold)
std::string calculate_query_hash(const std::string &query) const
std::unordered_map< std::string, size_t > query_patterns_
std::vector< performance_alert > get_recent_alerts(std::chrono::minutes window) const
std::vector< query_metrics > get_slow_queries(std::chrono::microseconds threshold) const
void register_alert_handler(std::function< void(const performance_alert &)> handler)
connection_metrics get_connection_metrics(database_types db_type) const
performance_summary get_performance_summary() const
performance_monitor()
Constructor - now public for dependency injection.
std::unordered_map< database_types, connection_metrics > connection_metrics_
std::vector< performance_alert > alerts_
std::vector< query_metrics > get_recent_queries(std::chrono::minutes window) const
void emit_alert(performance_alert::alert_type type, const std::string &message)
void update_connection_count(database_types db_type, size_t active, size_t total)
std::unordered_map< std::string, std::chrono::microseconds > query_avg_times_
void record_query_metrics(const query_metrics &metrics)
std::string format_prometheus_metrics(const performance_summary &summary) const
bool export_alerts(const std::vector< performance_alert > &alerts) override
bool export_metrics(const performance_summary &summary) override
prometheus_exporter(const std::string &endpoint, int port)
void set_error(const std::string &error)
std::shared_ptr< performance_monitor > monitor_
query_timer(const std::string &query, database_types db_type, std::shared_ptr< performance_monitor > monitor)
Constructor with explicit performance_monitor (recommended)
std::chrono::steady_clock::time_point start_time_
database_types
Represents various database backends or modes.
Metrics for database connection usage.
std::atomic< std::chrono::microseconds > max_acquisition_time
std::atomic< std::chrono::microseconds > avg_acquisition_time
std::chrono::steady_clock::time_point last_update
std::chrono::steady_clock::time_point measurement_end
std::unordered_map< std::string, size_t > error_counts
std::chrono::steady_clock::time_point measurement_start
Metrics for individual query execution.
std::chrono::steady_clock::time_point start_time
std::chrono::steady_clock::time_point end_time
std::chrono::microseconds execution_time