Database System 0.1.0
Advanced C++20 Database System with Multi-Backend Support
Loading...
Searching...
No Matches
system_monitoring_backend.cpp
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
6
7#include <kcenon/monitoring/core/performance_monitor.h>
8#include <kcenon/monitoring/exporters/metric_exporters.h>
9
10#include <algorithm>
11#include <numeric>
12#include <sstream>
13
14namespace
15{
16 inline common::VoidResult make_error(const std::string& msg)
17 {
18 return common::VoidResult(common::error_info{ -1, msg, "" });
19 }
20
21 template<typename T>
22 common::Result<T> make_error_result(const std::string& msg)
23 {
24 return common::Result<T>(common::error_info{ -1, msg, "" });
25 }
26}
27
28namespace database
29{
30namespace integrated
31{
32namespace adapters
33{
34namespace backends
35{
36
38 : config_(config)
39 , initialized_(false)
40 , current_metrics_{}
41 , max_latency_samples_(10000)
42 , active_connections_(0)
43 , idle_connections_(0)
44 , total_connections_(0)
45 , active_transactions_(0)
46{
48}
49
57
59{
60 if (initialized_)
61 {
62 return common::ok();
63 }
64
65 try
66 {
67 // Create performance monitor
68 monitor_ = std::make_unique<kcenon::monitoring::performance_monitor>("database_system");
69
70 // Initialize the monitor
71 auto init_result = monitor_->initialize();
72 if (!init_result.is_ok())
73 {
74 return make_error("Failed to initialize monitoring_system: " +
75 std::string(init_result.error().message));
76 }
77
78 start_time_ = std::chrono::steady_clock::now();
79 initialized_ = true;
80 return common::ok();
81 }
82 catch (const std::exception& e)
83 {
84 return make_error("Exception during initialization: " + std::string(e.what()));
85 }
86}
87
89{
90 if (!initialized_)
91 {
92 return common::ok();
93 }
94
95 try
96 {
97 std::lock_guard<std::mutex> lock(mutex_);
98
99 if (monitor_)
100 {
101 auto cleanup_result = monitor_->cleanup();
102 if (!cleanup_result.is_ok())
103 {
104 // Log error but continue shutdown
105 }
106 monitor_.reset();
107 }
108
111 initialized_ = false;
112
113 return common::ok();
114 }
115 catch (const std::exception& e)
116 {
117 return make_error("Exception during shutdown: " + std::string(e.what()));
118 }
119}
120
125
127{
128 if (!initialized_)
129 {
130 return make_error("Monitoring backend not initialized");
131 }
132
133 std::lock_guard<std::mutex> lock(mutex_);
134
135 try
136 {
137 // Record as a duration sample for performance profiler
138 auto duration_ns = static_cast<std::int64_t>(value * 1000.0); // Assume value is in microseconds
139 monitor_->get_profiler().record_sample(
140 name,
141 std::chrono::nanoseconds(duration_ns),
142 true);
143 return common::ok();
144 }
145 catch (const std::exception& e)
146 {
147 return make_error("Failed to record metric: " + std::string(e.what()));
148 }
149}
150
152 const std::string& name, double value,
153 const std::unordered_map<std::string, std::string>& /*tags*/)
154{
155 // monitoring_system performance_profiler doesn't support tags directly
156 // Record without tags for now
157 return record_metric(name, value);
158}
159
161{
162 if (!initialized_)
163 {
164 return make_error_result<metrics_snapshot>("Monitoring backend not initialized");
165 }
166
167 std::lock_guard<std::mutex> lock(mutex_);
168
169 try
170 {
171 metrics_snapshot snapshot;
172 snapshot.source_id = "database_system";
173
174 // Connection metrics
175 snapshot.gauges["db.connections.active"] = static_cast<double>(active_connections_);
176 snapshot.gauges["db.connections.idle"] = static_cast<double>(idle_connections_);
177 snapshot.gauges["db.connections.total"] = static_cast<double>(total_connections_);
178 snapshot.gauges["db.connections.usage_percent"] = current_metrics_.connection_usage_percent;
179
180 // Query metrics
181 snapshot.counters["db.queries.total"] = current_metrics_.total_queries;
182 snapshot.counters["db.queries.successful"] = current_metrics_.successful_queries;
183 snapshot.counters["db.queries.failed"] = current_metrics_.failed_queries;
184
185 snapshot.gauges["db.query.avg_latency_us"] = static_cast<double>(current_metrics_.avg_query_latency.count());
186 snapshot.gauges["db.query.min_latency_us"] = static_cast<double>(current_metrics_.min_query_latency.count());
187 snapshot.gauges["db.query.max_latency_us"] = static_cast<double>(current_metrics_.max_query_latency.count());
188 snapshot.gauges["db.query.p95_latency_us"] = static_cast<double>(current_metrics_.p95_query_latency.count());
189 snapshot.gauges["db.query.p99_latency_us"] = static_cast<double>(current_metrics_.p99_query_latency.count());
190 snapshot.gauges["db.query.success_rate"] = current_metrics_.query_success_rate;
191
192 // Transaction metrics
193 snapshot.gauges["db.transactions.active"] = static_cast<double>(active_transactions_);
194 snapshot.counters["db.transactions.committed"] = current_metrics_.committed_transactions;
195 snapshot.counters["db.transactions.rolled_back"] = current_metrics_.rolled_back_transactions;
196 snapshot.gauges["db.transaction.commit_rate"] = current_metrics_.transaction_commit_rate;
197
198 // Throughput metrics
199 snapshot.gauges["db.queries_per_second"] = current_metrics_.queries_per_second;
200 snapshot.gauges["db.transactions_per_second"] = current_metrics_.transactions_per_second;
201
202 return snapshot;
203 }
204 catch (const std::exception& e)
205 {
206 return make_error_result<metrics_snapshot>("Failed to get metrics: " + std::string(e.what()));
207 }
208}
209
211{
212 if (!initialized_)
213 {
214 return make_error_result<health_check_result>("Monitoring backend not initialized");
215 }
216
217 std::lock_guard<std::mutex> lock(mutex_);
218
219 health_check_result result;
221 result.message = "Database system healthy";
222
223 // Connection pool health check
225 {
227 result.message = "Connection pool usage critical";
228 result.metadata["connection_usage"] =
229 std::to_string(current_metrics_.connection_usage_percent) + "%";
230 }
231
232 // Query latency health check
234 {
236 result.message = "Query latency critical";
237 result.metadata["avg_latency_us"] = std::to_string(current_metrics_.avg_query_latency.count());
238 }
239
240 // Query success rate health check
242 {
244 result.message = "Query success rate low";
245 result.metadata["success_rate"] = std::to_string(current_metrics_.query_success_rate);
246 }
247
248 return result;
249}
250
252{
253 if (!initialized_)
254 {
255 return make_error("Monitoring backend not initialized");
256 }
257
258 std::lock_guard<std::mutex> lock(mutex_);
259
260 try
261 {
268
269 if (monitor_)
270 {
271 monitor_->reset();
272 }
273
274 return common::ok();
275 }
276 catch (const std::exception& e)
277 {
278 return make_error("Failed to reset metrics: " + std::string(e.what()));
279 }
280}
281
282void system_monitoring_backend::record_query_execution(std::chrono::microseconds duration, bool success)
283{
284 std::lock_guard<std::mutex> lock(mutex_);
285
286 // Update counters
288 if (success)
289 {
291 }
292 else
293 {
295 }
296
297 // Calculate success rate
299 {
302 }
303
304 // Record in monitoring_system
305 if (monitor_)
306 {
307 auto duration_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(duration);
308 monitor_->get_profiler().record_sample("query_execution", duration_ns, success);
309 }
310
311 // Store latency sample
312 recent_query_latencies_.push_back(duration);
314 {
316 }
317
318 // Calculate derived metrics
320}
321
323{
324 std::lock_guard<std::mutex> lock(mutex_);
325 // Connection tracking is handled by update_pool_stats
326}
327
329{
330 std::lock_guard<std::mutex> lock(mutex_);
331 // Connection tracking is handled by update_pool_stats
332}
333
334void system_monitoring_backend::update_pool_stats(std::size_t active, std::size_t idle, std::size_t total)
335{
336 std::lock_guard<std::mutex> lock(mutex_);
337
338 active_connections_ = active;
339 idle_connections_ = idle;
340 total_connections_ = total;
341
345
346 // Calculate usage percentage
347 if (total > 0)
348 {
349 current_metrics_.connection_usage_percent = (static_cast<double>(active) / total) * 100.0;
350 }
351 else
352 {
354 }
355}
356
363
365{
366 std::lock_guard<std::mutex> lock(mutex_);
367
368 if (active_transactions_ > 0)
369 {
371 }
372
375
376 // Calculate commit rate
378 if (total_txns > 0)
379 {
381 static_cast<double>(current_metrics_.committed_transactions) / total_txns;
382 }
383}
384
386{
387 std::lock_guard<std::mutex> lock(mutex_);
388
389 if (active_transactions_ > 0)
390 {
392 }
393
396
397 // Calculate commit rate
399 if (total_txns > 0)
400 {
402 static_cast<double>(current_metrics_.committed_transactions) / total_txns;
403 }
404}
405
407{
408 if (!initialized_)
409 {
410 return make_error_result<database_metrics>("Monitoring backend not initialized");
411 }
412
413 std::lock_guard<std::mutex> lock(mutex_);
414
415 // Update timestamp
416 current_metrics_.timestamp = std::chrono::system_clock::now();
417
418 // Calculate throughput
419 auto elapsed = std::chrono::steady_clock::now() - start_time_;
420 auto elapsed_seconds = std::chrono::duration_cast<std::chrono::seconds>(elapsed).count();
421
422 if (elapsed_seconds > 0)
423 {
425 static_cast<double>(current_metrics_.total_queries) / elapsed_seconds;
426 auto total_txns =
428 current_metrics_.transactions_per_second = static_cast<double>(total_txns) / elapsed_seconds;
429 }
430
431 return current_metrics_;
432}
433
435{
436 std::lock_guard<std::mutex> lock(mutex_);
437
438 std::ostringstream ss;
439
440 // Connection metrics
441 ss << "# HELP db_connections_active Number of active database connections\n";
442 ss << "# TYPE db_connections_active gauge\n";
443 ss << "db_connections_active " << current_metrics_.active_connections << "\n";
444
445 ss << "# HELP db_connections_idle Number of idle database connections\n";
446 ss << "# TYPE db_connections_idle gauge\n";
447 ss << "db_connections_idle " << current_metrics_.idle_connections << "\n";
448
449 ss << "# HELP db_connections_total Total number of connections in pool\n";
450 ss << "# TYPE db_connections_total gauge\n";
451 ss << "db_connections_total " << current_metrics_.total_connections << "\n";
452
453 ss << "# HELP db_connection_usage_percent Connection pool usage percentage\n";
454 ss << "# TYPE db_connection_usage_percent gauge\n";
455 ss << "db_connection_usage_percent " << current_metrics_.connection_usage_percent << "\n";
456
457 // Query metrics
458 ss << "# HELP db_queries_total Total number of queries executed\n";
459 ss << "# TYPE db_queries_total counter\n";
460 ss << "db_queries_total " << current_metrics_.total_queries << "\n";
461
462 ss << "# HELP db_queries_successful Number of successful queries\n";
463 ss << "# TYPE db_queries_successful counter\n";
464 ss << "db_queries_successful " << current_metrics_.successful_queries << "\n";
465
466 ss << "# HELP db_queries_failed Number of failed queries\n";
467 ss << "# TYPE db_queries_failed counter\n";
468 ss << "db_queries_failed " << current_metrics_.failed_queries << "\n";
469
470 ss << "# HELP db_query_success_rate Query success rate (0-1)\n";
471 ss << "# TYPE db_query_success_rate gauge\n";
472 ss << "db_query_success_rate " << current_metrics_.query_success_rate << "\n";
473
474 // Latency metrics
475 ss << "# HELP db_query_latency_avg_us Average query latency in microseconds\n";
476 ss << "# TYPE db_query_latency_avg_us gauge\n";
477 ss << "db_query_latency_avg_us " << current_metrics_.avg_query_latency.count() << "\n";
478
479 ss << "# HELP db_query_latency_min_us Minimum query latency in microseconds\n";
480 ss << "# TYPE db_query_latency_min_us gauge\n";
481 ss << "db_query_latency_min_us " << current_metrics_.min_query_latency.count() << "\n";
482
483 ss << "# HELP db_query_latency_max_us Maximum query latency in microseconds\n";
484 ss << "# TYPE db_query_latency_max_us gauge\n";
485 ss << "db_query_latency_max_us " << current_metrics_.max_query_latency.count() << "\n";
486
487 ss << "# HELP db_query_latency_p95_us 95th percentile query latency in microseconds\n";
488 ss << "# TYPE db_query_latency_p95_us gauge\n";
489 ss << "db_query_latency_p95_us " << current_metrics_.p95_query_latency.count() << "\n";
490
491 ss << "# HELP db_query_latency_p99_us 99th percentile query latency in microseconds\n";
492 ss << "# TYPE db_query_latency_p99_us gauge\n";
493 ss << "db_query_latency_p99_us " << current_metrics_.p99_query_latency.count() << "\n";
494
495 // Transaction metrics
496 ss << "# HELP db_transactions_active Number of active transactions\n";
497 ss << "# TYPE db_transactions_active gauge\n";
498 ss << "db_transactions_active " << current_metrics_.active_transactions << "\n";
499
500 ss << "# HELP db_transactions_committed Total number of committed transactions\n";
501 ss << "# TYPE db_transactions_committed counter\n";
502 ss << "db_transactions_committed " << current_metrics_.committed_transactions << "\n";
503
504 ss << "# HELP db_transactions_rolled_back Total number of rolled back transactions\n";
505 ss << "# TYPE db_transactions_rolled_back counter\n";
506 ss << "db_transactions_rolled_back " << current_metrics_.rolled_back_transactions << "\n";
507
508 ss << "# HELP db_transaction_commit_rate Transaction commit rate (0-1)\n";
509 ss << "# TYPE db_transaction_commit_rate gauge\n";
510 ss << "db_transaction_commit_rate " << current_metrics_.transaction_commit_rate << "\n";
511
512 // Throughput metrics
513 ss << "# HELP db_queries_per_second Query throughput (queries/second)\n";
514 ss << "# TYPE db_queries_per_second gauge\n";
515 ss << "db_queries_per_second " << current_metrics_.queries_per_second << "\n";
516
517 ss << "# HELP db_transactions_per_second Transaction throughput (transactions/second)\n";
518 ss << "# TYPE db_transactions_per_second gauge\n";
519 ss << "db_transactions_per_second " << current_metrics_.transactions_per_second << "\n";
520
521 return ss.str();
522}
523
525{
526 if (recent_query_latencies_.empty())
527 {
528 return;
529 }
530
531 // Create sorted copy for percentile calculation
532 auto sorted_latencies = recent_query_latencies_;
533 std::sort(sorted_latencies.begin(), sorted_latencies.end());
534
535 // Calculate min/max
536 current_metrics_.min_query_latency = sorted_latencies.front();
537 current_metrics_.max_query_latency = sorted_latencies.back();
538
539 // Calculate average
540 auto total_latency = std::accumulate(
541 sorted_latencies.begin(),
542 sorted_latencies.end(),
543 std::chrono::microseconds(0));
544 current_metrics_.avg_query_latency = total_latency / sorted_latencies.size();
545
546 // Calculate percentiles
547 auto calc_percentile = [&sorted_latencies](double percentile) -> std::chrono::microseconds {
548 if (sorted_latencies.empty())
549 return std::chrono::microseconds(0);
550
551 std::size_t index =
552 static_cast<std::size_t>((percentile / 100.0) * (sorted_latencies.size() - 1));
553 return sorted_latencies[index];
554 };
555
556 current_metrics_.p95_query_latency = calc_percentile(95.0);
557 current_metrics_.p99_query_latency = calc_percentile(99.0);
558}
559
561 const kcenon::monitoring::metrics_snapshot& snapshot)
562{
563 database_metrics db_metrics;
564
565 // Extract metrics from snapshot
566 for (const auto& metric : snapshot.metrics)
567 {
568 const auto& name = metric.name;
569 const auto value = metric.value;
570
571 if (name == "query_execution_count")
572 {
573 db_metrics.total_queries = static_cast<std::uint64_t>(value);
574 }
575 // Add more metric mappings as needed
576 }
577
578 return db_metrics;
579}
580
581} // namespace backends
582} // namespace adapters
583} // namespace integrated
584} // namespace database
bool is_initialized() const override
Check if backend is initialized.
common::VoidResult initialize() override
Initialize the monitoring backend.
common::Result< health_check_result > check_health() override
Perform health check.
void update_pool_stats(std::size_t active, std::size_t idle, std::size_t total) override
Update connection pool statistics.
std::string export_prometheus_metrics() override
Export metrics in Prometheus format.
std::unique_ptr< kcenon::monitoring::performance_monitor > monitor_
void record_query_execution(std::chrono::microseconds duration, bool success) override
Record query execution.
system_monitoring_backend(const db_monitoring_config &config)
Construct system monitoring backend.
common::VoidResult record_metric(const std::string &name, double value) override
Record a metric value.
database_metrics convert_to_database_metrics(const kcenon::monitoring::metrics_snapshot &snapshot)
Convert monitoring_system metrics to database_metrics format.
common::Result< database_metrics > get_database_metrics() override
Get database-specific metrics.
common::Result< metrics_snapshot > get_metrics() override
Get current metrics snapshot.
common::VoidResult shutdown() override
Shutdown the monitoring backend gracefully.
void calculate_derived_metrics()
Calculate derived metrics from collected data.
VoidResult ok()
Result< std::monostate > VoidResult
async_result< T > make_error_result(const std::exception &error)
std::unordered_map< std::string, std::string > metadata
std::unordered_map< std::string, uint64_t > counters
std::uint64_t active_transactions
Currently active transactions.
std::size_t idle_connections
Idle connections in pool.
double connection_usage_percent
Percentage of connections in use.
std::size_t active_connections
Currently active connections.
std::uint64_t rolled_back_transactions
Total rolled-back transactions.
std::uint64_t total_queries
Total queries executed.
std::chrono::microseconds p99_query_latency
99th percentile latency
std::uint64_t successful_queries
Successfully completed queries.
double transaction_commit_rate
Commit rate (0.0 to 1.0)
std::chrono::system_clock::time_point timestamp
When metrics were collected.
std::chrono::microseconds p95_query_latency
95th percentile latency
std::chrono::microseconds max_query_latency
Maximum query latency.
std::chrono::microseconds avg_query_latency
Average query latency.
std::uint64_t committed_transactions
Total committed transactions.
std::chrono::microseconds min_query_latency
Minimum query latency.
Monitoring and metrics configuration.
std::chrono::milliseconds query_latency_warning
Warn when query latency exceeds this threshold.
double connection_usage_warning_threshold
Warn when connection pool usage exceeds this percentage (0.0-1.0)
Monitoring backend using kcenon/monitoring_system.