PACS System 0.1.0
PACS DICOM system library
Loading...
Searching...
No Matches
database_metrics_service.cpp
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
14
15#ifdef PACS_WITH_DATABASE_SYSTEM
16
17#include <algorithm>
18#include <iomanip>
19#include <mutex>
20#include <sstream>
21
23
24// ============================================================================
25// database_metrics_service::impl
26// ============================================================================
27
28struct database_metrics_service::impl {
29 std::shared_ptr<storage::pacs_database_adapter> db;
30 std::chrono::microseconds slow_query_threshold{100000}; // 100ms
31 std::chrono::minutes metrics_retention{5};
32 std::vector<slow_query_callback> slow_query_callbacks;
33 std::vector<slow_query> slow_query_history;
34 std::mutex mutex; // Protects callbacks and history
35
36 explicit impl(std::shared_ptr<storage::pacs_database_adapter> database)
37 : db(std::move(database)) {}
38
39 void add_slow_query(slow_query sq) {
40 std::lock_guard<std::mutex> lock(mutex);
41
42 // Add to history
43 slow_query_history.push_back(sq);
44
45 // Notify callbacks
46 for (const auto& callback : slow_query_callbacks) {
47 try {
48 callback(sq);
49 } catch (const std::exception&) {
50 // Ignore callback exceptions
51 }
52 }
53
54 // Cleanup old entries (keep last 1000)
55 if (slow_query_history.size() > 1000) {
56 slow_query_history.erase(slow_query_history.begin(),
57 slow_query_history.begin() + 500);
58 }
59 }
60};
61
62// ============================================================================
63// database_metrics_service
64// ============================================================================
65
66database_metrics_service::database_metrics_service(
67 std::shared_ptr<storage::pacs_database_adapter> db)
68 : impl_(std::make_unique<impl>(std::move(db))) {}
69
70database_metrics_service::~database_metrics_service() = default;
71
72database_metrics_service::database_metrics_service(
73 database_metrics_service&&) noexcept = default;
74
75auto database_metrics_service::operator=(database_metrics_service&&) noexcept
76 -> database_metrics_service& = default;
77
78// ============================================================================
79// Configuration
80// ============================================================================
81
82void database_metrics_service::set_slow_query_threshold(
83 std::chrono::microseconds threshold) {
84 impl_->slow_query_threshold = threshold;
85}
86
87void database_metrics_service::set_metrics_retention(
88 std::chrono::minutes retention) {
89 impl_->metrics_retention = retention;
90}
91
92void database_metrics_service::register_slow_query_callback(
93 slow_query_callback callback) {
94 std::lock_guard<std::mutex> lock(impl_->mutex);
95 impl_->slow_query_callbacks.push_back(std::move(callback));
96}
97
98// ============================================================================
99// Health Checks
100// ============================================================================
101
102auto database_metrics_service::check_health() -> database_health {
103 database_health health;
104
105 auto start = std::chrono::steady_clock::now();
106
107 // Simple connectivity check
108 auto result = impl_->db->select("SELECT 1");
109
110 auto end = std::chrono::steady_clock::now();
111 health.response_time =
112 std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
113
114 if (result.is_err()) {
115 health.current_status = database_health::status::unhealthy;
116 health.message =
117 "Database connection failed: " + result.error().message;
118 return health;
119 }
120
121 // Get metrics for additional checks
122 auto metrics = get_current_metrics();
123 health.active_connections = metrics.active_connections;
124 health.error_rate = metrics.error_rate;
125
126 // Determine status based on metrics
127 if (metrics.error_rate > 0.1) { // >10% error rate
128 health.current_status = database_health::status::unhealthy;
129 health.message = "High error rate detected";
130 health.warnings.push_back(
131 "Error rate: " + std::to_string(metrics.error_rate * 100) + "%");
132 } else if (metrics.error_rate > 0.01 ||
133 metrics.connection_utilization > 0.8 ||
134 metrics.avg_latency_us > 50000) {
135 health.current_status = database_health::status::degraded;
136 health.message = "Database performance degraded";
137
138 if (metrics.error_rate > 0.01) {
139 health.warnings.push_back("Elevated error rate");
140 }
141 if (metrics.connection_utilization > 0.8) {
142 health.warnings.push_back("High connection utilization");
143 }
144 if (metrics.avg_latency_us > 50000) {
145 health.warnings.push_back("High average latency");
146 }
147 } else {
148 health.current_status = database_health::status::healthy;
149 health.message = "Database is healthy";
150 }
151
152 return health;
153}
154
155auto database_metrics_service::is_healthy() -> bool {
156 auto health = check_health();
157 return health.current_status == database_health::status::healthy;
158}
159
160// ============================================================================
161// Metrics Retrieval
162// ============================================================================
163
164auto database_metrics_service::get_current_metrics() -> database_metrics {
165 database_metrics metrics;
166
167 // TODO(Phase 1): Integrate with database_system's performance_monitor
168 // For now, return placeholder values
169 // This will be populated when database_system monitoring API is available
170
171 // Placeholder implementation - will be replaced with:
172 // auto context = impl_->db->context();
173 // auto monitor = context->get_performance_monitor();
174 // auto summary = monitor->get_performance_summary();
175
176 metrics.total_queries = 0;
177 metrics.successful_queries = 0;
178 metrics.failed_queries = 0;
179 metrics.queries_per_second = 0.0;
180 metrics.avg_latency_us = 0;
181 metrics.min_latency_us = 0;
182 metrics.max_latency_us = 0;
183 metrics.p95_latency_us = 0;
184 metrics.p99_latency_us = 0;
185 metrics.active_connections = 1; // Assume at least 1 connection
186 metrics.pool_size = 1;
187 metrics.connection_utilization = 0.0;
188 metrics.error_rate = 0.0;
189 metrics.slow_query_count = get_slow_queries().size();
190
191 return metrics;
192}
193
194auto database_metrics_service::get_slow_queries(
195 [[maybe_unused]] std::chrono::minutes since) -> std::vector<slow_query> {
196 std::lock_guard<std::mutex> lock(impl_->mutex);
197
198 // TODO: Filter by timestamp when slow query tracking is integrated
199 // For now, return recent slow queries from history
200
201 std::vector<slow_query> result;
202 result.reserve(impl_->slow_query_history.size());
203
204 // Return all queries for now
205 // Will be filtered by timestamp in future implementation
206 result = impl_->slow_query_history;
207
208 return result;
209}
210
211auto database_metrics_service::get_top_slow_queries(size_t limit)
212 -> std::vector<slow_query> {
213 std::lock_guard<std::mutex> lock(impl_->mutex);
214
215 std::vector<slow_query> sorted = impl_->slow_query_history;
216
217 // Sort by duration (descending)
218 std::sort(sorted.begin(), sorted.end(),
219 [](const slow_query& a, const slow_query& b) {
220 return a.duration_us > b.duration_us;
221 });
222
223 // Return top N
224 if (sorted.size() > limit) {
225 sorted.resize(limit);
226 }
227
228 return sorted;
229}
230
231// ============================================================================
232// Metrics Export
233// ============================================================================
234
235auto database_metrics_service::export_prometheus_metrics() -> std::string {
236 auto metrics = get_current_metrics();
237 std::ostringstream oss;
238
239 oss << "# HELP pacs_db_queries_total Total database queries\n"
240 << "# TYPE pacs_db_queries_total counter\n"
241 << "pacs_db_queries_total{status=\"success\"} "
242 << metrics.successful_queries << "\n"
243 << "pacs_db_queries_total{status=\"failure\"} " << metrics.failed_queries
244 << "\n\n";
245
246 oss << "# HELP pacs_db_query_duration_microseconds Query duration\n"
247 << "# TYPE pacs_db_query_duration_microseconds summary\n"
248 << "pacs_db_query_duration_microseconds{quantile=\"0.5\"} "
249 << metrics.avg_latency_us << "\n"
250 << "pacs_db_query_duration_microseconds{quantile=\"0.95\"} "
251 << metrics.p95_latency_us << "\n"
252 << "pacs_db_query_duration_microseconds{quantile=\"0.99\"} "
253 << metrics.p99_latency_us << "\n\n";
254
255 oss << "# HELP pacs_db_queries_per_second Query throughput\n"
256 << "# TYPE pacs_db_queries_per_second gauge\n"
257 << "pacs_db_queries_per_second " << std::fixed << std::setprecision(2)
258 << metrics.queries_per_second << "\n\n";
259
260 oss << "# HELP pacs_db_connections Active connections\n"
261 << "# TYPE pacs_db_connections gauge\n"
262 << "pacs_db_connections{state=\"active\"} " << metrics.active_connections
263 << "\n\n";
264
265 oss << "# HELP pacs_db_connection_utilization Connection pool utilization\n"
266 << "# TYPE pacs_db_connection_utilization gauge\n"
267 << "pacs_db_connection_utilization " << std::fixed
268 << std::setprecision(4) << metrics.connection_utilization << "\n\n";
269
270 oss << "# HELP pacs_db_error_rate Database error rate\n"
271 << "# TYPE pacs_db_error_rate gauge\n"
272 << "pacs_db_error_rate " << std::fixed << std::setprecision(4)
273 << metrics.error_rate << "\n\n";
274
275 oss << "# HELP pacs_db_slow_queries Slow queries count\n"
276 << "# TYPE pacs_db_slow_queries gauge\n"
277 << "pacs_db_slow_queries " << metrics.slow_query_count << "\n";
278
279 return oss.str();
280}
281
282// ============================================================================
283// Helper Functions
284// ============================================================================
285
286auto health_status_to_string(database_health::status status) -> std::string {
287 switch (status) {
288 case database_health::status::healthy:
289 return "healthy";
290 case database_health::status::degraded:
291 return "degraded";
292 case database_health::status::unhealthy:
293 return "unhealthy";
294 default:
295 return "unknown";
296 }
297}
298
299} // namespace kcenon::pacs::services::monitoring
300
301#endif // PACS_WITH_DATABASE_SYSTEM
Database monitoring and metrics service.
std::shared_mutex mutex
Mutex for thread-safe access.
@ move
C-MOVE move request/response.