Logger System 0.1.3
High-performance C++20 thread-safe logging system with asynchronous capabilities
Loading...
Searching...
No Matches
basic_monitor.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
5#pragma once
6
16#include <mutex>
17#include <memory>
18#include <atomic>
19#include <vector>
20#include <unordered_map>
21#include <chrono>
22#include <algorithm>
23#include <cstdint>
24
25namespace kcenon::logger {
26
34private:
35 mutable std::mutex mutex_;
36 std::atomic<bool> enabled_{true};
37
38 // Core metrics
39 std::atomic<uint64_t> messages_logged_{0};
40 std::atomic<uint64_t> messages_dropped_{0};
41 std::atomic<uint64_t> errors_encountered_{0};
42 std::atomic<uint64_t> writers_failed_{0};
43
44 // Performance metrics
45 std::atomic<uint64_t> total_processing_time_us_{0};
46 std::atomic<uint64_t> max_processing_time_us_{0};
47 std::atomic<uint64_t> min_processing_time_us_{UINT64_MAX};
48
49 // Resource metrics
50 std::atomic<size_t> buffer_usage_bytes_{0};
51 std::atomic<size_t> max_buffer_usage_bytes_{0};
52 std::atomic<size_t> queue_size_{0};
53 std::atomic<size_t> max_queue_size_{0};
54
55 // Health check state
56 mutable std::vector<std::string> health_issues_;
57
58 // Additional counters and gauges
59 mutable std::unordered_map<std::string, std::atomic<double>> counters_;
60 mutable std::unordered_map<std::string, std::atomic<double>> gauges_;
61 mutable std::unordered_map<std::string, std::vector<double>> histograms_;
62
63 // Start time for uptime calculation
64 std::chrono::system_clock::time_point start_time_;
65
66public:
70 basic_monitor() : start_time_(std::chrono::system_clock::now()) {}
71
75 ~basic_monitor() override = default;
76
82 if (!enabled_.load()) {
84 "Monitoring is disabled"};
85 }
86
87 monitoring_data data;
88
89 // Core metrics
90 data.add_metric("messages_logged",
91 static_cast<double>(messages_logged_.load()),
93 data.add_metric("messages_dropped",
94 static_cast<double>(messages_dropped_.load()),
96 data.add_metric("errors_encountered",
97 static_cast<double>(errors_encountered_.load()),
99 data.add_metric("writers_failed",
100 static_cast<double>(writers_failed_.load()),
102
103 // Performance metrics
104 uint64_t total_time = total_processing_time_us_.load();
105 uint64_t messages = messages_logged_.load();
106 if (messages > 0) {
107 data.add_metric("avg_processing_time_us",
108 static_cast<double>(total_time) / messages,
110 }
111 data.add_metric("max_processing_time_us",
112 static_cast<double>(max_processing_time_us_.load()),
114
115 uint64_t min_time = min_processing_time_us_.load();
116 if (min_time != UINT64_MAX) {
117 data.add_metric("min_processing_time_us",
118 static_cast<double>(min_time),
120 }
121
122 // Resource metrics
123 data.add_metric("buffer_usage_bytes",
124 static_cast<double>(buffer_usage_bytes_.load()),
126 data.add_metric("max_buffer_usage_bytes",
127 static_cast<double>(max_buffer_usage_bytes_.load()),
129 data.add_metric("queue_size",
130 static_cast<double>(queue_size_.load()),
132 data.add_metric("max_queue_size",
133 static_cast<double>(max_queue_size_.load()),
135
136 // Uptime
137 auto now = std::chrono::system_clock::now();
138 auto uptime = std::chrono::duration_cast<std::chrono::seconds>(
139 now - start_time_).count();
140 data.add_metric("uptime_seconds",
141 static_cast<double>(uptime),
143
144 // Add custom counters and gauges
145 {
146 std::lock_guard<std::mutex> lock(mutex_);
147
148 for (const auto& [name, value] : counters_) {
149 data.add_metric(name, value.load(), metric_type::counter);
150 }
151
152 for (const auto& [name, value] : gauges_) {
153 data.add_metric(name, value.load(), metric_type::gauge);
154 }
155
156 // Calculate histogram summaries
157 for (const auto& [name, values] : histograms_) {
158 if (!values.empty()) {
159 double sum = 0;
160 double min_val = values[0];
161 double max_val = values[0];
162
163 for (double v : values) {
164 sum += v;
165 min_val = std::min(min_val, v);
166 max_val = std::max(max_val, v);
167 }
168
169 double avg = sum / values.size();
170 data.add_metric(name + "_avg", avg, metric_type::summary);
171 data.add_metric(name + "_min", min_val, metric_type::summary);
172 data.add_metric(name + "_max", max_val, metric_type::summary);
173 data.add_metric(name + "_count",
174 static_cast<double>(values.size()),
176 }
177 }
178 }
179
180 return data;
181 }
182
189
190 // Check error rate
191 uint64_t errors = errors_encountered_.load();
192 uint64_t messages = messages_logged_.load();
193 if (messages > 0) {
194 double error_rate = static_cast<double>(errors) / messages;
195 if (error_rate > 0.1) { // More than 10% error rate
196 result.add_issue("High error rate: " +
197 std::to_string(error_rate * 100) + "%");
198 result.set_status(health_status::unhealthy);
199 } else if (error_rate > 0.05) { // More than 5% error rate
200 result.add_issue("Elevated error rate: " +
201 std::to_string(error_rate * 100) + "%");
202 result.set_status(health_status::degraded);
203 }
204 }
205
206 // Check dropped messages
207 uint64_t dropped = messages_dropped_.load();
208 if (dropped > 0) {
209 if (messages > 0) {
210 double drop_rate = static_cast<double>(dropped) / messages;
211 if (drop_rate > 0.01) { // More than 1% drop rate
212 result.add_issue("Messages being dropped: " +
213 std::to_string(dropped) + " total");
214 if (result.get_status() == health_status::healthy) {
215 result.set_status(health_status::degraded);
216 }
217 }
218 }
219 }
220
221 // Check writer failures
222 uint64_t writer_failures = writers_failed_.load();
223 if (writer_failures > 0) {
224 result.add_issue("Writer failures detected: " +
225 std::to_string(writer_failures));
226 if (result.get_status() == health_status::healthy) {
227 result.set_status(health_status::degraded);
228 }
229 }
230
231 // Check queue size
232 size_t queue_size = queue_size_.load();
233 size_t max_queue = max_queue_size_.load();
234 if (max_queue > 0 && queue_size > max_queue * 0.9) {
235 result.add_issue("Queue near capacity: " +
236 std::to_string(queue_size) + "/" +
237 std::to_string(max_queue));
238 if (result.get_status() == health_status::healthy) {
239 result.set_status(health_status::degraded);
240 }
241 }
242
243 // Add any custom health issues
244 {
245 std::lock_guard<std::mutex> lock(mutex_);
246 for (const auto& issue : health_issues_) {
247 result.add_issue(issue);
248 }
249 }
250
251 // Set overall message
252 if (result.is_healthy()) {
253 result.set_message("All systems operational");
254 } else {
255 result.set_message("Issues detected - check details");
256 }
257
258 return result;
259 }
260
269 writers_failed_ = 0;
270
273 min_processing_time_us_ = UINT64_MAX;
274
277 queue_size_ = 0;
278 max_queue_size_ = 0;
279
280 {
281 std::lock_guard<std::mutex> lock(mutex_);
282 counters_.clear();
283 gauges_.clear();
284 histograms_.clear();
285 health_issues_.clear();
286 }
287
288 start_time_ = std::chrono::system_clock::now();
289
290 return common::ok();
291 }
292
298 common::VoidResult set_enabled(bool enable) override {
299 enabled_ = enable;
300 return common::ok();
301 }
302
307 bool is_enabled() const override {
308 return enabled_.load();
309 }
310
315 std::string get_backend_name() const override {
316 return "basic";
317 }
318
324 void increment_counter(const std::string& name, double value = 1.0) override {
325 if (!enabled_.load()) return;
326
327 if (name == "messages_logged") {
328 messages_logged_ += static_cast<uint64_t>(value);
329 } else if (name == "messages_dropped") {
330 messages_dropped_ += static_cast<uint64_t>(value);
331 } else if (name == "errors_encountered") {
332 errors_encountered_ += static_cast<uint64_t>(value);
333 } else if (name == "writers_failed") {
334 writers_failed_ += static_cast<uint64_t>(value);
335 } else {
336 std::lock_guard<std::mutex> lock(mutex_);
337 counters_[name] += value;
338 }
339 }
340
346 void update_gauge(const std::string& name, double value) override {
347 if (!enabled_.load()) return;
348
349 if (name == "buffer_usage_bytes") {
350 buffer_usage_bytes_ = static_cast<size_t>(value);
351 size_t max_val = max_buffer_usage_bytes_.load();
352 while (value > max_val &&
353 !max_buffer_usage_bytes_.compare_exchange_weak(max_val,
354 static_cast<size_t>(value))) {
355 // Loop until successful
356 }
357 } else if (name == "queue_size") {
358 queue_size_ = static_cast<size_t>(value);
359 size_t max_val = max_queue_size_.load();
360 while (value > max_val &&
361 !max_queue_size_.compare_exchange_weak(max_val,
362 static_cast<size_t>(value))) {
363 // Loop until successful
364 }
365 } else {
366 std::lock_guard<std::mutex> lock(mutex_);
367 gauges_[name] = value;
368 }
369 }
370
376 void record_histogram(const std::string& name, double value) override {
377 if (!enabled_.load()) return;
378
379 if (name == "processing_time_us") {
380 uint64_t us_value = static_cast<uint64_t>(value);
381 total_processing_time_us_ += us_value;
382
383 // Update max
384 uint64_t max_val = max_processing_time_us_.load();
385 while (us_value > max_val &&
386 !max_processing_time_us_.compare_exchange_weak(max_val, us_value)) {
387 // Loop until successful
388 }
389
390 // Update min
391 uint64_t min_val = min_processing_time_us_.load();
392 while (us_value < min_val &&
393 !min_processing_time_us_.compare_exchange_weak(min_val, us_value)) {
394 // Loop until successful
395 }
396 } else {
397 std::lock_guard<std::mutex> lock(mutex_);
398 histograms_[name].push_back(value);
399
400 // Limit histogram size to prevent unbounded growth
401 if (histograms_[name].size() > 10000) {
402 // Keep only recent values
403 histograms_[name].erase(histograms_[name].begin(),
404 histograms_[name].begin() + 5000);
405 }
406 }
407 }
408
413 void add_health_issue(const std::string& issue) {
414 std::lock_guard<std::mutex> lock(mutex_);
415 health_issues_.push_back(issue);
416 }
417
422 std::lock_guard<std::mutex> lock(mutex_);
423 health_issues_.clear();
424 }
425};
426
427} // namespace kcenon::logger
Basic monitoring implementation.
std::atomic< uint64_t > messages_logged_
bool is_enabled() const override
Check if monitoring is enabled.
std::atomic< size_t > max_buffer_usage_bytes_
std::unordered_map< std::string, std::vector< double > > histograms_
void record_histogram(const std::string &name, double value) override
Record a histogram value.
void clear_health_issues()
Clear health issues.
std::atomic< size_t > queue_size_
std::unordered_map< std::string, std::atomic< double > > counters_
common::VoidResult reset_metrics() override
Reset all metrics.
result< monitoring_data > collect_metrics() const override
Collect current metrics.
std::atomic< bool > enabled_
~basic_monitor() override=default
Destructor.
std::atomic< size_t > buffer_usage_bytes_
std::unordered_map< std::string, std::atomic< double > > gauges_
std::atomic< uint64_t > min_processing_time_us_
result< health_check_result > check_health() const override
Perform health check.
std::chrono::system_clock::time_point start_time_
void add_health_issue(const std::string &issue)
Add a health issue.
std::atomic< uint64_t > total_processing_time_us_
std::atomic< uint64_t > writers_failed_
std::atomic< uint64_t > messages_dropped_
std::string get_backend_name() const override
Get backend name.
std::vector< std::string > health_issues_
void update_gauge(const std::string &name, double value) override
Update a gauge.
void increment_counter(const std::string &name, double value=1.0) override
Increment a counter.
common::VoidResult set_enabled(bool enable) override
Enable or disable monitoring.
std::atomic< uint64_t > max_processing_time_us_
std::atomic< size_t > max_queue_size_
std::atomic< uint64_t > errors_encountered_
void add_metric(const std::string &name, double value, metric_type type=metric_type::gauge)
Add a metric to the collection.
Abstract interface for monitoring and metrics collection.
VoidResult ok()
@ gauge
A metric that can go up or down (e.g., queue depth).
@ counter
A monotonically increasing metric (e.g., total messages).
@ summary
Statistical summary.
@ size
Rotate based on file size only.