Monitoring System 0.1.0
System resource monitoring with pluggable collectors and alerting
Loading...
Searching...
No Matches
monitoring_core.h
Go to the documentation of this file.
1#pragma once
2
3// BSD 3-Clause License
4// Copyright (c) 2025, 🍀☀🌕🌥 🌊
5// See the LICENSE file in the project root for full license information.
6
7
24#include "../core/error_codes.h"
25#include <memory>
26#include <string>
27#include <vector>
28#include <chrono>
29#include <functional>
30#include <unordered_map>
31#include <optional>
32#include <exception>
33
34namespace kcenon { namespace monitoring {
35
36// Forward declarations
37class metrics_collector;
38class storage_backend;
39class metrics_analyzer;
40struct metrics_snapshot;
41struct monitoring_config;
42struct health_check_result;
43
49 std::string name;
50 double value;
51 std::chrono::system_clock::time_point timestamp;
52 std::unordered_map<std::string, std::string> tags;
53
54 metric_value(const std::string& n = "", double v = 0.0)
55 : name(n)
56 , value(v)
57 , timestamp(std::chrono::system_clock::now()) {}
58};
59
65 std::vector<metric_value> metrics;
66 std::chrono::system_clock::time_point capture_time;
67 std::string source_id;
68
70 : capture_time(std::chrono::system_clock::now()) {}
71
77 void add_metric(const std::string& name, double value) {
78 metrics.emplace_back(name, value);
79 }
80
87 void add_metric(const std::string& name, double value,
88 const std::unordered_map<std::string, std::string>& tags) {
89 metric_value mv(name, value);
90 mv.tags = tags;
91 metrics.push_back(std::move(mv));
92 }
93
99 std::optional<double> get_metric(const std::string& name) const {
100 for (const auto& m : metrics) {
101 if (m.name == name) {
102 return m.value;
103 }
104 }
105 return std::nullopt;
106 }
107};
108
113enum class health_status {
114 healthy,
115 degraded,
116 unhealthy,
117 unknown
118};
119
126 std::string message;
127 std::vector<std::string> issues;
128 std::chrono::system_clock::time_point check_time;
129 std::chrono::system_clock::time_point timestamp;
130 std::chrono::milliseconds check_duration{0};
131 std::unordered_map<std::string, std::string> metadata;
132 std::optional<std::exception_ptr> error;
133
135 : check_time(std::chrono::system_clock::now()),
136 timestamp(std::chrono::system_clock::now()) {}
137
138 bool is_healthy() const {
140 }
141
142 bool is_operational() const {
143 return status == health_status::healthy ||
145 }
146
147 static health_check_result healthy(const std::string& msg = "OK") {
148 health_check_result result;
150 result.message = msg;
151 result.timestamp = std::chrono::system_clock::now();
152 result.check_time = result.timestamp;
153 return result;
154 }
155
156 static health_check_result unhealthy(const std::string& msg) {
157 health_check_result result;
159 result.message = msg;
160 result.timestamp = std::chrono::system_clock::now();
161 result.check_time = result.timestamp;
162 return result;
163 }
164
165 static health_check_result degraded(const std::string& msg) {
166 health_check_result result;
168 result.message = msg;
169 result.timestamp = std::chrono::system_clock::now();
170 result.check_time = result.timestamp;
171 return result;
172 }
173};
174
180 std::size_t history_size = 1000;
181 std::chrono::milliseconds collection_interval{1000};
182 bool enable_compression = false;
183 bool enable_persistence = false;
184 std::size_t max_collectors = 100;
185 std::size_t buffer_size = 10000;
186
191 common::VoidResult validate() const {
192 if (history_size == 0) {
194 "History size must be greater than 0");
195 return common::VoidResult::err(err.to_common_error());
196 }
197 if (collection_interval.count() < 10) {
199 "Collection interval must be at least 10ms");
200 return common::VoidResult::err(err.to_common_error());
201 }
204 "Buffer size must be at least as large as history size");
205 return common::VoidResult::err(err.to_common_error());
206 }
207 return common::ok();
208 }
209};
210
260public:
261 virtual ~monitoring_interface() = default;
262
263 // Configuration
264 virtual common::VoidResult configure(const monitoring_config& config) = 0;
265 virtual common::Result<monitoring_config> get_configuration() const = 0;
266
267 // Collector management
268 virtual common::VoidResult add_collector(std::unique_ptr<metrics_collector> collector) = 0;
269 virtual common::VoidResult remove_collector(const std::string& name) = 0;
270 virtual common::Result<std::vector<std::string>> list_collectors() const = 0;
271
272 // Metrics operations
273 virtual common::VoidResult start() = 0;
274 virtual common::VoidResult stop() = 0;
275 virtual common::Result<metrics_snapshot> collect_now() = 0;
276 virtual common::Result<metrics_snapshot> get_latest_snapshot() const = 0;
277 virtual common::Result<std::vector<metrics_snapshot>> get_history(std::size_t count) const = 0;
278
279 // Health checks
280 virtual common::Result<health_check_result> check_health() const = 0;
281 virtual common::VoidResult register_health_check(
282 const std::string& name,
283 std::function<health_check_result()> checker) = 0;
284
285 // Storage management
286 virtual common::VoidResult set_storage_backend(std::unique_ptr<storage_backend> storage) = 0;
287 virtual common::VoidResult flush_storage() = 0;
288
289 // Analysis
290 virtual common::VoidResult add_analyzer(std::unique_ptr<metrics_analyzer> analyzer) = 0;
291 virtual common::Result<std::vector<std::string>> get_analysis_results() const = 0;
292
293 // Status
294 virtual bool is_running() const = 0;
295 virtual common::Result<std::string> get_status_summary() const = 0;
296};
297
338public:
339 virtual ~metrics_collector() = default;
340
345 virtual common::Result<metrics_snapshot> collect() = 0;
346
351 virtual std::string get_name() const = 0;
352
357 virtual bool is_enabled() const = 0;
358
364 virtual common::VoidResult set_enabled(bool enable) = 0;
365
370 virtual common::VoidResult initialize() = 0;
371
376 virtual common::VoidResult cleanup() = 0;
377};
378
421public:
422 virtual ~storage_backend() = default;
423
429 virtual common::VoidResult store(const metrics_snapshot& snapshot) = 0;
430
436 virtual common::Result<metrics_snapshot> retrieve(std::size_t index) const = 0;
437
444 virtual common::Result<std::vector<metrics_snapshot>> retrieve_range(
445 std::size_t start_index, std::size_t count) const = 0;
446
451 virtual std::size_t capacity() const = 0;
452
457 virtual std::size_t size() const = 0;
458
463 virtual common::VoidResult clear() = 0;
464
469 virtual common::VoidResult flush() = 0;
470};
471
517public:
518 virtual ~metrics_analyzer() = default;
519
525 virtual common::Result<std::string> analyze(const metrics_snapshot& snapshot) = 0;
526
532 virtual common::Result<std::string> analyze_trend(
533 const std::vector<metrics_snapshot>& snapshots) = 0;
534
539 virtual std::string get_name() const = 0;
540
545 virtual common::VoidResult reset() = 0;
546};
547
548} } // namespace kcenon::monitoring
Abstract interface for metrics analysis.
virtual std::string get_name() const =0
Get analyzer name.
virtual common::Result< std::string > analyze(const metrics_snapshot &snapshot)=0
Analyze a metrics snapshot.
virtual common::Result< std::string > analyze_trend(const std::vector< metrics_snapshot > &snapshots)=0
Analyze multiple snapshots for trends.
virtual common::VoidResult reset()=0
Reset analyzer state.
Abstract base class for metric collectors.
virtual std::string get_name() const =0
Get collector name.
virtual bool is_enabled() const =0
Check if collector is enabled.
virtual common::VoidResult initialize()=0
Initialize the collector.
virtual common::VoidResult cleanup()=0
Cleanup collector resources.
virtual common::Result< metrics_snapshot > collect()=0
Collect metrics.
virtual common::VoidResult set_enabled(bool enable)=0
Enable or disable the collector.
Abstract interface for monitoring operations.
virtual common::VoidResult set_storage_backend(std::unique_ptr< storage_backend > storage)=0
virtual common::Result< monitoring_config > get_configuration() const =0
virtual common::VoidResult flush_storage()=0
virtual common::VoidResult stop()=0
virtual common::Result< health_check_result > check_health() const =0
virtual common::Result< metrics_snapshot > collect_now()=0
virtual common::Result< std::vector< std::string > > get_analysis_results() const =0
virtual common::VoidResult start()=0
virtual common::Result< std::vector< metrics_snapshot > > get_history(std::size_t count) const =0
virtual common::VoidResult register_health_check(const std::string &name, std::function< health_check_result()> checker)=0
virtual common::Result< std::string > get_status_summary() const =0
virtual common::Result< std::vector< std::string > > list_collectors() const =0
virtual common::Result< metrics_snapshot > get_latest_snapshot() const =0
virtual common::VoidResult remove_collector(const std::string &name)=0
virtual common::VoidResult add_analyzer(std::unique_ptr< metrics_analyzer > analyzer)=0
virtual common::VoidResult configure(const monitoring_config &config)=0
virtual common::VoidResult add_collector(std::unique_ptr< metrics_collector > collector)=0
Abstract interface for metrics storage.
virtual common::Result< metrics_snapshot > retrieve(std::size_t index) const =0
Retrieve a stored snapshot by index.
virtual common::VoidResult store(const metrics_snapshot &snapshot)=0
Store a metrics snapshot.
virtual common::VoidResult flush()=0
Flush any buffered data to persistent storage.
virtual std::size_t capacity() const =0
Get storage capacity.
virtual common::VoidResult clear()=0
Clear all stored data.
virtual std::size_t size() const =0
Get current storage usage.
virtual common::Result< std::vector< metrics_snapshot > > retrieve_range(std::size_t start_index, std::size_t count) const =0
Retrieve multiple snapshots.
Monitoring system specific error codes.
@ storage
Storage device sensor.
health_status
System health status levels.
Result pattern type definitions for monitoring system.
Extended error information with context.
common::error_info to_common_error() const
Convert to common_system error_info.
Result of a health check operation.
static health_check_result unhealthy(const std::string &msg)
std::chrono::system_clock::time_point timestamp
static health_check_result healthy(const std::string &msg="OK")
std::unordered_map< std::string, std::string > metadata
static health_check_result degraded(const std::string &msg)
std::chrono::milliseconds check_duration
std::chrono::system_clock::time_point check_time
std::optional< std::exception_ptr > error
Represents a single metric value with metadata.
std::chrono::system_clock::time_point timestamp
metric_value(const std::string &n="", double v=0.0)
std::unordered_map< std::string, std::string > tags
Complete snapshot of metrics at a point in time.
std::chrono::system_clock::time_point capture_time
std::optional< double > get_metric(const std::string &name) const
Get a specific metric value.
std::vector< metric_value > metrics
void add_metric(const std::string &name, double value)
Add a metric to the snapshot.
void add_metric(const std::string &name, double value, const std::unordered_map< std::string, std::string > &tags)
Add a metric to the snapshot with tags.
Configuration for the monitoring system.
std::chrono::milliseconds collection_interval
common::VoidResult validate() const
Validate configuration parameters.