Monitoring System 0.1.0
System resource monitoring with pluggable collectors and alerting
Loading...
Searching...
No Matches
production_monitoring_example.cpp
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
19#include <atomic>
20#include <chrono>
21#include <csignal>
22#include <iostream>
23#include <thread>
24
31
32using namespace kcenon::monitoring;
33using namespace std::chrono_literals;
34
35// Global shutdown flag for graceful shutdown
36std::atomic<bool> shutdown_requested{false};
37
38// Signal handler for graceful shutdown
39void signal_handler(int signal) {
40 std::cout << "\nReceived signal " << signal << ", initiating graceful shutdown..." << std::endl;
41 shutdown_requested = true;
42}
43
44// Custom health check for database simulation
46private:
47 std::string name_;
48
49public:
50 explicit database_health_check(const std::string& name) : name_(name) {}
51
52 std::string get_name() const override {
53 return name_;
54 }
55
56 health_check_type get_type() const override {
57 return health_check_type::readiness;
58 }
59
61 // Simulate database connectivity check
62 return health_check_result::healthy("Database connection pool active");
63 }
64
65 bool is_critical() const override {
66 return true;
67 }
68};
69
70// Custom health check for external API
72private:
73 std::string name_;
74
75public:
76 explicit external_api_health_check(const std::string& name) : name_(name) {}
77
78 std::string get_name() const override {
79 return name_;
80 }
81
82 health_check_type get_type() const override {
83 return health_check_type::readiness;
84 }
85
87 // Simulate external API health check
88 return health_check_result::healthy("External API responding");
89 }
90
91 bool is_critical() const override {
92 return false;
93 }
94};
95
96int main() {
97 std::cout << "=== Production Monitoring Stack Example ===" << std::endl;
98 std::cout << std::endl;
99
100 try {
101 // =====================================================================
102 // Section 1: Configuration Management
103 // =====================================================================
104 std::cout << "1. Configuring Production Monitoring Stack" << std::endl;
105 std::cout << " =========================================" << std::endl;
106 std::cout << std::endl;
107
108 // Configure performance monitoring
109 monitoring_config perf_config;
110 perf_config.history_size = 10000;
111 perf_config.collection_interval = 5000ms;
112 perf_config.enable_compression = true;
113
114 std::cout << " Performance Monitor:" << std::endl;
115 std::cout << " - History size: " << perf_config.history_size << std::endl;
116 std::cout << " - Collection interval: 5s" << std::endl;
117 std::cout << std::endl;
118
119 // Configure alert manager
120 alert_manager_config alert_config;
121 alert_config.default_evaluation_interval = 10000ms;
122 alert_config.default_repeat_interval = 300000ms;
123 alert_config.enable_grouping = true;
124
125 std::cout << " Alert Manager:" << std::endl;
126 std::cout << " - Evaluation interval: 10s" << std::endl;
127 std::cout << " - Grouping: enabled" << std::endl;
128 std::cout << std::endl;
129
130 // Configure health monitoring
131 health_monitor_config health_config;
132 health_config.check_interval = 5000ms;
133 health_config.enable_auto_recovery = true;
134
135 std::cout << " Health Monitor:" << std::endl;
136 std::cout << " - Check interval: 5s" << std::endl;
137 std::cout << " - Auto-recovery: enabled" << std::endl;
138 std::cout << std::endl;
139
140 // =====================================================================
141 // Section 2: Initialize Monitoring Components
142 // =====================================================================
143 std::cout << "2. Initializing Components" << std::endl;
144 std::cout << " ======================= " << std::endl;
145 std::cout << std::endl;
146
147 // Initialize performance monitor
148 performance_monitor perf_monitor("production_monitor");
149 if (auto result = perf_monitor.initialize(); result.is_err()) {
150 std::cerr << "Failed to initialize performance monitor" << std::endl;
151 return 1;
152 }
153 std::cout << " [OK] Performance monitor" << std::endl;
154
155 // Initialize health monitor
156 health_monitor health_mon(health_config);
157 std::cout << " [OK] Health monitor" << std::endl;
158
159 // Initialize alert manager
160 alert_manager alert_mgr(alert_config);
161 std::cout << " [OK] Alert manager" << std::endl;
162 std::cout << std::endl;
163
164 // =====================================================================
165 // Section 3: Configure Storage Backend
166 // =====================================================================
167 std::cout << "3. Configuring Storage" << std::endl;
168 std::cout << " ====================" << std::endl;
169 std::cout << std::endl;
170
171 storage_config storage_cfg;
172 storage_cfg.type = storage_backend_type::file_json;
173 storage_cfg.path = "production_metrics.json";
174
175 auto storage = std::make_unique<file_storage_backend>(storage_cfg);
176 std::cout << " [OK] JSON file storage configured" << std::endl;
177 std::cout << std::endl;
178
179 // =====================================================================
180 // Section 4: Register Health Checks
181 // =====================================================================
182 std::cout << "4. Registering Health Checks" << std::endl;
183 std::cout << " ===========================" << std::endl;
184 std::cout << std::endl;
185
186 auto db_check = std::make_shared<database_health_check>("database");
187 health_mon.register_check("database", db_check);
188 std::cout << " [OK] Database health check" << std::endl;
189
190 auto api_check = std::make_shared<external_api_health_check>("external_api");
191 health_mon.register_check("external_api", api_check);
192 std::cout << " [OK] External API health check" << std::endl;
193 std::cout << std::endl;
194
195 // =====================================================================
196 // Section 5: Configure Alert Rules
197 // =====================================================================
198 std::cout << "5. Configuring Alert Rules" << std::endl;
199 std::cout << " ========================" << std::endl;
200 std::cout << std::endl;
201
202 auto cpu_rule = std::make_shared<alert_rule>("high_cpu_usage");
203 cpu_rule->set_metric_name("cpu_usage")
204 .set_severity(alert_severity::warning)
205 .set_summary("CPU usage exceeds 80%")
206 .set_trigger(threshold_trigger::above(80.0));
207
208 alert_mgr.add_rule(cpu_rule);
209 std::cout << " [OK] CPU usage alert rule" << std::endl;
210
211 auto log_notifier_ptr = std::make_shared<log_notifier>("console_logger");
212 alert_mgr.add_notifier(log_notifier_ptr);
213 std::cout << " [OK] Console notifier" << std::endl;
214 std::cout << std::endl;
215
216 // =====================================================================
217 // Section 6: Start Monitoring
218 // =====================================================================
219 std::cout << "6. Starting Monitoring" << std::endl;
220 std::cout << " ====================" << std::endl;
221 std::cout << std::endl;
222
223 health_mon.start();
224 std::cout << " [OK] Health monitor started" << std::endl;
225
226 alert_mgr.start();
227 std::cout << " [OK] Alert manager started" << std::endl;
228 std::cout << std::endl;
229
230 // Install signal handlers
231 std::signal(SIGINT, signal_handler);
232 std::signal(SIGTERM, signal_handler);
233
234 std::cout << "7. Monitoring Active (Ctrl+C to shutdown)" << std::endl;
235 std::cout << " ========================================" << std::endl;
236 std::cout << std::endl;
237
238 // =====================================================================
239 // Section 7: Workload Simulation
240 // =====================================================================
241 int iteration = 0;
242 while (!shutdown_requested && iteration < 10) {
243 std::cout << " Iteration " << (iteration + 1) << "/10" << std::endl;
244
245 // Collect performance metrics
246 {
247 auto timer = perf_monitor.time_operation("iteration_" + std::to_string(iteration));
248 std::this_thread::sleep_for(200ms);
249 }
250
251 // Check health
252 auto health_result = health_mon.check_health();
253 if (health_result.status == health_status::healthy) {
254 std::cout << " Health: Healthy" << std::endl;
255 }
256
257 // Get system metrics
259 if (system_metrics.is_ok()) {
260 const auto& metrics = system_metrics.value();
261 std::cout << " CPU: " << metrics.cpu_usage_percent
262 << "%, Memory: " << (metrics.memory_usage_bytes / (1024.0 * 1024.0))
263 << " MB" << std::endl;
264
265 // Process metrics for alerting
266 alert_mgr.process_metric("cpu_usage", metrics.cpu_usage_percent);
267 }
268
269 std::cout << std::endl;
270 std::this_thread::sleep_for(2s);
271 iteration++;
272 }
273
274 // =====================================================================
275 // Section 8: Graceful Shutdown
276 // =====================================================================
277 std::cout << "8. Graceful Shutdown" << std::endl;
278 std::cout << " ==================" << std::endl;
279 std::cout << std::endl;
280
281 alert_mgr.stop();
282 std::cout << " [OK] Alert manager stopped" << std::endl;
283
284 health_mon.stop();
285 std::cout << " [OK] Health monitor stopped" << std::endl;
286
287 perf_monitor.cleanup();
288 std::cout << " [OK] Performance monitor cleaned up" << std::endl;
289
290 storage->flush();
291 std::cout << " [OK] Storage flushed" << std::endl;
292 std::cout << std::endl;
293
294 std::cout << "=== Production Monitoring Completed Successfully ===" << std::endl;
295
296 } catch (const std::exception& e) {
297 std::cerr << "Exception: " << e.what() << std::endl;
298 return 1;
299 }
300
301 return 0;
302}
Central coordinator for alert lifecycle management.
Alert notification implementations.
Alert trigger implementations for various condition types.
bool is_critical() const override
Whether this check is critical for overall system health.
std::string get_name() const override
Get the human-readable name of this health check.
database_health_check(const std::string &name)
health_check_result check() override
Execute the health check and return the result.
health_check_type get_type() const override
Get the type of this health check (liveness, readiness, or startup).
health_check_type get_type() const override
Get the type of this health check (liveness, readiness, or startup).
health_check_result check() override
Execute the health check and return the result.
external_api_health_check(const std::string &name)
bool is_critical() const override
Whether this check is critical for overall system health.
std::string get_name() const override
Get the human-readable name of this health check.
Central coordinator for the alert pipeline.
common::VoidResult stop()
Stop the alert manager.
common::VoidResult add_rule(std::shared_ptr< alert_rule > rule)
Add an alert rule.
common::VoidResult process_metric(const std::string &metric_name, double value)
Process a metric value.
common::VoidResult start()
Start the alert manager.
common::VoidResult add_notifier(std::shared_ptr< alert_notifier > notifier)
Add a notifier.
Abstract base class for health checks.
Health monitor with dependency management, auto-recovery, and statistics.
common::VoidResult stop()
Stop the periodic health monitoring background thread.
health_check_result check_health() const
Quick self-check of the health monitor itself.
common::VoidResult start()
Start the periodic health monitoring background thread.
common::Result< bool > register_check(const std::string &name, std::shared_ptr< health_check > check)
Register a named health check.
Performance monitor combining profiling and system monitoring.
common::VoidResult initialize() override
Initialize the collector.
system_monitor & get_system_monitor()
Get system monitor.
common::VoidResult cleanup() override
Cleanup collector resources.
scoped_timer time_operation(const std::string &operation_name)
Create a scoped timer for an operation.
common::Result< system_metrics > get_current_metrics() const
Get current system metrics.
static std::shared_ptr< threshold_trigger > above(double threshold)
Create trigger for value > threshold.
Health monitoring with dependency graphs, auto-recovery, and statistics.
@ timer
StatsD-specific timer metric.
@ storage
Storage device sensor.
health_check_type
Types of health checks following Kubernetes probe conventions.
Performance monitoring and profiling implementation.
std::atomic< bool > shutdown_requested
void signal_handler(int signal)
Storage backend type definitions for metric persistence.
Configuration for the alert manager.
std::chrono::milliseconds default_evaluation_interval
Default eval interval.
bool enable_grouping
Enable alert grouping.
std::chrono::milliseconds default_repeat_interval
Default repeat interval.
Result of a health check operation.
static health_check_result healthy(const std::string &msg="OK")
Configuration for the health_monitor.
std::chrono::milliseconds check_interval
Interval between automatic health check cycles.
bool enable_auto_recovery
Whether to invoke recovery handlers on failure.
Configuration for the monitoring system.
std::chrono::milliseconds collection_interval