33using namespace std::chrono_literals;
40 std::cout <<
"\nReceived signal " << signal <<
", initiating graceful shutdown..." << std::endl;
57 return health_check_type::readiness;
83 return health_check_type::readiness;
97 std::cout <<
"=== Production Monitoring Stack Example ===" << std::endl;
98 std::cout << std::endl;
104 std::cout <<
"1. Configuring Production Monitoring Stack" << std::endl;
105 std::cout <<
" =========================================" << std::endl;
106 std::cout << std::endl;
114 std::cout <<
" Performance Monitor:" << std::endl;
115 std::cout <<
" - History size: " << perf_config.
history_size << std::endl;
116 std::cout <<
" - Collection interval: 5s" << std::endl;
117 std::cout << std::endl;
125 std::cout <<
" Alert Manager:" << std::endl;
126 std::cout <<
" - Evaluation interval: 10s" << std::endl;
127 std::cout <<
" - Grouping: enabled" << std::endl;
128 std::cout << std::endl;
135 std::cout <<
" Health Monitor:" << std::endl;
136 std::cout <<
" - Check interval: 5s" << std::endl;
137 std::cout <<
" - Auto-recovery: enabled" << std::endl;
138 std::cout << std::endl;
143 std::cout <<
"2. Initializing Components" << std::endl;
144 std::cout <<
" ======================= " << std::endl;
145 std::cout << std::endl;
149 if (
auto result = perf_monitor.
initialize(); result.is_err()) {
150 std::cerr <<
"Failed to initialize performance monitor" << std::endl;
153 std::cout <<
" [OK] Performance monitor" << std::endl;
157 std::cout <<
" [OK] Health monitor" << std::endl;
161 std::cout <<
" [OK] Alert manager" << std::endl;
162 std::cout << std::endl;
167 std::cout <<
"3. Configuring Storage" << std::endl;
168 std::cout <<
" ====================" << std::endl;
169 std::cout << std::endl;
172 storage_cfg.
type = storage_backend_type::file_json;
173 storage_cfg.
path =
"production_metrics.json";
175 auto storage = std::make_unique<file_storage_backend>(storage_cfg);
176 std::cout <<
" [OK] JSON file storage configured" << std::endl;
177 std::cout << std::endl;
182 std::cout <<
"4. Registering Health Checks" << std::endl;
183 std::cout <<
" ===========================" << std::endl;
184 std::cout << std::endl;
186 auto db_check = std::make_shared<database_health_check>(
"database");
188 std::cout <<
" [OK] Database health check" << std::endl;
190 auto api_check = std::make_shared<external_api_health_check>(
"external_api");
192 std::cout <<
" [OK] External API health check" << std::endl;
193 std::cout << std::endl;
198 std::cout <<
"5. Configuring Alert Rules" << std::endl;
199 std::cout <<
" ========================" << std::endl;
200 std::cout << std::endl;
202 auto cpu_rule = std::make_shared<alert_rule>(
"high_cpu_usage");
203 cpu_rule->set_metric_name(
"cpu_usage")
204 .set_severity(alert_severity::warning)
205 .set_summary(
"CPU usage exceeds 80%")
209 std::cout <<
" [OK] CPU usage alert rule" << std::endl;
211 auto log_notifier_ptr = std::make_shared<log_notifier>(
"console_logger");
213 std::cout <<
" [OK] Console notifier" << std::endl;
214 std::cout << std::endl;
219 std::cout <<
"6. Starting Monitoring" << std::endl;
220 std::cout <<
" ====================" << std::endl;
221 std::cout << std::endl;
224 std::cout <<
" [OK] Health monitor started" << std::endl;
227 std::cout <<
" [OK] Alert manager started" << std::endl;
228 std::cout << std::endl;
234 std::cout <<
"7. Monitoring Active (Ctrl+C to shutdown)" << std::endl;
235 std::cout <<
" ========================================" << std::endl;
236 std::cout << std::endl;
243 std::cout <<
" Iteration " << (iteration + 1) <<
"/10" << std::endl;
248 std::this_thread::sleep_for(200ms);
253 if (health_result.status == health_status::healthy) {
254 std::cout <<
" Health: Healthy" << std::endl;
262 <<
"%, Memory: " << (metrics.memory_usage_bytes / (1024.0 * 1024.0))
263 <<
" MB" << std::endl;
269 std::cout << std::endl;
270 std::this_thread::sleep_for(2s);
277 std::cout <<
"8. Graceful Shutdown" << std::endl;
278 std::cout <<
" ==================" << std::endl;
279 std::cout << std::endl;
282 std::cout <<
" [OK] Alert manager stopped" << std::endl;
285 std::cout <<
" [OK] Health monitor stopped" << std::endl;
288 std::cout <<
" [OK] Performance monitor cleaned up" << std::endl;
291 std::cout <<
" [OK] Storage flushed" << std::endl;
292 std::cout << std::endl;
294 std::cout <<
"=== Production Monitoring Completed Successfully ===" << std::endl;
296 }
catch (
const std::exception& e) {
297 std::cerr <<
"Exception: " << e.what() << std::endl;
Central coordinator for alert lifecycle management.
Alert notification implementations.
Alert trigger implementations for various condition types.
bool is_critical() const override
Whether this check is critical for overall system health.
std::string get_name() const override
Get the human-readable name of this health check.
database_health_check(const std::string &name)
health_check_result check() override
Execute the health check and return the result.
health_check_type get_type() const override
Get the type of this health check (liveness, readiness, or startup).
health_check_type get_type() const override
Get the type of this health check (liveness, readiness, or startup).
health_check_result check() override
Execute the health check and return the result.
external_api_health_check(const std::string &name)
bool is_critical() const override
Whether this check is critical for overall system health.
std::string get_name() const override
Get the human-readable name of this health check.
Central coordinator for the alert pipeline.
common::VoidResult stop()
Stop the alert manager.
common::VoidResult add_rule(std::shared_ptr< alert_rule > rule)
Add an alert rule.
common::VoidResult process_metric(const std::string &metric_name, double value)
Process a metric value.
common::VoidResult start()
Start the alert manager.
common::VoidResult add_notifier(std::shared_ptr< alert_notifier > notifier)
Add a notifier.
Abstract base class for health checks.
Health monitor with dependency management, auto-recovery, and statistics.
common::VoidResult stop()
Stop the periodic health monitoring background thread.
health_check_result check_health() const
Quick self-check of the health monitor itself.
common::VoidResult start()
Start the periodic health monitoring background thread.
common::Result< bool > register_check(const std::string &name, std::shared_ptr< health_check > check)
Register a named health check.
common::Result< system_metrics > get_current_metrics() const
Get current system metrics.
static std::shared_ptr< threshold_trigger > above(double threshold)
Create trigger for value > threshold.
Health monitoring with dependency graphs, auto-recovery, and statistics.
@ timer
StatsD-specific timer metric.
@ storage
Storage device sensor.
health_check_type
Types of health checks following Kubernetes probe conventions.
std::atomic< bool > shutdown_requested
void signal_handler(int signal)
Storage backend type definitions for metric persistence.
Configuration for the alert manager.
std::chrono::milliseconds default_evaluation_interval
Default eval interval.
bool enable_grouping
Enable alert grouping.
std::chrono::milliseconds default_repeat_interval
Default repeat interval.
Result of a health check operation.
static health_check_result healthy(const std::string &msg="OK")
Configuration for the health_monitor.
std::chrono::milliseconds check_interval
Interval between automatic health check cycles.
bool enable_auto_recovery
Whether to invoke recovery handlers on failure.
Configuration for the monitoring system.
std::chrono::milliseconds collection_interval
storage_backend_type type