Monitoring System 0.1.0
System resource monitoring with pluggable collectors and alerting
Loading...
Searching...
No Matches
production_monitoring_example.cpp

This example demonstrates:

// BSD 3-Clause License
// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
// See the LICENSE file in the project root for full license information.
#include <atomic>
#include <chrono>
#include <csignal>
#include <iostream>
#include <thread>
using namespace kcenon::monitoring;
using namespace std::chrono_literals;
// Global shutdown flag for graceful shutdown
std::atomic<bool> shutdown_requested{false};
// Signal handler for graceful shutdown
void signal_handler(int signal) {
std::cout << "\nReceived signal " << signal << ", initiating graceful shutdown..." << std::endl;
}
// Custom health check for database simulation
private:
std::string name_;
public:
explicit database_health_check(const std::string& name) : name_(name) {}
std::string get_name() const override {
return name_;
}
health_check_type get_type() const override {
return health_check_type::readiness;
}
// Simulate database connectivity check
return health_check_result::healthy("Database connection pool active");
}
bool is_critical() const override {
return true;
}
};
// Custom health check for external API
private:
std::string name_;
public:
explicit external_api_health_check(const std::string& name) : name_(name) {}
std::string get_name() const override {
return name_;
}
health_check_type get_type() const override {
return health_check_type::readiness;
}
// Simulate external API health check
return health_check_result::healthy("External API responding");
}
bool is_critical() const override {
return false;
}
};
int main() {
std::cout << "=== Production Monitoring Stack Example ===" << std::endl;
std::cout << std::endl;
try {
// =====================================================================
// Section 1: Configuration Management
// =====================================================================
std::cout << "1. Configuring Production Monitoring Stack" << std::endl;
std::cout << " =========================================" << std::endl;
std::cout << std::endl;
// Configure performance monitoring
monitoring_config perf_config;
perf_config.history_size = 10000;
perf_config.collection_interval = 5000ms;
perf_config.enable_compression = true;
std::cout << " Performance Monitor:" << std::endl;
std::cout << " - History size: " << perf_config.history_size << std::endl;
std::cout << " - Collection interval: 5s" << std::endl;
std::cout << std::endl;
// Configure alert manager
alert_manager_config alert_config;
alert_config.default_evaluation_interval = 10000ms;
alert_config.default_repeat_interval = 300000ms;
alert_config.enable_grouping = true;
std::cout << " Alert Manager:" << std::endl;
std::cout << " - Evaluation interval: 10s" << std::endl;
std::cout << " - Grouping: enabled" << std::endl;
std::cout << std::endl;
// Configure health monitoring
health_monitor_config health_config;
health_config.check_interval = 5000ms;
health_config.enable_auto_recovery = true;
std::cout << " Health Monitor:" << std::endl;
std::cout << " - Check interval: 5s" << std::endl;
std::cout << " - Auto-recovery: enabled" << std::endl;
std::cout << std::endl;
// =====================================================================
// Section 2: Initialize Monitoring Components
// =====================================================================
std::cout << "2. Initializing Components" << std::endl;
std::cout << " ======================= " << std::endl;
std::cout << std::endl;
// Initialize performance monitor
performance_monitor perf_monitor("production_monitor");
if (auto result = perf_monitor.initialize(); result.is_err()) {
std::cerr << "Failed to initialize performance monitor" << std::endl;
return 1;
}
std::cout << " [OK] Performance monitor" << std::endl;
// Initialize health monitor
health_monitor health_mon(health_config);
std::cout << " [OK] Health monitor" << std::endl;
// Initialize alert manager
alert_manager alert_mgr(alert_config);
std::cout << " [OK] Alert manager" << std::endl;
std::cout << std::endl;
// =====================================================================
// Section 3: Configure Storage Backend
// =====================================================================
std::cout << "3. Configuring Storage" << std::endl;
std::cout << " ====================" << std::endl;
std::cout << std::endl;
storage_config storage_cfg;
storage_cfg.type = storage_backend_type::file_json;
storage_cfg.path = "production_metrics.json";
auto storage = std::make_unique<file_storage_backend>(storage_cfg);
std::cout << " [OK] JSON file storage configured" << std::endl;
std::cout << std::endl;
// =====================================================================
// Section 4: Register Health Checks
// =====================================================================
std::cout << "4. Registering Health Checks" << std::endl;
std::cout << " ===========================" << std::endl;
std::cout << std::endl;
auto db_check = std::make_shared<database_health_check>("database");
health_mon.register_check("database", db_check);
std::cout << " [OK] Database health check" << std::endl;
auto api_check = std::make_shared<external_api_health_check>("external_api");
health_mon.register_check("external_api", api_check);
std::cout << " [OK] External API health check" << std::endl;
std::cout << std::endl;
// =====================================================================
// Section 5: Configure Alert Rules
// =====================================================================
std::cout << "5. Configuring Alert Rules" << std::endl;
std::cout << " ========================" << std::endl;
std::cout << std::endl;
auto cpu_rule = std::make_shared<alert_rule>("high_cpu_usage");
cpu_rule->set_metric_name("cpu_usage")
.set_severity(alert_severity::warning)
.set_summary("CPU usage exceeds 80%")
.set_trigger(threshold_trigger::above(80.0));
alert_mgr.add_rule(cpu_rule);
std::cout << " [OK] CPU usage alert rule" << std::endl;
auto log_notifier_ptr = std::make_shared<log_notifier>("console_logger");
alert_mgr.add_notifier(log_notifier_ptr);
std::cout << " [OK] Console notifier" << std::endl;
std::cout << std::endl;
// =====================================================================
// Section 6: Start Monitoring
// =====================================================================
std::cout << "6. Starting Monitoring" << std::endl;
std::cout << " ====================" << std::endl;
std::cout << std::endl;
health_mon.start();
std::cout << " [OK] Health monitor started" << std::endl;
alert_mgr.start();
std::cout << " [OK] Alert manager started" << std::endl;
std::cout << std::endl;
// Install signal handlers
std::signal(SIGINT, signal_handler);
std::signal(SIGTERM, signal_handler);
std::cout << "7. Monitoring Active (Ctrl+C to shutdown)" << std::endl;
std::cout << " ========================================" << std::endl;
std::cout << std::endl;
// =====================================================================
// Section 7: Workload Simulation
// =====================================================================
int iteration = 0;
while (!shutdown_requested && iteration < 10) {
std::cout << " Iteration " << (iteration + 1) << "/10" << std::endl;
// Collect performance metrics
{
auto timer = perf_monitor.time_operation("iteration_" + std::to_string(iteration));
std::this_thread::sleep_for(200ms);
}
// Check health
auto health_result = health_mon.check_health();
if (health_result.status == health_status::healthy) {
std::cout << " Health: Healthy" << std::endl;
}
// Get system metrics
auto system_metrics = perf_monitor.get_system_monitor().get_current_metrics();
if (system_metrics.is_ok()) {
const auto& metrics = system_metrics.value();
std::cout << " CPU: " << metrics.cpu_usage_percent
<< "%, Memory: " << (metrics.memory_usage_bytes / (1024.0 * 1024.0))
<< " MB" << std::endl;
// Process metrics for alerting
alert_mgr.process_metric("cpu_usage", metrics.cpu_usage_percent);
}
std::cout << std::endl;
std::this_thread::sleep_for(2s);
iteration++;
}
// =====================================================================
// Section 8: Graceful Shutdown
// =====================================================================
std::cout << "8. Graceful Shutdown" << std::endl;
std::cout << " ==================" << std::endl;
std::cout << std::endl;
alert_mgr.stop();
std::cout << " [OK] Alert manager stopped" << std::endl;
health_mon.stop();
std::cout << " [OK] Health monitor stopped" << std::endl;
perf_monitor.cleanup();
std::cout << " [OK] Performance monitor cleaned up" << std::endl;
storage->flush();
std::cout << " [OK] Storage flushed" << std::endl;
std::cout << std::endl;
std::cout << "=== Production Monitoring Completed Successfully ===" << std::endl;
} catch (const std::exception& e) {
std::cerr << "Exception: " << e.what() << std::endl;
return 1;
}
return 0;
}
Central coordinator for alert lifecycle management.
Alert notification implementations.
Alert trigger implementations for various condition types.
bool is_critical() const override
Whether this check is critical for overall system health.
std::string get_name() const override
Get the human-readable name of this health check.
database_health_check(const std::string &name)
health_check_result check() override
Execute the health check and return the result.
health_check_type get_type() const override
Get the type of this health check (liveness, readiness, or startup).
health_check_type get_type() const override
Get the type of this health check (liveness, readiness, or startup).
health_check_result check() override
Execute the health check and return the result.
external_api_health_check(const std::string &name)
bool is_critical() const override
Whether this check is critical for overall system health.
std::string get_name() const override
Get the human-readable name of this health check.
Central coordinator for the alert pipeline.
Abstract base class for health checks.
Health monitor with dependency management, auto-recovery, and statistics.
Performance monitor combining profiling and system monitoring.
Health monitoring with dependency graphs, auto-recovery, and statistics.
@ timer
StatsD-specific timer metric.
@ storage
Storage device sensor.
health_check_type
Types of health checks following Kubernetes probe conventions.
Performance monitoring and profiling implementation.
std::atomic< bool > shutdown_requested
void signal_handler(int signal)
Storage backend type definitions for metric persistence.
Configuration for the alert manager.
std::chrono::milliseconds default_evaluation_interval
Default eval interval.
bool enable_grouping
Enable alert grouping.
std::chrono::milliseconds default_repeat_interval
Default repeat interval.
Result of a health check operation.
Configuration for the health_monitor.
std::chrono::milliseconds check_interval
Interval between automatic health check cycles.
bool enable_auto_recovery
Whether to invoke recovery handlers on failure.
Configuration for the monitoring system.
std::chrono::milliseconds collection_interval