31using namespace std::chrono_literals;
38 std::mt19937
rng_{std::random_device{}()};
45 kcenon::common::Result<std::string>
execute_query(
const std::string& query) {
49 std::this_thread::sleep_for(10ms);
53 return kcenon::common::Result<std::string>::err(
error_info(monitoring_error_code::service_unavailable,
"Database connection lost").to_common_error());
57 std::uniform_int_distribution<> dist(1, 10);
58 if (dist(
rng_) == 1) {
59 return kcenon::common::Result<std::string>::err(
error_info(monitoring_error_code::operation_timeout,
"Query timeout").to_common_error());
62 return kcenon::common::ok(
"Query result for: " + query);
75 kcenon::common::Result<std::string>
call_api(
const std::string& endpoint) {
81 return kcenon::common::Result<std::string>::err(
error_info(monitoring_error_code::service_unavailable,
"Service unavailable").to_common_error());
87 return kcenon::common::Result<std::string>::err(
error_info(monitoring_error_code::operation_failed,
"Internal server error").to_common_error());
91 return kcenon::common::ok(
"API response from: " + endpoint);
104 std::cout <<
"\n=== Health Monitoring Demo ===" << std::endl;
114 auto database = std::make_shared<DatabaseConnection>();
118 std::make_shared<functional_health_check>(
120 health_check_type::liveness,
123 auto result = database->execute_query(
"SELECT 1");
124 if (result.is_ok()) {
128 "Database unreachable: " + result.error().message
139 std::make_shared<functional_health_check>(
140 "database_readiness",
141 health_check_type::readiness,
144 auto result = database->execute_query(
"SELECT COUNT(*) FROM users");
145 if (result.is_ok()) {
146 int query_count = database->get_query_count();
147 if (query_count > 100) {
149 "High query count: " + std::to_string(query_count)
155 "Database not ready: " + result.error().message
166 std::make_shared<functional_health_check>(
168 health_check_type::startup,
171 static bool initialized =
false;
173 std::this_thread::sleep_for(100ms);
184 std::cout <<
"Health monitoring started" << std::endl;
187 std::cout <<
"\n1. Initial health check:" << std::endl;
189 for (
const auto& [name, result] : all_checks) {
190 std::cout <<
" " << name <<
": "
191 << (result.status == health_status::healthy ?
"HEALTHY" :
192 result.status == health_status::degraded ?
"DEGRADED" :
"UNHEALTHY")
193 <<
" - " << result.message << std::endl;
198 std::cout <<
" Overall status: "
199 << (overall == health_status::healthy ?
"HEALTHY" :
200 overall == health_status::degraded ?
"DEGRADED" :
"UNHEALTHY")
204 std::cout <<
"\n2. Simulating database failure..." << std::endl;
205 database->set_healthy(
false);
206 std::this_thread::sleep_for(1s);
209 for (
const auto& [name, result] : all_checks) {
210 if (name.find(
"database") != std::string::npos) {
211 std::cout <<
" " << name <<
": "
212 << (result.status == health_status::healthy ?
"HEALTHY" :
"UNHEALTHY")
213 <<
" - " << result.message << std::endl;
219 [database]() ->
bool {
220 std::cout <<
" Attempting database recovery..." << std::endl;
221 database->set_healthy(
true);
227 std::cout <<
"\n3. Triggering recovery..." << std::endl;
229 std::this_thread::sleep_for(2s);
232 std::cout <<
" Database status after recovery: "
233 << (all_checks[
"database_liveness"].status == health_status::healthy ?
234 "HEALTHY" :
"UNHEALTHY") << std::endl;
237 std::cout <<
"\n4. Health Report:" << std::endl;
245 std::cout <<
"\n=== Circuit Breaker Demo ===" << std::endl;
248 auto api_client = std::make_shared<ExternalApiClient>();
252 cb_config.failure_threshold = 3;
253 cb_config.timeout = 2s;
254 cb_config.success_threshold = 2;
258 std::cout <<
"Circuit breaker configured:" << std::endl;
259 std::cout <<
" Failure threshold: " << cb_config.failure_threshold << std::endl;
260 std::cout <<
" Reset timeout: 2s" << std::endl;
263 auto api_operation = [api_client]() -> kcenon::common::Result<std::string> {
264 return api_client->call_api(
"/users");
268 auto fallback = []() -> kcenon::common::Result<std::string> {
269 return kcenon::common::ok(std::string(
"Cached response (fallback)"));
273 std::cout <<
"\n1. Making API calls through circuit breaker:" << std::endl;
275 for (
int i = 1; i <= 10; ++i) {
276 kcenon::common::Result<std::string> result = kcenon::common::make_error<std::string>(0,
"");
277 if (breaker.allow_request()) {
278 result = api_operation();
279 if (result.is_ok()) {
280 breaker.record_success();
282 breaker.record_failure();
289 std::cout <<
" Call " << i <<
": ";
290 if (result.is_ok()) {
291 std::cout <<
"SUCCESS - " << result.value() << std::endl;
293 std::cout <<
"FAILED - " << result.error().message << std::endl;
297 auto state = breaker.get_state();
298 if (state == circuit_state::OPEN) {
299 std::cout <<
" [Circuit OPEN - using fallback]" << std::endl;
300 }
else if (state == circuit_state::HALF_OPEN) {
301 std::cout <<
" [Circuit HALF-OPEN - testing]" << std::endl;
304 std::this_thread::sleep_for(300ms);
308 auto stats = breaker.get_stats();
309 std::cout <<
"\n2. Circuit Breaker Stats:" << std::endl;
310 for (
const auto& [key, val] : stats) {
311 std::visit([&key](
const auto& v) {
312 std::cout <<
" " << key <<
": " << v << std::endl;
317 std::cout <<
"\n3. Waiting for circuit reset..." << std::endl;
319 std::this_thread::sleep_for(3s);
322 std::cout <<
"\n4. Trying after reset:" << std::endl;
323 for (
int i = 1; i <= 3; ++i) {
325 std::cout <<
" Call " << i <<
": ";
326 if (result.is_ok()) {
327 std::cout <<
"SUCCESS" << std::endl;
329 std::cout <<
"FAILED" << std::endl;
336 std::cout <<
"\n=== Retry Policy Demo ===" << std::endl;
341 config.
strategy = retry_strategy::exponential_backoff;
346 std::cout <<
"Retry policy configured:" << std::endl;
347 std::cout <<
" Max attempts: " << config.
max_attempts << std::endl;
348 std::cout <<
" Strategy: exponential backoff" << std::endl;
349 std::cout <<
" Initial delay: 100ms" << std::endl;
352 std::cout <<
"\n1. Executing flaky operation with manual retry:" << std::endl;
354 std::atomic<int> attempt_count{0};
355 auto flaky_operation = [&attempt_count]() -> kcenon::common::Result<std::string> {
357 std::cout <<
" Attempt " << attempt_count <<
"..." << std::endl;
360 if (attempt_count <= 2) {
361 return kcenon::common::Result<std::string>::err(
error_info(monitoring_error_code::operation_timeout,
"Operation timed out").to_common_error());
364 return kcenon::common::ok(std::string(
"Operation succeeded!"));
367 kcenon::common::Result<std::string> final_result = kcenon::common::Result<std::string>::err(
error_info(monitoring_error_code::operation_failed,
"Initialization pending").to_common_error());
368 for (
int i = 0; i < static_cast<int>(config.
max_attempts); ++i) {
369 final_result = flaky_operation();
370 if (final_result.is_ok()) {
377 std::this_thread::sleep_for(
delay);
381 if (final_result.is_ok()) {
382 std::cout <<
" Final result: SUCCESS - " << final_result.value() << std::endl;
384 std::cout <<
" Final result: FAILED - " << final_result.error().message << std::endl;
387 std::cout <<
" Total attempts: " << attempt_count << std::endl;
392 std::cout <<
"\n=== Error Boundaries Demo ===" << std::endl;
404 std::cout <<
" Error handler called: " <<
error.message
405 <<
" (degradation level: " <<
static_cast<int>(level) <<
")" << std::endl;
408 std::cout <<
"Error boundary configured:" << std::endl;
410 std::cout <<
" Error window: 60s" << std::endl;
413 std::cout <<
"\n1. Executing operations within error boundary:" << std::endl;
415 for (
int i = 1; i <= 7; ++i) {
416 auto result = boundary.
execute([i]() -> ::kcenon::common::Result<std::string> {
417 std::cout <<
" Operation " << i <<
": ";
421 std::cout <<
"FAILED" << std::endl;
422 error_info err(monitoring_error_code::operation_failed,
423 "Operation " + std::to_string(i) +
" failed");
424 return ::kcenon::common::Result<std::string>::err(err.
to_common_error());
427 std::cout <<
"SUCCESS" << std::endl;
428 return kcenon::common::ok(
"Result " + std::to_string(i));
431 if (result.is_err() && result.error().code ==
static_cast<int>(monitoring_error_code::circuit_breaker_open)) {
432 std::cout <<
" [Error boundary triggered - too many errors]" << std::endl;
439 std::cout <<
"\n2. Error Boundary Statistics:" << std::endl;
441 std::cout <<
" Failed operations: " << stats.failed_operations << std::endl;
442 std::cout <<
" Success rate: "
443 << (stats.total_operations > 0 ?
444 100.0 * (stats.total_operations - stats.failed_operations) / stats.total_operations : 0)
449 std::cout <<
"=== Health Monitoring & Reliability Example ===" << std::endl;
464 }
catch (
const std::exception& e) {
465 std::cerr <<
"Exception: " << e.what() << std::endl;
469 std::cout <<
"\n=== Example completed successfully ===" << std::endl;
Circuit breaker integration for monitoring_system.
std::atomic< int > query_count_
int get_query_count() const
void set_healthy(bool healthy)
std::atomic< bool > is_healthy_
kcenon::common::Result< std::string > execute_query(const std::string &query)
std::atomic< int > call_count_
int get_call_count() const
std::atomic< int > failure_count_
kcenon::common::Result< std::string > call_api(const std::string &endpoint)
Error boundary implementation for resilient operations.
void set_error_handler(std::function< void(const error_info &, degradation_level)> handler)
Set error handler callback.
auto execute(Func &&func) -> common::Result< T >
Execute a function within the error boundary.
error_boundary_metrics get_metrics() const
Get metrics.
Health monitor with dependency management, auto-recovery, and statistics.
void refresh()
Manually refresh all health checks and trigger recovery if needed.
common::VoidResult stop()
Stop the periodic health monitoring background thread.
void register_recovery_handler(const std::string &check_name, std::function< bool()> handler)
Register a recovery handler for a named health check.
std::unordered_map< std::string, health_check_result > check_all()
Execute all registered health checks.
health_status get_overall_status()
Get the aggregate health status across all cached results.
common::VoidResult start()
Start the periodic health monitoring background thread.
std::string get_health_report()
Generate a human-readable health report.
common::Result< bool > register_check(const std::string &name, std::shared_ptr< health_check > check)
Register a named health check.
Error boundary with degradation levels for fault isolation.
Monitoring system specific error codes.
Fault tolerance manager coordinating circuit breakers and retries.
Health monitoring with dependency graphs, auto-recovery, and statistics.
void demonstrate_health_monitoring()
void demonstrate_retry_policy()
void demonstrate_circuit_breaker()
void demonstrate_error_boundaries()
@ delay
Delay requests until resources are available.
common::Result< T > execute_with_circuit_breaker(circuit_breaker &cb, const std::string &name, Func &&func)
Execute an operation through a circuit breaker.
common::resilience::circuit_breaker circuit_breaker
degradation_level
Degradation levels for error boundary.
common::resilience::circuit_breaker_config circuit_breaker_config
Result pattern type definitions for monitoring system.
Retry strategies with backoff for monitoring operations.
Error boundary configuration.
bool enable_fallback_logging
std::chrono::seconds error_window
std::atomic< size_t > total_operations
Extended error information with context.
common::error_info to_common_error() const
Convert to common_system error_info.
Result of a health check operation.
static health_check_result unhealthy(const std::string &msg)
static health_check_result healthy(const std::string &msg="OK")
static health_check_result degraded(const std::string &msg)
Configuration for the health_monitor.
std::chrono::seconds cache_duration
Duration to cache health check results.
std::chrono::milliseconds check_interval
Interval between automatic health check cycles.
double backoff_multiplier
std::chrono::milliseconds initial_delay
std::chrono::milliseconds max_delay