Monitoring System 0.1.0
System resource monitoring with pluggable collectors and alerting
Loading...
Searching...
No Matches
error_boundary.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
12#pragma once
13
14#include <atomic>
15#include <chrono>
16#include <functional>
17#include <memory>
18#include <mutex>
19#include <string>
20
22
23namespace kcenon::monitoring {
24
29 normal = 0,
30 limited = 1,
31 minimal = 2,
32 emergency = 3
33};
34
40 isolate,
41 degrade,
43};
44
49 std::atomic<size_t> total_operations{0};
50 std::atomic<size_t> successful_operations{0};
51 std::atomic<size_t> failed_operations{0};
52 std::atomic<size_t> recovered_operations{0};
53 std::atomic<size_t> recovery_attempts{0};
54
56
63
65 if (this != &other) {
66 total_operations = other.total_operations.load();
67 successful_operations = other.successful_operations.load();
68 failed_operations = other.failed_operations.load();
69 recovered_operations = other.recovered_operations.load();
70 recovery_attempts = other.recovery_attempts.load();
71 }
72 return *this;
73 }
74
75 double get_success_rate() const {
76 size_t total = total_operations.load();
77 if (total == 0) {
78 return 1.0;
79 }
80 return static_cast<double>(successful_operations.load()) / static_cast<double>(total);
81 }
82};
83
88 std::string name;
89 size_t error_threshold = 5;
90 std::chrono::seconds error_window = std::chrono::seconds(60);
95 std::chrono::milliseconds recovery_timeout = std::chrono::milliseconds(5000);
96
97 bool validate() const {
98 if (name.empty()) {
99 return false;
100 }
101 if (error_threshold == 0) {
102 return false;
103 }
104 return true;
105 }
106};
107
111template<typename T>
113public:
114 virtual ~fallback_strategy_interface() = default;
115 virtual common::Result<T> get_fallback(const error_info& err, degradation_level level) = 0;
116};
117
121template<typename T>
123public:
124 explicit default_value_strategy(T default_val) : default_value_(std::move(default_val)) {}
125
126 common::Result<T> get_fallback(const error_info& /*err*/, degradation_level /*level*/) override {
127 return common::ok(default_value_);
128 }
129
130private:
132};
133
137template<typename T>
139public:
140 explicit cached_value_strategy(std::chrono::seconds ttl = std::chrono::seconds(60))
141 : ttl_(ttl), has_value_(false) {}
142
143 void update_cache(const T& value) {
144 std::lock_guard<std::mutex> lock(mutex_);
145 cached_value_ = value;
146 cache_time_ = std::chrono::steady_clock::now();
147 has_value_ = true;
148 }
149
150 common::Result<T> get_fallback(const error_info& /*err*/, degradation_level /*level*/) override {
151 std::lock_guard<std::mutex> lock(mutex_);
152 if (!has_value_) {
153 return common::Result<T>::err(error_info(monitoring_error_code::operation_failed, "No cached value available").to_common_error());
154 }
155
156 auto now = std::chrono::steady_clock::now();
157 auto age = std::chrono::duration_cast<std::chrono::seconds>(now - cache_time_);
158 if (age > ttl_) {
159 return common::Result<T>::err(error_info(monitoring_error_code::operation_failed, "Cached value expired").to_common_error());
160 }
161
162 return common::ok(cached_value_);
163 }
164
165private:
166 std::chrono::seconds ttl_;
168 std::chrono::steady_clock::time_point cache_time_;
170 mutable std::mutex mutex_;
171};
172
176template<typename T>
178public:
179 using alternative_func = std::function<common::Result<T>()>;
180
182
183 common::Result<T> get_fallback(const error_info& /*err*/, degradation_level /*level*/) override {
184 if (alternative_func_) {
185 return alternative_func_();
186 }
187 return common::Result<T>::err(error_info(monitoring_error_code::operation_failed, "No alternative service available").to_common_error());
188 }
189
190private:
192};
193
197template<typename T>
199public:
201
202 error_boundary() : name_("default"), config_() {}
203
204 explicit error_boundary(const std::string& name) : name_(name), config_() {
205 config_.name = name;
206 }
207
208 explicit error_boundary(const std::string& name, const config& cfg)
209 : name_(name), config_(cfg) {
210 config_.name = name;
211 }
212
216 template<typename Func>
217 auto execute(Func&& func) -> common::Result<T> {
219
220 try {
221 auto op_result = func();
222
223 if (op_result.is_ok()) {
226 return op_result;
227 }
228
230 return handle_failure(op_result.error());
231
232 } catch (const std::exception& e) {
235 return handle_failure(err.to_common_error());
236 } catch (...) {
238 error_info err(monitoring_error_code::operation_failed, "Unknown exception");
239 return handle_failure(err.to_common_error());
240 }
241 }
242
246 template<typename Func, typename FallbackFunc>
247 auto execute(Func&& func, FallbackFunc&& fallback) -> common::Result<T> {
249
250 try {
251 auto op_result = func();
252
253 if (op_result.is_ok()) {
256 return op_result;
257 }
258
260 error_info err = error_info::from_common_error(op_result.error());
262
263 } catch (const std::exception& e) {
267 }
268 }
269
273 void set_error_handler(std::function<void(const error_info&, degradation_level)> handler) {
274 error_handler_ = std::move(handler);
275 }
276
281 fallback_strategy_ = std::move(strategy);
282 }
283
290
295 if (level <= config_.max_degradation) {
297 }
298 }
299
303 common::Result<bool> is_healthy() const {
305 return common::ok(healthy);
306 }
307
312 return metrics_;
313 }
314
318 const std::string& get_name() const {
319 return name_;
320 }
321
322private:
323 common::Result<T> handle_failure(const common::error_info& err) {
325
326 // Check if we should degrade
330 }
331
332 // Call error handler if set
333 if (error_handler_) {
334 error_info monitoring_err = error_info::from_common_error(err);
336 }
337
338 // Apply policy
339 switch (config_.policy) {
341 return common::Result<T>::err(err);
342
344 return common::Result<T>::err(error_info(monitoring_error_code::service_degraded, "Service isolated due to error").to_common_error());
345
347 if (fallback_strategy_) {
348 error_info monitoring_err = error_info::from_common_error(err);
349 return fallback_strategy_->get_fallback(monitoring_err, current_degradation_level_);
350 }
351 return common::Result<T>::err(err);
352
354 default:
355 return common::Result<T>::err(err);
356 }
357 }
358
369
371 auto current = static_cast<int>(current_degradation_level_);
372 auto max_level = static_cast<int>(config_.max_degradation);
373 if (current < max_level) {
374 current_degradation_level_ = static_cast<degradation_level>(current + 1);
375 }
376 }
377
379 auto current = static_cast<int>(current_degradation_level_);
380 if (current > 0) {
381 current_degradation_level_ = static_cast<degradation_level>(current - 1);
382 }
383 }
384
385 std::string name_;
387 std::function<void(const error_info&, degradation_level)> error_handler_;
388 std::shared_ptr<fallback_strategy_interface<T>> fallback_strategy_;
392};
393
394} // namespace kcenon::monitoring
Alternative service fallback strategy.
common::Result< T > get_fallback(const error_info &, degradation_level) override
std::function< common::Result< T >()> alternative_func
Cached value fallback strategy.
std::chrono::steady_clock::time_point cache_time_
cached_value_strategy(std::chrono::seconds ttl=std::chrono::seconds(60))
common::Result< T > get_fallback(const error_info &, degradation_level) override
Default value fallback strategy.
common::Result< T > get_fallback(const error_info &, degradation_level) override
Error boundary implementation for resilient operations.
void force_degradation(degradation_level level)
Force degradation to a specific level.
degradation_level current_degradation_level_
auto execute(Func &&func, FallbackFunc &&fallback) -> common::Result< T >
Execute with custom fallback function.
const std::string & get_name() const
Get boundary name.
void set_error_handler(std::function< void(const error_info &, degradation_level)> handler)
Set error handler callback.
auto execute(Func &&func) -> common::Result< T >
Execute a function within the error boundary.
degradation_level get_degradation_level() const
Get current degradation level.
error_boundary(const std::string &name, const config &cfg)
std::function< void(const error_info &, degradation_level)> error_handler_
error_boundary_metrics get_metrics() const
Get metrics.
std::shared_ptr< fallback_strategy_interface< T > > fallback_strategy_
void set_fallback_strategy(std::shared_ptr< fallback_strategy_interface< T > > strategy)
Set fallback strategy.
error_boundary(const std::string &name)
common::Result< T > handle_failure(const common::error_info &err)
common::Result< bool > is_healthy() const
Check if the boundary is healthy.
Base interface for fallback strategies.
virtual common::Result< T > get_fallback(const error_info &err, degradation_level level)=0
error_boundary_policy
Error boundary policies.
degradation_level
Degradation levels for error boundary.
@ emergency
Emergency condition, system-wide impact.
Result pattern type definitions for monitoring system.
Error boundary configuration.
std::chrono::milliseconds recovery_timeout
Error boundary metrics with atomic counters.
error_boundary_metrics(const error_boundary_metrics &other)
error_boundary_metrics & operator=(const error_boundary_metrics &other)
Extended error information with context.
static error_info from_common_error(const common::error_info &common_err)
Create from common_system error_info.
common::error_info to_common_error() const
Convert to common_system error_info.