Monitoring System 0.1.0
System resource monitoring with pluggable collectors and alerting
Loading...
Searching...
No Matches
fault_tolerance_manager.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
13#pragma once
14
15#include "circuit_breaker.h"
16#include "retry_policy.h"
17#include "error_boundary.h"
18
19#include <any>
20#include <future>
21#include <memory>
22#include <mutex>
23#include <string>
24#include <unordered_map>
25#include <vector>
26
27namespace kcenon::monitoring {
28
33 size_t total_operations = 0;
37 size_t timeouts = 0;
38
39 double get_overall_success_rate() const {
40 if (total_operations == 0) {
41 return 1.0;
42 }
43 return static_cast<double>(successful_operations) / static_cast<double>(total_operations);
44 }
45};
46
52 bool enable_retry = true;
56
57 bool validate() const {
59 return false;
60 }
62 return false;
63 }
64 return true;
65 }
66};
67
81template<typename T, typename Func>
82common::Result<T> execute_with_circuit_breaker(circuit_breaker& cb, const std::string& name, Func&& func) {
83 if (!cb.allow_request()) {
84 return common::make_error<T>(static_cast<int>(monitoring_error_code::circuit_breaker_open),
85 "Circuit breaker '" + name + "' is open");
86 }
87
88 auto op_result = func();
89 if (op_result.is_ok()) {
90 cb.record_success();
91 } else {
92 cb.record_failure();
93 }
94 return op_result;
95}
96
104template<typename T>
106public:
108 initialize();
109 }
110
111 explicit fault_tolerance_manager(const std::string& name)
112 : name_(name), config_() {
113 initialize();
114 }
115
116 explicit fault_tolerance_manager(const std::string& name, const fault_tolerance_config& cfg)
117 : name_(name), config_(cfg) {
118 initialize();
119 }
120
128 template<typename Func>
129 common::Result<T> execute(Func&& func) {
131
132 common::Result<T> op_result = common::Result<T>::err(error_info(monitoring_error_code::operation_failed, "Not executed").to_common_error());
133
135 op_result = execute_circuit_breaker_first(std::forward<Func>(func));
136 } else {
137 op_result = execute_retry_first(std::forward<Func>(func));
138 }
139
140 if (op_result.is_ok()) {
142 } else {
144 }
145
146 return op_result;
147 }
148
157 template<typename Func>
158 common::Result<T> execute_with_timeout(Func&& func, std::chrono::milliseconds timeout) {
160
161 auto future_result = std::async(std::launch::async, [this, func = std::forward<Func>(func)]() mutable {
162 return this->execute_internal(std::move(func));
163 });
164
165 if (future_result.wait_for(timeout) == std::future_status::timeout) {
168 return common::make_error<T>(static_cast<int>(monitoring_error_code::operation_timeout),
169 "Operation timed out after " + std::to_string(timeout.count()) + "ms");
170 }
171
172 auto op_result = future_result.get();
173 if (op_result.is_ok()) {
175 } else {
177 }
178
179 return op_result;
180 }
181
186 common::Result<bool> is_healthy() {
188 auto state = circuit_breaker_->get_state();
189 if (state == circuit_state::OPEN) {
190 return common::ok(false);
191 }
192 }
193 return common::ok(true);
194 }
195
200 return metrics_;
201 }
202
206 const std::string& get_name() const {
207 return name_;
208 }
209
210private:
211 void initialize() {
213 circuit_breaker_ = std::make_unique<circuit_breaker>(config_.circuit_config);
214 }
215 if (config_.enable_retry) {
216 retry_executor_ = std::make_unique<retry_executor<T>>(name_ + "_retry", config_.retry_cfg);
217 }
218 }
219
220 template<typename Func>
221 common::Result<T> execute_internal(Func&& func) {
223 return execute_circuit_breaker_first(std::forward<Func>(func));
224 }
225 return execute_retry_first(std::forward<Func>(func));
226 }
227
228 template<typename Func>
229 common::Result<T> execute_circuit_breaker_first(Func&& func) {
232 return execute_with_circuit_breaker<T>(*circuit_breaker_, name_, [this, &func]() {
233 return retry_executor_->execute(func);
234 });
235 }
236 return execute_with_circuit_breaker<T>(*circuit_breaker_, name_, std::forward<Func>(func));
237 }
239 return retry_executor_->execute(std::forward<Func>(func));
240 }
241 return func();
242 }
243
244 template<typename Func>
245 common::Result<T> execute_retry_first(Func&& func) {
248 return retry_executor_->execute([this, &func]() {
250 });
251 }
252 return retry_executor_->execute(std::forward<Func>(func));
253 }
255 return execute_with_circuit_breaker<T>(*circuit_breaker_, name_, std::forward<Func>(func));
256 }
257 return func();
258 }
259
260 std::string name_;
262 std::unique_ptr<circuit_breaker> circuit_breaker_;
263 std::unique_ptr<retry_executor<T>> retry_executor_;
265};
266
271public:
272 void register_circuit_breaker(const std::string& name, std::shared_ptr<circuit_breaker> breaker) {
273 std::lock_guard<std::mutex> lock(mutex_);
274 registry_[name] = std::move(breaker);
275 }
276
277 std::shared_ptr<circuit_breaker> get_circuit_breaker(const std::string& name) {
278 std::lock_guard<std::mutex> lock(mutex_);
279 auto it = registry_.find(name);
280 if (it != registry_.end()) {
281 return it->second;
282 }
283 return nullptr;
284 }
285
286 void remove_circuit_breaker(const std::string& name) {
287 std::lock_guard<std::mutex> lock(mutex_);
288 registry_.erase(name);
289 }
290
291 std::vector<std::string> get_all_names() const {
292 std::lock_guard<std::mutex> lock(mutex_);
293 std::vector<std::string> names;
294 names.reserve(registry_.size());
295 for (const auto& pair : registry_) {
296 names.push_back(pair.first);
297 }
298 return names;
299 }
300
301 void clear() {
302 std::lock_guard<std::mutex> lock(mutex_);
303 registry_.clear();
304 }
305
306private:
307 mutable std::mutex mutex_;
308 std::unordered_map<std::string, std::shared_ptr<circuit_breaker>> registry_;
309};
310
315public:
316 template<typename T>
317 void register_executor(const std::string& name, std::shared_ptr<retry_executor<T>> executor) {
318 std::lock_guard<std::mutex> lock(mutex_);
319 registry_[name] = std::move(executor);
320 }
321
322 template<typename T>
323 std::shared_ptr<retry_executor<T>> get_executor(const std::string& name) {
324 std::lock_guard<std::mutex> lock(mutex_);
325 auto it = registry_.find(name);
326 if (it != registry_.end()) {
327 return std::any_cast<std::shared_ptr<retry_executor<T>>>(it->second);
328 }
329 return nullptr;
330 }
331
332 void remove_executor(const std::string& name) {
333 std::lock_guard<std::mutex> lock(mutex_);
334 registry_.erase(name);
335 }
336
337 std::vector<std::string> get_all_names() const {
338 std::lock_guard<std::mutex> lock(mutex_);
339 std::vector<std::string> names;
340 names.reserve(registry_.size());
341 for (const auto& pair : registry_) {
342 names.push_back(pair.first);
343 }
344 return names;
345 }
346
347 void clear() {
348 std::lock_guard<std::mutex> lock(mutex_);
349 registry_.clear();
350 }
351
352private:
353 mutable std::mutex mutex_;
354 std::unordered_map<std::string, std::any> registry_;
355};
356
361public:
362 template<typename T>
363 void register_manager(const std::string& name, std::shared_ptr<fault_tolerance_manager<T>> manager) {
364 std::lock_guard<std::mutex> lock(mutex_);
365 registry_[name] = std::move(manager);
366 }
367
368 template<typename T>
369 std::shared_ptr<fault_tolerance_manager<T>> get_manager(const std::string& name) {
370 std::lock_guard<std::mutex> lock(mutex_);
371 auto it = registry_.find(name);
372 if (it != registry_.end()) {
373 return std::any_cast<std::shared_ptr<fault_tolerance_manager<T>>>(it->second);
374 }
375 return nullptr;
376 }
377
378 void remove_manager(const std::string& name) {
379 std::lock_guard<std::mutex> lock(mutex_);
380 registry_.erase(name);
381 }
382
383 std::vector<std::string> get_all_names() const {
384 std::lock_guard<std::mutex> lock(mutex_);
385 std::vector<std::string> names;
386 names.reserve(registry_.size());
387 for (const auto& pair : registry_) {
388 names.push_back(pair.first);
389 }
390 return names;
391 }
392
393 void clear() {
394 std::lock_guard<std::mutex> lock(mutex_);
395 registry_.clear();
396 }
397
398private:
399 mutable std::mutex mutex_;
400 std::unordered_map<std::string, std::any> registry_;
401};
402
407 static circuit_breaker_registry instance;
408 return instance;
409}
410
415 static retry_executor_registry instance;
416 return instance;
417}
418
423 static fault_tolerance_registry instance;
424 return instance;
425}
426
427} // namespace kcenon::monitoring
Circuit breaker integration for monitoring_system.
void register_circuit_breaker(const std::string &name, std::shared_ptr< circuit_breaker > breaker)
std::vector< std::string > get_all_names() const
std::shared_ptr< circuit_breaker > get_circuit_breaker(const std::string &name)
std::unordered_map< std::string, std::shared_ptr< circuit_breaker > > registry_
Fault tolerance manager template class.
fault_tolerance_manager(const std::string &name, const fault_tolerance_config &cfg)
common::Result< T > execute_with_timeout(Func &&func, std::chrono::milliseconds timeout)
Execute a function with timeout.
common::Result< T > execute_retry_first(Func &&func)
common::Result< bool > is_healthy()
Check if fault tolerance manager is healthy.
fault_tolerance_metrics get_metrics() const
Get fault tolerance metrics.
common::Result< T > execute_circuit_breaker_first(Func &&func)
common::Result< T > execute(Func &&func)
Execute a function with fault tolerance.
const std::string & get_name() const
Get manager name.
std::unique_ptr< circuit_breaker > circuit_breaker_
std::unique_ptr< retry_executor< T > > retry_executor_
common::Result< T > execute_internal(Func &&func)
std::vector< std::string > get_all_names() const
void register_manager(const std::string &name, std::shared_ptr< fault_tolerance_manager< T > > manager)
std::unordered_map< std::string, std::any > registry_
std::shared_ptr< fault_tolerance_manager< T > > get_manager(const std::string &name)
std::unordered_map< std::string, std::any > registry_
std::shared_ptr< retry_executor< T > > get_executor(const std::string &name)
std::vector< std::string > get_all_names() const
void register_executor(const std::string &name, std::shared_ptr< retry_executor< T > > executor)
Retry executor template class.
Error boundary with degradation levels for fault isolation.
common::Result< T > execute_with_circuit_breaker(circuit_breaker &cb, const std::string &name, Func &&func)
Execute an operation through a circuit breaker.
common::resilience::circuit_breaker circuit_breaker
circuit_breaker_registry & global_circuit_breaker_registry()
Get global circuit breaker registry.
fault_tolerance_registry & global_fault_tolerance_registry()
Get global fault tolerance manager registry.
common::resilience::circuit_breaker_config circuit_breaker_config
retry_executor_registry & global_retry_executor_registry()
Get global retry executor registry.
Retry strategies with backoff for monitoring operations.
Extended error information with context.
bool validate() const
Validate configuration.