Monitoring System 0.1.0
System resource monitoring with pluggable collectors and alerting
Loading...
Searching...
No Matches
graceful_degradation.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
11#pragma once
12
13#include "error_boundary.h"
14
15#include <any>
16#include <atomic>
17#include <chrono>
18#include <functional>
19#include <memory>
20#include <mutex>
21#include <string>
22#include <unordered_map>
23#include <vector>
24
25namespace kcenon::monitoring {
26
30enum class service_priority {
31 optional = 0,
32 normal = 1,
33 important = 2,
34 critical = 3
35};
36
41 std::atomic<size_t> total_degradations{0};
42 std::atomic<size_t> successful_degradations{0};
43 std::atomic<size_t> failed_degradations{0};
44 std::atomic<size_t> recovery_attempts{0};
45 std::atomic<size_t> successful_recoveries{0};
46
48
55
57 if (this != &other) {
58 total_degradations = other.total_degradations.load();
59 successful_degradations = other.successful_degradations.load();
60 failed_degradations = other.failed_degradations.load();
61 recovery_attempts = other.recovery_attempts.load();
62 successful_recoveries = other.successful_recoveries.load();
63 }
64 return *this;
65 }
66};
67
72 std::string name;
75 std::chrono::milliseconds health_check_interval = std::chrono::milliseconds(5000);
76 bool auto_recover = true;
77
78 bool validate() const {
79 if (name.empty()) {
80 return false;
81 }
83 return false;
84 }
85 return true;
86 }
87};
88
93 std::string name;
94 std::vector<std::string> services_to_maintain;
95 std::vector<std::string> services_to_disable;
97
98 bool validate() const {
99 return !name.empty();
100 }
101};
102
112
119public:
121
122 explicit graceful_degradation_manager(const std::string& name) : name_(name) {}
123
127 common::VoidResult register_service(const service_config& config) {
128 if (!config.validate()) {
129 return common::VoidResult::err(error_info(monitoring_error_code::invalid_configuration, "Invalid service configuration").to_common_error());
130 }
131
132 std::lock_guard<std::mutex> lock(mutex_);
133 if (services_.find(config.name) != services_.end()) {
134 return common::VoidResult::err(static_cast<int>(monitoring_error_code::already_exists),
135 "Service already registered: " + config.name);
136 }
137
138 service_state state;
139 state.config = config;
141 state.last_state_change = std::chrono::steady_clock::now();
142 services_[config.name] = state;
143
144 return common::ok();
145 }
146
150 common::VoidResult unregister_service(const std::string& name) {
151 std::lock_guard<std::mutex> lock(mutex_);
152 auto it = services_.find(name);
153 if (it == services_.end()) {
154 return common::VoidResult::err(static_cast<int>(monitoring_error_code::not_found),
155 "Service not found: " + name);
156 }
157 services_.erase(it);
158 return common::ok();
159 }
160
164 common::VoidResult degrade_service(const std::string& name, degradation_level level,
165 const std::string& reason) {
166 std::lock_guard<std::mutex> lock(mutex_);
167 auto it = services_.find(name);
168 if (it == services_.end()) {
170 return common::VoidResult::err(static_cast<int>(monitoring_error_code::not_found),
171 "Service not found: " + name);
172 }
173
175 it->second.current_level = level;
176 it->second.last_degradation_reason = reason;
177 it->second.last_state_change = std::chrono::steady_clock::now();
179
180 return common::ok();
181 }
182
186 common::VoidResult recover_service(const std::string& name) {
187 std::lock_guard<std::mutex> lock(mutex_);
188 auto it = services_.find(name);
189 if (it == services_.end()) {
190 return common::VoidResult::err(static_cast<int>(monitoring_error_code::not_found),
191 "Service not found: " + name);
192 }
193
195 it->second.current_level = degradation_level::normal;
196 it->second.last_degradation_reason.clear();
197 it->second.last_state_change = std::chrono::steady_clock::now();
199
200 return common::ok();
201 }
202
206 common::VoidResult recover_all_services() {
207 std::lock_guard<std::mutex> lock(mutex_);
208 for (auto& [name, state] : services_) {
210 state.current_level = degradation_level::normal;
211 state.last_degradation_reason.clear();
212 state.last_state_change = std::chrono::steady_clock::now();
214 }
215 return common::ok();
216 }
217
221 degradation_level get_service_level(const std::string& name) const {
222 std::lock_guard<std::mutex> lock(mutex_);
223 auto it = services_.find(name);
224 if (it == services_.end()) {
226 }
227 return it->second.current_level;
228 }
229
233 common::VoidResult add_degradation_plan(const degradation_plan& plan) {
234 if (!plan.validate()) {
235 return common::VoidResult::err(error_info(monitoring_error_code::invalid_configuration, "Invalid degradation plan").to_common_error());
236 }
237
238 std::lock_guard<std::mutex> lock(mutex_);
239 plans_[plan.name] = plan;
240 return common::ok();
241 }
242
246 common::VoidResult execute_plan(const std::string& plan_name, const std::string& reason) {
247 std::lock_guard<std::mutex> lock(mutex_);
248 auto it = plans_.find(plan_name);
249 if (it == plans_.end()) {
250 return common::VoidResult::err(static_cast<int>(monitoring_error_code::not_found),
251 "Plan not found: " + plan_name);
252 }
253
254 const auto& plan = it->second;
255
256 // Degrade services to maintain to target level
257 for (const auto& service_name : plan.services_to_maintain) {
258 auto service_it = services_.find(service_name);
259 if (service_it != services_.end()) {
261 service_it->second.current_level = plan.target_level;
262 service_it->second.last_degradation_reason = reason;
263 service_it->second.last_state_change = std::chrono::steady_clock::now();
265 }
266 }
267
268 // Disable (set to emergency) services to disable
269 for (const auto& service_name : plan.services_to_disable) {
270 auto service_it = services_.find(service_name);
271 if (service_it != services_.end()) {
273 service_it->second.current_level = degradation_level::emergency;
274 service_it->second.last_degradation_reason = reason;
275 service_it->second.last_state_change = std::chrono::steady_clock::now();
277 }
278 }
279
280 return common::ok();
281 }
282
286 common::Result<bool> is_healthy() const {
287 std::lock_guard<std::mutex> lock(mutex_);
288 if (services_.empty()) {
289 return common::ok(true);
290 }
291
292 size_t normal_count = 0;
293 for (const auto& [name, state] : services_) {
294 if (state.current_level == degradation_level::normal) {
295 normal_count++;
296 }
297 }
298
299 double healthy_ratio = static_cast<double>(normal_count) / static_cast<double>(services_.size());
300 return common::ok(healthy_ratio > 0.5);
301 }
302
309
313 const std::string& get_name() const {
314 return name_;
315 }
316
320 std::vector<std::string> get_service_names() const {
321 std::lock_guard<std::mutex> lock(mutex_);
322 std::vector<std::string> names;
323 names.reserve(services_.size());
324 for (const auto& [name, state] : services_) {
325 names.push_back(name);
326 }
327 return names;
328 }
329
330private:
331 std::string name_;
332 mutable std::mutex mutex_;
333 std::unordered_map<std::string, service_state> services_;
334 std::unordered_map<std::string, degradation_plan> plans_;
336};
337
343template<typename T>
345public:
346 using normal_operation = std::function<common::Result<T>()>;
347 using degraded_operation = std::function<common::Result<T>(degradation_level)>;
348
349 degradable_service(const std::string& name,
350 std::shared_ptr<graceful_degradation_manager> manager,
351 normal_operation normal_op,
352 degraded_operation degraded_op)
353 : name_(name)
354 , manager_(std::move(manager))
355 , normal_op_(std::move(normal_op))
356 , degraded_op_(std::move(degraded_op)) {}
357
361 common::Result<T> execute() {
362 if (!manager_) {
363 return normal_op_();
364 }
365
366 auto level = manager_->get_service_level(name_);
367 if (level == degradation_level::normal) {
368 return normal_op_();
369 }
370
371 if (degraded_op_) {
372 return degraded_op_(level);
373 }
374
375 return common::Result<T>::err(error_info(monitoring_error_code::service_degraded, "Service is degraded and no fallback available").to_common_error());
376 }
377
381 const std::string& get_name() const {
382 return name_;
383 }
384
385private:
386 std::string name_;
387 std::shared_ptr<graceful_degradation_manager> manager_;
390};
391
396public:
397 template<typename T>
398 void register_boundary(const std::string& name, std::shared_ptr<error_boundary<T>> boundary) {
399 std::lock_guard<std::mutex> lock(mutex_);
400 registry_[name] = std::move(boundary);
401 }
402
403 template<typename T>
404 std::shared_ptr<error_boundary<T>> get_boundary(const std::string& name) {
405 std::lock_guard<std::mutex> lock(mutex_);
406 auto it = registry_.find(name);
407 if (it != registry_.end()) {
408 try {
409 return std::any_cast<std::shared_ptr<error_boundary<T>>>(it->second);
410 } catch (const std::bad_any_cast&) {
411 return nullptr;
412 }
413 }
414 return nullptr;
415 }
416
417 void remove_boundary(const std::string& name) {
418 std::lock_guard<std::mutex> lock(mutex_);
419 registry_.erase(name);
420 }
421
422 std::vector<std::string> get_all_names() const {
423 std::lock_guard<std::mutex> lock(mutex_);
424 std::vector<std::string> names;
425 names.reserve(registry_.size());
426 for (const auto& [name, boundary] : registry_) {
427 names.push_back(name);
428 }
429 return names;
430 }
431
432 void clear() {
433 std::lock_guard<std::mutex> lock(mutex_);
434 registry_.clear();
435 }
436
437private:
438 mutable std::mutex mutex_;
439 std::unordered_map<std::string, std::any> registry_;
440};
441
442// Factory functions
443
448 static error_boundary_registry instance;
449 return instance;
450}
451
455inline std::shared_ptr<graceful_degradation_manager> create_degradation_manager(const std::string& name) {
456 return std::make_shared<graceful_degradation_manager>(name);
457}
458
462inline service_config create_service_config(const std::string& name, service_priority priority) {
463 service_config config;
464 config.name = name;
465 config.priority = priority;
466 return config;
467}
468
472inline degradation_plan create_degradation_plan(const std::string& name,
473 const std::vector<std::string>& maintain,
474 const std::vector<std::string>& disable,
475 degradation_level level) {
476 degradation_plan plan;
477 plan.name = name;
478 plan.services_to_maintain = maintain;
479 plan.services_to_disable = disable;
480 plan.target_level = level;
481 return plan;
482}
483
487template<typename T>
488std::shared_ptr<degradable_service<T>> create_degradable_service(
489 const std::string& name,
490 std::shared_ptr<graceful_degradation_manager> manager,
492 typename degradable_service<T>::degraded_operation degraded_op) {
493 return std::make_shared<degradable_service<T>>(name, std::move(manager),
494 std::move(normal_op), std::move(degraded_op));
495}
496
497} // namespace kcenon::monitoring
std::function< common::Result< T >()> normal_operation
const std::string & get_name() const
Get service name.
common::Result< T > execute()
Execute the service operation.
std::shared_ptr< graceful_degradation_manager > manager_
std::function< common::Result< T >(degradation_level)> degraded_operation
degradable_service(const std::string &name, std::shared_ptr< graceful_degradation_manager > manager, normal_operation normal_op, degraded_operation degraded_op)
Error boundary registry for managing multiple boundaries.
std::unordered_map< std::string, std::any > registry_
std::vector< std::string > get_all_names() const
std::shared_ptr< error_boundary< T > > get_boundary(const std::string &name)
void register_boundary(const std::string &name, std::shared_ptr< error_boundary< T > > boundary)
Error boundary implementation for resilient operations.
degradation_level get_service_level(const std::string &name) const
Get current degradation level for a service.
const std::string & get_name() const
Get manager name.
common::VoidResult unregister_service(const std::string &name)
Unregister a service.
common::VoidResult register_service(const service_config &config)
Register a service for management.
common::VoidResult add_degradation_plan(const degradation_plan &plan)
Add a degradation plan.
std::unordered_map< std::string, service_state > services_
std::unordered_map< std::string, degradation_plan > plans_
common::VoidResult recover_service(const std::string &name)
Recover a specific service to normal operation.
common::VoidResult execute_plan(const std::string &plan_name, const std::string &reason)
Execute a degradation plan.
common::VoidResult recover_all_services()
Recover all services to normal operation.
common::Result< bool > is_healthy() const
Check if the manager is healthy (more than 50% services at normal level)
graceful_degradation_metrics get_metrics() const
Get metrics.
common::VoidResult degrade_service(const std::string &name, degradation_level level, const std::string &reason)
Degrade a specific service.
std::vector< std::string > get_service_names() const
Get all registered service names.
Error boundary with degradation levels for fault isolation.
std::shared_ptr< degradable_service< T > > create_degradable_service(const std::string &name, std::shared_ptr< graceful_degradation_manager > manager, typename degradable_service< T >::normal_operation normal_op, typename degradable_service< T >::degraded_operation degraded_op)
Create a degradable service.
service_config create_service_config(const std::string &name, service_priority priority)
Create a service configuration.
service_priority
Service priority levels.
degradation_plan create_degradation_plan(const std::string &name, const std::vector< std::string > &maintain, const std::vector< std::string > &disable, degradation_level level)
Create a degradation plan.
degradation_level
Degradation levels for error boundary.
std::shared_ptr< graceful_degradation_manager > create_degradation_manager(const std::string &name)
Create a graceful degradation manager.
error_boundary_registry & global_error_boundary_registry()
Get global error boundary registry.
Degradation plan for coordinated service degradation.
std::vector< std::string > services_to_disable
std::vector< std::string > services_to_maintain
Extended error information with context.
graceful_degradation_metrics(const graceful_degradation_metrics &other)
graceful_degradation_metrics & operator=(const graceful_degradation_metrics &other)
Service configuration for graceful degradation.
std::chrono::milliseconds health_check_interval
std::chrono::steady_clock::time_point last_state_change