Monitoring System 0.1.0
System resource monitoring with pluggable collectors and alerting
Loading...
Searching...
No Matches
kcenon::monitoring::alert_manager Class Reference

Central coordinator for the alert pipeline. More...

#include <alert_manager.h>

Collaboration diagram for kcenon::monitoring::alert_manager:
Collaboration graph

Public Types

using metric_provider_func = std::function<std::optional<double>(const std::string&)>
 

Public Member Functions

 alert_manager ()
 Default constructor.
 
 alert_manager (const alert_manager_config &config)
 Construct with configuration.
 
 ~alert_manager ()
 Destructor.
 
 alert_manager (const alert_manager &)=delete
 
alert_manageroperator= (const alert_manager &)=delete
 
 alert_manager (alert_manager &&)=delete
 
alert_manageroperator= (alert_manager &&)=delete
 
common::VoidResult start ()
 Start the alert manager.
 
common::VoidResult stop ()
 Stop the alert manager.
 
bool is_running () const
 Check if manager is running.
 
common::VoidResult add_rule (std::shared_ptr< alert_rule > rule)
 Add an alert rule.
 
common::VoidResult remove_rule (const std::string &rule_name)
 Remove an alert rule.
 
std::shared_ptr< alert_ruleget_rule (const std::string &rule_name) const
 Get a rule by name.
 
std::vector< std::shared_ptr< alert_rule > > get_rules () const
 Get all rules.
 
common::VoidResult add_rule_group (std::shared_ptr< alert_rule_group > group)
 Add a rule group.
 
common::VoidResult process_metric (const std::string &metric_name, double value)
 Process a metric value.
 
common::VoidResult process_metrics (const std::unordered_map< std::string, double > &metrics)
 Process a batch of metrics.
 
std::vector< alertget_active_alerts () const
 Get all active alerts.
 
std::optional< alertget_alert (const std::string &fingerprint) const
 Get alert by fingerprint.
 
common::VoidResult resolve_alert (const std::string &fingerprint)
 Resolve an alert manually.
 
common::Result< uint64_t > create_silence (const alert_silence &silence)
 Create a silence.
 
common::VoidResult delete_silence (uint64_t silence_id)
 Delete a silence.
 
std::vector< alert_silenceget_silences () const
 Get all active silences.
 
bool is_silenced (const alert &a) const
 Check if an alert is silenced.
 
common::VoidResult add_notifier (std::shared_ptr< alert_notifier > notifier)
 Add a notifier.
 
common::VoidResult remove_notifier (const std::string &notifier_name)
 Remove a notifier.
 
std::vector< std::shared_ptr< alert_notifier > > get_notifiers () const
 Get all notifiers.
 
void set_metric_provider (metric_provider_func provider)
 Set the metric provider function.
 
void set_event_bus (std::shared_ptr< interface_event_bus > event_bus)
 Set event bus for publishing alert events.
 
alert_manager_metrics get_metrics () const
 Get manager metrics.
 
const alert_manager_configconfig () const
 Get configuration.
 

Private Member Functions

void evaluation_loop ()
 Main evaluation loop.
 
void evaluate_rule (const std::shared_ptr< alert_rule > &rule, double value)
 Evaluate a single rule.
 
void update_alert_state (const std::string &fingerprint, bool condition_met, double value, const std::shared_ptr< alert_rule > &rule)
 Update alert state.
 
void send_notifications (const alert &a)
 Send notifications for an alert.
 
void cleanup_silences ()
 Clean up expired silences.
 
void cleanup_resolved_alerts ()
 Clean up resolved alerts.
 

Private Attributes

alert_manager_config config_
 
std::mutex rules_mutex_
 
std::unordered_map< std::string, std::shared_ptr< alert_rule > > rules_
 
std::vector< std::shared_ptr< alert_rule_group > > rule_groups_
 
std::mutex alerts_mutex_
 
std::unordered_map< std::string, alertalerts_
 
std::mutex silences_mutex_
 
std::unordered_map< uint64_t, alert_silencesilences_
 
std::mutex notifiers_mutex_
 
std::vector< std::shared_ptr< alert_notifier > > notifiers_
 
std::mutex provider_mutex_
 
metric_provider_func metric_provider_
 
std::shared_ptr< interface_event_busevent_bus_
 
alert_manager_metrics metrics_
 
std::atomic< bool > running_ {false}
 
std::thread evaluation_thread_
 
std::condition_variable cv_
 
std::mutex cv_mutex_
 
std::unordered_map< std::string, std::chrono::steady_clock::time_point > last_notification_times_
 

Detailed Description

Central coordinator for the alert pipeline.

The alert manager is responsible for:

  • Managing alert rules and their lifecycle
  • Evaluating rules against incoming metrics
  • Managing alert state transitions
  • Routing notifications to configured notifiers
  • Handling alert silencing and grouping
Thread Safety:
This class is thread-safe. All public methods can be called from multiple threads simultaneously.
Examples
alert_pipeline_example.cpp, and production_monitoring_example.cpp.

Definition at line 122 of file alert_manager.h.

Member Typedef Documentation

◆ metric_provider_func

Constructor & Destructor Documentation

◆ alert_manager() [1/4]

kcenon::monitoring::alert_manager::alert_manager ( )

◆ alert_manager() [2/4]

kcenon::monitoring::alert_manager::alert_manager ( const alert_manager_config & config)
explicit

Construct with configuration.

Parameters
configManager configuration

◆ ~alert_manager()

kcenon::monitoring::alert_manager::~alert_manager ( )

◆ alert_manager() [3/4]

kcenon::monitoring::alert_manager::alert_manager ( const alert_manager & )
delete

◆ alert_manager() [4/4]

kcenon::monitoring::alert_manager::alert_manager ( alert_manager && )
delete

Member Function Documentation

◆ add_notifier()

common::VoidResult kcenon::monitoring::alert_manager::add_notifier ( std::shared_ptr< alert_notifier > notifier)

Add a notifier.

Parameters
notifierNotifier to add
Returns
Result indicating success or failure
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h, and alert_pipeline_example.cpp.

Referenced by main().

Here is the caller graph for this function:

◆ add_rule()

common::VoidResult kcenon::monitoring::alert_manager::add_rule ( std::shared_ptr< alert_rule > rule)

Add an alert rule.

Parameters
ruleRule to add
Returns
Result indicating success or failure
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h, and alert_pipeline_example.cpp.

Referenced by main(), AlertManagerProcessingTest::SetUp(), and TEST().

Here is the caller graph for this function:

◆ add_rule_group()

common::VoidResult kcenon::monitoring::alert_manager::add_rule_group ( std::shared_ptr< alert_rule_group > group)

Add a rule group.

Parameters
groupRule group to add
Returns
Result indicating success or failure
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h, and alert_pipeline_example.cpp.

Referenced by main(), and TEST().

Here is the caller graph for this function:

◆ cleanup_resolved_alerts()

void kcenon::monitoring::alert_manager::cleanup_resolved_alerts ( )
private

◆ cleanup_silences()

void kcenon::monitoring::alert_manager::cleanup_silences ( )
private

◆ config()

const alert_manager_config & kcenon::monitoring::alert_manager::config ( ) const

Get configuration.

Returns
Current configuration
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

Referenced by TEST(), TEST(), and TEST_F().

Here is the caller graph for this function:

◆ create_silence()

common::Result< uint64_t > kcenon::monitoring::alert_manager::create_silence ( const alert_silence & silence)

Create a silence.

Parameters
silenceSilence configuration
Returns
Result containing silence ID
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

◆ delete_silence()

common::VoidResult kcenon::monitoring::alert_manager::delete_silence ( uint64_t silence_id)

Delete a silence.

Parameters
silence_idSilence ID
Returns
Result indicating success or failure
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

◆ evaluate_rule()

void kcenon::monitoring::alert_manager::evaluate_rule ( const std::shared_ptr< alert_rule > & rule,
double value )
private

Evaluate a single rule.

Parameters
ruleRule to evaluate
valueMetric value
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

◆ evaluation_loop()

void kcenon::monitoring::alert_manager::evaluation_loop ( )
private

◆ get_active_alerts()

std::vector< alert > kcenon::monitoring::alert_manager::get_active_alerts ( ) const

Get all active alerts.

Returns
Vector of active alerts
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h, and alert_pipeline_example.cpp.

Referenced by main(), and print_active_alerts().

Here is the caller graph for this function:

◆ get_alert()

std::optional< alert > kcenon::monitoring::alert_manager::get_alert ( const std::string & fingerprint) const

Get alert by fingerprint.

Parameters
fingerprintAlert fingerprint
Returns
Alert if found
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

◆ get_metrics()

alert_manager_metrics kcenon::monitoring::alert_manager::get_metrics ( ) const

Get manager metrics.

Returns
Current metrics
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h, and alert_pipeline_example.cpp.

Referenced by main(), TEST(), and TEST().

Here is the caller graph for this function:

◆ get_notifiers()

std::vector< std::shared_ptr< alert_notifier > > kcenon::monitoring::alert_manager::get_notifiers ( ) const

◆ get_rule()

std::shared_ptr< alert_rule > kcenon::monitoring::alert_manager::get_rule ( const std::string & rule_name) const

Get a rule by name.

Parameters
rule_nameName of rule
Returns
Rule if found
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

◆ get_rules()

std::vector< std::shared_ptr< alert_rule > > kcenon::monitoring::alert_manager::get_rules ( ) const

Get all rules.

Returns
Vector of all rules
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h, and alert_pipeline_example.cpp.

Referenced by main().

Here is the caller graph for this function:

◆ get_silences()

std::vector< alert_silence > kcenon::monitoring::alert_manager::get_silences ( ) const

Get all active silences.

Returns
Vector of active silences
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

◆ is_running()

bool kcenon::monitoring::alert_manager::is_running ( ) const

Check if manager is running.

Returns
True if running
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

Referenced by TEST_F(), TEST_F(), TEST_F(), and TEST_F().

Here is the caller graph for this function:

◆ is_silenced()

bool kcenon::monitoring::alert_manager::is_silenced ( const alert & a) const

Check if an alert is silenced.

Parameters
aAlert to check
Returns
True if silenced
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

◆ operator=() [1/2]

alert_manager & kcenon::monitoring::alert_manager::operator= ( alert_manager && )
delete

◆ operator=() [2/2]

◆ process_metric()

common::VoidResult kcenon::monitoring::alert_manager::process_metric ( const std::string & metric_name,
double value )

Process a metric value.

Parameters
metric_nameName of the metric
valueMetric value
Returns
Result indicating success or failure
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h, and alert_pipeline_example.cpp.

Referenced by main(), and TEST().

Here is the caller graph for this function:

◆ process_metrics()

common::VoidResult kcenon::monitoring::alert_manager::process_metrics ( const std::unordered_map< std::string, double > & metrics)

Process a batch of metrics.

Parameters
metricsMap of metric names to values
Returns
Result indicating success or failure
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

◆ remove_notifier()

common::VoidResult kcenon::monitoring::alert_manager::remove_notifier ( const std::string & notifier_name)

Remove a notifier.

Parameters
notifier_nameName of notifier
Returns
Result indicating success or failure
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

◆ remove_rule()

common::VoidResult kcenon::monitoring::alert_manager::remove_rule ( const std::string & rule_name)

Remove an alert rule.

Parameters
rule_nameName of rule to remove
Returns
Result indicating success or failure
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

◆ resolve_alert()

common::VoidResult kcenon::monitoring::alert_manager::resolve_alert ( const std::string & fingerprint)

Resolve an alert manually.

Parameters
fingerprintAlert fingerprint
Returns
Result indicating success or failure
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

Referenced by TEST_F().

Here is the caller graph for this function:

◆ send_notifications()

void kcenon::monitoring::alert_manager::send_notifications ( const alert & a)
private

Send notifications for an alert.

Parameters
aAlert to notify about
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

◆ set_event_bus()

void kcenon::monitoring::alert_manager::set_event_bus ( std::shared_ptr< interface_event_bus > event_bus)

Set event bus for publishing alert events.

Parameters
event_busEvent bus instance
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

Referenced by TEST().

Here is the caller graph for this function:

◆ set_metric_provider()

void kcenon::monitoring::alert_manager::set_metric_provider ( metric_provider_func provider)

Set the metric provider function.

Parameters
providerFunction that returns metric values by name
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

Referenced by TEST().

Here is the caller graph for this function:

◆ start()

common::VoidResult kcenon::monitoring::alert_manager::start ( )

Start the alert manager.

Returns
Result indicating success or failure
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h, and alert_pipeline_example.cpp.

Referenced by main(), TEST_F(), TEST_F(), and TEST_F().

Here is the caller graph for this function:

◆ stop()

common::VoidResult kcenon::monitoring::alert_manager::stop ( )

Stop the alert manager.

Returns
Result indicating success or failure
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h, and alert_pipeline_example.cpp.

Referenced by main(), TEST_F(), TEST_F(), and TEST_F().

Here is the caller graph for this function:

◆ update_alert_state()

void kcenon::monitoring::alert_manager::update_alert_state ( const std::string & fingerprint,
bool condition_met,
double value,
const std::shared_ptr< alert_rule > & rule )
private

Update alert state.

Parameters
fingerprintAlert fingerprint
condition_metWhether condition is met
valueCurrent metric value
Examples
/home/runner/work/monitoring_system/monitoring_system/include/kcenon/monitoring/alert/alert_manager.h.

Member Data Documentation

◆ alerts_

std::unordered_map<std::string, alert> kcenon::monitoring::alert_manager::alerts_
private

◆ alerts_mutex_

std::mutex kcenon::monitoring::alert_manager::alerts_mutex_
mutableprivate

◆ config_

alert_manager_config kcenon::monitoring::alert_manager::config_
private

◆ cv_

std::condition_variable kcenon::monitoring::alert_manager::cv_
private

◆ cv_mutex_

std::mutex kcenon::monitoring::alert_manager::cv_mutex_
private

◆ evaluation_thread_

std::thread kcenon::monitoring::alert_manager::evaluation_thread_
private

◆ event_bus_

std::shared_ptr<interface_event_bus> kcenon::monitoring::alert_manager::event_bus_
private

◆ last_notification_times_

std::unordered_map<std::string, std::chrono::steady_clock::time_point> kcenon::monitoring::alert_manager::last_notification_times_
private

◆ metric_provider_

metric_provider_func kcenon::monitoring::alert_manager::metric_provider_
private

◆ metrics_

alert_manager_metrics kcenon::monitoring::alert_manager::metrics_
mutableprivate

◆ notifiers_

std::vector<std::shared_ptr<alert_notifier> > kcenon::monitoring::alert_manager::notifiers_
private

◆ notifiers_mutex_

std::mutex kcenon::monitoring::alert_manager::notifiers_mutex_
mutableprivate

◆ provider_mutex_

std::mutex kcenon::monitoring::alert_manager::provider_mutex_
private

◆ rule_groups_

std::vector<std::shared_ptr<alert_rule_group> > kcenon::monitoring::alert_manager::rule_groups_
private

◆ rules_

std::unordered_map<std::string, std::shared_ptr<alert_rule> > kcenon::monitoring::alert_manager::rules_
private

◆ rules_mutex_

std::mutex kcenon::monitoring::alert_manager::rules_mutex_
mutableprivate

◆ running_

std::atomic<bool> kcenon::monitoring::alert_manager::running_ {false}
private

◆ silences_

std::unordered_map<uint64_t, alert_silence> kcenon::monitoring::alert_manager::silences_
private

◆ silences_mutex_

std::mutex kcenon::monitoring::alert_manager::silences_mutex_
mutableprivate

The documentation for this class was generated from the following file: