Monitoring System 0.1.0
System resource monitoring with pluggable collectors and alerting
Loading...
Searching...
No Matches
alert_config.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
5#pragma once
6
15#include <chrono>
16#include <functional>
17#include <iomanip>
18#include <memory>
19#include <regex>
20#include <sstream>
21#include <string>
22#include <unordered_map>
23#include <vector>
24
25#include "alert_rule.h"
26#include "alert_triggers.h"
27#include "alert_types.h"
29
30namespace kcenon::monitoring {
31
59public:
63 explicit alert_template(std::string template_str)
64 : template_str_(std::move(template_str)) {}
65
69 void set(const std::string& key, const std::string& value) {
70 custom_vars_[key] = value;
71 }
72
78 std::string render(const alert& a) const {
79 std::string result = template_str_;
80
81 // Build variable map
82 std::unordered_map<std::string, std::string> vars;
83
84 // Alert properties
85 vars["name"] = a.name;
86 vars["state"] = alert_state_to_string(a.state);
87 vars["severity"] = alert_severity_to_string(a.severity);
88 vars["value"] = format_value(a.value);
89 vars["fingerprint"] = a.fingerprint();
90 vars["rule_name"] = a.rule_name;
91 vars["group_key"] = a.group_key;
92
93 // Labels
94 for (const auto& [key, value] : a.labels.labels) {
95 vars["labels." + key] = value;
96 }
97
98 // Annotations
99 vars["annotations.summary"] = a.annotations.summary;
100 vars["annotations.description"] = a.annotations.description;
101 if (a.annotations.runbook_url) {
102 vars["annotations.runbook_url"] = *a.annotations.runbook_url;
103 }
104 for (const auto& [key, value] : a.annotations.custom) {
105 vars["annotations." + key] = value;
106 }
107
108 // Custom variables
109 for (const auto& [key, value] : custom_vars_) {
110 vars[key] = value;
111 }
112
113 // Substitute variables
114 result = substitute_variables(result, vars);
115
116 return result;
117 }
118
122 const std::string& template_string() const { return template_str_; }
123
128 common::VoidResult validate() const {
129 // Check for unclosed variable references
130 std::regex pattern(R"(\$\{[^}]*$)");
131 if (std::regex_search(template_str_, pattern)) {
132 return common::VoidResult::err(error_info(monitoring_error_code::validation_failed, "Unclosed variable reference in template").to_common_error());
133 }
134 return common::ok();
135 }
136
137private:
138 static std::string format_value(double value) {
139 std::ostringstream oss;
140 oss << std::fixed << std::setprecision(2) << value;
141 return oss.str();
142 }
143
144 static std::string substitute_variables(
145 const std::string& input,
146 const std::unordered_map<std::string, std::string>& vars) {
147 std::string result = input;
148 std::regex pattern(R"(\$\{([^}]+)\})");
149
150 std::string::const_iterator start = result.cbegin();
151 std::smatch match;
152 std::string output;
153
154 while (std::regex_search(start, result.cend(), match, pattern)) {
155 output += std::string(start, match[0].first);
156
157 std::string var_name = match[1].str();
158 auto it = vars.find(var_name);
159 if (it != vars.end()) {
160 output += it->second;
161 } else {
162 output += match[0].str(); // Keep original if not found
163 }
164
165 start = match[0].second;
166 }
167 output += std::string(start, result.cend());
168
169 return output;
170 }
171
172 std::string template_str_;
173 std::unordered_map<std::string, std::string> custom_vars_;
174};
175
184 std::string name;
185 std::string group;
186 std::string metric_name;
187 std::string severity; // "info", "warning", "critical", "emergency"
188 bool enabled = true;
189
190 // Trigger configuration
192 std::string type; // "threshold", "rate", "anomaly", "absent"
193 std::string operator_str; // ">", ">=", "<", "<=", "==", "!="
194 double threshold = 0.0;
195 double rate_threshold = 0.0;
196 double sensitivity = 3.0;
198 int absent_seconds = 300;
200
201 // Timing configuration
205
206 // Labels and annotations
207 std::unordered_map<std::string, std::string> labels;
208 std::string summary;
209 std::string description;
210 std::string runbook_url;
211};
212
221public:
227 static common::Result<std::shared_ptr<alert_rule>> build(const rule_definition& def) {
228 // Validate required fields
229 if (def.name.empty()) {
230 return common::make_error<std::shared_ptr<alert_rule>>(
232 "Rule name is required");
233 }
234 if (def.metric_name.empty()) {
235 return common::make_error<std::shared_ptr<alert_rule>>(
237 "Metric name is required");
238 }
239
240 auto rule = std::make_shared<alert_rule>(def.name);
241
242 // Set group
243 if (!def.group.empty()) {
244 rule->set_group(def.group);
245 }
246
247 // Set metric name
248 rule->set_metric_name(def.metric_name);
249
250 // Set severity
251 auto severity = parse_severity(def.severity);
252 if (!severity.is_ok()) {
253 return common::make_error<std::shared_ptr<alert_rule>>(
255 severity.error().message);
256 }
257 rule->set_severity(severity.value());
258
259 // Set enabled
260 rule->set_enabled(def.enabled);
261
262 // Build trigger
263 auto trigger = build_trigger(def.trigger);
264 if (!trigger.is_ok()) {
265 return common::make_error<std::shared_ptr<alert_rule>>(
267 trigger.error().message);
268 }
269 rule->set_trigger(trigger.value());
270
271 // Set timing
272 rule->set_evaluation_interval(
273 std::chrono::seconds(def.evaluation_interval_seconds));
274 rule->set_for_duration(
275 std::chrono::seconds(def.for_duration_seconds));
276 rule->set_repeat_interval(
277 std::chrono::seconds(def.repeat_interval_seconds));
278
279 // Set labels
280 for (const auto& [key, value] : def.labels) {
281 rule->add_label(key, value);
282 }
283
284 // Set annotations
285 if (!def.summary.empty()) {
286 rule->set_summary(def.summary);
287 }
288 if (!def.description.empty()) {
289 rule->set_description(def.description);
290 }
291 if (!def.runbook_url.empty()) {
292 rule->set_runbook_url(def.runbook_url);
293 }
294
295 return common::ok(std::move(rule));
296 }
297
298private:
299 static common::Result<alert_severity> parse_severity(const std::string& str) {
300 if (str.empty() || str == "warning") {
301 return common::ok(alert_severity::warning);
302 }
303 if (str == "info") {
304 return common::ok(alert_severity::info);
305 }
306 if (str == "critical") {
307 return common::ok(alert_severity::critical);
308 }
309 if (str == "emergency") {
310 return common::ok(alert_severity::emergency);
311 }
312 return common::make_error<alert_severity>(
314 "Unknown severity: " + str);
315 }
316
317 static common::Result<std::shared_ptr<alert_trigger>> build_trigger(
319 if (cfg.type.empty() || cfg.type == "threshold") {
320 auto op = parse_operator(cfg.operator_str);
321 if (!op.is_ok()) {
322 return common::make_error<std::shared_ptr<alert_trigger>>(
324 op.error().message);
325 }
326 return common::ok(std::shared_ptr<alert_trigger>(
327 std::make_shared<threshold_trigger>(cfg.threshold, op.value())));
328 }
329
330 if (cfg.type == "rate") {
331 return common::ok(std::shared_ptr<alert_trigger>(
332 std::make_shared<rate_of_change_trigger>(
333 cfg.rate_threshold,
334 std::chrono::seconds(cfg.window_seconds))));
335 }
336
337 if (cfg.type == "anomaly") {
338 return common::ok(std::shared_ptr<alert_trigger>(
339 std::make_shared<anomaly_trigger>(
340 cfg.sensitivity,
341 static_cast<size_t>(cfg.window_seconds))));
342 }
343
344 if (cfg.type == "absent") {
345 return common::ok(std::shared_ptr<alert_trigger>(
346 std::make_shared<absent_trigger>(
347 std::chrono::seconds(cfg.absent_seconds))));
348 }
349
350 return common::make_error<std::shared_ptr<alert_trigger>>(
352 "Unknown trigger type: " + cfg.type);
353 }
354
355 static common::Result<comparison_operator> parse_operator(const std::string& str) {
356 if (str.empty() || str == ">") {
357 return common::ok(comparison_operator::greater_than);
358 }
359 if (str == ">=") {
360 return common::ok(comparison_operator::greater_or_equal);
361 }
362 if (str == "<") {
363 return common::ok(comparison_operator::less_than);
364 }
365 if (str == "<=") {
366 return common::ok(comparison_operator::less_or_equal);
367 }
368 if (str == "==" || str == "=") {
369 return common::ok(comparison_operator::equal);
370 }
371 if (str == "!=" || str == "<>") {
372 return common::ok(comparison_operator::not_equal);
373 }
374 return common::make_error<comparison_operator>(
376 "Unknown operator: " + str);
377 }
378};
379
388public:
389 using rule_change_callback = std::function<void(
390 const std::string& rule_name,
391 const std::shared_ptr<alert_rule>& rule,
392 bool is_removal)>;
393
399 common::VoidResult register_rule(std::shared_ptr<alert_rule> rule) {
400 if (!rule) {
401 return common::VoidResult::err(error_info(monitoring_error_code::invalid_argument, "Rule cannot be null").to_common_error());
402 }
403
404 std::lock_guard<std::mutex> lock(mutex_);
405
406 std::string name = rule->name();
407 rules_[name] = rule;
408
409 // Notify listeners
410 for (const auto& callback : change_callbacks_) {
411 callback(name, rule, false);
412 }
413
414 return common::ok();
415 }
416
422 common::VoidResult unregister_rule(const std::string& name) {
423 std::lock_guard<std::mutex> lock(mutex_);
424
425 auto it = rules_.find(name);
426 if (it == rules_.end()) {
427 return common::VoidResult::err(static_cast<int>(monitoring_error_code::not_found),
428 "Rule not found: " + name);
429 }
430
431 auto rule = it->second;
432 rules_.erase(it);
433
434 // Notify listeners
435 for (const auto& callback : change_callbacks_) {
436 callback(name, rule, true);
437 }
438
439 return common::ok();
440 }
441
445 std::shared_ptr<alert_rule> get_rule(const std::string& name) const {
446 std::lock_guard<std::mutex> lock(mutex_);
447 auto it = rules_.find(name);
448 return it != rules_.end() ? it->second : nullptr;
449 }
450
454 std::vector<std::shared_ptr<alert_rule>> get_all_rules() const {
455 std::lock_guard<std::mutex> lock(mutex_);
456 std::vector<std::shared_ptr<alert_rule>> result;
457 result.reserve(rules_.size());
458 for (const auto& [name, rule] : rules_) {
459 result.push_back(rule);
460 }
461 return result;
462 }
463
467 std::vector<std::shared_ptr<alert_rule>> get_rules_by_group(
468 const std::string& group) const {
469 std::lock_guard<std::mutex> lock(mutex_);
470 std::vector<std::shared_ptr<alert_rule>> result;
471 for (const auto& [name, rule] : rules_) {
472 if (rule->group() == group) {
473 result.push_back(rule);
474 }
475 }
476 return result;
477 }
478
482 size_t rule_count() const {
483 std::lock_guard<std::mutex> lock(mutex_);
484 return rules_.size();
485 }
486
491 std::lock_guard<std::mutex> lock(mutex_);
492 change_callbacks_.push_back(std::move(callback));
493 }
494
500 common::Result<size_t> load_definitions(const std::vector<rule_definition>& definitions) {
501 size_t loaded = 0;
502 std::vector<std::string> errors;
503
504 for (const auto& def : definitions) {
505 auto rule_result = rule_builder::build(def);
506 if (rule_result.is_ok()) {
507 auto reg_result = register_rule(rule_result.value());
508 if (reg_result.is_ok()) {
509 ++loaded;
510 } else {
511 errors.push_back(def.name + ": " + reg_result.error().message);
512 }
513 } else {
514 errors.push_back(def.name + ": " + rule_result.error().message);
515 }
516 }
517
518 if (!errors.empty() && loaded == 0) {
519 return common::make_error<size_t>(
521 "Failed to load any rules: " + errors.front());
522 }
523
524 return common::ok(loaded);
525 }
526
530 void clear() {
531 std::lock_guard<std::mutex> lock(mutex_);
532
533 for (const auto& [name, rule] : rules_) {
534 for (const auto& callback : change_callbacks_) {
535 callback(name, rule, true);
536 }
537 }
538
539 rules_.clear();
540 }
541
542private:
543 mutable std::mutex mutex_;
544 std::unordered_map<std::string, std::shared_ptr<alert_rule>> rules_;
545 std::vector<rule_change_callback> change_callbacks_;
546};
547
592} // namespace kcenon::monitoring
Alert rule configuration and evaluation.
Alert trigger implementations for various condition types.
Core alert data structures for the monitoring system.
Template engine for alert messages.
static std::string format_value(double value)
const std::string & template_string() const
Get template string.
static std::string substitute_variables(const std::string &input, const std::unordered_map< std::string, std::string > &vars)
common::VoidResult validate() const
Validate template syntax.
void set(const std::string &key, const std::string &value)
Set a custom variable value.
std::string render(const alert &a) const
Render template with alert data.
std::unordered_map< std::string, std::string > custom_vars_
alert_template(std::string template_str)
Construct with template string.
Builds alert_rule from rule_definition.
static common::Result< std::shared_ptr< alert_rule > > build(const rule_definition &def)
Build alert_rule from definition.
static common::Result< comparison_operator > parse_operator(const std::string &str)
static common::Result< std::shared_ptr< alert_trigger > > build_trigger(const rule_definition::trigger_config &cfg)
static common::Result< alert_severity > parse_severity(const std::string &str)
Dynamic registry for alert rules with hot-reload support.
std::function< void( const std::string &rule_name, const std::shared_ptr< alert_rule > &rule, bool is_removal)> rule_change_callback
common::VoidResult unregister_rule(const std::string &name)
Unregister a rule.
void on_rule_change(rule_change_callback callback)
Register callback for rule changes.
size_t rule_count() const
Get rule count.
common::VoidResult register_rule(std::shared_ptr< alert_rule > rule)
Register a rule.
std::unordered_map< std::string, std::shared_ptr< alert_rule > > rules_
std::vector< std::shared_ptr< alert_rule > > get_rules_by_group(const std::string &group) const
Get rules in a group.
std::vector< std::shared_ptr< alert_rule > > get_all_rules() const
Get all registered rules.
common::Result< size_t > load_definitions(const std::vector< rule_definition > &definitions)
Load rules from definitions.
void clear()
Clear all rules.
std::vector< rule_change_callback > change_callbacks_
std::shared_ptr< alert_rule > get_rule(const std::string &name) const
Get a rule by name.
@ equal
value == threshold (with epsilon)
@ not_equal
value != threshold (with epsilon)
@ warning
Warning condition, may require attention.
@ critical
Critical condition, immediate attention required.
@ emergency
Emergency condition, system-wide impact.
@ info
Informational, no action required.
constexpr const char * alert_state_to_string(alert_state state) noexcept
Convert alert state to string.
Definition alert_types.h:82
constexpr const char * alert_severity_to_string(alert_severity severity) noexcept
Convert alert severity to string.
Definition alert_types.h:47
Result pattern type definitions for monitoring system.
std::string description
Detailed description.
std::unordered_map< std::string, std::string > custom
Custom annotations.
std::string summary
Brief description.
std::optional< std::string > runbook_url
Link to runbook.
std::unordered_map< std::string, std::string > labels
Core alert data structure.
alert_state state
Current state.
double value
Current metric value.
alert_severity severity
Alert severity level.
std::string rule_name
Name of triggering rule.
std::string group_key
Grouping key for dedup.
std::string name
Alert name/identifier.
alert_labels labels
Identifying labels.
alert_annotations annotations
Descriptive annotations.
std::string fingerprint() const
Get alert fingerprint for deduplication.
Extended error information with context.
Structured definition for alert rule configuration.
struct kcenon::monitoring::rule_definition::trigger_config trigger
std::unordered_map< std::string, std::string > labels