10#include <gtest/gtest.h>
31 const std::string& name,
34 const std::string& message =
"OK"
41 std::lock_guard<std::mutex> lock(
mutex_);
45 result.
timestamp = std::chrono::system_clock::now();
52 std::lock_guard<std::mutex> lock(
mutex_);
73 EXPECT_EQ(
healthy.status, health_status::healthy);
74 EXPECT_EQ(
healthy.message,
"Service is running");
75 EXPECT_TRUE(
healthy.is_healthy());
76 EXPECT_TRUE(
healthy.is_operational());
79 EXPECT_EQ(
unhealthy.status, health_status::unhealthy);
80 EXPECT_EQ(
unhealthy.message,
"Database connection failed");
85 EXPECT_EQ(
degraded.status, health_status::degraded);
86 EXPECT_EQ(
degraded.message,
"High latency detected");
88 EXPECT_TRUE(
degraded.is_operational());
92 auto check_func = []() {
98 health_check_type::liveness,
102 EXPECT_EQ(func_check.
get_name(),
"lambda_check");
103 EXPECT_EQ(func_check.
get_type(), health_check_type::liveness);
105 auto result = func_check.
check();
106 EXPECT_EQ(result.status, health_status::healthy);
107 EXPECT_EQ(result.message,
"Lambda check passed");
113 auto check1 = std::make_shared<test_health_check>(
"check1", health_check_type::readiness);
114 auto check2 = std::make_shared<test_health_check>(
"check2", health_check_type::readiness);
115 auto check3 = std::make_shared<test_health_check>(
"check3", health_check_type::readiness);
122 auto result = composite.
check();
123 EXPECT_EQ(result.status, health_status::healthy);
126 check2->set_status(health_status::degraded);
127 result = composite.
check();
128 EXPECT_EQ(result.status, health_status::degraded);
131 check3->set_status(health_status::unhealthy);
132 result = composite.
check();
133 EXPECT_EQ(result.status, health_status::unhealthy);
139 auto check1 = std::make_shared<test_health_check>(
"check1", health_check_type::readiness);
140 auto check2 = std::make_shared<test_health_check>(
"check2", health_check_type::readiness);
146 auto result = composite.
check();
147 EXPECT_EQ(result.status, health_status::healthy);
150 check1->set_status(health_status::unhealthy);
151 result = composite.
check();
152 EXPECT_EQ(result.status, health_status::healthy);
155 check2->set_status(health_status::unhealthy);
156 result = composite.
check();
157 EXPECT_EQ(result.status, health_status::unhealthy);
163 auto check = std::make_shared<test_health_check>(
"database", health_check_type::liveness);
164 auto result = graph.
add_node(
"database", check);
166 ASSERT_TRUE(result.is_ok());
167 EXPECT_TRUE(result.value());
170 result = graph.
add_node(
"database", check);
171 ASSERT_FALSE(result.is_ok());
172 EXPECT_EQ(result.error().code,
static_cast<int>(monitoring_error_code::already_exists));
178 auto db_check = std::make_shared<test_health_check>(
"database", health_check_type::liveness);
179 auto api_check = std::make_shared<test_health_check>(
"api", health_check_type::liveness);
181 graph.
add_node(
"database", db_check);
186 ASSERT_TRUE(result.is_ok());
187 EXPECT_TRUE(result.value());
190 EXPECT_EQ(deps.size(), 1);
191 EXPECT_EQ(deps[0],
"database");
194 EXPECT_EQ(dependents.size(), 1);
195 EXPECT_EQ(dependents[0],
"api");
201 auto check_a = std::make_shared<test_health_check>(
"A", health_check_type::liveness);
202 auto check_b = std::make_shared<test_health_check>(
"B", health_check_type::liveness);
203 auto check_c = std::make_shared<test_health_check>(
"C", health_check_type::liveness);
218 ASSERT_FALSE(result.is_ok());
219 EXPECT_EQ(result.error().code,
static_cast<int>(monitoring_error_code::invalid_state));
226 auto check_a = std::make_shared<test_health_check>(
"A", health_check_type::liveness);
227 auto check_b = std::make_shared<test_health_check>(
"B", health_check_type::liveness);
228 auto check_c = std::make_shared<test_health_check>(
"C", health_check_type::liveness);
229 auto check_d = std::make_shared<test_health_check>(
"D", health_check_type::liveness);
245 auto pos_d = std::find(sorted.begin(), sorted.end(),
"D") - sorted.begin();
246 auto pos_b = std::find(sorted.begin(), sorted.end(),
"B") - sorted.begin();
247 auto pos_c = std::find(sorted.begin(), sorted.end(),
"C") - sorted.begin();
248 auto pos_a = std::find(sorted.begin(), sorted.end(),
"A") - sorted.begin();
250 EXPECT_LT(pos_d, pos_b);
251 EXPECT_LT(pos_d, pos_c);
252 EXPECT_LT(pos_b, pos_a);
253 EXPECT_LT(pos_c, pos_a);
259 auto db_check = std::make_shared<test_health_check>(
"database", health_check_type::liveness);
260 auto cache_check = std::make_shared<test_health_check>(
"cache", health_check_type::liveness);
261 auto api_check = std::make_shared<test_health_check>(
"api", health_check_type::liveness);
263 graph.
add_node(
"database", db_check);
264 graph.
add_node(
"cache", cache_check);
273 EXPECT_EQ(result.status, health_status::healthy);
276 db_check->set_status(health_status::unhealthy);
278 EXPECT_EQ(result.status, health_status::unhealthy);
281 db_check->set_status(health_status::healthy);
282 cache_check->set_status(health_status::degraded);
285 EXPECT_TRUE(result.status == health_status::degraded || result.status == health_status::healthy);
293 auto db_check = std::make_shared<test_health_check>(
"database", health_check_type::liveness);
294 auto api_check = std::make_shared<test_health_check>(
"api", health_check_type::liveness);
295 auto frontend_check = std::make_shared<test_health_check>(
"frontend", health_check_type::liveness);
296 auto worker_check = std::make_shared<test_health_check>(
"worker", health_check_type::liveness);
298 graph.
add_node(
"database", db_check);
300 graph.
add_node(
"frontend", frontend_check);
301 graph.
add_node(
"worker", worker_check);
309 EXPECT_EQ(impact.size(), 3);
310 EXPECT_TRUE(std::find(impact.begin(), impact.end(),
"api") != impact.end());
311 EXPECT_TRUE(std::find(impact.begin(), impact.end(),
"frontend") != impact.end());
312 EXPECT_TRUE(std::find(impact.begin(), impact.end(),
"worker") != impact.end());
316 EXPECT_EQ(impact.size(), 1);
317 EXPECT_EQ(impact[0],
"frontend");
321 auto check = std::make_shared<test_health_check>(
"test_check", health_check_type::liveness);
324 auto result = monitor.register_check(
"test", check);
325 ASSERT_TRUE(result.is_ok());
326 EXPECT_TRUE(result.value());
329 result = monitor.register_check(
"test", check);
330 ASSERT_FALSE(result.is_ok());
331 EXPECT_EQ(result.error().code,
static_cast<int>(monitoring_error_code::already_exists));
334 result = monitor.unregister_check(
"test");
335 ASSERT_TRUE(result.is_ok());
336 EXPECT_TRUE(result.value());
339 result = monitor.unregister_check(
"test");
340 ASSERT_FALSE(result.is_ok());
341 EXPECT_EQ(result.error().code,
static_cast<int>(monitoring_error_code::not_found));
345 auto check = std::make_shared<test_health_check>(
"test", health_check_type::liveness);
346 monitor.register_check(
"test", check);
348 EXPECT_FALSE(monitor.is_running());
350 auto result = monitor.start();
351 ASSERT_TRUE(result.is_ok());
352 EXPECT_TRUE(monitor.is_running());
355 result = monitor.start();
356 ASSERT_TRUE(result.is_ok());
358 result = monitor.stop();
359 ASSERT_TRUE(result.is_ok());
360 EXPECT_FALSE(monitor.is_running());
364 auto check = std::make_shared<test_health_check>(
366 health_check_type::readiness,
367 health_status::healthy,
371 monitor.register_check(
"specific", check);
373 auto result = monitor.check(
"specific");
374 ASSERT_TRUE(result.is_ok());
375 EXPECT_EQ(result.value().status, health_status::healthy);
376 EXPECT_EQ(result.value().message,
"Ready to serve");
379 result = monitor.check(
"non_existent");
380 ASSERT_FALSE(result.is_ok());
381 EXPECT_EQ(result.error().code,
static_cast<int>(monitoring_error_code::not_found));
385 auto check1 = std::make_shared<test_health_check>(
"check1", health_check_type::liveness);
386 auto check2 = std::make_shared<test_health_check>(
"check2", health_check_type::readiness);
387 auto check3 = std::make_shared<test_health_check>(
"check3", health_check_type::startup);
389 monitor.register_check(
"check1", check1);
390 monitor.register_check(
"check2", check2);
391 monitor.register_check(
"check3", check3);
393 auto results = monitor.check_all();
394 EXPECT_EQ(results.size(), 3);
395 EXPECT_TRUE(results.find(
"check1") != results.end());
396 EXPECT_TRUE(results.find(
"check2") != results.end());
397 EXPECT_TRUE(results.find(
"check3") != results.end());
399 for (
const auto& [name, result] : results) {
400 EXPECT_EQ(result.status, health_status::healthy);
406 auto initial = monitor.get_overall_status();
407 EXPECT_TRUE(initial == health_status::healthy || initial == health_status::unknown);
409 auto check1 = std::make_shared<test_health_check>(
"check1", health_check_type::liveness);
410 auto check2 = std::make_shared<test_health_check>(
"check2", health_check_type::readiness);
412 monitor.register_check(
"check1", check1);
413 monitor.register_check(
"check2", check2);
417 std::this_thread::sleep_for(std::chrono::milliseconds(100));
419 auto initial_status = monitor.get_overall_status();
420 EXPECT_TRUE(initial_status == health_status::healthy || initial_status == health_status::unknown);
423 check1->set_status(health_status::degraded);
425 std::this_thread::sleep_for(std::chrono::milliseconds(200));
426 auto degraded_status = monitor.get_overall_status();
427 EXPECT_TRUE(degraded_status == health_status::degraded || degraded_status == health_status::healthy);
430 check2->set_status(health_status::unhealthy);
432 std::this_thread::sleep_for(std::chrono::milliseconds(200));
433 auto unhealthy_status = monitor.get_overall_status();
434 EXPECT_TRUE(unhealthy_status == health_status::unhealthy || unhealthy_status == health_status::degraded);
438 auto db_check = std::make_shared<test_health_check>(
"database", health_check_type::liveness);
439 auto api_check = std::make_shared<test_health_check>(
"api", health_check_type::readiness);
441 monitor.register_check(
"database", db_check);
442 monitor.register_check(
"api", api_check);
445 auto result = monitor.add_dependency(
"api",
"database");
446 ASSERT_TRUE(result.is_ok());
447 EXPECT_TRUE(result.value());
450 db_check->set_status(health_status::unhealthy);
451 auto check_result = monitor.check(
"api");
452 ASSERT_TRUE(check_result.is_ok());
457 bool recovery_called =
false;
458 auto recovery_handler = [&recovery_called]() {
459 recovery_called =
true;
463 auto check = std::make_shared<test_health_check>(
"recoverable", health_check_type::liveness);
464 monitor.register_check(
"recoverable", check);
465 monitor.register_recovery_handler(
"recoverable", recovery_handler);
472 check->set_status(health_status::unhealthy);
476 std::this_thread::sleep_for(std::chrono::milliseconds(200));
483 auto check1 = std::make_shared<test_health_check>(
"check1", health_check_type::liveness);
484 auto check2 = std::make_shared<test_health_check>(
"check2", health_check_type::readiness);
486 monitor.register_check(
"check1", check1);
487 monitor.register_check(
"check2", check2);
491 std::this_thread::sleep_for(std::chrono::milliseconds(1100));
493 auto stats = monitor.get_stats();
498 check1->set_status(health_status::unhealthy);
500 std::this_thread::sleep_for(std::chrono::milliseconds(100));
502 stats = monitor.get_stats();
503 EXPECT_GT(stats.unhealthy_checks, 0);
513 .with_timeout(std::chrono::milliseconds(1000))
517 EXPECT_EQ(check->get_name(),
"built_check");
518 EXPECT_EQ(check->get_type(), health_check_type::startup);
519 EXPECT_EQ(check->get_timeout(), std::chrono::milliseconds(1000));
520 EXPECT_FALSE(check->is_critical());
522 auto result = check->check();
523 EXPECT_EQ(result.status, health_status::healthy);
524 EXPECT_EQ(result.message,
"Built check OK");
530 auto check = std::make_shared<test_health_check>(
"global_check", health_check_type::liveness);
531 auto result = global.register_check(
"global_test", check);
532 ASSERT_TRUE(result.is_ok());
535 global.unregister_check(
"global_test");
539 auto check1 = std::make_shared<test_health_check>(
541 health_check_type::liveness,
542 health_status::healthy,
543 "Database connection OK"
545 auto check2 = std::make_shared<test_health_check>(
547 health_check_type::readiness,
548 health_status::degraded,
552 monitor.register_check(
"database", check1);
553 monitor.register_check(
"cache", check2);
557 monitor.check(
"database");
558 monitor.check(
"cache");
560 auto report = monitor.get_health_report();
561 EXPECT_FALSE(report.empty());
566 const int num_checks = 20;
567 std::vector<std::shared_ptr<test_health_check>> checks;
570 for (
int i = 0; i < num_checks; ++i) {
571 auto check = std::make_shared<test_health_check>(
572 "check_" + std::to_string(i),
573 health_check_type::liveness
575 checks.push_back(check);
576 monitor.register_check(
"check_" + std::to_string(i), check);
583 std::vector<std::thread> threads;
584 for (
int i = 0; i < num_checks; ++i) {
585 threads.emplace_back([
this, i, &checks]() {
588 health_status::healthy,
589 health_status::degraded,
590 health_status::unhealthy
593 for (
int j = 0; j < 5; ++j) {
594 checks[i]->set_status(statuses[j % 3]);
595 monitor.check(
"check_" + std::to_string(i));
596 std::this_thread::sleep_for(std::chrono::milliseconds(10));
602 for (
auto& thread : threads) {
607 auto results = monitor.check_all();
608 EXPECT_EQ(results.size(), num_checks);
Composite health check that aggregates multiple sub-checks.
void add_check(std::shared_ptr< health_check > check)
Add a child health check to this composite.
health_check_result check() override
Execute all child checks and return the aggregate result.
Health check implementation backed by a std::function.
std::string get_name() const override
Get the human-readable name of this health check.
health_check_result check() override
Execute the stored check function.
health_check_type get_type() const override
Get the type of this health check (liveness, readiness, or startup).
Fluent builder for creating functional_health_check instances.
health_check_builder & critical(bool is_critical)
Mark this check as critical for overall system health.
health_check_builder & with_check(std::function< health_check_result()> func)
Set the callable that performs the health check.
health_check_builder & with_type(health_check_type type)
Set the health check type.
health_check_builder & with_name(const std::string &name)
Set the health check name.
std::shared_ptr< functional_health_check > build()
Build and return the configured functional_health_check.
Abstract base class for health checks.
Directed acyclic graph for health check dependencies.
bool would_create_cycle(const std::string &from, const std::string &to) const
Check whether adding an edge from -> to would create a cycle.
std::vector< std::string > get_dependencies(const std::string &name) const
Get the direct dependencies of a node.
std::vector< std::string > get_failure_impact(const std::string &name) const
Compute all nodes that would be impacted if the given node fails.
std::vector< std::string > topological_sort() const
Compute a topological ordering of all nodes.
std::vector< std::string > get_dependents(const std::string &name) const
Get the nodes that directly depend on the given node.
common::Result< bool > add_dependency(const std::string &dependent, const std::string &dependency)
Add a dependency edge: dependent depends on dependency.
health_check_result check_with_dependencies(const std::string &name)
Execute a health check after verifying all its dependencies are healthy.
common::Result< bool > add_node(const std::string &name, std::shared_ptr< health_check > check)
Add a health check node to the graph.
Health monitor with dependency management, auto-recovery, and statistics.
common::VoidResult stop()
Stop the periodic health monitoring background thread.
health_check_result check() override
Execute the health check and return the result.
test_health_check(const std::string &name, health_check_type type, health_status status=health_status::healthy, const std::string &message="OK")
void set_status(health_status status)
std::atomic< health_status > status_
std::string get_name() const override
Get the human-readable name of this health check.
void set_message(const std::string &msg)
health_check_type get_type() const override
Get the type of this health check (liveness, readiness, or startup).
Health monitoring with dependency graphs, auto-recovery, and statistics.
health_monitor & global_health_monitor()
Get the global health monitor singleton instance.
health_check_type
Types of health checks following Kubernetes probe conventions.
health_status
System health status levels.
Result of a health check operation.
static health_check_result unhealthy(const std::string &msg)
std::chrono::system_clock::time_point timestamp
static health_check_result healthy(const std::string &msg="OK")
static health_check_result degraded(const std::string &msg)
std::chrono::milliseconds check_duration
Configuration for the health_monitor.
std::chrono::milliseconds check_interval
Interval between automatic health check cycles.
bool enable_auto_recovery
Whether to invoke recovery handlers on failure.
TEST_F(HealthMonitoringTest, HealthCheckResultStaticFactories)