Thread System 0.3.1
High-performance C++20 thread pool with work stealing and DAG scheduling
Loading...
Searching...
No Matches
crash_protection/main.cpp

Demonstrates comprehensive crash protection mechanisms including signal handling, graceful shutdown, resource cleanup, scoped crash callbacks, and automatic recovery capabilities via crash_handler.

See also
crash_handler, crash_context, scoped_crash_callback, thread_pool_crash_safety
// BSD 3-Clause License
// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
// See the LICENSE file in the project root for full license information.
#include <iostream>
#include <thread>
#include <chrono>
#include <random>
#include <vector>
#include <memory>
#include <atomic>
// Include crash protection headers
using namespace kcenon::thread;
using namespace kcenon::thread;
// Global state for demonstration
std::atomic<bool> system_running{true};
std::atomic<int> tasks_completed{0};
std::atomic<int> tasks_failed{0};
// Simulated resource that needs cleanup
public:
critical_resource(const std::string& name) : name_(name), allocated_(true) {
std::cout << "[ALLOC] Allocated critical resource: " << name_ << std::endl;
}
}
void cleanup() {
if (allocated_) {
std::cout << "[CLEANUP] Cleaning up critical resource: " << name_ << std::endl;
allocated_ = false;
}
}
const std::string& name() const { return name_; }
bool is_allocated() const { return allocated_; }
private:
std::string name_;
bool allocated_;
};
// Global critical resources
std::vector<std::shared_ptr<critical_resource>> global_resources;
// Crash simulation functions
std::cout << "[CRASH] Simulating segmentation fault..." << std::endl;
int* null_ptr = nullptr;
*null_ptr = 42; // This will cause SIGSEGV
}
std::cout << "[CRASH] Simulating division by zero..." << std::endl;
volatile int zero = 0;
volatile int result = 100 / zero; // This will cause SIGFPE
(void)result;
}
std::cout << "[CRASH] Simulating abort..." << std::endl;
std::abort(); // This will cause SIGABRT
}
// Task functions for thread pool
void normal_task(int task_id) {
std::cout << "[TASK] Task " << task_id << " starting normally" << std::endl;
// Simulate some work
std::this_thread::sleep_for(std::chrono::milliseconds(100 + (task_id % 200)));
tasks_completed.fetch_add(1);
std::cout << "[TASK] Task " << task_id << " completed successfully" << std::endl;
}
void potentially_crashing_task(int task_id) {
std::cout << "[WARN] Task " << task_id << " starting (potentially dangerous)" << std::endl;
// Random chance of crash
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> crash_dist(1, 10);
int outcome = crash_dist(gen);
if (outcome <= 7) {
// Normal execution
std::this_thread::sleep_for(std::chrono::milliseconds(50));
tasks_completed.fetch_add(1);
std::cout << "[TASK] Task " << task_id << " completed safely" << std::endl;
} else if (outcome == 8) {
// Segmentation fault
tasks_failed.fetch_add(1);
} else if (outcome == 9) {
// Division by zero
tasks_failed.fetch_add(1);
} else {
// Abort
tasks_failed.fetch_add(1);
}
}
// Crash callback functions
void on_system_crash(const crash_context& context) {
std::cout << "\n[ALERT] CRASH DETECTED!" << std::endl;
std::cout << "Signal: " << context.signal_name << " (" << context.signal_number << ")" << std::endl;
std::cout << "Thread: " << context.crashing_thread << std::endl;
std::cout << "Time: " << std::chrono::duration_cast<std::chrono::seconds>(
context.crash_time.time_since_epoch()).count() << std::endl;
if (!context.stack_trace.empty()) {
std::cout << "Stack trace available" << std::endl;
}
// Mark system as no longer running
system_running.store(false);
}
std::cout << "[CLEANUP] Cleaning up global resources..." << std::endl;
for (auto& resource : global_resources) {
if (resource && resource->is_allocated()) {
resource->cleanup();
}
}
}
std::cout << "[SAVE] Saving emergency state..." << std::endl;
std::cout << "Tasks completed: " << tasks_completed.load() << std::endl;
std::cout << "Tasks failed: " << tasks_failed.load() << std::endl;
std::cout << "Resources allocated: " << global_resources.size() << std::endl;
}
int main() {
std::cout << "=== Thread System Crash Protection Demo ===" << std::endl;
std::cout << "This demo shows comprehensive crash protection mechanisms\n" << std::endl;
// Step 1: Initialize crash protection
std::cout << "--- Step 1: Initialize Crash Protection ---" << std::endl;
auto& crash_handler = crash_handler::instance();
crash_handler.initialize(crash_safety_level::standard, true);
// Register crash callbacks
auto crash_callback_id = crash_handler.register_crash_callback(
"SystemCrashHandler", on_system_crash, 10);
// Register cleanup functions
std::cout << "[OK] Crash protection initialized" << std::endl;
// Step 2: Allocate critical resources
std::cout << "\n--- Step 2: Allocate Critical Resources ---" << std::endl;
global_resources.push_back(std::make_shared<critical_resource>("DatabaseConnection"));
global_resources.push_back(std::make_shared<critical_resource>("NetworkSocket"));
global_resources.push_back(std::make_shared<critical_resource>("FileHandle"));
global_resources.push_back(std::make_shared<critical_resource>("SharedMemory"));
// Step 3: Create and configure thread pool with crash protection
std::cout << "\n--- Step 3: Create Thread Pool ---" << std::endl;
using thread_pool_t = kcenon::thread::thread_pool;
using thread_worker_t = kcenon::thread::thread_worker;
auto thread_pool = std::make_shared<thread_pool_t>("MainPool");
// Enable crash protection for thread pool (conceptual)
thread_pool_crash_safety::enable_for_pool("MainPool", *thread_pool);
thread_pool_crash_safety::set_job_crash_handler(
[](const std::string& pool_name, const crash_context& context) {
std::cout << "[CRASH] Job crashed in pool: " << pool_name << std::endl;
std::cout << "Signal: " << context.signal_name << std::endl;
});
// Add workers
{
std::vector<std::unique_ptr<thread_worker_t>> workers;
for (int i = 0; i < 4; ++i) {
workers.push_back(std::make_unique<thread_worker_t>());
}
auto r = thread_pool->enqueue_batch(std::move(workers));
if (r.is_err()) {
std::cerr << "Failed to add workers: " << r.error().message << std::endl;
return 1;
}
}
// Start pool
{
auto r = thread_pool->start();
if (r.is_err()) {
std::cerr << "Failed to start thread pool: " << r.error().message << std::endl;
return 1;
}
}
std::cout << "[OK] Thread pool started with crash protection" << std::endl;
// Step 4: Submit normal tasks
std::cout << "\n--- Step 4: Submit Normal Tasks ---" << std::endl;
for (int i = 0; i < 10; ++i) {
auto job = std::make_unique<callback_job>(
[i]() -> kcenon::common::VoidResult {
return kcenon::common::ok();
}
);
auto r = thread_pool->enqueue(std::move(job));
if (r.is_err()) {
std::cerr << "enqueue normal task failed: " << r.error().message << std::endl;
}
}
// Give some time for tasks
std::this_thread::sleep_for(std::chrono::seconds(1));
std::cout << "[OK] All normal tasks completed" << std::endl;
// Step 5: Submit potentially crashing tasks
std::cout << "\n--- Step 5: Submit Potentially Crashing Tasks ---" << std::endl;
std::cout << "[WARN] Some of these tasks may crash - crash protection will handle them" << std::endl;
for (int i = 10; i < 25; ++i) {
auto job = std::make_unique<callback_job>(
[i, &crash_handler]() -> kcenon::common::VoidResult {
// Simulate occasional crash handling without real crash
std::random_device rd; std::mt19937 gen(rd());
std::uniform_int_distribution<> dist(1, 10);
int outcome = dist(gen);
if (outcome >= 9) {
ctx.signal_number = SIGUSR1;
ctx.signal_name = "SIGUSR1";
ctx.fault_address = nullptr;
ctx.stack_trace = "Simulated crash";
ctx.crash_time = std::chrono::system_clock::now();
ctx.crashing_thread = std::this_thread::get_id();
tasks_failed.fetch_add(1);
} else {
std::this_thread::sleep_for(std::chrono::milliseconds(50));
tasks_completed.fetch_add(1);
}
return kcenon::common::ok();
}
);
auto r = thread_pool->enqueue(std::move(job));
if (r.is_err()) {
std::cerr << "enqueue risky task failed: " << r.error().message << std::endl;
}
}
std::this_thread::sleep_for(std::chrono::seconds(2));
// Step 6: Test manual crash scenarios
std::cout << "\n--- Step 6: Manual Crash Tests ---" << std::endl;
// Test 1: Manual crash trigger (for testing)
std::cout << "\nTest 1: Manual crash trigger" << std::endl;
crash_context test_context;
test_context.signal_number = SIGUSR1;
test_context.signal_name = "SIGUSR1";
test_context.fault_address = nullptr;
test_context.stack_trace = "Manual test stack trace";
test_context.crash_time = std::chrono::system_clock::now();
test_context.crashing_thread = std::this_thread::get_id();
// Test 2: Scoped crash protection
std::cout << "\nTest 2: Scoped crash protection" << std::endl;
{
scoped_crash_callback scoped_protection("ScopedTest",
[](const crash_context& ctx) {
std::cout << "[PROTECT] Scoped crash handler activated" << std::endl;
}, 50);
std::cout << "[OK] Scoped protection active" << std::endl;
// Scoped protection will be automatically removed when leaving this block
}
std::cout << "[OK] Scoped protection removed" << std::endl;
// Step 7: Display final statistics
std::cout << "\n--- Step 7: Final Statistics ---" << std::endl;
auto stats = crash_handler.get_stats();
std::cout << "Crash Statistics:" << std::endl;
std::cout << " Total crashes handled: " << stats.total_crashes_handled << std::endl;
std::cout << " Successful cleanups: " << stats.successful_cleanups << std::endl;
std::cout << " Failed cleanups: " << stats.failed_cleanups << std::endl;
std::cout << "\nTask Statistics:" << std::endl;
std::cout << " Tasks completed: " << tasks_completed.load() << std::endl;
std::cout << " Tasks failed: " << tasks_failed.load() << std::endl;
std::cout << "\nResource Status:" << std::endl;
for (const auto& resource : global_resources) {
std::cout << " " << resource->name() << ": "
<< (resource->is_allocated() ? "allocated" : "cleaned up") << std::endl;
}
// Step 8: Graceful shutdown
std::cout << "\n--- Step 8: Graceful Shutdown ---" << std::endl;
std::cout << "Stopping thread pool..." << std::endl;
{
auto r = thread_pool->stop();
if (r.is_err()) {
std::cerr << "Failed to stop thread pool: " << r.error().message << std::endl;
}
}
std::cout << "Cleaning up resources..." << std::endl;
std::cout << "Unregistering crash callbacks..." << std::endl;
std::cout << "\n=== Demo Completed Successfully ===" << std::endl;
std::cout << "Key features demonstrated:" << std::endl;
std::cout << "[OK] Signal handling and crash detection" << std::endl;
std::cout << "[OK] Stack trace generation" << std::endl;
std::cout << "[OK] Resource cleanup on crash" << std::endl;
std::cout << "[OK] Thread pool crash protection" << std::endl;
std::cout << "[OK] Scoped crash protection" << std::endl;
std::cout << "[OK] Graceful shutdown coordination" << std::endl;
std::cout << "[OK] Crash statistics and monitoring" << std::endl;
return 0;
}
Specialized job class that encapsulates user-defined callbacks.
std::string name_
Definition main.cpp:61
bool is_allocated() const
Definition main.cpp:58
const std::string & name() const
Definition main.cpp:57
void cleanup()
Definition main.cpp:50
critical_resource(const std::string &name)
Definition main.cpp:42
A specialized job class that encapsulates user-defined callbacks.
Thread-safe crash handler for the entire thread system.
void unregister_crash_callback(size_t registration_id)
Unregister a crash callback.
void initialize(crash_safety_level level=crash_safety_level::standard, bool enable_core_dumps=false)
Initialize crash handling with specified safety level.
void register_cleanup(const std::string &name, std::function< void()> cleanup, uint32_t timeout_ms=1000)
Register a resource cleanup function.
crash_stats get_stats() const
size_t register_crash_callback(const std::string &name, crash_callback callback, int priority=100)
Register a callback to be called during crash handling.
void set_crash_log_directory(const std::string &directory)
Set custom crash log directory.
void trigger_crash_handling(const crash_context &context)
Manually trigger crash handling (for testing)
Represents a unit of work (task) to be executed, typically by a job queue.
Definition job.h:136
A template class representing either a value or an error.
RAII helper for automatic crash callback registration.
A thread pool for concurrent execution of jobs using multiple worker threads.
auto enqueue(std::unique_ptr< job > &&job) -> common::VoidResult
Enqueues a new job into the shared job_queue.
auto enqueue_batch(std::vector< std::unique_ptr< job > > &&jobs) -> common::VoidResult
Enqueues a batch of jobs into the shared job_queue.
auto stop(const bool &immediately_stop=false) -> common::VoidResult
Stops the thread pool and all worker threads.
auto start(void) -> common::VoidResult
Starts the thread pool and all associated workers.
A specialized worker thread that processes jobs from a job_queue.
Core thread pool implementation with work stealing and auto-scaling.
Crash safety levels and handler interface for thread failure recovery.
void potentially_crashing_task(int task_id)
Definition main.cpp:98
void normal_task(int task_id)
Definition main.cpp:88
std::atomic< int > tasks_completed
Definition main.cpp:36
void simulate_division_by_zero()
Definition main.cpp:75
std::vector< std::shared_ptr< critical_resource > > global_resources
Definition main.cpp:66
void on_system_crash(const crash_context &context)
Definition main.cpp:129
void simulate_abort()
Definition main.cpp:82
void emergency_state_save()
Definition main.cpp:154
void cleanup_global_resources()
Definition main.cpp:144
std::atomic< int > tasks_failed
Definition main.cpp:37
void simulate_segmentation_fault()
Definition main.cpp:69
std::atomic< bool > system_running
Definition main.cpp:35
Core threading foundation of the thread system library.
Definition thread_impl.h:17
Crash context information.
std::chrono::system_clock::time_point crash_time
Specialized worker thread that processes jobs from a job_queue.