Thread System 0.3.1
High-performance C++20 thread pool with work stealing and DAG scheduling
Loading...
Searching...
No Matches
thread_pool_diagnostics.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2024, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
12#pragma once
13
19 // end of diagnostics
20
21#include "thread_info.h"
22#include "job_info.h"
23#include "bottleneck_report.h"
24#include "health_status.h"
25#include "execution_event.h"
26
27#include <atomic>
28#include <chrono>
29#include <cstddef>
30#include <deque>
31#include <memory>
32#include <mutex>
33#include <string>
34#include <vector>
35
36namespace kcenon::thread
37{
38 // Forward declarations
39 class thread_pool;
40 class thread_worker;
41
42namespace diagnostics
43{
51 {
55 std::size_t recent_jobs_capacity{1000};
56
60 std::size_t event_history_size{1000};
61
65 bool enable_tracing{false};
66
71
76
81
86
94 std::uint32_t event_sample_rate{1};
95 };
96
143 {
144 public:
151 const diagnostics_config& config = {});
152
157
158 // Non-copyable, non-movable
163
164 // =========================================================================
165 // Thread Dump
166 // =========================================================================
167
174 [[nodiscard]] auto dump_thread_states() const -> std::vector<thread_info>;
175
193 [[nodiscard]] auto format_thread_dump() const -> std::string;
194
195 // =========================================================================
196 // Job Inspection
197 // =========================================================================
198
203 [[nodiscard]] auto get_active_jobs() const -> std::vector<job_info>;
204
210 [[nodiscard]] auto get_pending_jobs(std::size_t limit = 100) const
211 -> std::vector<job_info>;
212
218 [[nodiscard]] auto get_recent_jobs(std::size_t limit = 100) const
219 -> std::vector<job_info>;
220
228
229 // =========================================================================
230 // Bottleneck Detection
231 // =========================================================================
232
237 [[nodiscard]] auto detect_bottlenecks() const -> bottleneck_report;
238
239 // =========================================================================
240 // Health Checks
241 // =========================================================================
242
247 [[nodiscard]] auto health_check() const -> health_status;
248
253 [[nodiscard]] auto is_healthy() const -> bool;
254
255 // =========================================================================
256 // Event Tracing
257 // =========================================================================
258
264 void enable_tracing(bool enable, std::size_t history_size = 1000);
265
270 [[nodiscard]] auto is_tracing_enabled() const -> bool;
271
276 void add_event_listener(std::shared_ptr<execution_event_listener> listener);
277
282 void remove_event_listener(std::shared_ptr<execution_event_listener> listener);
283
290 void record_event(const job_execution_event& event);
291
297 [[nodiscard]] auto get_recent_events(std::size_t limit = 100) const
298 -> std::vector<job_execution_event>;
299
300 // =========================================================================
301 // Export
302 // =========================================================================
303
308 [[nodiscard]] auto to_json() const -> std::string;
309
314 [[nodiscard]] auto to_string() const -> std::string;
315
324 [[nodiscard]] auto to_prometheus() const -> std::string;
325
326 // =========================================================================
327 // Configuration
328 // =========================================================================
329
334 [[nodiscard]] auto get_config() const -> diagnostics_config;
335
340 void set_config(const diagnostics_config& config);
341
342 private:
347
352
356 std::atomic<bool> tracing_enabled_{false};
357
361 mutable std::mutex events_mutex_;
362
366 std::deque<job_execution_event> event_history_;
367
371 mutable std::mutex jobs_mutex_;
372
376 std::deque<job_info> recent_jobs_;
377
381 mutable std::mutex listeners_mutex_;
382
386 std::vector<std::shared_ptr<execution_event_listener>> listeners_;
387
391 std::atomic<std::uint64_t> next_event_id_{0};
392
396 std::chrono::steady_clock::time_point start_time_;
397
398 // =========================================================================
399 // Internal Helpers
400 // =========================================================================
401
408 [[nodiscard]] auto get_worker_info(const thread_worker& worker,
409 std::size_t index) const -> thread_info;
410
415 void notify_listeners(const job_execution_event& event);
416
422
427 [[nodiscard]] auto check_worker_health() const -> component_health;
428
433 [[nodiscard]] auto check_queue_health() const -> component_health;
434
441 [[nodiscard]] auto check_metrics_health(double avg_latency_ms,
442 double success_rate) const -> component_health;
443 };
444
445} // namespace diagnostics
446} // namespace kcenon::thread
Bottleneck detection and reporting for thread pool performance analysis.
Interface for receiving job execution events.
Comprehensive diagnostics API for thread pool monitoring.
auto check_queue_health() const -> component_health
Checks queue component health.
void generate_recommendations(bottleneck_report &report) const
Generates recommendations for a bottleneck.
auto get_active_jobs() const -> std::vector< job_info >
Gets currently executing jobs.
void add_event_listener(std::shared_ptr< execution_event_listener > listener)
Adds an event listener.
auto get_worker_info(const thread_worker &worker, std::size_t index) const -> thread_info
Gets thread info for a single worker.
auto to_json() const -> std::string
Exports diagnostics as JSON.
thread_pool_diagnostics & operator=(thread_pool_diagnostics &&)=delete
auto get_recent_jobs(std::size_t limit=100) const -> std::vector< job_info >
Gets recent completed/failed jobs.
diagnostics_config config_
Configuration for diagnostics.
auto to_prometheus() const -> std::string
Exports diagnostics as Prometheus-compatible metrics.
std::vector< std::shared_ptr< execution_event_listener > > listeners_
Event listeners.
void record_job_completion(const job_info &info)
Records a job completion for history tracking.
thread_pool & pool_
Reference to the monitored thread pool.
auto detect_bottlenecks() const -> bottleneck_report
Analyzes for bottlenecks.
auto is_tracing_enabled() const -> bool
Checks if tracing is enabled.
auto check_metrics_health(double avg_latency_ms, double success_rate) const -> component_health
Checks metrics component health.
auto get_pending_jobs(std::size_t limit=100) const -> std::vector< job_info >
Gets pending jobs in queue.
auto get_config() const -> diagnostics_config
Gets the current configuration.
auto get_recent_events(std::size_t limit=100) const -> std::vector< job_execution_event >
Gets recent execution events.
thread_pool_diagnostics(const thread_pool_diagnostics &)=delete
std::chrono::steady_clock::time_point start_time_
Time when the pool was started.
auto format_thread_dump() const -> std::string
Gets formatted thread dump (human-readable).
thread_pool_diagnostics(thread_pool &pool, const diagnostics_config &config={})
Constructs diagnostics for a thread pool.
void notify_listeners(const job_execution_event &event)
Notifies all event listeners.
auto to_string() const -> std::string
Exports diagnostics as formatted string.
void remove_event_listener(std::shared_ptr< execution_event_listener > listener)
Removes an event listener.
void enable_tracing(bool enable, std::size_t history_size=1000)
Enables or disables job execution tracing.
auto health_check() const -> health_status
Performs comprehensive health check.
std::atomic< bool > tracing_enabled_
Whether event tracing is enabled.
void record_event(const job_execution_event &event)
Records a job execution event.
thread_pool_diagnostics & operator=(const thread_pool_diagnostics &)=delete
auto is_healthy() const -> bool
Quick check if pool is healthy.
std::atomic< std::uint64_t > next_event_id_
Counter for event IDs.
std::deque< job_execution_event > event_history_
Ring buffer for event history.
std::mutex events_mutex_
Mutex for event history access.
auto dump_thread_states() const -> std::vector< thread_info >
Gets current state of all worker threads.
void set_config(const diagnostics_config &config)
Updates the configuration.
auto check_worker_health() const -> component_health
Checks worker component health.
thread_pool_diagnostics(thread_pool_diagnostics &&)=delete
std::deque< job_info > recent_jobs_
Ring buffer for recent job completions.
A thread pool for concurrent execution of jobs using multiple worker threads.
A specialized worker thread that processes jobs from a job_queue.
Job execution event types and listener interface for tracing.
Health status thresholds and monitoring for thread pools.
Job information snapshot for diagnostics and monitoring.
Core threading foundation of the thread system library.
Definition thread_impl.h:17
@ info
Informational messages highlighting progress.
STL namespace.
Analysis report of bottlenecks in the thread pool.
Health status of a single component.
Configuration options for thread pool diagnostics.
health_thresholds health_thresholds_config
Configurable thresholds for health status determination.
double queue_saturation_high
High watermark threshold for queue saturation (0.0 to 1.0).
std::size_t recent_jobs_capacity
Maximum number of recent jobs to track.
std::uint32_t event_sample_rate
Diagnostics event sampling rate (record every Nth job).
double wait_time_threshold_ms
Wait time threshold (ms) for slow consumer detection.
bool enable_tracing
Enable automatic event tracing.
std::size_t event_history_size
Maximum number of events to retain in history.
double utilization_high_threshold
Worker utilization threshold for bottleneck detection.
Comprehensive health status of the thread pool.
Configurable thresholds for health status determination.
Event data for job execution tracing.
Information about a job in the thread pool.
Definition job_info.h:90
Information about a worker thread in the pool.
Definition thread_info.h:88
Worker thread state information for diagnostics.