Thread System 0.3.1
High-performance C++20 thread pool with work stealing and DAG scheduling
Loading...
Searching...
No Matches
enhanced_metrics.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2024, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
12#pragma once
13
17
18#include <atomic>
19#include <chrono>
20#include <cstdint>
21#include <mutex>
22#include <optional>
23#include <string>
24#include <vector>
25
27
36 // =========================================================================
37 // Basic Counters (from ThreadPoolMetrics)
38 // =========================================================================
39
43 std::uint64_t tasks_submitted{0};
44
48 std::uint64_t tasks_executed{0};
49
53 std::uint64_t tasks_failed{0};
54
55 // =========================================================================
56 // Latency Percentiles (microseconds for readability)
57 // =========================================================================
58
63
68
73
78
83
88
92 double wait_time_p50_us{0.0};
93
97 double wait_time_p90_us{0.0};
98
102 double wait_time_p99_us{0.0};
103
104 // =========================================================================
105 // Throughput
106 // =========================================================================
107
111 double throughput_1s{0.0};
112
116 double throughput_1m{0.0};
117
118 // =========================================================================
119 // Queue Health
120 // =========================================================================
121
125 std::size_t current_queue_depth{0};
126
130 std::size_t peak_queue_depth{0};
131
135 double avg_queue_depth{0.0};
136
137 // =========================================================================
138 // Worker Utilization
139 // =========================================================================
140
147
151 std::vector<double> per_worker_utilization;
152
156 std::size_t active_workers{0};
157
158 // =========================================================================
159 // Timing Information
160 // =========================================================================
161
165 std::uint64_t total_busy_time_ns{0};
166
170 std::uint64_t total_idle_time_ns{0};
171
175 std::chrono::steady_clock::time_point snapshot_time;
176};
177
185 std::size_t worker_id{0};
186
190 std::uint64_t tasks_executed{0};
191
195 std::uint64_t busy_time_ns{0};
196
200 std::uint64_t idle_time_ns{0};
201
205 bool is_busy{false};
206};
207
252public:
257 explicit EnhancedThreadPoolMetrics(std::size_t worker_count = 0);
258
262 ~EnhancedThreadPoolMetrics() override = default;
263
264 // Non-copyable, non-movable for thread safety
269
270 // =========================================================================
271 // Recording Methods (called by thread_pool)
272 // =========================================================================
273
277 void record_submission();
278
283 void record_enqueue(std::chrono::nanoseconds latency);
284
290 void record_execution(std::chrono::nanoseconds latency, bool success);
291
296 void record_wait_time(std::chrono::nanoseconds wait);
297
302 void record_queue_depth(std::size_t depth);
303
311 std::size_t worker_id,
312 bool busy,
313 std::uint64_t duration_ns = 0);
314
319 void set_active_workers(std::size_t count);
320
321 // =========================================================================
322 // Query Methods
323 // =========================================================================
324
331 [[nodiscard]] EnhancedSnapshot snapshot() const;
332
337 [[nodiscard]] const LatencyHistogram& enqueue_latency() const;
338
343 [[nodiscard]] const LatencyHistogram& execution_latency() const;
344
349 [[nodiscard]] const LatencyHistogram& wait_time() const;
350
355 [[nodiscard]] std::vector<WorkerMetrics> worker_metrics() const;
356
361 [[nodiscard]] const SlidingWindowCounter& throughput_1s() const;
362
367 [[nodiscard]] const SlidingWindowCounter& throughput_1m() const;
368
369 // =========================================================================
370 // Management Methods
371 // =========================================================================
372
378 void reset() override;
379
386 void update_worker_count(std::size_t count);
387
388 // =========================================================================
389 // Export Methods
390 // =========================================================================
391
396 [[nodiscard]] std::string to_json() const;
397
403 [[nodiscard]] std::string to_prometheus(
404 const std::string& prefix = "thread_pool") const;
405
406private:
407 // Latency histograms
411
412 // Throughput counters
415
416 // Note: Basic counters (tasks_submitted_, tasks_executed_, tasks_failed_,
417 // total_busy_time_ns_, total_idle_time_ns_) are inherited from MetricsBase
418
419 // Queue depth tracking
420 std::atomic<std::size_t> current_queue_depth_{0};
421 std::atomic<std::size_t> peak_queue_depth_{0};
422 std::atomic<std::uint64_t> queue_depth_sum_{0};
423 std::atomic<std::uint64_t> queue_depth_samples_{0};
424
425 // Worker tracking
426 std::atomic<std::size_t> active_workers_{0};
427
428 // Per-worker metrics
429 mutable std::mutex workers_mutex_;
430 std::vector<WorkerMetrics> per_worker_metrics_;
431
432 // Helper to convert nanoseconds to microseconds
433 [[nodiscard]] static double ns_to_us(double ns) { return ns / 1000.0; }
434};
435
436} // namespace kcenon::thread::metrics
Enhanced thread pool metrics with histograms and percentiles.
const SlidingWindowCounter & throughput_1s() const
Get the 1-second throughput counter (read-only).
const LatencyHistogram & execution_latency() const
Get the execution latency histogram (read-only).
void set_active_workers(std::size_t count)
Set the number of active workers.
void record_execution(std::chrono::nanoseconds latency, bool success)
Record task execution completion.
EnhancedSnapshot snapshot() const
Get a comprehensive snapshot of all metrics.
const SlidingWindowCounter & throughput_1m() const
Get the 1-minute throughput counter (read-only).
EnhancedThreadPoolMetrics & operator=(const EnhancedThreadPoolMetrics &)=delete
std::string to_prometheus(const std::string &prefix="thread_pool") const
Export metrics in Prometheus/OpenMetrics format.
~EnhancedThreadPoolMetrics() override=default
Destructor.
std::vector< WorkerMetrics > worker_metrics() const
Get per-worker metrics.
void record_worker_state(std::size_t worker_id, bool busy, std::uint64_t duration_ns=0)
Update worker state.
void record_wait_time(std::chrono::nanoseconds wait)
Record wait time (time spent in queue).
std::string to_json() const
Export metrics as JSON string.
void reset() override
Reset all metrics to initial state.
const LatencyHistogram & wait_time() const
Get the wait time histogram (read-only).
const LatencyHistogram & enqueue_latency() const
Get the enqueue latency histogram (read-only).
EnhancedThreadPoolMetrics & operator=(EnhancedThreadPoolMetrics &&)=delete
EnhancedThreadPoolMetrics(EnhancedThreadPoolMetrics &&)=delete
void record_enqueue(std::chrono::nanoseconds latency)
Record enqueue operation latency.
EnhancedThreadPoolMetrics(std::size_t worker_count=0)
Constructs enhanced metrics with the specified worker count.
void update_worker_count(std::size_t count)
Update worker count.
void record_queue_depth(std::size_t depth)
Record current queue depth.
EnhancedThreadPoolMetrics(const EnhancedThreadPoolMetrics &)=delete
HDR-style histogram for latency distribution with logarithmic buckets.
Abstract base class for thread pool metrics.
Sliding window counter for throughput measurement.
HDR-style histogram for latency distribution with logarithmic buckets.
Thread pool metrics collection, histograms, and observability.
@ latency
Latency threshold exceeded.
Sliding window counter for throughput measurement.
Enhanced snapshot with latency percentiles and throughput.
double throughput_1s
Tasks completed per second (1-second window).
std::size_t peak_queue_depth
Peak queue depth since last reset.
double worker_utilization
Overall worker utilization (0.0 - 1.0).
std::size_t active_workers
Number of active workers.
double enqueue_latency_p50_us
Median (P50) enqueue latency in microseconds.
std::uint64_t tasks_failed
Total tasks that failed during execution.
std::uint64_t total_busy_time_ns
Total busy time across all workers in nanoseconds.
std::chrono::steady_clock::time_point snapshot_time
Timestamp when this snapshot was taken.
double enqueue_latency_p90_us
90th percentile enqueue latency in microseconds.
double wait_time_p90_us
90th percentile wait time in microseconds.
double execution_latency_p99_us
99th percentile execution latency in microseconds.
std::uint64_t tasks_executed
Total tasks successfully executed.
std::uint64_t tasks_submitted
Total tasks submitted to the pool.
std::uint64_t total_idle_time_ns
Total idle time across all workers in nanoseconds.
double avg_queue_depth
Average queue depth over the sampling period.
double execution_latency_p50_us
Median execution latency in microseconds.
double wait_time_p50_us
Median wait time (queue time) in microseconds.
double throughput_1m
Tasks completed per second (1-minute window average).
double execution_latency_p90_us
90th percentile execution latency in microseconds.
std::size_t current_queue_depth
Current queue depth.
double wait_time_p99_us
99th percentile wait time in microseconds.
std::vector< double > per_worker_utilization
Per-worker utilization (0.0 - 1.0 each).
double enqueue_latency_p99_us
99th percentile enqueue latency in microseconds.
Per-worker metrics for detailed analysis.
std::uint64_t idle_time_ns
Total idle time in nanoseconds.
std::size_t worker_id
Worker identifier.
bool is_busy
Current state (true = busy, false = idle).
std::uint64_t busy_time_ns
Total busy time in nanoseconds.
std::uint64_t tasks_executed
Total tasks executed by this worker.