thread_system/thread__pool_8cpp_source.html

// BSD 3-Clause License

// Copyright (c) 2024, 🍀☀🌕🌥 🌊

// See the LICENSE file in the project root for full license information.


#include <kcenon/thread/core/job_queue.h>

#include <kcenon/thread/core/thread_pool.h>

#include <kcenon/thread/core/thread_logger.h>

#include <kcenon/thread/utils/formatter.h>

#include <kcenon/thread/diagnostics/thread_pool_diagnostics.h>

#include <kcenon/thread/adapters/job_queue_adapter.h>


using namespace utility_module;


namespace kcenon::thread {

// Support both old (namespace common) and new (namespace kcenon::common) versions

// When inside namespace kcenon::thread, 'common' resolves to kcenon::common

#if KCENON_HAS_COMMON_EXECUTOR

namespace common_ns = common;

#endif


// Initialize static member

std::atomic<std::uint32_t> thread_pool::next_pool_instance_id_{0};


thread_pool::thread_pool(const std::string& thread_title, const thread_context& context)

    : thread_title_(thread_title)

    , pool_instance_id_(next_pool_instance_id_.fetch_add(1))

    , start_pool_(false)

    , job_queue_(std::make_shared<kcenon::thread::job_queue>())

    , context_(context)

    , pool_cancellation_token_(cancellation_token::create())

    , metrics_service_(std::make_shared<metrics::metrics_service>()) {

    // Report initial pool registration if monitoring is available

    if (context_.monitoring()) {

        common::interfaces::thread_pool_metrics initial_metrics(thread_title_, pool_instance_id_);

        initial_metrics.worker_threads.value = 0;

        context_.update_thread_pool_metrics(thread_title_, pool_instance_id_, initial_metrics);

    }

}


thread_pool::thread_pool(const std::string& thread_title,

                         std::shared_ptr<job_queue> custom_queue,

                         const thread_context& context)

    : thread_title_(thread_title)

    , pool_instance_id_(next_pool_instance_id_.fetch_add(1))

    , start_pool_(false)

    , job_queue_(std::move(custom_queue))

    , context_(context)

    , pool_cancellation_token_(cancellation_token::create())

    , metrics_service_(std::make_shared<metrics::metrics_service>()) {

    // Report initial pool registration if monitoring is available

    if (context_.monitoring()) {

        common::interfaces::thread_pool_metrics initial_metrics(thread_title_, pool_instance_id_);

        initial_metrics.worker_threads.value = 0;

        context_.update_thread_pool_metrics(thread_title_, pool_instance_id_, initial_metrics);

    }

}


thread_pool::thread_pool(const std::string& thread_title,

                         std::unique_ptr<pool_queue_adapter_interface> queue_adapter,

                         const thread_context& context)

    : thread_title_(thread_title)

    , pool_instance_id_(next_pool_instance_id_.fetch_add(1))

    , start_pool_(false)

    , job_queue_(queue_adapter ? queue_adapter->get_job_queue() : nullptr)

    , queue_adapter_(std::move(queue_adapter))

    , context_(context)

    , pool_cancellation_token_(cancellation_token::create())

    , metrics_service_(std::make_shared<metrics::metrics_service>()) {

    // Report initial pool registration if monitoring is available

    if (context_.monitoring()) {

        common::interfaces::thread_pool_metrics initial_metrics(thread_title_, pool_instance_id_);

        initial_metrics.worker_threads.value = 0;

        context_.update_thread_pool_metrics(thread_title_, pool_instance_id_, initial_metrics);

    }

}


thread_pool::~thread_pool() {

    // Check if we're in static destruction phase

    // During static destruction, logger/monitoring singletons may already be destroyed

    if (thread_logger::is_shutting_down()) {

        // Minimal cleanup without logging to avoid SDOF

        stop_unsafe();

    } else {

        stop();

    }

}


auto thread_pool::get_ptr(void) -> std::shared_ptr<thread_pool> {

    return this->shared_from_this();

}


auto thread_pool::start(void) -> common::VoidResult {

    // Acquire lock to check workers_ safely

    std::scoped_lock<std::mutex> lock(workers_mutex_);


    // Check if pool is already running

    // Use acquire to ensure we see all previous modifications to pool state

    if (start_pool_.load(std::memory_order_acquire)) {

        return common::error_info{static_cast<int>(error_code::thread_already_running), "thread pool is already running", "thread_system"};

    }


    // Validate that workers have been added

    if (workers_.empty()) {

        return common::error_info{static_cast<int>(error_code::invalid_argument), "no workers to start", "thread_system"};

    }


    // Handle queue initialization for restart scenarios

    // The approach differs based on whether we're using job_queue or queue_adapter

    if (queue_adapter_) {

        // Using queue_adapter (policy_queue or wrapped job_queue)

        if (queue_adapter_->is_stopped()) {

            // For policy_queue adapters, we can't easily recreate,

            // so we return an error suggesting pool recreation

            return common::error_info{

                static_cast<int>(error_code::queue_stopped),

                "queue is stopped; create a new thread_pool instance for restart",

                "thread_system"};

        }

        // Update job_queue_ reference if adapter wraps a job_queue

        job_queue_ = queue_adapter_->get_job_queue();

    } else if (job_queue_ == nullptr || job_queue_->is_stopped()) {

        // Create fresh job queue for restart scenarios

        // Stopped queues cannot accept new jobs, so we must create a new instance

        job_queue_ = std::make_shared<kcenon::thread::job_queue>();


        // Update all workers with the new queue reference

        for (auto& worker : workers_) {

            worker->set_job_queue(job_queue_);

        }

    }


    // Create fresh pool cancellation token for restart scenarios

    // This ensures workers start with a non-cancelled token

    pool_cancellation_token_ = cancellation_token::create();

    metrics_service_->reset();


    // Attempt to start each worker

    for (auto& worker : workers_) {

        auto start_result = worker->start();

        if (start_result.is_err()) {

            // If any worker fails, stop all and return error

            stop();

            return start_result.error();

        }

    }


    // Mark pool as successfully started

    // Use release to ensure all previous modifications (worker starts, queue setup)

    // are visible to other threads before they see start_pool_ == true

    start_pool_.store(true, std::memory_order_release);


    return common::ok();

}


auto thread_pool::get_job_queue(void) -> std::shared_ptr<job_queue> {

    return job_queue_;

}


const metrics::ThreadPoolMetrics& thread_pool::metrics() const noexcept {

    return metrics_service_->basic_metrics();

}


void thread_pool::reset_metrics() {

    metrics_service_->reset();

}


void thread_pool::set_enhanced_metrics_enabled(bool enabled) {

    std::size_t worker_count = 0;

    {

        std::scoped_lock<std::mutex> lock(workers_mutex_);

        worker_count = workers_.size();

    }

    metrics_service_->set_enhanced_metrics_enabled(enabled, worker_count);

}


bool thread_pool::is_enhanced_metrics_enabled() const {

    return metrics_service_->is_enhanced_metrics_enabled();

}


const metrics::EnhancedThreadPoolMetrics& thread_pool::enhanced_metrics() const {

    return metrics_service_->enhanced_metrics();

}


metrics::EnhancedSnapshot thread_pool::enhanced_metrics_snapshot() const {

    return metrics_service_->enhanced_snapshot();

}


auto thread_pool::enqueue(std::unique_ptr<job>&& job) -> common::VoidResult {

    // Validate inputs

    if (job == nullptr) {

        return common::error_info{static_cast<int>(error_code::invalid_argument), "job is null", "thread_system"};

    }


    // Check queue availability and stopped state

    // Supports both queue_adapter_ (policy_queue) and job_queue_ (legacy)

    if (queue_adapter_) {

        if (queue_adapter_->is_stopped()) {

            return common::error_info{static_cast<int>(error_code::queue_stopped), "thread pool is stopped", "thread_system"};

        }

    } else if (job_queue_ == nullptr) {

        return common::error_info{static_cast<int>(error_code::resource_allocation_failed), "job queue is null", "thread_system"};

    } else if (job_queue_->is_stopped()) {

        return common::error_info{static_cast<int>(error_code::queue_stopped), "thread pool is stopped", "thread_system"};

    }


    // Record metrics and enqueue job

    metrics_service_->record_submission();


    auto start_time = std::chrono::steady_clock::now();


    if (queue_adapter_) {

        auto enqueue_result = queue_adapter_->enqueue(std::move(job));

        if (enqueue_result.is_err()) {

            return enqueue_result.error();

        }

    } else {

        auto enqueue_result = job_queue_->enqueue(std::move(job));

        if (enqueue_result.is_err()) {

            return enqueue_result.error();

        }

    }


    auto end_time = std::chrono::steady_clock::now();

    auto latency = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time);

    metrics_service_->record_enqueue_with_latency(latency);


    auto queue_size = queue_adapter_ ? queue_adapter_->size() : job_queue_->size();

    metrics_service_->record_queue_depth(queue_size);


    return common::ok();

}


auto thread_pool::enqueue_batch(std::vector<std::unique_ptr<job>>&& jobs) -> common::VoidResult {

    if (jobs.empty()) {

        return common::error_info{static_cast<int>(error_code::invalid_argument), "jobs are empty", "thread_system"};

    }


    // Check queue availability and stopped state

    // Supports both queue_adapter_ (policy_queue) and job_queue_ (legacy)

    if (queue_adapter_) {

        if (queue_adapter_->is_stopped()) {

            return common::error_info{static_cast<int>(error_code::queue_stopped), "thread pool is stopped", "thread_system"};

        }

    } else if (job_queue_ == nullptr) {

        return common::error_info{static_cast<int>(error_code::resource_allocation_failed), "job queue is null", "thread_system"};

    } else if (job_queue_->is_stopped()) {

        return common::error_info{static_cast<int>(error_code::queue_stopped), "thread pool is stopped", "thread_system"};

    }


    const auto batch_size = jobs.size();

    metrics_service_->record_submission(batch_size);


    auto start_time = std::chrono::steady_clock::now();


    if (queue_adapter_) {

        auto enqueue_result = queue_adapter_->enqueue_batch(std::move(jobs));

        if (enqueue_result.is_err()) {

            return enqueue_result.error();

        }

    } else {

        auto enqueue_result = job_queue_->enqueue_batch(std::move(jobs));

        if (enqueue_result.is_err()) {

            return enqueue_result.error();

        }

    }


    auto end_time = std::chrono::steady_clock::now();

    auto latency = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time);

    metrics_service_->record_enqueue_with_latency(latency, batch_size);


    auto queue_size = queue_adapter_ ? queue_adapter_->size() : job_queue_->size();

    metrics_service_->record_queue_depth(queue_size);


    return common::ok();

}


auto thread_pool::enqueue(std::unique_ptr<thread_worker>&& worker) -> common::VoidResult {

    if (worker == nullptr) {

        return common::error_info{static_cast<int>(error_code::invalid_argument), "worker is null", "thread_system"};

    }


    // Get job_queue from adapter if available, otherwise use direct job_queue_

    std::shared_ptr<job_queue> worker_queue;

    if (queue_adapter_) {

        worker_queue = queue_adapter_->get_job_queue();

        if (!worker_queue) {

            // policy_queue adapter without job_queue backend

            // Workers currently require job_queue; this limitation may be lifted in future versions

            return common::error_info{

                static_cast<int>(error_code::resource_allocation_failed),

                "policy_queue adapter without job_queue backend not yet supported for workers",

                "thread_system"};

        }

    } else if (job_queue_ == nullptr) {

        return common::error_info{static_cast<int>(error_code::resource_allocation_failed), "job queue is null", "thread_system"};

    } else {

        worker_queue = job_queue_;

    }


    worker->set_job_queue(worker_queue);

    worker->set_context(context_);

    worker->set_metrics(metrics_service_->get_basic_metrics());

    worker->set_diagnostics(&diagnostics());

    worker->set_diagnostics_sample_rate(diagnostics().get_config().event_sample_rate);


    // Acquire lock before checking start_pool_ and adding worker

    // This prevents race condition with stop():

    // - stop() acquires workers_mutex_ after atomically setting start_pool_ to false

    // - If we check start_pool_ while holding the lock, we ensure consistent state

    std::scoped_lock<std::mutex> lock(workers_mutex_);


    // Use memory_order_acquire to ensure we see all previous modifications

    // made by the thread that set start_pool_ to true (in start())

    bool is_running = start_pool_.load(std::memory_order_acquire);


    // Add worker to vector first, before starting

    // This ensures stop() will see and stop this worker if called concurrently

    workers_.emplace_back(std::move(worker));


    // Only start the worker if pool is running

    // Since we hold workers_mutex_, stop() cannot proceed until we release it

    if (is_running) {

        auto start_result = workers_.back()->start();

        if (start_result.is_err()) {

            // Remove the worker we just added since it failed to start

            workers_.pop_back();

            return start_result.error();

        }

    }


    return common::ok();

}


auto thread_pool::enqueue_batch(std::vector<std::unique_ptr<thread_worker>>&& workers)

    -> common::VoidResult {

    if (workers.empty()) {

        return common::error_info{static_cast<int>(error_code::invalid_argument), "workers are empty", "thread_system"};

    }


    if (job_queue_ == nullptr) {

        return common::error_info{static_cast<int>(error_code::resource_allocation_failed), "job queue is null", "thread_system"};

    }


    // Acquire lock before processing workers

    // This ensures atomic check-and-add operation with respect to stop()

    std::scoped_lock<std::mutex> lock(workers_mutex_);


    // Check pool running state once with acquire semantics

    bool is_running = start_pool_.load(std::memory_order_acquire);


    // Track the starting index for rollback in case of error

    std::size_t start_index = workers_.size();


    // Get diagnostics pointer and config once outside the loop for efficiency

    auto* diag = &diagnostics();

    const auto sample_rate = diag->get_config().event_sample_rate;


    for (auto& worker : workers) {

        worker->set_job_queue(job_queue_);

        worker->set_context(context_);

        worker->set_metrics(metrics_service_->get_basic_metrics());

        worker->set_diagnostics(diag);

        worker->set_diagnostics_sample_rate(sample_rate);


        // Add worker to vector first

        workers_.emplace_back(std::move(worker));


        // Only start if pool is running

        if (is_running) {

            auto start_result = workers_.back()->start();

            if (start_result.is_err()) {

                // Rollback: remove all workers added in this batch

                workers_.erase(workers_.begin() + static_cast<std::ptrdiff_t>(start_index),

                               workers_.end());

                return start_result.error();

            }

        }

    }


    return common::ok();

}


auto thread_pool::stop(const bool& immediately_stop) -> common::VoidResult {

    // Use compare_exchange_strong to atomically check and set state

    // This prevents TOCTOU (Time-Of-Check-Time-Of-Use) race conditions

    // where multiple threads might call stop() simultaneously

    bool expected = true;

    if (!start_pool_.compare_exchange_strong(expected, false, std::memory_order_acq_rel,

                                             std::memory_order_acquire)) {

        // Pool is already stopped or being stopped by another thread

        return common::ok();

    }


    // At this point, we've atomically transitioned from running to stopped

    // and only this thread will execute the shutdown sequence


    // Cancel pool-level token to propagate cancellation to all workers and jobs

    // This triggers hierarchical cancellation:

    // 1. Pool token cancelled → linked worker tokens cancelled

    // 2. Worker tokens cancelled → running jobs receive cancellation signal

    pool_cancellation_token_.cancel();


    // Stop the queue (supports both queue_adapter_ and job_queue_)

    if (queue_adapter_) {

        queue_adapter_->stop();

        if (immediately_stop) {

            queue_adapter_->clear();

        }

    } else if (job_queue_ != nullptr) {

        job_queue_->stop();

        if (immediately_stop) {

            job_queue_->clear();

        }

    }


    // Stop workers while holding lock to ensure consistent iteration

    // This is safe because worker->stop() only signals and joins threads,

    // it does not call back into thread_pool methods

    std::scoped_lock<std::mutex> lock(workers_mutex_);

    for (auto& worker : workers_) {

        auto stop_result = worker->stop();

        if (stop_result.is_err()) {

            context_.log(common::interfaces::log_level::error, formatter::format("error stopping worker: {}",

                                                             stop_result.error().message));

        }

    }


    return common::ok();

}


auto thread_pool::stop_unsafe() noexcept -> common::VoidResult {

    // Use compare_exchange_strong to atomically check and set state

    // Same atomic transition as stop() to prevent race conditions

    bool expected = true;

    if (!start_pool_.compare_exchange_strong(expected, false, std::memory_order_acq_rel,

                                             std::memory_order_acquire)) {

        // Pool is already stopped or being stopped by another thread

        return common::ok();

    }


    // Cancel pool-level token to propagate cancellation to all workers and jobs

    pool_cancellation_token_.cancel();


    // Stop the queue (supports both queue_adapter_ and job_queue_)

    if (queue_adapter_) {

        queue_adapter_->stop();

    } else if (job_queue_ != nullptr) {

        job_queue_->stop();

    }


    // Stop workers while holding lock to ensure consistent iteration

    // No logging to avoid accessing potentially destroyed singletons

    std::scoped_lock<std::mutex> lock(workers_mutex_);

    for (auto& worker : workers_) {

        // Stop worker without checking result to avoid any potential exceptions

        // during static destruction

        worker->stop();

    }


    return common::ok();

}


auto thread_pool::to_string(void) const -> std::string {

    std::string format_string;


    // Use relaxed memory order for diagnostic/logging purposes

    // Exact state ordering is not critical for debug output

    formatter::format_to(std::back_inserter(format_string), "{} is {},\n", thread_title_,

                         start_pool_.load(std::memory_order_relaxed) ? "running" : "stopped");


    // Get queue string representation (supports both queue_adapter_ and job_queue_)

    std::string queue_str;

    if (queue_adapter_) {

        queue_str = queue_adapter_->to_string();

    } else if (job_queue_ != nullptr) {

        queue_str = job_queue_->to_string();

    } else {

        queue_str = "nullptr";

    }

    formatter::format_to(std::back_inserter(format_string), "\tjob_queue: {}\n\n", queue_str);


    // Protect workers_ access with lock

    std::scoped_lock<std::mutex> lock(workers_mutex_);

    formatter::format_to(std::back_inserter(format_string), "\tworkers: {}\n", workers_.size());

    for (const auto& worker : workers_) {

        formatter::format_to(std::back_inserter(format_string), "\t{}\n", worker->to_string());

    }


    return format_string;

}


auto thread_pool::get_context(void) const -> const thread_context& {

    return context_;

}


std::uint32_t thread_pool::get_pool_instance_id() const {

    return pool_instance_id_;

}


void thread_pool::report_metrics() {

    if (!context_.monitoring()) {

        return;

    }


    common::interfaces::thread_pool_metrics metrics(thread_title_, pool_instance_id_);


    // Protect workers_ access with lock

    {

        std::scoped_lock<std::mutex> lock(workers_mutex_);

        metrics.worker_threads.value = static_cast<double>(workers_.size());

    }


    metrics.idle_threads.value = static_cast<double>(get_idle_worker_count());


    // Get pending jobs count (supports both queue_adapter_ and job_queue_)

    if (queue_adapter_) {

        metrics.jobs_pending.value = static_cast<double>(queue_adapter_->size());

    } else if (job_queue_) {

        metrics.jobs_pending.value = static_cast<double>(job_queue_->size());

    }


    // Report metrics with pool identification

    context_.update_thread_pool_metrics(thread_title_, pool_instance_id_, metrics);

}


std::size_t thread_pool::get_idle_worker_count() const {

    // Count idle workers by checking each worker's idle state

    // Thread safety: workers_mutex_ protects access to workers_ vector

    std::scoped_lock<std::mutex> lock(workers_mutex_);


    return static_cast<std::size_t>(std::count_if(

        workers_.begin(), workers_.end(),

        [](const std::unique_ptr<thread_worker>& worker) { return worker && worker->is_idle(); }));

}


auto thread_pool::is_running() const -> bool {

    // Use acquire to ensure we see the latest pool state

    // This is important for callers making decisions based on running state

    return start_pool_.load(std::memory_order_acquire);

}


auto thread_pool::get_pending_task_count() const -> std::size_t {

    // Supports both queue_adapter_ and job_queue_

    if (queue_adapter_) {

        return queue_adapter_->size();

    }

    if (job_queue_) {

        return job_queue_->size();

    }

    return 0;

}


auto thread_pool::check_worker_health(bool restart_failed) -> std::size_t {

    std::scoped_lock<std::mutex> lock(workers_mutex_);


    std::size_t failed_count = 0;


    // Remove dead workers using erase-remove idiom

    auto remove_iter =

        std::remove_if(workers_.begin(), workers_.end(),

                       [&failed_count](const std::unique_ptr<thread_worker>& worker) {

                           if (!worker || !worker->is_running()) {

                               ++failed_count;

                               return true;  // Remove this worker

                           }

                           return false;  // Keep this worker

                       });


    workers_.erase(remove_iter, workers_.end());


    // Restart workers if requested and pool is running

    if (restart_failed && failed_count > 0 && is_running()) {

        // Get diagnostics pointer and config for new workers

        auto* diag = &diagnostics();

        const auto sample_rate = diag->get_config().event_sample_rate;


        // Create new workers to replace failed ones

        for (std::size_t i = 0; i < failed_count; ++i) {

            // Create worker with default settings and context

            auto worker = std::make_unique<thread_worker>(true, context_);


            // Set job queue and diagnostics

            worker->set_job_queue(job_queue_);

            worker->set_metrics(metrics_service_->get_basic_metrics());

            worker->set_diagnostics(diag);

            worker->set_diagnostics_sample_rate(sample_rate);


            // Start the new worker

            auto start_result = worker->start();

            if (start_result.is_err()) {

                // Failed to start, skip this worker

                continue;

            }


            workers_.push_back(std::move(worker));

        }

    }


    return failed_count;

}


auto thread_pool::get_active_worker_count() const -> std::size_t {

    std::scoped_lock<std::mutex> lock(workers_mutex_);


    return static_cast<std::size_t>(std::count_if(workers_.begin(), workers_.end(),

                                                  [](const std::unique_ptr<thread_worker>& worker) {

                                                      return worker && worker->is_running();

                                                  }));

}


// ============================================================================

// Diagnostics

// ============================================================================


auto thread_pool::diagnostics() -> diagnostics::thread_pool_diagnostics& {

    std::call_once(diagnostics_init_flag_, [this]() {

        diagnostics_ = std::make_unique<diagnostics::thread_pool_diagnostics>(*this);

    });

    return *diagnostics_;

}


auto thread_pool::diagnostics() const -> const diagnostics::thread_pool_diagnostics& {

    std::call_once(diagnostics_init_flag_, [this]() {

        diagnostics_ = std::make_unique<diagnostics::thread_pool_diagnostics>(

            const_cast<thread_pool&>(*this));

    });

    return *diagnostics_;

}


auto thread_pool::collect_worker_diagnostics() const

    -> std::vector<diagnostics::thread_info> {

    std::scoped_lock<std::mutex> lock(workers_mutex_);


    std::vector<diagnostics::thread_info> result;

    result.reserve(workers_.size());


    for (std::size_t i = 0; i < workers_.size(); ++i) {

        const auto& worker = workers_[i];

        if (!worker) {

            continue;

        }


        diagnostics::thread_info info;

        info.thread_id = worker->get_thread_id();

        info.thread_name = "Worker-" + std::to_string(i);

        info.worker_id = worker->get_worker_id();


        // Determine worker state

        if (!worker->is_running()) {

            info.state = diagnostics::worker_state::stopped;

        } else if (worker->is_idle()) {

            info.state = diagnostics::worker_state::idle;

        } else {

            info.state = diagnostics::worker_state::active;

        }


        info.state_since = worker->get_state_since();


        // Get current job info if active

        info.current_job = worker->get_current_job_info();


        // Get statistics

        info.jobs_completed = worker->get_jobs_completed();

        info.jobs_failed = worker->get_jobs_failed();

        info.total_busy_time = worker->get_total_busy_time();

        info.total_idle_time = worker->get_total_idle_time();


        // Calculate utilization

        info.update_utilization();


        result.push_back(std::move(info));

    }


    return result;

}


// ============================================================================

// Pool Policies

// ============================================================================


void thread_pool::add_policy(std::unique_ptr<pool_policy> policy) {

    if (!policy) {

        return;

    }


    std::scoped_lock<std::mutex> lock(policies_mutex_);

    policies_.push_back(std::move(policy));

}


auto thread_pool::get_policies() const -> const std::vector<std::unique_ptr<pool_policy>>& {

    return policies_;

}


auto thread_pool::remove_policy(const std::string& name) -> bool {

    std::scoped_lock<std::mutex> lock(policies_mutex_);


    auto it = std::remove_if(policies_.begin(), policies_.end(),

        [&name](const std::unique_ptr<pool_policy>& policy) {

            return policy && policy->get_name() == name;

        });


    if (it != policies_.end()) {

        policies_.erase(it, policies_.end());

        return true;

    }


    return false;

}


// ============================================================================

// Internal Methods (for friend classes)

// ============================================================================


auto thread_pool::remove_workers_internal(std::size_t count, std::size_t min_workers)

    -> common::VoidResult

{

    if (count == 0) {

        return common::ok();

    }


    std::scoped_lock<std::mutex> lock(workers_mutex_);


    if (workers_.size() <= min_workers) {

        return common::error_info{

            static_cast<int>(error_code::invalid_argument),

            "Cannot remove workers: already at minimum",

            "thread_system"

        };

    }


    // Calculate how many we can actually remove

    std::size_t max_removable = workers_.size() - min_workers;

    count = std::min(count, max_removable);


    std::size_t removed = 0;


    // First pass: remove idle workers

    auto it = workers_.begin();

    while (it != workers_.end() && removed < count) {

        if (*it && (*it)->is_idle()) {

            // Stop the worker

            (*it)->stop();

            it = workers_.erase(it);

            ++removed;

        } else {

            ++it;

        }

    }


    // If we still need to remove more, wait briefly for workers to become idle

    if (removed < count) {

        // Just return success with what we removed

        // Remaining workers will be removed on subsequent calls

        context_.log(common::interfaces::log_level::info,

            formatter::format("Removed {} of {} requested workers (remaining are busy)",

                              removed, count));

    }


    return common::ok();

}


}  // namespace kcenon::thread

kcenon::thread::cancellation_token
Provides a mechanism for cooperative cancellation of operations.
Definition cancellation_token.h:30

kcenon::thread::cancellation_token::cancel
void cancel()
Cancels the operation.
Definition cancellation_token.h:107

kcenon::thread::cancellation_token::create
static cancellation_token create()
Creates a new cancellation token.
Definition cancellation_token.h:59

kcenon::thread::diagnostics::thread_pool_diagnostics
Comprehensive diagnostics API for thread pool monitoring.
Definition thread_pool_diagnostics.h:143

kcenon::thread::job_queue
A thread-safe job queue for managing and dispatching work items.
Definition job_queue.h:65

kcenon::thread::job
Represents a unit of work (task) to be executed, typically by a job queue.
Definition job.h:136

kcenon::thread::metrics::EnhancedThreadPoolMetrics
Enhanced thread pool metrics with histograms and percentiles.
Definition enhanced_metrics.h:251

kcenon::thread::metrics::ThreadPoolMetrics
Lightweight metrics container shared between thread_pool and workers.
Definition thread_pool_metrics.h:51

kcenon::thread::metrics::ThreadPoolMetrics::reset
void reset() override
Reset all metrics to their initial state.
Definition thread_pool_metrics.h:96

kcenon::thread::pool_policy
Base interface for thread pool policies.
Definition pool_policy.h:81

kcenon::thread::result
A template class representing either a value or an error.
Definition error_handling.h:252

kcenon::thread::thread_context
Context object that provides access to optional services.
Definition thread_context.h:40

kcenon::thread::thread_context::monitoring
std::shared_ptr< IMonitor > monitoring() const
Get the monitoring service.
Definition thread_context.h:74

kcenon::thread::thread_context::update_thread_pool_metrics
void update_thread_pool_metrics(const common::interfaces::thread_pool_metrics &metrics) const
Update thread pool metrics if monitoring is available.
Definition thread_context.h:163

kcenon::thread::thread_logger::is_shutting_down
static bool is_shutting_down()
Check if shutdown is in progress.
Definition thread_logger.h:117

kcenon::thread::thread_pool
A thread pool for concurrent execution of jobs using multiple worker threads.
Definition thread_pool.h:182

kcenon::thread::thread_pool::next_pool_instance_id_
static std::atomic< std::uint32_t > next_pool_instance_id_
Static counter for generating unique pool instance IDs.
Definition thread_pool.h:647

kcenon::thread::thread_pool::is_enhanced_metrics_enabled
bool is_enhanced_metrics_enabled() const
Check if enhanced metrics is enabled.
Definition thread_pool.cpp:290

kcenon::thread::thread_pool::~thread_pool
virtual ~thread_pool(void)
Virtual destructor. Cleans up resources used by the thread pool.
Definition thread_pool.cpp:148

kcenon::thread::thread_pool::get_pending_task_count
auto get_pending_task_count() const -> std::size_t
Get the number of pending tasks in the queue.
Definition thread_pool.cpp:695

kcenon::thread::thread_pool::enhanced_metrics_snapshot
metrics::EnhancedSnapshot enhanced_metrics_snapshot() const
Get enhanced metrics snapshot.
Definition thread_pool.cpp:298

kcenon::thread::thread_pool::report_metrics
void report_metrics()
Collect and report current thread pool metrics.
Definition thread_pool.cpp:653

kcenon::thread::thread_pool::get_idle_worker_count
std::size_t get_idle_worker_count() const
Get the number of idle workers.
Definition thread_pool.cpp:679

kcenon::thread::thread_pool::thread_pool
thread_pool(const std::string &thread_title="thread_pool", const thread_context &context=thread_context())
Constructs a new thread_pool instance.
Definition thread_pool.cpp:51

kcenon::thread::thread_pool::to_string
auto to_string(void) const -> std::string
Provides a string representation of this thread_pool.
Definition thread_pool.cpp:616

kcenon::thread::thread_pool::queue_adapter_
std::unique_ptr< pool_queue_adapter_interface > queue_adapter_
Queue adapter for unified access to different queue types.
Definition thread_pool.h:689

kcenon::thread::thread_pool::get_context
auto get_context(void) const -> const thread_context &
Gets the thread context for this pool.
Definition thread_pool.cpp:645

kcenon::thread::thread_pool::metrics_service_
std::shared_ptr< metrics::metrics_service > metrics_service_
Centralized metrics service for all pool and worker metrics.
Definition thread_pool.h:768

kcenon::thread::thread_pool::workers_
std::vector< std::unique_ptr< thread_worker > > workers_
A collection of worker threads associated with this pool.
Definition thread_pool.h:702

kcenon::thread::thread_pool::reset_metrics
void reset_metrics()
Reset accumulated metrics.
Definition thread_pool.cpp:277

kcenon::thread::thread_pool::enhanced_metrics
const metrics::EnhancedThreadPoolMetrics & enhanced_metrics() const
Access enhanced metrics (read-only reference).
Definition thread_pool.cpp:294

kcenon::thread::thread_pool::enqueue
auto enqueue(std::unique_ptr< job > &&job) -> common::VoidResult
Enqueues a new job into the shared job_queue.
Definition thread_pool.cpp:323

kcenon::thread::thread_pool::is_running
auto is_running() const -> bool
Check if the thread pool is currently running.
Definition thread_pool.cpp:689

kcenon::thread::thread_pool::enqueue_batch
auto enqueue_batch(std::vector< std::unique_ptr< job > > &&jobs) -> common::VoidResult
Enqueues a batch of jobs into the shared job_queue.
Definition thread_pool.cpp:368

kcenon::thread::thread_pool::thread_title_
std::string thread_title_
A title or name for this thread pool, useful for identification and logging.
Definition thread_pool.h:652

kcenon::thread::thread_pool::get_pool_instance_id
std::uint32_t get_pool_instance_id() const
Get the pool instance id.
Definition thread_pool.cpp:649

kcenon::thread::thread_pool::stop_unsafe
auto stop_unsafe() noexcept -> common::VoidResult
Stops the thread pool without logging (for use during static destruction).
Definition thread_pool.cpp:584

kcenon::thread::thread_pool::set_enhanced_metrics_enabled
void set_enhanced_metrics_enabled(bool enabled)
Enable or disable enhanced metrics collection.
Definition thread_pool.cpp:281

kcenon::thread::thread_pool::start_pool_
std::atomic< bool > start_pool_
Indicates whether the pool is currently running.
Definition thread_pool.h:665

kcenon::thread::thread_pool::pool_cancellation_token_
cancellation_token pool_cancellation_token_
Pool-level cancellation token.
Definition thread_pool.h:739

kcenon::thread::thread_pool::metrics
const metrics::ThreadPoolMetrics & metrics() const noexcept
Access aggregated runtime metrics (read-only reference).
Definition thread_pool.cpp:273

kcenon::thread::thread_pool::job_queue_
std::shared_ptr< job_queue > job_queue_
The shared job queue where jobs (job objects) are enqueued.
Definition thread_pool.h:676

kcenon::thread::thread_pool::check_worker_health
auto check_worker_health(bool restart_failed=true) -> std::size_t
Check health of all worker threads and restart failed workers.
Definition thread_pool.cpp:706

kcenon::thread::thread_pool::pool_instance_id_
std::uint32_t pool_instance_id_
Unique instance ID for this pool (for multi-pool scenarios).
Definition thread_pool.h:657

kcenon::thread::thread_pool::context_
thread_context context_
The thread context providing access to logging and monitoring services.
Definition thread_pool.h:721

kcenon::thread::thread_pool::get_job_queue
auto get_job_queue(void) -> std::shared_ptr< job_queue >
Returns the shared job_queue used by this thread pool.
Definition thread_pool.cpp:269

kcenon::thread::thread_pool::stop
auto stop(const bool &immediately_stop=false) -> common::VoidResult
Stops the thread pool and all worker threads.
Definition thread_pool.cpp:518

kcenon::thread::thread_pool::get_ptr
auto get_ptr(void) -> std::shared_ptr< thread_pool >
Retrieves a std::shared_ptr to this thread_pool instance.
Definition thread_pool.cpp:169

kcenon::thread::thread_pool::start
auto start(void) -> common::VoidResult
Starts the thread pool and all associated workers.
Definition thread_pool.cpp:196

kcenon::thread::thread_pool::workers_mutex_
std::mutex workers_mutex_
Mutex protecting concurrent access to the workers_ vector.
Definition thread_pool.h:713

kcenon::thread::utils::formatter::format_to
static auto format_to(OutputIt out, const char *formats, const FormatArgs &... args) -> OutputIt
Formats a narrow-character string directly to an output iterator.
Definition formatter.h:162

kcenon::thread::utils::formatter::format
static auto format(const char *formats, const FormatArgs &... args) -> std::string
Formats a narrow-character string with the given arguments.
Definition formatter.h:132

thread_pool.h
Core thread pool implementation with work stealing and auto-scaling.

formatter.h
Generic formatter for enum types using user-provided converter functors.

job_queue.h
Thread-safe FIFO job queue with optional bounded size.

job_queue_adapter.h
Adapter bridging job_queue to pool_queue_adapter_interface.

kcenon::thread::utils
Definition compatibility.h:20

kcenon::thread
Core threading foundation of the thread system library.
Definition thread_impl.h:17

kcenon::thread::error_code::queue_stopped
@ queue_stopped

kcenon::thread::error_code::resource_allocation_failed
@ resource_allocation_failed

kcenon::thread::error_code::invalid_argument
@ invalid_argument

kcenon::thread::error_code::thread_already_running
@ thread_already_running

kcenon::thread::log_level_v2::info
@ info
Informational messages highlighting progress.

kcenon::thread::scaling_reason::latency
@ latency
Latency threshold exceeded.

kcenon
Definition thread_impl.h:17

std
STL namespace.

kcenon::thread::diagnostics::thread_info
Information about a worker thread in the pool.
Definition thread_info.h:88

kcenon::thread::metrics::EnhancedSnapshot
Enhanced snapshot with latency percentiles and throughput.
Definition enhanced_metrics.h:35

thread_logger.h
Internal logging interface for the thread system.

thread_pool_diagnostics.h
Runtime diagnostics, health monitoring, and execution tracing for thread pools.