thread_system/work__stealing__deque_8h_source.html

// BSD 3-Clause License

// Copyright (c) 2024, 🍀☀🌕🌥 🌊

// See the LICENSE file in the project root for full license information.


#pragma once


#include <algorithm>

#include <atomic>

#include <cstdint>

#include <memory>

#include <optional>

#include <vector>


namespace kcenon::thread::lockfree {


template<typename T>


class circular_array {

public:


    explicit circular_array(std::size_t log_size)

        : log_size_(log_size)

        , size_(1ULL << log_size)

        , mask_(size_ - 1)

        , buffer_(new std::atomic<T>[size_]) {

        for (std::size_t i = 0; i < size_; ++i) {

            buffer_[i].store(T{}, std::memory_order_relaxed);

        }

    }


    ~circular_array() {

        delete[] buffer_;

    }


    // Non-copyable

    circular_array(const circular_array&) = delete;

    circular_array& operator=(const circular_array&) = delete;


    [[nodiscard]] std::size_t size() const noexcept {

        return size_;

    }


    [[nodiscard]] T get(std::int64_t index) const noexcept {

        return buffer_[index & mask_].load(std::memory_order_relaxed);

    }


    void put(std::int64_t index, T value) noexcept {

        buffer_[index & mask_].store(value, std::memory_order_relaxed);

    }


    [[nodiscard]] circular_array* grow(std::int64_t bottom, std::int64_t top) const {

        auto* new_array = new circular_array(log_size_ + 1);

        for (std::int64_t i = top; i < bottom; ++i) {

            new_array->put(i, get(i));

        }

        return new_array;

    }


private:

    std::size_t log_size_;

    std::size_t size_;

    std::size_t mask_;

    std::atomic<T>* buffer_;

};


template<typename T>

class work_stealing_deque {

public:

    static constexpr std::size_t LOG_INITIAL_SIZE = 5;


    explicit work_stealing_deque(std::size_t log_initial_size = LOG_INITIAL_SIZE)

        : top_(0)

        , bottom_(0)

        , array_(new circular_array<T>(log_initial_size)) {

    }


    ~work_stealing_deque() {

        delete array_.load(std::memory_order_relaxed);

    }


    // Non-copyable and non-movable

    work_stealing_deque(const work_stealing_deque&) = delete;

    work_stealing_deque& operator=(const work_stealing_deque&) = delete;

    work_stealing_deque(work_stealing_deque&&) = delete;

    work_stealing_deque& operator=(work_stealing_deque&&) = delete;


    void push(T item) {

        std::int64_t b = bottom_.load(std::memory_order_relaxed);

        std::int64_t t = top_.load(std::memory_order_acquire);

        circular_array<T>* a = array_.load(std::memory_order_relaxed);


        // Check if array needs to grow

        if (b - t > static_cast<std::int64_t>(a->size()) - 1) {

            // Grow the array

            circular_array<T>* new_array = a->grow(b, t);

            // Store old array for cleanup (in a real implementation,

            // you would use hazard pointers or epoch-based reclamation)

            old_arrays_.push_back(a);

            array_.store(new_array, std::memory_order_release);

            a = new_array;

        }


        a->put(b, item);

        std::atomic_thread_fence(std::memory_order_release);

        bottom_.store(b + 1, std::memory_order_relaxed);

    }


    [[nodiscard]] std::optional<T> pop() {

        std::int64_t b = bottom_.load(std::memory_order_relaxed) - 1;

        circular_array<T>* a = array_.load(std::memory_order_relaxed);

        bottom_.store(b, std::memory_order_relaxed);

        std::atomic_thread_fence(std::memory_order_seq_cst);

        std::int64_t t = top_.load(std::memory_order_relaxed);


        if (t <= b) {

            // Non-empty queue

            T item = a->get(b);

            if (t == b) {

                // Last element - compete with thieves

                if (!top_.compare_exchange_strong(

                        t, t + 1,

                        std::memory_order_seq_cst,

                        std::memory_order_relaxed)) {

                    // Lost race with a thief

                    bottom_.store(b + 1, std::memory_order_relaxed);

                    return std::nullopt;

                }

                bottom_.store(b + 1, std::memory_order_relaxed);

            }

            return item;

        } else {

            // Empty queue

            bottom_.store(b + 1, std::memory_order_relaxed);

            return std::nullopt;

        }

    }


    [[nodiscard]] std::optional<T> steal() {

        std::int64_t t = top_.load(std::memory_order_acquire);

        std::atomic_thread_fence(std::memory_order_seq_cst);

        std::int64_t b = bottom_.load(std::memory_order_acquire);


        if (t < b) {

            // Non-empty queue

            circular_array<T>* a = array_.load(std::memory_order_consume);

            T item = a->get(t);


            if (!top_.compare_exchange_strong(

                    t, t + 1,

                    std::memory_order_seq_cst,

                    std::memory_order_relaxed)) {

                // Lost race with another thief or owner

                return std::nullopt;

            }

            return item;

        }

        return std::nullopt;

    }


    [[nodiscard]] std::vector<T> steal_batch(std::size_t max_count) {

        if (max_count == 0) {

            return {};

        }


        std::int64_t t = top_.load(std::memory_order_acquire);

        std::atomic_thread_fence(std::memory_order_seq_cst);

        std::int64_t b = bottom_.load(std::memory_order_acquire);


        if (t >= b) {

            // Empty queue

            return {};

        }


        // Calculate how many we can actually steal

        std::int64_t available = b - t;

        std::size_t to_steal = std::min(

            max_count,

            static_cast<std::size_t>(available)

        );


        // Try to atomically claim the range [t, t + to_steal)

        std::int64_t new_top = t + static_cast<std::int64_t>(to_steal);


        if (!top_.compare_exchange_strong(

                t, new_top,

                std::memory_order_seq_cst,

                std::memory_order_relaxed)) {

            // Lost race with another thief or owner

            // Return empty and let caller retry if needed

            return {};

        }


        // Successfully claimed the range - now read the elements

        // The CAS already ensured we have exclusive access to [t, new_top)

        circular_array<T>* a = array_.load(std::memory_order_consume);

        std::vector<T> result;

        result.reserve(to_steal);


        for (std::int64_t i = t; i < new_top; ++i) {

            result.push_back(a->get(i));

        }


        return result;

    }


    [[nodiscard]] bool empty() const noexcept {

        std::int64_t b = bottom_.load(std::memory_order_relaxed);

        std::int64_t t = top_.load(std::memory_order_relaxed);

        return b <= t;

    }


    [[nodiscard]] std::size_t size() const noexcept {

        std::int64_t b = bottom_.load(std::memory_order_relaxed);

        std::int64_t t = top_.load(std::memory_order_relaxed);

        std::int64_t diff = b - t;

        return diff > 0 ? static_cast<std::size_t>(diff) : 0;

    }


    [[nodiscard]] std::size_t capacity() const noexcept {

        return array_.load(std::memory_order_relaxed)->size();

    }


    void cleanup_old_arrays() {

        for (auto* old_array : old_arrays_) {

            delete old_array;

        }

        old_arrays_.clear();

    }


private:

    // Cache line padding to prevent false sharing

    static constexpr std::size_t CACHE_LINE_SIZE = 64;


    alignas(CACHE_LINE_SIZE) std::atomic<std::int64_t> top_;

    alignas(CACHE_LINE_SIZE) std::atomic<std::int64_t> bottom_;

    alignas(CACHE_LINE_SIZE) std::atomic<circular_array<T>*> array_;


    // Storage for old arrays (simple approach - could use hazard pointers)

    std::vector<circular_array<T>*> old_arrays_;

};


} // namespace kcenon::thread::lockfree


kcenon::thread::lockfree::circular_array
Dynamic circular array for work-stealing deque.
Definition work_stealing_deque.h:33

kcenon::thread::lockfree::circular_array::~circular_array
~circular_array()
Definition work_stealing_deque.h:49

kcenon::thread::lockfree::circular_array::size_
std::size_t size_
Definition work_stealing_deque.h:99

kcenon::thread::lockfree::circular_array::grow
circular_array * grow(std::int64_t bottom, std::int64_t top) const
Create a new array with double the capacity, copying elements.
Definition work_stealing_deque.h:89

kcenon::thread::lockfree::circular_array::buffer_
std::atomic< T > * buffer_
Definition work_stealing_deque.h:101

kcenon::thread::lockfree::circular_array::put
void put(std::int64_t index, T value) noexcept
Store element at index with relaxed memory ordering.
Definition work_stealing_deque.h:79

kcenon::thread::lockfree::circular_array::circular_array
circular_array(std::size_t log_size)
Constructs a circular array with given capacity.
Definition work_stealing_deque.h:39

kcenon::thread::lockfree::circular_array::get
T get(std::int64_t index) const noexcept
Get element at index with relaxed memory ordering.
Definition work_stealing_deque.h:70

kcenon::thread::lockfree::circular_array::size
std::size_t size() const noexcept
Get the capacity of the array.
Definition work_stealing_deque.h:61

kcenon::thread::lockfree::circular_array::circular_array
circular_array(const circular_array &)=delete

kcenon::thread::lockfree::circular_array::operator=
circular_array & operator=(const circular_array &)=delete

kcenon::thread::lockfree::circular_array::log_size_
std::size_t log_size_
Definition work_stealing_deque.h:98

kcenon::thread::lockfree::circular_array::mask_
std::size_t mask_
Definition work_stealing_deque.h:100

kcenon::thread::lockfree::work_stealing_deque
Lock-free work-stealing deque based on Chase-Lev algorithm.
Definition numa_work_stealer.h:36

kcenon::thread::lockfree::work_stealing_deque::CACHE_LINE_SIZE
static constexpr std::size_t CACHE_LINE_SIZE
Definition work_stealing_deque.h:394

kcenon::thread::lockfree::work_stealing_deque::top_
std::atomic< std::int64_t > top_
Definition work_stealing_deque.h:396

kcenon::thread::lockfree::work_stealing_deque::bottom_
std::atomic< std::int64_t > bottom_
Definition work_stealing_deque.h:397

kcenon::thread::lockfree::work_stealing_deque::work_stealing_deque
work_stealing_deque(const work_stealing_deque &)=delete

kcenon::thread::lockfree::work_stealing_deque::capacity
std::size_t capacity() const noexcept
Get the capacity of the current array.
Definition work_stealing_deque.h:375

kcenon::thread::lockfree::work_stealing_deque::size
std::size_t size() const noexcept
Get approximate size of the deque.
Definition work_stealing_deque.h:364

kcenon::thread::lockfree::work_stealing_deque::~work_stealing_deque
~work_stealing_deque()
Destructor - cleans up the circular array.
Definition work_stealing_deque.h:165

kcenon::thread::lockfree::work_stealing_deque::operator=
work_stealing_deque & operator=(work_stealing_deque &&)=delete

kcenon::thread::lockfree::work_stealing_deque::LOG_INITIAL_SIZE
static constexpr std::size_t LOG_INITIAL_SIZE
Default initial log capacity (2^LOG_INITIAL_SIZE = 32 elements)
Definition work_stealing_deque.h:150

kcenon::thread::lockfree::work_stealing_deque::empty
bool empty() const noexcept
Check if the deque appears empty.
Definition work_stealing_deque.h:352

kcenon::thread::lockfree::work_stealing_deque::work_stealing_deque
work_stealing_deque(std::size_t log_initial_size=LOG_INITIAL_SIZE)
Constructs an empty work-stealing deque.
Definition work_stealing_deque.h:156

kcenon::thread::lockfree::work_stealing_deque::steal_batch
std::vector< T > steal_batch(std::size_t max_count)
Steal multiple elements from the top of the deque (thief threads)
Definition work_stealing_deque.h:299

kcenon::thread::lockfree::work_stealing_deque::work_stealing_deque
work_stealing_deque(work_stealing_deque &&)=delete

kcenon::thread::lockfree::work_stealing_deque::array_
std::atomic< circular_array< T > * > array_
Definition work_stealing_deque.h:398

kcenon::thread::lockfree::work_stealing_deque::steal
std::optional< T > steal()
Steal an element from the top of the deque (thief threads)
Definition work_stealing_deque.h:253

kcenon::thread::lockfree::work_stealing_deque::old_arrays_
std::vector< circular_array< T > * > old_arrays_
Definition work_stealing_deque.h:401

kcenon::thread::lockfree::work_stealing_deque::cleanup_old_arrays
void cleanup_old_arrays()
Clear all old arrays (for memory cleanup)
Definition work_stealing_deque.h:385

kcenon::thread::lockfree::work_stealing_deque::operator=
work_stealing_deque & operator=(const work_stealing_deque &)=delete

kcenon::thread::lockfree::work_stealing_deque::push
void push(T item)
Push an element onto the bottom of the deque (owner only)
Definition work_stealing_deque.h:184

kcenon::thread::lockfree::work_stealing_deque::pop
std::optional< T > pop()
Pop an element from the bottom of the deque (owner only)
Definition work_stealing_deque.h:214

kcenon::thread::result
A template class representing either a value or an error.
Definition error_handling.h:252

kcenon::thread::lockfree
Definition work_stealing_deque.h:21

std
STL namespace.