15#ifndef PACS_ENCODING_SIMD_TYPES_HPP
16#define PACS_ENCODING_SIMD_TYPES_HPP
30#if defined(PACS_ARCH_X64) || defined(PACS_ARCH_X86)
33#if defined(PACS_SIMD_SSE2)
34using vec128i = __m128i;
35using vec128f = __m128;
36using vec128d = __m128d;
40#if defined(PACS_SIMD_AVX2)
41using vec256i = __m256i;
42using vec256f = __m256;
43using vec256d = __m256d;
47#if defined(PACS_SIMD_AVX512)
48using vec512i = __m512i;
49using vec512f = __m512;
50using vec512d = __m512d;
53#elif defined(PACS_SIMD_NEON)
56using vec128i8 = int8x16_t;
57using vec128u8 = uint8x16_t;
58using vec128i16 = int16x8_t;
59using vec128u16 = uint16x8_t;
60using vec128i32 = int32x4_t;
61using vec128u32 = uint32x4_t;
62using vec128i64 = int64x2_t;
63using vec128u64 = uint64x2_t;
64using vec128f = float32x4_t;
66#if defined(__aarch64__)
67using vec128d = float64x2_t;
76#if defined(PACS_SIMD_SSE2)
79 vec128_int() noexcept : data(_mm_setzero_si128()) {}
80 explicit vec128_int(__m128i v) noexcept : data(v) {}
82 static vec128_int load(
const void* ptr)
noexcept {
83 return vec128_int(_mm_loadu_si128(
reinterpret_cast<const __m128i*
>(ptr)));
86 static vec128_int load_aligned(
const void* ptr)
noexcept {
87 return vec128_int(_mm_load_si128(
reinterpret_cast<const __m128i*
>(ptr)));
90 void store(
void* ptr)
const noexcept {
91 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(ptr), data);
94 void store_aligned(
void* ptr)
const noexcept {
95 _mm_store_si128(
reinterpret_cast<__m128i*
>(ptr), data);
98#elif defined(PACS_SIMD_NEON)
101 vec128_int() noexcept : data(vdupq_n_u8(0)) {}
102 explicit vec128_int(uint8x16_t v) noexcept : data(v) {}
104 static vec128_int load(
const void* ptr)
noexcept {
105 return vec128_int(vld1q_u8(
reinterpret_cast<const uint8_t*
>(ptr)));
108 static vec128_int load_aligned(
const void* ptr)
noexcept {
112 void store(
void* ptr)
const noexcept {
113 vst1q_u8(
reinterpret_cast<uint8_t*
>(ptr), data);
116 void store_aligned(
void* ptr)
const noexcept {
122 alignas(16) uint8_t bytes[16]{};
128 std::memcpy(result.
bytes, ptr, 16);
136 void store(
void* ptr)
const noexcept {
137 std::memcpy(ptr, bytes, 16);
150#if defined(PACS_SIMD_AVX2)
153 vec256_int() noexcept : data(_mm256_setzero_si256()) {}
154 explicit vec256_int(__m256i v) noexcept : data(v) {}
156 static vec256_int load(
const void* ptr)
noexcept {
158 _mm256_loadu_si256(
reinterpret_cast<const __m256i*
>(ptr)));
161 static vec256_int load_aligned(
const void* ptr)
noexcept {
163 _mm256_load_si256(
reinterpret_cast<const __m256i*
>(ptr)));
166 void store(
void* ptr)
const noexcept {
167 _mm256_storeu_si256(
reinterpret_cast<__m256i*
>(ptr), data);
170 void store_aligned(
void* ptr)
const noexcept {
171 _mm256_store_si256(
reinterpret_cast<__m256i*
>(ptr), data);
183 const auto* p =
reinterpret_cast<const uint8_t*
>(ptr);
184 return vec256_int(vec128_int::load(p), vec128_int::load(p + 16));
188 const auto* p =
reinterpret_cast<const uint8_t*
>(ptr);
189 return vec256_int(vec128_int::load_aligned(p),
190 vec128_int::load_aligned(p + 16));
193 void store(
void* ptr)
const noexcept {
194 auto* p =
reinterpret_cast<uint8_t*
>(ptr);
200 auto* p =
reinterpret_cast<uint8_t*
>(ptr);
201 low.store_aligned(p);
202 high.store_aligned(p + 16);
210template <
size_t Alignment>
211[[nodiscard]]
constexpr bool is_aligned(
const void* ptr)
noexcept {
212 return (
reinterpret_cast<uintptr_t
>(ptr) % Alignment) == 0;
218template <
size_t Alignment>
220 const auto addr =
reinterpret_cast<uintptr_t
>(ptr);
221 const auto remainder = addr % Alignment;
222 return remainder == 0 ? 0 : Alignment - remainder;
constexpr size_t AVX_ALIGNMENT
constexpr size_t align_offset(const void *ptr) noexcept
Get the aligned portion start offset.
constexpr bool is_aligned(const void *ptr) noexcept
Check if a pointer is aligned to the specified boundary.
constexpr size_t SSE_ALIGNMENT
constexpr size_t AVX512_ALIGNMENT
SIMD configuration and CPU feature detection.
Portable 128-bit integer vector wrapper.
void store(void *ptr) const noexcept
vec128_int() noexcept=default
static vec128_int load_aligned(const void *ptr) noexcept
void store_aligned(void *ptr) const noexcept
Portable 256-bit integer vector wrapper.
void store(void *ptr) const noexcept
void store_aligned(void *ptr) const noexcept
vec256_int() noexcept=default
static vec256_int load(const void *ptr) noexcept
static vec256_int load_aligned(const void *ptr) noexcept