PACS System 0.1.0
PACS DICOM system library
Loading...
Searching...
No Matches
simd_types.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
15#ifndef PACS_ENCODING_SIMD_TYPES_HPP
16#define PACS_ENCODING_SIMD_TYPES_HPP
17
18#include "simd_config.h"
19
20#include <cstddef>
21#include <cstdint>
22
24
25// Memory alignment requirements
26constexpr size_t SSE_ALIGNMENT = 16;
27constexpr size_t AVX_ALIGNMENT = 32;
28constexpr size_t AVX512_ALIGNMENT = 64;
29
30#if defined(PACS_ARCH_X64) || defined(PACS_ARCH_X86)
31
32// SSE2 types (128-bit)
33#if defined(PACS_SIMD_SSE2)
34using vec128i = __m128i;
35using vec128f = __m128;
36using vec128d = __m128d;
37#endif
38
39// AVX/AVX2 types (256-bit)
40#if defined(PACS_SIMD_AVX2)
41using vec256i = __m256i;
42using vec256f = __m256;
43using vec256d = __m256d;
44#endif
45
46// AVX-512 types (512-bit)
47#if defined(PACS_SIMD_AVX512)
48using vec512i = __m512i;
49using vec512f = __m512;
50using vec512d = __m512d;
51#endif
52
53#elif defined(PACS_SIMD_NEON)
54
55// NEON types (128-bit)
56using vec128i8 = int8x16_t;
57using vec128u8 = uint8x16_t;
58using vec128i16 = int16x8_t;
59using vec128u16 = uint16x8_t;
60using vec128i32 = int32x4_t;
61using vec128u32 = uint32x4_t;
62using vec128i64 = int64x2_t;
63using vec128u64 = uint64x2_t;
64using vec128f = float32x4_t;
65
66#if defined(__aarch64__)
67using vec128d = float64x2_t;
68#endif
69
70#endif // Architecture
71
75struct alignas(SSE_ALIGNMENT) vec128_int {
76#if defined(PACS_SIMD_SSE2)
77 __m128i data;
78
79 vec128_int() noexcept : data(_mm_setzero_si128()) {}
80 explicit vec128_int(__m128i v) noexcept : data(v) {}
81
82 static vec128_int load(const void* ptr) noexcept {
83 return vec128_int(_mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr)));
84 }
85
86 static vec128_int load_aligned(const void* ptr) noexcept {
87 return vec128_int(_mm_load_si128(reinterpret_cast<const __m128i*>(ptr)));
88 }
89
90 void store(void* ptr) const noexcept {
91 _mm_storeu_si128(reinterpret_cast<__m128i*>(ptr), data);
92 }
93
94 void store_aligned(void* ptr) const noexcept {
95 _mm_store_si128(reinterpret_cast<__m128i*>(ptr), data);
96 }
97
98#elif defined(PACS_SIMD_NEON)
99 uint8x16_t data;
100
101 vec128_int() noexcept : data(vdupq_n_u8(0)) {}
102 explicit vec128_int(uint8x16_t v) noexcept : data(v) {}
103
104 static vec128_int load(const void* ptr) noexcept {
105 return vec128_int(vld1q_u8(reinterpret_cast<const uint8_t*>(ptr)));
106 }
107
108 static vec128_int load_aligned(const void* ptr) noexcept {
109 return load(ptr); // NEON handles unaligned loads efficiently
110 }
111
112 void store(void* ptr) const noexcept {
113 vst1q_u8(reinterpret_cast<uint8_t*>(ptr), data);
114 }
115
116 void store_aligned(void* ptr) const noexcept {
117 store(ptr);
118 }
119
120#else
121 // Scalar fallback
122 alignas(16) uint8_t bytes[16]{};
123
124 vec128_int() noexcept = default;
125
126 static vec128_int load(const void* ptr) noexcept {
127 vec128_int result;
128 std::memcpy(result.bytes, ptr, 16);
129 return result;
130 }
131
132 static vec128_int load_aligned(const void* ptr) noexcept {
133 return load(ptr);
134 }
135
136 void store(void* ptr) const noexcept {
137 std::memcpy(ptr, bytes, 16);
138 }
139
140 void store_aligned(void* ptr) const noexcept {
141 store(ptr);
142 }
143#endif
144};
145
149struct alignas(AVX_ALIGNMENT) vec256_int {
150#if defined(PACS_SIMD_AVX2)
151 __m256i data;
152
153 vec256_int() noexcept : data(_mm256_setzero_si256()) {}
154 explicit vec256_int(__m256i v) noexcept : data(v) {}
155
156 static vec256_int load(const void* ptr) noexcept {
157 return vec256_int(
158 _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr)));
159 }
160
161 static vec256_int load_aligned(const void* ptr) noexcept {
162 return vec256_int(
163 _mm256_load_si256(reinterpret_cast<const __m256i*>(ptr)));
164 }
165
166 void store(void* ptr) const noexcept {
167 _mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), data);
168 }
169
170 void store_aligned(void* ptr) const noexcept {
171 _mm256_store_si256(reinterpret_cast<__m256i*>(ptr), data);
172 }
173
174#else
175 // Fallback using two 128-bit vectors
178
179 vec256_int() noexcept = default;
180 vec256_int(vec128_int l, vec128_int h) noexcept : low(l), high(h) {}
181
182 static vec256_int load(const void* ptr) noexcept {
183 const auto* p = reinterpret_cast<const uint8_t*>(ptr);
184 return vec256_int(vec128_int::load(p), vec128_int::load(p + 16));
185 }
186
187 static vec256_int load_aligned(const void* ptr) noexcept {
188 const auto* p = reinterpret_cast<const uint8_t*>(ptr);
189 return vec256_int(vec128_int::load_aligned(p),
190 vec128_int::load_aligned(p + 16));
191 }
192
193 void store(void* ptr) const noexcept {
194 auto* p = reinterpret_cast<uint8_t*>(ptr);
195 low.store(p);
196 high.store(p + 16);
197 }
198
199 void store_aligned(void* ptr) const noexcept {
200 auto* p = reinterpret_cast<uint8_t*>(ptr);
201 low.store_aligned(p);
202 high.store_aligned(p + 16);
203 }
204#endif
205};
206
210template <size_t Alignment>
211[[nodiscard]] constexpr bool is_aligned(const void* ptr) noexcept {
212 return (reinterpret_cast<uintptr_t>(ptr) % Alignment) == 0;
213}
214
218template <size_t Alignment>
219[[nodiscard]] constexpr size_t align_offset(const void* ptr) noexcept {
220 const auto addr = reinterpret_cast<uintptr_t>(ptr);
221 const auto remainder = addr % Alignment;
222 return remainder == 0 ? 0 : Alignment - remainder;
223}
224
225} // namespace kcenon::pacs::encoding::simd
226
227#endif // PACS_ENCODING_SIMD_TYPES_HPP
constexpr size_t AVX_ALIGNMENT
Definition simd_types.h:27
constexpr size_t align_offset(const void *ptr) noexcept
Get the aligned portion start offset.
Definition simd_types.h:219
constexpr bool is_aligned(const void *ptr) noexcept
Check if a pointer is aligned to the specified boundary.
Definition simd_types.h:211
constexpr size_t SSE_ALIGNMENT
Definition simd_types.h:26
constexpr size_t AVX512_ALIGNMENT
Definition simd_types.h:28
SIMD configuration and CPU feature detection.
Portable 128-bit integer vector wrapper.
Definition simd_types.h:75
void store(void *ptr) const noexcept
Definition simd_types.h:136
static vec128_int load_aligned(const void *ptr) noexcept
Definition simd_types.h:132
void store_aligned(void *ptr) const noexcept
Definition simd_types.h:140
Portable 256-bit integer vector wrapper.
Definition simd_types.h:149
void store(void *ptr) const noexcept
Definition simd_types.h:193
void store_aligned(void *ptr) const noexcept
Definition simd_types.h:199
static vec256_int load(const void *ptr) noexcept
Definition simd_types.h:182
static vec256_int load_aligned(const void *ptr) noexcept
Definition simd_types.h:187