21#ifndef PACS_ENCODING_SIMD_WINDOWING_HPP
22#define PACS_ENCODING_SIMD_WINDOWING_HPP
44 bool inv =
false) noexcept
51 const window_level_params& params)
noexcept;
54 const window_level_params& params)
noexcept;
57 const window_level_params& params)
noexcept;
71 const double min_val = params.
center - params.
width / 2.0;
72 const double scale = 255.0 / params.
width;
74 for (
int i = 0; i < 256; ++i) {
75 double val = (i - min_val) * scale;
76 val = std::clamp(val, 0.0, 255.0);
80 lut.
lut_8bit_[i] =
static_cast<uint8_t
>(std::round(val));
93 const double min_val = params.
center - params.
width / 2.0;
94 const double scale = 255.0 / params.
width;
96 for (
int i = 0; i < 4096; ++i) {
97 double val = (i - min_val) * scale;
98 val = std::clamp(val, 0.0, 255.0);
102 lut.
lut_16bit_[i] =
static_cast<uint8_t
>(std::round(val));
115 const double min_val = params.
center - params.
width / 2.0;
116 const double scale = 255.0 / params.
width;
118 for (
int i = 0; i < 65536; ++i) {
119 double val = (i - min_val) * scale;
120 val = std::clamp(val, 0.0, 255.0);
124 lut.
lut_16bit_[i] =
static_cast<uint8_t
>(std::round(val));
134 size_t pixel_count)
const noexcept {
135 for (
size_t i = 0; i < pixel_count; ++i) {
144 size_t pixel_count)
const noexcept {
146 for (
size_t i = 0; i < pixel_count; ++i) {
147 const uint16_t val = src[i];
148 if (val < lut_size) {
181 const double min_val = params.center - params.width / 2.0;
182 const double scale = 255.0 / params.width;
184 for (
size_t i = 0; i < pixel_count; ++i) {
185 double val = (src[i] - min_val) * scale;
186 val = std::clamp(val, 0.0, 255.0);
190 dst[i] =
static_cast<uint8_t
>(val);
200 const double min_val = params.center - params.width / 2.0;
201 const double scale = 255.0 / params.width;
203 for (
size_t i = 0; i < pixel_count; ++i) {
204 double val = (src[i] - min_val) * scale;
205 val = std::clamp(val, 0.0, 255.0);
209 dst[i] =
static_cast<uint8_t
>(val);
217 const int16_t* src, uint8_t* dst,
size_t pixel_count,
219 const double min_val = params.center - params.width / 2.0;
220 const double scale = 255.0 / params.width;
222 for (
size_t i = 0; i < pixel_count; ++i) {
223 double val = (src[i] - min_val) * scale;
224 val = std::clamp(val, 0.0, 255.0);
228 dst[i] =
static_cast<uint8_t
>(val);
236#if defined(PACS_SIMD_SSE2)
244inline void apply_window_level_8bit_sse2(
const uint8_t* src, uint8_t* dst,
248 const int32_t min_val_fp =
249 static_cast<int32_t
>((params.center - params.width / 2.0) * 256);
250 const int32_t scale_fp =
251 static_cast<int32_t
>((255.0 / params.width) * 256);
253 const __m128i min_vec = _mm_set1_epi16(
static_cast<int16_t
>(min_val_fp >> 8));
254 const __m128i scale_vec = _mm_set1_epi16(
static_cast<int16_t
>(scale_fp));
255 const __m128i zero = _mm_setzero_si128();
256 const __m128i max_255 = _mm_set1_epi16(255);
257 const __m128i all_ones = _mm_set1_epi8(
static_cast<char>(0xFF));
259 const size_t simd_count = (pixel_count / 16) * 16;
262 for (; i < simd_count; i += 16) {
264 __m128i pixels = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(src + i));
267 __m128i pixels_lo = _mm_unpacklo_epi8(pixels, zero);
269 __m128i pixels_hi = _mm_unpackhi_epi8(pixels, zero);
272 pixels_lo = _mm_sub_epi16(pixels_lo, min_vec);
273 pixels_hi = _mm_sub_epi16(pixels_hi, min_vec);
276 pixels_lo = _mm_mulhi_epi16(pixels_lo, scale_vec);
277 pixels_hi = _mm_mulhi_epi16(pixels_hi, scale_vec);
280 pixels_lo = _mm_max_epi16(_mm_min_epi16(pixels_lo, max_255), zero);
281 pixels_hi = _mm_max_epi16(_mm_min_epi16(pixels_hi, max_255), zero);
284 __m128i result = _mm_packus_epi16(pixels_lo, pixels_hi);
288 result = _mm_xor_si128(result, all_ones);
291 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(dst + i), result);
302inline void apply_window_level_16bit_sse2(
const uint16_t* src, uint8_t* dst,
304 const window_level_params& params)
noexcept {
306 const float min_val =
static_cast<float>(params.center - params.width / 2.0);
307 const float scale = 255.0f /
static_cast<float>(params.width);
309 const __m128 min_vec = _mm_set1_ps(min_val);
310 const __m128 scale_vec = _mm_set1_ps(scale);
311 const __m128 zero_f = _mm_setzero_ps();
312 const __m128 max_255_f = _mm_set1_ps(255.0f);
313 const __m128i all_ones = _mm_set1_epi8(
static_cast<char>(0xFF));
315 const size_t simd_count = (pixel_count / 8) * 8;
318 for (; i < simd_count; i += 8) {
320 __m128i pixels = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(src + i));
323 __m128i lo = _mm_unpacklo_epi16(pixels, _mm_setzero_si128());
324 __m128i hi = _mm_unpackhi_epi16(pixels, _mm_setzero_si128());
326 __m128 lo_f = _mm_cvtepi32_ps(lo);
327 __m128 hi_f = _mm_cvtepi32_ps(hi);
330 lo_f = _mm_mul_ps(_mm_sub_ps(lo_f, min_vec), scale_vec);
331 hi_f = _mm_mul_ps(_mm_sub_ps(hi_f, min_vec), scale_vec);
334 lo_f = _mm_max_ps(_mm_min_ps(lo_f, max_255_f), zero_f);
335 hi_f = _mm_max_ps(_mm_min_ps(hi_f, max_255_f), zero_f);
338 __m128i lo_i = _mm_cvtps_epi32(lo_f);
339 __m128i hi_i = _mm_cvtps_epi32(hi_f);
342 __m128i packed16 = _mm_packs_epi32(lo_i, hi_i);
343 __m128i packed8 = _mm_packus_epi16(packed16, packed16);
347 packed8 = _mm_xor_si128(packed8, all_ones);
351 _mm_storel_epi64(
reinterpret_cast<__m128i*
>(dst + i), packed8);
361inline void apply_window_level_16bit_signed_sse2(
362 const int16_t* src, uint8_t* dst,
size_t pixel_count,
363 const window_level_params& params)
noexcept {
364 const float min_val =
static_cast<float>(params.center - params.width / 2.0);
365 const float scale = 255.0f /
static_cast<float>(params.width);
367 const __m128 min_vec = _mm_set1_ps(min_val);
368 const __m128 scale_vec = _mm_set1_ps(scale);
369 const __m128 zero_f = _mm_setzero_ps();
370 const __m128 max_255_f = _mm_set1_ps(255.0f);
371 const __m128i all_ones = _mm_set1_epi8(
static_cast<char>(0xFF));
373 const size_t simd_count = (pixel_count / 8) * 8;
376 for (; i < simd_count; i += 8) {
377 __m128i pixels = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(src + i));
380 __m128i lo = _mm_srai_epi32(_mm_unpacklo_epi16(pixels, pixels), 16);
381 __m128i hi = _mm_srai_epi32(_mm_unpackhi_epi16(pixels, pixels), 16);
383 __m128 lo_f = _mm_cvtepi32_ps(lo);
384 __m128 hi_f = _mm_cvtepi32_ps(hi);
386 lo_f = _mm_mul_ps(_mm_sub_ps(lo_f, min_vec), scale_vec);
387 hi_f = _mm_mul_ps(_mm_sub_ps(hi_f, min_vec), scale_vec);
389 lo_f = _mm_max_ps(_mm_min_ps(lo_f, max_255_f), zero_f);
390 hi_f = _mm_max_ps(_mm_min_ps(hi_f, max_255_f), zero_f);
392 __m128i lo_i = _mm_cvtps_epi32(lo_f);
393 __m128i hi_i = _mm_cvtps_epi32(hi_f);
395 __m128i packed16 = _mm_packs_epi32(lo_i, hi_i);
396 __m128i packed8 = _mm_packus_epi16(packed16, packed16);
399 packed8 = _mm_xor_si128(packed8, all_ones);
402 _mm_storel_epi64(
reinterpret_cast<__m128i*
>(dst + i), packed8);
414#if defined(PACS_SIMD_AVX2)
420inline void apply_window_level_8bit_avx2(
const uint8_t* src, uint8_t* dst,
422 const window_level_params& params)
noexcept {
423 const int32_t min_val_fp =
424 static_cast<int32_t
>((params.center - params.width / 2.0) * 256);
425 const int32_t scale_fp =
426 static_cast<int32_t
>((255.0 / params.width) * 256);
428 const __m256i min_vec = _mm256_set1_epi16(
static_cast<int16_t
>(min_val_fp >> 8));
429 const __m256i scale_vec = _mm256_set1_epi16(
static_cast<int16_t
>(scale_fp));
430 const __m256i zero = _mm256_setzero_si256();
431 const __m256i max_255 = _mm256_set1_epi16(255);
432 const __m256i all_ones = _mm256_set1_epi8(
static_cast<char>(0xFF));
434 const size_t simd_count = (pixel_count / 32) * 32;
437 for (; i < simd_count; i += 32) {
438 __m256i pixels = _mm256_loadu_si256(
reinterpret_cast<const __m256i*
>(src + i));
441 __m256i pixels_lo = _mm256_unpacklo_epi8(pixels, zero);
442 __m256i pixels_hi = _mm256_unpackhi_epi8(pixels, zero);
445 pixels_lo = _mm256_sub_epi16(pixels_lo, min_vec);
446 pixels_hi = _mm256_sub_epi16(pixels_hi, min_vec);
448 pixels_lo = _mm256_mulhi_epi16(pixels_lo, scale_vec);
449 pixels_hi = _mm256_mulhi_epi16(pixels_hi, scale_vec);
452 pixels_lo = _mm256_max_epi16(_mm256_min_epi16(pixels_lo, max_255), zero);
453 pixels_hi = _mm256_max_epi16(_mm256_min_epi16(pixels_hi, max_255), zero);
456 __m256i result = _mm256_packus_epi16(pixels_lo, pixels_hi);
459 result = _mm256_permute4x64_epi64(result, 0xD8);
462 result = _mm256_xor_si256(result, all_ones);
465 _mm256_storeu_si256(
reinterpret_cast<__m256i*
>(dst + i), result);
469#if defined(PACS_SIMD_SSE2)
470 if (pixel_count - i >= 16) {
471 apply_window_level_8bit_sse2(src + i, dst + i, pixel_count - i, params);
483inline void apply_window_level_16bit_avx2(
const uint16_t* src, uint8_t* dst,
485 const window_level_params& params)
noexcept {
486 const float min_val =
static_cast<float>(params.center - params.width / 2.0);
487 const float scale = 255.0f /
static_cast<float>(params.width);
489 const __m256 min_vec = _mm256_set1_ps(min_val);
490 const __m256 scale_vec = _mm256_set1_ps(scale);
491 const __m256 zero_f = _mm256_setzero_ps();
492 const __m256 max_255_f = _mm256_set1_ps(255.0f);
493 const __m128i all_ones_128 = _mm_set1_epi8(
static_cast<char>(0xFF));
495 const size_t simd_count = (pixel_count / 16) * 16;
498 for (; i < simd_count; i += 16) {
500 __m256i pixels = _mm256_loadu_si256(
reinterpret_cast<const __m256i*
>(src + i));
503 __m128i lo_128 = _mm256_castsi256_si128(pixels);
504 __m128i hi_128 = _mm256_extracti128_si256(pixels, 1);
506 __m256i lo_32 = _mm256_cvtepu16_epi32(lo_128);
507 __m256i hi_32 = _mm256_cvtepu16_epi32(hi_128);
509 __m256 lo_f = _mm256_cvtepi32_ps(lo_32);
510 __m256 hi_f = _mm256_cvtepi32_ps(hi_32);
513 lo_f = _mm256_mul_ps(_mm256_sub_ps(lo_f, min_vec), scale_vec);
514 hi_f = _mm256_mul_ps(_mm256_sub_ps(hi_f, min_vec), scale_vec);
517 lo_f = _mm256_max_ps(_mm256_min_ps(lo_f, max_255_f), zero_f);
518 hi_f = _mm256_max_ps(_mm256_min_ps(hi_f, max_255_f), zero_f);
521 __m256i lo_i = _mm256_cvtps_epi32(lo_f);
522 __m256i hi_i = _mm256_cvtps_epi32(hi_f);
525 __m256i packed16 = _mm256_packs_epi32(lo_i, hi_i);
526 packed16 = _mm256_permute4x64_epi64(packed16, 0xD8);
529 __m128i lo_16 = _mm256_castsi256_si128(packed16);
530 __m128i hi_16 = _mm256_extracti128_si256(packed16, 1);
531 __m128i packed8 = _mm_packus_epi16(lo_16, hi_16);
534 packed8 = _mm_xor_si128(packed8, all_ones_128);
537 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(dst + i), packed8);
541#if defined(PACS_SIMD_SSE2)
542 if (pixel_count - i >= 8) {
543 apply_window_level_16bit_sse2(src + i, dst + i, pixel_count - i, params);
554inline void apply_window_level_16bit_signed_avx2(
555 const int16_t* src, uint8_t* dst,
size_t pixel_count,
556 const window_level_params& params)
noexcept {
557 const float min_val =
static_cast<float>(params.center - params.width / 2.0);
558 const float scale = 255.0f /
static_cast<float>(params.width);
560 const __m256 min_vec = _mm256_set1_ps(min_val);
561 const __m256 scale_vec = _mm256_set1_ps(scale);
562 const __m256 zero_f = _mm256_setzero_ps();
563 const __m256 max_255_f = _mm256_set1_ps(255.0f);
564 const __m128i all_ones_128 = _mm_set1_epi8(
static_cast<char>(0xFF));
566 const size_t simd_count = (pixel_count / 16) * 16;
569 for (; i < simd_count; i += 16) {
570 __m256i pixels = _mm256_loadu_si256(
reinterpret_cast<const __m256i*
>(src + i));
573 __m128i lo_128 = _mm256_castsi256_si128(pixels);
574 __m128i hi_128 = _mm256_extracti128_si256(pixels, 1);
576 __m256i lo_32 = _mm256_cvtepi16_epi32(lo_128);
577 __m256i hi_32 = _mm256_cvtepi16_epi32(hi_128);
579 __m256 lo_f = _mm256_cvtepi32_ps(lo_32);
580 __m256 hi_f = _mm256_cvtepi32_ps(hi_32);
582 lo_f = _mm256_mul_ps(_mm256_sub_ps(lo_f, min_vec), scale_vec);
583 hi_f = _mm256_mul_ps(_mm256_sub_ps(hi_f, min_vec), scale_vec);
585 lo_f = _mm256_max_ps(_mm256_min_ps(lo_f, max_255_f), zero_f);
586 hi_f = _mm256_max_ps(_mm256_min_ps(hi_f, max_255_f), zero_f);
588 __m256i lo_i = _mm256_cvtps_epi32(lo_f);
589 __m256i hi_i = _mm256_cvtps_epi32(hi_f);
591 __m256i packed16 = _mm256_packs_epi32(lo_i, hi_i);
592 packed16 = _mm256_permute4x64_epi64(packed16, 0xD8);
594 __m128i lo_16 = _mm256_castsi256_si128(packed16);
595 __m128i hi_16 = _mm256_extracti128_si256(packed16, 1);
596 __m128i packed8 = _mm_packus_epi16(lo_16, hi_16);
599 packed8 = _mm_xor_si128(packed8, all_ones_128);
602 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(dst + i), packed8);
605#if defined(PACS_SIMD_SSE2)
606 if (pixel_count - i >= 8) {
607 apply_window_level_16bit_signed_sse2(src + i, dst + i, pixel_count - i, params);
621#if defined(PACS_SIMD_NEON)
627inline void apply_window_level_8bit_neon(
const uint8_t* src, uint8_t* dst,
629 const window_level_params& params)
noexcept {
630 const float min_val =
static_cast<float>(params.center - params.width / 2.0);
631 const float scale = 255.0f /
static_cast<float>(params.width);
633 const float32x4_t min_vec = vdupq_n_f32(min_val);
634 const float32x4_t scale_vec = vdupq_n_f32(scale);
635 const float32x4_t zero_f = vdupq_n_f32(0.0f);
636 const float32x4_t max_255_f = vdupq_n_f32(255.0f);
637 const uint8x16_t all_ones = vdupq_n_u8(0xFF);
639 const size_t simd_count = (pixel_count / 16) * 16;
642 for (; i < simd_count; i += 16) {
643 uint8x16_t pixels = vld1q_u8(src + i);
646 uint8x8_t lo8 = vget_low_u8(pixels);
647 uint8x8_t hi8 = vget_high_u8(pixels);
649 uint16x8_t lo16 = vmovl_u8(lo8);
650 uint16x8_t hi16 = vmovl_u8(hi8);
653 uint32x4_t p0 = vmovl_u16(vget_low_u16(lo16));
654 uint32x4_t p1 = vmovl_u16(vget_high_u16(lo16));
655 uint32x4_t p2 = vmovl_u16(vget_low_u16(hi16));
656 uint32x4_t p3 = vmovl_u16(vget_high_u16(hi16));
658 float32x4_t f0 = vcvtq_f32_u32(p0);
659 float32x4_t f1 = vcvtq_f32_u32(p1);
660 float32x4_t f2 = vcvtq_f32_u32(p2);
661 float32x4_t f3 = vcvtq_f32_u32(p3);
664 f0 = vmulq_f32(vsubq_f32(f0, min_vec), scale_vec);
665 f1 = vmulq_f32(vsubq_f32(f1, min_vec), scale_vec);
666 f2 = vmulq_f32(vsubq_f32(f2, min_vec), scale_vec);
667 f3 = vmulq_f32(vsubq_f32(f3, min_vec), scale_vec);
670 f0 = vmaxq_f32(vminq_f32(f0, max_255_f), zero_f);
671 f1 = vmaxq_f32(vminq_f32(f1, max_255_f), zero_f);
672 f2 = vmaxq_f32(vminq_f32(f2, max_255_f), zero_f);
673 f3 = vmaxq_f32(vminq_f32(f3, max_255_f), zero_f);
676 uint32x4_t i0 = vcvtq_u32_f32(f0);
677 uint32x4_t i1 = vcvtq_u32_f32(f1);
678 uint32x4_t i2 = vcvtq_u32_f32(f2);
679 uint32x4_t i3 = vcvtq_u32_f32(f3);
682 uint16x4_t n0 = vmovn_u32(i0);
683 uint16x4_t n1 = vmovn_u32(i1);
684 uint16x4_t n2 = vmovn_u32(i2);
685 uint16x4_t n3 = vmovn_u32(i3);
687 uint16x8_t n_lo = vcombine_u16(n0, n1);
688 uint16x8_t n_hi = vcombine_u16(n2, n3);
690 uint8x8_t r_lo = vmovn_u16(n_lo);
691 uint8x8_t r_hi = vmovn_u16(n_hi);
693 uint8x16_t result = vcombine_u8(r_lo, r_hi);
696 result = veorq_u8(result, all_ones);
699 vst1q_u8(dst + i, result);
708inline void apply_window_level_16bit_neon(
const uint16_t* src, uint8_t* dst,
710 const window_level_params& params)
noexcept {
711 const float min_val =
static_cast<float>(params.center - params.width / 2.0);
712 const float scale = 255.0f /
static_cast<float>(params.width);
714 const float32x4_t min_vec = vdupq_n_f32(min_val);
715 const float32x4_t scale_vec = vdupq_n_f32(scale);
716 const float32x4_t zero_f = vdupq_n_f32(0.0f);
717 const float32x4_t max_255_f = vdupq_n_f32(255.0f);
718 const uint8x8_t all_ones = vdup_n_u8(0xFF);
720 const size_t simd_count = (pixel_count / 8) * 8;
723 for (; i < simd_count; i += 8) {
724 uint16x8_t pixels = vld1q_u16(src + i);
726 uint32x4_t lo32 = vmovl_u16(vget_low_u16(pixels));
727 uint32x4_t hi32 = vmovl_u16(vget_high_u16(pixels));
729 float32x4_t lo_f = vcvtq_f32_u32(lo32);
730 float32x4_t hi_f = vcvtq_f32_u32(hi32);
732 lo_f = vmulq_f32(vsubq_f32(lo_f, min_vec), scale_vec);
733 hi_f = vmulq_f32(vsubq_f32(hi_f, min_vec), scale_vec);
735 lo_f = vmaxq_f32(vminq_f32(lo_f, max_255_f), zero_f);
736 hi_f = vmaxq_f32(vminq_f32(hi_f, max_255_f), zero_f);
738 uint32x4_t lo_i = vcvtq_u32_f32(lo_f);
739 uint32x4_t hi_i = vcvtq_u32_f32(hi_f);
741 uint16x4_t lo16 = vmovn_u32(lo_i);
742 uint16x4_t hi16 = vmovn_u32(hi_i);
744 uint16x8_t packed16 = vcombine_u16(lo16, hi16);
745 uint8x8_t packed8 = vmovn_u16(packed16);
748 packed8 = veor_u8(packed8, all_ones);
751 vst1_u8(dst + i, packed8);
760inline void apply_window_level_16bit_signed_neon(
761 const int16_t* src, uint8_t* dst,
size_t pixel_count,
762 const window_level_params& params)
noexcept {
763 const float min_val =
static_cast<float>(params.center - params.width / 2.0);
764 const float scale = 255.0f /
static_cast<float>(params.width);
766 const float32x4_t min_vec = vdupq_n_f32(min_val);
767 const float32x4_t scale_vec = vdupq_n_f32(scale);
768 const float32x4_t zero_f = vdupq_n_f32(0.0f);
769 const float32x4_t max_255_f = vdupq_n_f32(255.0f);
770 const uint8x8_t all_ones = vdup_n_u8(0xFF);
772 const size_t simd_count = (pixel_count / 8) * 8;
775 for (; i < simd_count; i += 8) {
776 int16x8_t pixels = vld1q_s16(src + i);
778 int32x4_t lo32 = vmovl_s16(vget_low_s16(pixels));
779 int32x4_t hi32 = vmovl_s16(vget_high_s16(pixels));
781 float32x4_t lo_f = vcvtq_f32_s32(lo32);
782 float32x4_t hi_f = vcvtq_f32_s32(hi32);
784 lo_f = vmulq_f32(vsubq_f32(lo_f, min_vec), scale_vec);
785 hi_f = vmulq_f32(vsubq_f32(hi_f, min_vec), scale_vec);
787 lo_f = vmaxq_f32(vminq_f32(lo_f, max_255_f), zero_f);
788 hi_f = vmaxq_f32(vminq_f32(hi_f, max_255_f), zero_f);
790 uint32x4_t lo_i = vcvtq_u32_f32(lo_f);
791 uint32x4_t hi_i = vcvtq_u32_f32(hi_f);
793 uint16x4_t lo16 = vmovn_u32(lo_i);
794 uint16x4_t hi16 = vmovn_u32(hi_i);
796 uint16x8_t packed16 = vcombine_u16(lo16, hi16);
797 uint8x8_t packed8 = vmovn_u16(packed16);
800 packed8 = veor_u8(packed8, all_ones);
803 vst1_u8(dst + i, packed8);
831#if defined(PACS_SIMD_AVX2)
833 detail::apply_window_level_8bit_avx2(src, dst, pixel_count, params);
837#if defined(PACS_SIMD_SSE2)
839 detail::apply_window_level_8bit_sse2(src, dst, pixel_count, params);
843#if defined(PACS_SIMD_NEON)
845 detail::apply_window_level_8bit_neon(src, dst, pixel_count, params);
863#if defined(PACS_SIMD_AVX2)
865 detail::apply_window_level_16bit_avx2(src, dst, pixel_count, params);
869#if defined(PACS_SIMD_SSE2)
871 detail::apply_window_level_16bit_sse2(src, dst, pixel_count, params);
875#if defined(PACS_SIMD_NEON)
877 detail::apply_window_level_16bit_neon(src, dst, pixel_count, params);
897#if defined(PACS_SIMD_AVX2)
899 detail::apply_window_level_16bit_signed_avx2(src, dst, pixel_count, params);
903#if defined(PACS_SIMD_SSE2)
905 detail::apply_window_level_16bit_signed_sse2(src, dst, pixel_count, params);
909#if defined(PACS_SIMD_NEON)
911 detail::apply_window_level_16bit_signed_neon(src, dst, pixel_count, params);
Precomputed LUT for fast repeated window/level application.
std::vector< uint8_t > lut_8bit_
void apply_16bit(const uint16_t *src, uint8_t *dst, size_t pixel_count) const noexcept
Apply LUT to 16-bit data (uses clamping for out-of-range values)
void apply_8bit(const uint8_t *src, uint8_t *dst, size_t pixel_count) const noexcept
Apply LUT to 8-bit data.
static window_level_lut create_8bit(const window_level_params ¶ms)
Construct LUT for 8-bit input.
static window_level_lut create_16bit(const window_level_params ¶ms)
Construct LUT for 16-bit input.
bool is_valid_16bit() const noexcept
std::vector< uint8_t > lut_16bit_
bool is_valid_8bit() const noexcept
static window_level_lut create_12bit(const window_level_params ¶ms)
Construct LUT for 12-bit input.
void apply_window_level_16bit_scalar(const uint16_t *src, uint8_t *dst, size_t pixel_count, const window_level_params ¶ms) noexcept
Scalar 16-bit window/level application.
void apply_window_level_16bit_signed_scalar(const int16_t *src, uint8_t *dst, size_t pixel_count, const window_level_params ¶ms) noexcept
Scalar signed 16-bit window/level application.
void apply_window_level_8bit_scalar(const uint8_t *src, uint8_t *dst, size_t pixel_count, const window_level_params ¶ms) noexcept
Scalar 8-bit window/level application.
bool has_neon() noexcept
Check if NEON is available.
bool has_sse2() noexcept
Check if SSE2 is available.
void apply_window_level_16bit_signed(const int16_t *src, uint8_t *dst, size_t pixel_count, const window_level_params ¶ms) noexcept
Apply window/level transformation to 16-bit signed grayscale data.
bool has_avx2() noexcept
Check if AVX2 is available.
void apply_window_level_16bit(const uint16_t *src, uint8_t *dst, size_t pixel_count, const window_level_params ¶ms) noexcept
Apply window/level transformation to 16-bit unsigned grayscale data.
void apply_window_level_8bit(const uint8_t *src, uint8_t *dst, size_t pixel_count, const window_level_params ¶ms) noexcept
Apply window/level transformation to 8-bit grayscale data.
SIMD configuration and CPU feature detection.
Platform-specific SIMD type definitions and wrappers.
constexpr window_level_params(double c=128.0, double w=256.0, bool inv=false) noexcept
bool invert
Invert output (for MONOCHROME1)
double width
Window width.
double center
Window center (level)