PACS System 0.1.0
PACS DICOM system library
Loading...
Searching...
No Matches
explicit_vr_big_endian_codec.cpp
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
11
15
16#include <cstring>
17
18namespace kcenon::pacs::encoding {
19
20namespace {
21
22// ============================================================================
23// DICOM Special Tags
24// ============================================================================
25
26constexpr uint16_t ITEM_GROUP = 0xFFFE;
27constexpr uint16_t ITEM_TAG_ELEMENT = 0xE000; // Item
28constexpr uint16_t ITEM_DELIM_ELEMENT = 0xE00D; // Item Delimitation Item
29constexpr uint16_t SEQ_DELIM_ELEMENT = 0xE0DD; // Sequence Delimitation Item
30
31constexpr uint32_t UNDEFINED_LENGTH = 0xFFFFFFFF;
32
33constexpr bool is_sequence_delimiter(core::dicom_tag tag) {
34 return tag.group() == ITEM_GROUP && tag.element() == SEQ_DELIM_ELEMENT;
35}
36
37constexpr bool is_item_delimiter(core::dicom_tag tag) {
38 return tag.group() == ITEM_GROUP && tag.element() == ITEM_DELIM_ELEMENT;
39}
40
41constexpr bool is_item_tag(core::dicom_tag tag) {
42 return tag.group() == ITEM_GROUP && tag.element() == ITEM_TAG_ELEMENT;
43}
44
45constexpr bool is_special_tag(core::dicom_tag tag) {
46 return tag.group() == ITEM_GROUP;
47}
48
49// ============================================================================
50// VR Classification for Byte Swapping
51// ============================================================================
52
58constexpr bool requires_byte_swap(vr_type vr) {
59 switch (vr) {
60 // 16-bit numeric
61 case vr_type::US:
62 case vr_type::SS:
63 // 32-bit numeric
64 case vr_type::UL:
65 case vr_type::SL:
66 case vr_type::FL:
67 // 64-bit numeric
68 case vr_type::FD:
69 // Attribute tag (2x 16-bit)
70 case vr_type::AT:
71 // Bulk data with word/long ordering
72 case vr_type::OW:
73 case vr_type::OL:
74 case vr_type::OF:
75 case vr_type::OD:
76 return true;
77
78 default:
79 // String VRs and OB don't need byte swapping
80 return false;
81 }
82}
83
89constexpr size_t swap_unit_size(vr_type vr) {
90 switch (vr) {
91 case vr_type::US:
92 case vr_type::SS:
93 case vr_type::OW:
94 case vr_type::AT:
95 return 2;
96
97 case vr_type::UL:
98 case vr_type::SL:
99 case vr_type::FL:
100 case vr_type::OL:
101 case vr_type::OF:
102 return 4;
103
104 case vr_type::FD:
105 case vr_type::OD:
106 return 8;
107
108 default:
109 return 1; // No swapping
110 }
111}
112
113} // namespace
114
115// ============================================================================
116// Error Helpers
117// ============================================================================
118
119namespace {
120
121template <typename T>
122kcenon::pacs::Result<T> make_codec_error(int code, const std::string& message) {
123 return kcenon::pacs::pacs_error<T>(code, message);
124}
125
126} // namespace
127
128// ============================================================================
129// Byte Order Conversion
130// ============================================================================
131
133 vr_type vr, std::span<const uint8_t> data) {
134 if (!requires_byte_swap(vr) || data.empty()) {
135 return {data.begin(), data.end()};
136 }
137
138 switch (swap_unit_size(vr)) {
139 case 2:
140 return swap_ow_bytes(data);
141 case 4:
142 return swap_ol_bytes(data);
143 case 8:
144 return swap_od_bytes(data);
145 default:
146 return {data.begin(), data.end()};
147 }
148}
149
151 vr_type vr, std::span<const uint8_t> data) {
152 // Byte swapping is symmetric - same operation in both directions
153 return to_big_endian(vr, data);
154}
155
156// ============================================================================
157// Dataset Encoding
158// ============================================================================
159
161 const core::dicom_dataset& dataset) {
162 std::vector<uint8_t> buffer;
163 buffer.reserve(4096); // Initial capacity
164
165 for (const auto& [tag, element] : dataset) {
166 auto encoded = encode_element(element);
167 buffer.insert(buffer.end(), encoded.begin(), encoded.end());
168 }
169
170 return buffer;
171}
172
174 const core::dicom_element& element) {
175 std::vector<uint8_t> buffer;
176
177 // Write tag (4 bytes: group + element, big-endian)
178 write_be16(buffer, element.tag().group());
179 write_be16(buffer, element.tag().element());
180
181 // Write VR (2 ASCII characters)
182 auto vr_str = to_string(element.vr());
183 buffer.push_back(static_cast<uint8_t>(vr_str[0]));
184 buffer.push_back(static_cast<uint8_t>(vr_str[1]));
185
186 // Handle sequences specially
187 if (element.is_sequence()) {
188 encode_sequence(buffer, element);
189 return buffer;
190 }
191
192 // Get padded data and convert to big-endian
193 auto data = element.raw_data();
194 auto padded_data = pad_to_even(element.vr(), data);
195 auto be_data = to_big_endian(element.vr(), padded_data);
196 uint32_t length = static_cast<uint32_t>(be_data.size());
197
198 // Write length based on VR type
199 if (has_explicit_32bit_length(element.vr())) {
200 // Extended format: 2 reserved bytes + 4 byte length
201 write_be16(buffer, 0x0000); // Reserved
202 write_be32(buffer, length);
203 } else {
204 // Standard format: 2 byte length
205 write_be16(buffer, static_cast<uint16_t>(length));
206 }
207
208 // Write value (already byte-swapped)
209 buffer.insert(buffer.end(), be_data.begin(), be_data.end());
210
211 return buffer;
212}
213
215 std::vector<uint8_t>& buffer,
216 const core::dicom_element& element) {
217 // Write reserved bytes for SQ VR (uses 32-bit length)
218 write_be16(buffer, 0x0000);
219
220 // Write undefined length for sequence (big-endian)
221 write_be32(buffer, UNDEFINED_LENGTH);
222
223 // Encode each sequence item
224 const auto& items = element.sequence_items();
225 for (const auto& item : items) {
226 encode_sequence_item(buffer, item);
227 }
228
229 // Write sequence delimitation item (big-endian for tag and length)
230 write_be16(buffer, ITEM_GROUP);
231 write_be16(buffer, SEQ_DELIM_ELEMENT);
232 write_be32(buffer, 0); // Length is always 0 for delimiter
233}
234
236 std::vector<uint8_t>& buffer,
237 const core::dicom_dataset& item) {
238 // Write Item tag (big-endian)
239 write_be16(buffer, ITEM_GROUP);
240 write_be16(buffer, ITEM_TAG_ELEMENT);
241
242 // Encode item content first to determine length
243 auto item_content = encode(item);
244
245 // Write item length (big-endian)
246 write_be32(buffer, static_cast<uint32_t>(item_content.size()));
247
248 // Write item content
249 buffer.insert(buffer.end(), item_content.begin(), item_content.end());
250}
251
252// ============================================================================
253// Dataset Decoding
254// ============================================================================
255
257explicit_vr_big_endian_codec::decode(std::span<const uint8_t> data) {
258 core::dicom_dataset dataset;
259
260 while (!data.empty()) {
261 // Peek at tag to check for sequence delimiters
262 if (data.size() >= 4) {
263 uint16_t group = read_be16(data.data());
264 uint16_t elem = read_be16(data.data() + 2);
265 core::dicom_tag tag{group, elem};
266
267 // Stop at sequence delimiter
268 if (is_sequence_delimiter(tag)) {
269 break;
270 }
271 // Stop at item delimiter
272 if (is_item_delimiter(tag)) {
273 break;
274 }
275 }
276
277 auto result = decode_element(data);
278 if (!result.is_ok()) {
280 result.error().code,
281 result.error().message);
282 }
283
284 dataset.insert(std::move(result.value()));
285 }
286
287 return dataset;
288}
289
291explicit_vr_big_endian_codec::decode_element(std::span<const uint8_t>& data) {
292 // Need at least 8 bytes for standard format: tag (4) + VR (2) + length (2)
293 if (data.size() < 8) {
294 return make_codec_error<core::dicom_element>(
296 "Insufficient data to decode element");
297 }
298
299 // Read tag (big-endian)
300 uint16_t group = read_be16(data.data());
301 uint16_t elem = read_be16(data.data() + 2);
302 core::dicom_tag tag{group, elem};
303
304 // Special handling for sequence delimiter/item tags
305 if (is_special_tag(tag)) {
306 data = data.subspan(8);
307
308 // For delimiter tags, return a placeholder element
309 return core::dicom_element(tag, vr_type::UN);
310 }
311
312 // Read VR (2 ASCII characters)
313 char vr_chars[2];
314 vr_chars[0] = static_cast<char>(data[4]);
315 vr_chars[1] = static_cast<char>(data[5]);
316 std::string_view vr_str(vr_chars, 2);
317
318 auto vr_opt = from_string(vr_str);
319 if (!vr_opt) {
320 return make_codec_error<core::dicom_element>(
322 "Unknown VR type");
323 }
324 vr_type vr = *vr_opt;
325
326 uint32_t length;
327 size_t header_size;
328
329 // Determine length format based on VR
331 // Extended format: need 12 bytes total
332 if (data.size() < 12) {
333 return make_codec_error<core::dicom_element>(
335 "Insufficient data for extended VR format");
336 }
337 // Skip reserved 2 bytes, then read 4-byte length (big-endian)
338 length = read_be32(data.data() + 8);
339 header_size = 12;
340 } else {
341 // Standard format: 2-byte length (big-endian)
342 length = read_be16(data.data() + 6);
343 header_size = 8;
344 }
345
346 data = data.subspan(header_size);
347
348 // Handle undefined length (sequences and encapsulated data)
349 if (length == UNDEFINED_LENGTH) {
350 return decode_undefined_length(tag, vr, data);
351 }
352
353 // Check if we have enough data
354 if (data.size() < length) {
355 return make_codec_error<core::dicom_element>(
357 "Insufficient data for element value");
358 }
359
360 // Read value and convert from big-endian
361 auto value_data = data.subspan(0, length);
362 data = data.subspan(length);
363
364 // Convert from big-endian to native (little-endian)
365 auto le_data = from_big_endian(vr, value_data);
366
367 return core::dicom_element(tag, vr, le_data);
368}
369
373 std::span<const uint8_t>& data) {
374 // If this is a sequence (SQ), decode sequence items
375 if (vr == vr_type::SQ) {
376 core::dicom_element seq_element(tag, vr_type::SQ);
377
378 while (!data.empty()) {
379 // Check for sequence delimitation
380 if (data.size() < 8) {
381 return make_codec_error<core::dicom_element>(
383 "Insufficient data for sequence delimiter");
384 }
385
386 uint16_t item_group = read_be16(data.data());
387 uint16_t item_elem = read_be16(data.data() + 2);
388 core::dicom_tag item_tag{item_group, item_elem};
389
390 // Check for sequence delimitation item
391 if (is_sequence_delimiter(item_tag)) {
392 // Skip the delimiter (8 bytes: tag + length)
393 data = data.subspan(8);
394 break;
395 }
396
397 // Must be an Item tag
398 if (!is_item_tag(item_tag)) {
399 return make_codec_error<core::dicom_element>(
401 "Expected Item tag in sequence");
402 }
403
404 // Decode the sequence item
405 auto item_result = decode_sequence_item(data);
406 if (!item_result.is_ok()) {
407 return make_codec_error<core::dicom_element>(
408 item_result.error().code,
409 item_result.error().message);
410 }
411
412 seq_element.sequence_items().push_back(std::move(item_result.value()));
413 }
414
415 return seq_element;
416 }
417
418 // For other undefined-length elements (like encapsulated pixel data),
419 // read until we find a sequence delimitation item
420 std::vector<uint8_t> accumulated_data;
421
422 while (!data.empty()) {
423 if (data.size() < 8) {
424 return make_codec_error<core::dicom_element>(
426 "Insufficient data for encapsulated data");
427 }
428
429 uint16_t item_group = read_be16(data.data());
430 uint16_t item_elem = read_be16(data.data() + 2);
431 core::dicom_tag item_tag{item_group, item_elem};
432
433 // Check for sequence delimitation item
434 if (is_sequence_delimiter(item_tag)) {
435 data = data.subspan(8);
436 break;
437 }
438
439 // For encapsulated data, read item tag + length + data
440 if (is_item_tag(item_tag)) {
441 uint32_t item_length = read_be32(data.data() + 4);
442 data = data.subspan(8);
443
444 if (item_length != UNDEFINED_LENGTH && data.size() >= item_length) {
445 // Convert from big-endian
446 auto item_data = data.subspan(0, item_length);
447 auto le_item = from_big_endian(vr, item_data);
448 accumulated_data.insert(accumulated_data.end(),
449 le_item.begin(), le_item.end());
450 data = data.subspan(item_length);
451 }
452 } else {
453 return make_codec_error<core::dicom_element>(
455 "Invalid tag in encapsulated data");
456 }
457 }
458
459 return core::dicom_element(tag, vr, accumulated_data);
460}
461
464 std::span<const uint8_t>& data) {
465 // Read Item tag and length (big-endian)
466 if (data.size() < 8) {
467 return make_codec_error<core::dicom_dataset>(
469 "Insufficient data for sequence item");
470 }
471
472 uint16_t group = read_be16(data.data());
473 uint16_t elem = read_be16(data.data() + 2);
474 core::dicom_tag tag{group, elem};
475
476 if (!is_item_tag(tag)) {
477 return make_codec_error<core::dicom_dataset>(
479 "Expected Item tag for sequence item");
480 }
481
482 uint32_t item_length = read_be32(data.data() + 4);
483 data = data.subspan(8);
484
485 if (item_length == UNDEFINED_LENGTH) {
486 // Decode until Item Delimitation Item
488
489 while (!data.empty()) {
490 if (data.size() < 4) {
491 return make_codec_error<core::dicom_dataset>(
493 "Insufficient data for item delimiter check");
494 }
495
496 uint16_t elem_group = read_be16(data.data());
497 uint16_t elem_elem = read_be16(data.data() + 2);
498 core::dicom_tag elem_tag{elem_group, elem_elem};
499
500 if (is_item_delimiter(elem_tag)) {
501 // Skip delimiter
502 data = data.subspan(8);
503 break;
504 }
505
506 auto elem_result = decode_element(data);
507 if (!elem_result.is_ok()) {
508 return make_codec_error<core::dicom_dataset>(
509 elem_result.error().code,
510 elem_result.error().message);
511 }
512
513 item.insert(std::move(elem_result.value()));
514 }
515
516 return item;
517 }
518
519 // Explicit item length
520 if (data.size() < item_length) {
521 return make_codec_error<core::dicom_dataset>(
523 "Insufficient data for item content");
524 }
525
526 auto item_data = data.subspan(0, item_length);
527 data = data.subspan(item_length);
528
529 return decode(item_data);
530}
531
532} // namespace kcenon::pacs::encoding
Byte swapping utilities for endianness conversion.
void insert(dicom_element element)
Insert or replace an element in the dataset.
auto is_sequence() const noexcept -> bool
Check if this element is a sequence.
constexpr auto tag() const noexcept -> dicom_tag
Get the element's tag.
auto raw_data() const noexcept -> std::span< const uint8_t >
Get the raw data bytes.
constexpr auto vr() const noexcept -> encoding::vr_type
Get the element's VR.
auto sequence_items() -> std::vector< dicom_dataset > &
Get mutable access to sequence items.
static std::vector< uint8_t > from_big_endian(vr_type vr, std::span< const uint8_t > data)
Convert element value from big-endian to little-endian.
static void encode_sequence_item(std::vector< uint8_t > &buffer, const core::dicom_dataset &item)
static std::vector< uint8_t > to_big_endian(vr_type vr, std::span< const uint8_t > data)
Convert element value from little-endian to big-endian.
static result< core::dicom_dataset > decode(std::span< const uint8_t > data)
Decode bytes to a dataset using Explicit VR Big Endian.
static result< core::dicom_element > decode_undefined_length(core::dicom_tag tag, vr_type vr, std::span< const uint8_t > &data)
static result< core::dicom_element > decode_element(std::span< const uint8_t > &data)
Decode a single element from bytes.
static std::vector< uint8_t > encode_element(const core::dicom_element &element)
Encode a single element to bytes.
static void encode_sequence(std::vector< uint8_t > &buffer, const core::dicom_element &element)
static std::vector< uint8_t > encode(const core::dicom_dataset &dataset)
Encode a dataset to bytes using Explicit VR Big Endian.
static result< core::dicom_dataset > decode_sequence_item(std::span< const uint8_t > &data)
Encoder/decoder for Explicit VR Big Endian transfer syntax.
constexpr uint32_t read_be32(const uint8_t *data) noexcept
Reads a 32-bit value from big-endian bytes.
Definition byte_swap.h:96
std::vector< uint8_t > swap_od_bytes(std::span< const uint8_t > data)
Swaps bytes in-place for OD (Other Double) data.
Definition byte_swap.h:212
vr_type
DICOM Value Representation (VR) types.
Definition vr_type.h:29
@ UN
Unknown (variable length)
@ SQ
Sequence of Items (undefined length)
@ UL
Unsigned Long (4 bytes)
@ SL
Signed Long (4 bytes)
@ US
Unsigned Short (2 bytes)
@ OD
Other Double (variable length)
@ OF
Other Float (variable length)
@ OW
Other Word (variable length)
@ FD
Floating Point Double (8 bytes)
@ FL
Floating Point Single (4 bytes)
@ OL
Other Long (variable length)
@ SS
Signed Short (2 bytes)
@ AT
Attribute Tag (4 bytes)
std::vector< uint8_t > swap_ow_bytes(std::span< const uint8_t > data)
Swaps bytes in-place for OW (Other Word) data.
Definition byte_swap.h:170
constexpr std::optional< vr_type > from_string(std::string_view str) noexcept
Parses a two-character string to a vr_type.
Definition vr_type.h:132
std::vector< uint8_t > pad_to_even(vr_type vr, std::span< const uint8_t > data)
Pads data to even length as required by DICOM.
Definition vr_info.cpp:196
void write_be32(std::vector< uint8_t > &buffer, uint32_t value)
Writes a 32-bit value in big-endian byte order.
Definition byte_swap.h:134
void write_be16(std::vector< uint8_t > &buffer, uint16_t value)
Writes a 16-bit value in big-endian byte order.
Definition byte_swap.h:124
constexpr uint16_t read_be16(const uint8_t *data) noexcept
Reads a 16-bit value from big-endian bytes.
Definition byte_swap.h:86
constexpr bool has_explicit_32bit_length(vr_type vr) noexcept
Checks if a VR requires 32-bit length field in Explicit VR encoding.
Definition vr_type.h:235
constexpr std::string_view to_string(vr_type vr) noexcept
Converts a vr_type to its two-character string representation.
Definition vr_type.h:83
std::vector< uint8_t > swap_ol_bytes(std::span< const uint8_t > data)
Swaps bytes in-place for OL (Other Long) data.
Definition byte_swap.h:185
constexpr int insufficient_data
Definition result.h:82
constexpr int invalid_sequence
Definition result.h:83
constexpr int unknown_vr
Definition result.h:84
Result< T > pacs_error(int code, const std::string &message, const std::string &details="")
Create a PACS error result with module context.
Definition result.h:234
std::string_view code
vr_encoding vr