45 std::filesystem::path input_path;
46 std::filesystem::path output_path;
47 std::filesystem::path template_path;
48 std::filesystem::path bulk_data_dir;
49 std::string transfer_syntax;
63 std::map<std::string, std::string> attributes;
65 std::vector<xml_node> children;
67 [[nodiscard]]
bool has_child(
const std::string& child_name)
const {
68 for (
const auto& child : children) {
69 if (child.name == child_name)
return true;
74 [[nodiscard]]
const xml_node* find_child(
const std::string& child_name)
const {
75 for (
const auto& child : children) {
76 if (child.name == child_name)
return &child;
81 [[nodiscard]] std::vector<const xml_node*> find_children(
const std::string& child_name)
const {
82 std::vector<const xml_node*> result;
83 for (
const auto& child : children) {
84 if (child.name == child_name) {
85 result.push_back(&child);
91 [[nodiscard]] std::string get_attr(
const std::string& attr_name,
92 const std::string& default_value =
"")
const {
93 auto it = attributes.find(attr_name);
94 return it != attributes.end() ? it->second : default_value;
103 explicit xml_parser(
const std::string& input) : input_(input),
pos_(0) {}
107 skip_xml_declaration();
109 return parse_element();
113 const std::string& input_;
116 [[nodiscard]]
char peek()
const {
117 return pos_ < input_.size() ? input_[
pos_] :
'\0';
121 return pos_ < input_.size() ? input_[
pos_++] :
'\0';
124 void skip_whitespace() {
125 while (pos_ < input_.size() && std::isspace(
static_cast<unsigned char>(input_[pos_]))) {
130 void skip_xml_declaration() {
131 if (input_.substr(pos_, 5) ==
"<?xml") {
132 while (pos_ < input_.size() && input_.substr(pos_, 2) !=
"?>") {
135 if (pos_ < input_.size())
pos_ += 2;
139 void skip_comment() {
140 if (input_.substr(pos_, 4) ==
"<!--") {
142 while (pos_ < input_.size() && input_.substr(pos_, 3) !=
"-->") {
145 if (pos_ < input_.size())
pos_ += 3;
149 [[nodiscard]] std::string parse_name() {
151 while (pos_ < input_.size() &&
152 (std::isalnum(
static_cast<unsigned char>(input_[pos_])) ||
153 input_[pos_] ==
'_' || input_[pos_] ==
':' || input_[pos_] ==
'-')) {
156 return input_.substr(start, pos_ - start);
159 [[nodiscard]] std::string parse_attribute_value() {
163 while (peek() != quote && peek() !=
'\0') {
165 result += parse_entity();
174 [[nodiscard]] std::string parse_entity() {
177 while (peek() !=
';' && peek() !=
'\0') {
182 if (entity ==
"lt")
return "<";
183 if (entity ==
"gt")
return ">";
184 if (entity ==
"amp")
return "&";
185 if (entity ==
"quot")
return "\"";
186 if (entity ==
"apos")
return "'";
187 if (!entity.empty() && entity[0] ==
'#') {
190 if (entity.size() > 1 && entity[1] ==
'x') {
191 code = std::stoi(entity.substr(2),
nullptr, 16);
193 code = std::stoi(entity.substr(1));
195 return std::string(1,
static_cast<char>(
code));
197 return "&" + entity +
";";
200 [[nodiscard]] std::string parse_text() {
202 while (peek() !=
'<' && peek() !=
'\0') {
204 result += parse_entity();
210 size_t start = result.find_first_not_of(
" \t\n\r");
211 size_t end = result.find_last_not_of(
" \t\n\r");
212 if (start == std::string::npos)
return "";
213 return result.substr(start, end - start + 1);
216 xml_node parse_element() {
221 while (input_.substr(pos_, 4) ==
"<!--") {
227 throw std::runtime_error(
"Expected '<' at position " + std::to_string(pos_));
232 node.name = parse_name();
236 while (peek() !=
'>' && peek() !=
'/' && peek() !=
'\0') {
237 std::string attr_name = parse_name();
242 std::string attr_value = parse_attribute_value();
243 node.attributes[attr_name] = attr_value;
258 throw std::runtime_error(
"Expected '>' at position " + std::to_string(pos_));
267 while (input_.substr(pos_, 4) ==
"<!--") {
272 if (input_.substr(pos_, 2) ==
"</") {
275 std::string end_name = parse_name();
277 if (peek() ==
'>')
get();
279 }
else if (peek() ==
'<') {
281 node.children.push_back(parse_element());
282 }
else if (peek() !=
'\0') {
284 node.text += parse_text();
298constexpr int8_t base64_decode_table[] = {
299 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
300 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
301 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
302 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
303 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
304 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
305 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
306 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1
309[[nodiscard]] std::vector<uint8_t> from_base64(
const std::string& input) {
310 std::vector<uint8_t> result;
311 result.reserve((input.size() * 3) / 4);
314 while (i < input.size()) {
315 while (i < input.size() && std::isspace(
static_cast<unsigned char>(input[i]))) ++i;
316 if (i >= input.size())
break;
318 uint32_t sextet[4] = {0, 0, 0, 0};
321 for (
int j = 0; j < 4 && i < input.size(); ++j) {
326 }
else if (
static_cast<unsigned char>(c) < 128 &&
327 base64_decode_table[
static_cast<unsigned char>(c)] >= 0) {
328 sextet[j] =
static_cast<uint32_t
>(base64_decode_table[
static_cast<unsigned char>(c)]);
334 uint32_t triple = (sextet[0] << 18) | (sextet[1] << 12) | (sextet[2] << 6) | sextet[3];
336 result.push_back(
static_cast<uint8_t
>((triple >> 16) & 0xFF));
337 if (padding < 2) result.push_back(
static_cast<uint8_t
>((triple >> 8) & 0xFF));
338 if (padding < 1) result.push_back(
static_cast<uint8_t
>(triple & 0xFF));
351 static const std::map<std::string, vr_type> vr_map = {
352 {
"AE", vr_type::AE}, {
"AS", vr_type::AS}, {
"AT", vr_type::AT},
353 {
"CS", vr_type::CS}, {
"DA", vr_type::DA}, {
"DS", vr_type::DS},
354 {
"DT", vr_type::DT}, {
"FL", vr_type::FL}, {
"FD", vr_type::FD},
355 {
"IS", vr_type::IS}, {
"LO", vr_type::LO}, {
"LT", vr_type::LT},
356 {
"OB", vr_type::OB}, {
"OD", vr_type::OD}, {
"OF", vr_type::OF},
357 {
"OL", vr_type::OL}, {
"OV", vr_type::OV}, {
"OW", vr_type::OW},
358 {
"PN", vr_type::PN}, {
"SH", vr_type::SH}, {
"SL", vr_type::SL},
359 {
"SQ", vr_type::SQ}, {
"SS", vr_type::SS}, {
"ST", vr_type::ST},
360 {
"SV", vr_type::SV}, {
"TM", vr_type::TM}, {
"UC", vr_type::UC},
361 {
"UI", vr_type::UI}, {
"UL", vr_type::UL}, {
"UN", vr_type::UN},
362 {
"UR", vr_type::UR}, {
"US", vr_type::US}, {
"UT", vr_type::UT},
366 auto it = vr_map.find(vr_str);
367 return it != vr_map.end() ? it->second : vr_type::UN;
370[[nodiscard]] std::optional<kcenon::pacs::core::dicom_tag> parse_tag(
const std::string& tag_str) {
371 if (tag_str.length() != 8) {
376 uint16_t group =
static_cast<uint16_t
>(std::stoul(tag_str.substr(0, 4),
nullptr, 16));
377 uint16_t elem =
static_cast<uint16_t
>(std::stoul(tag_str.substr(4, 4),
nullptr, 16));
384[[nodiscard]] std::string read_file(
const std::filesystem::path& path) {
385 std::ifstream file(path, std::ios::binary);
387 throw std::runtime_error(
"Cannot open file: " + path.string());
390 std::ostringstream oss;
395[[nodiscard]] std::vector<uint8_t> read_bulk_data(
const std::string& uri,
396 const std::filesystem::path& bulk_dir) {
397 std::string path = uri;
399 if (path.substr(0, 7) ==
"file://") {
400 path = path.substr(7);
403 std::filesystem::path file_path = path;
404 if (!file_path.is_absolute() && !bulk_dir.empty()) {
405 file_path = bulk_dir / file_path;
408 std::ifstream file(file_path, std::ios::binary);
410 throw std::runtime_error(
"Cannot open bulk data file: " + file_path.string());
413 return std::vector<uint8_t>(
414 std::istreambuf_iterator<char>(file),
415 std::istreambuf_iterator<char>()
420void parse_dataset(
const xml_node& node,
422 const options& opts);
427[[nodiscard]] std::string build_person_name(
const xml_node& pn_node) {
430 auto build_component = [](
const xml_node* rep_node) -> std::string {
431 if (!rep_node)
return "";
433 std::string family, given, middle, prefix, suffix;
434 if (
auto* n = rep_node->find_child(
"FamilyName")) family = n->text;
435 if (
auto* n = rep_node->find_child(
"GivenName")) given = n->text;
436 if (
auto* n = rep_node->find_child(
"MiddleName")) middle = n->text;
437 if (
auto* n = rep_node->find_child(
"NamePrefix")) prefix = n->text;
438 if (
auto* n = rep_node->find_child(
"NameSuffix")) suffix = n->text;
440 std::string comp = family;
441 if (!given.empty() || !middle.empty() || !prefix.empty() || !suffix.empty()) {
444 if (!middle.empty() || !prefix.empty() || !suffix.empty()) {
445 comp +=
"^" + middle;
447 if (!prefix.empty() || !suffix.empty()) {
448 comp +=
"^" + prefix;
450 if (!suffix.empty()) {
451 comp +=
"^" + suffix;
457 std::string alphabetic = build_component(pn_node.find_child(
"Alphabetic"));
458 std::string ideographic = build_component(pn_node.find_child(
"Ideographic"));
459 std::string phonetic = build_component(pn_node.find_child(
"Phonetic"));
462 if (!ideographic.empty() || !phonetic.empty()) {
463 result +=
"=" + ideographic;
465 if (!phonetic.empty()) {
466 result +=
"=" + phonetic;
477 const xml_node& attr_node,
478 const options& opts) {
482 std::string vr_str = attr_node.get_attr(
"vr",
"UN");
483 auto vr = parse_vr(vr_str);
486 if (
vr == vr_type::SQ) {
487 dicom_element elem{tag,
vr};
488 auto items = attr_node.find_children(
"Item");
489 for (
const auto* item_node : items) {
490 dicom_dataset item_dataset;
491 parse_dataset(*item_node, item_dataset, opts);
492 elem.sequence_items().push_back(std::move(item_dataset));
498 if (
auto* inline_binary = attr_node.find_child(
"InlineBinary")) {
499 auto data = from_base64(inline_binary->text);
500 return dicom_element{tag,
vr, std::span<const uint8_t>(data)};
504 if (
auto* bulk_data = attr_node.find_child(
"BulkData")) {
505 std::string uri = bulk_data->get_attr(
"uri");
507 auto data = read_bulk_data(uri, opts.bulk_data_dir);
508 return dicom_element{tag,
vr, std::span<const uint8_t>(data)};
513 if (
vr == vr_type::PN) {
514 auto pn_nodes = attr_node.find_children(
"PersonName");
515 if (!pn_nodes.empty()) {
516 std::string combined;
517 for (
size_t i = 0; i < pn_nodes.size(); ++i) {
518 if (i > 0) combined +=
"\\";
519 combined += build_person_name(*pn_nodes[i]);
521 return dicom_element::from_string(tag,
vr, combined);
526 auto value_nodes = attr_node.find_children(
"Value");
527 if (value_nodes.empty()) {
528 return dicom_element{tag,
vr};
532 std::vector<std::pair<int, std::string>> numbered_values;
533 for (
const auto* val_node : value_nodes) {
536 num = std::stoi(val_node->get_attr(
"number",
"1"));
538 numbered_values.emplace_back(num, val_node->text);
540 std::sort(numbered_values.begin(), numbered_values.end(),
541 [](
const auto& a,
const auto& b) { return a.first < b.first; });
545 std::string combined;
546 for (
size_t i = 0; i < numbered_values.size(); ++i) {
547 if (i > 0) combined +=
"\\";
548 combined += numbered_values[i].second;
550 return dicom_element::from_string(tag,
vr, combined);
555 std::vector<uint8_t> data;
557 auto write_values = [&]<
typename T>() {
558 for (
const auto& [num, val_str] : numbered_values) {
561 if constexpr (std::is_floating_point_v<T>) {
562 num_val =
static_cast<T
>(std::stod(val_str));
564 num_val =
static_cast<T
>(std::stoll(val_str));
567 const uint8_t* ptr =
reinterpret_cast<const uint8_t*
>(&num_val);
568 data.insert(data.end(), ptr, ptr +
sizeof(T));
573 case vr_type::US: write_values.template operator()<uint16_t>();
break;
574 case vr_type::SS: write_values.template operator()<int16_t>();
break;
575 case vr_type::UL: write_values.template operator()<uint32_t>();
break;
576 case vr_type::SL: write_values.template operator()<int32_t>();
break;
577 case vr_type::FL: write_values.template operator()<
float>();
break;
578 case vr_type::FD: write_values.template operator()<
double>();
break;
579 case vr_type::UV: write_values.template operator()<uint64_t>();
break;
580 case vr_type::SV: write_values.template operator()<int64_t>();
break;
584 return dicom_element{tag,
vr, std::span<const uint8_t>(data)};
588 if (
vr == vr_type::AT) {
589 std::vector<uint8_t> data;
590 for (
const auto& [num, val_str] : numbered_values) {
591 auto tag_opt = parse_tag(val_str);
593 uint16_t group = tag_opt->group();
594 uint16_t elem = tag_opt->element();
595 data.push_back(
static_cast<uint8_t
>(group & 0xFF));
596 data.push_back(
static_cast<uint8_t
>((group >> 8) & 0xFF));
597 data.push_back(
static_cast<uint8_t
>(elem & 0xFF));
598 data.push_back(
static_cast<uint8_t
>((elem >> 8) & 0xFF));
601 return dicom_element{tag,
vr, std::span<const uint8_t>(data)};
605 if (!numbered_values.empty()) {
606 std::string combined;
607 for (
size_t i = 0; i < numbered_values.size(); ++i) {
608 if (i > 0) combined +=
"\\";
609 combined += numbered_values[i].second;
611 return dicom_element::from_string(tag,
vr, combined);
614 return dicom_element{tag,
vr};
620void parse_dataset(
const xml_node& node,
622 const options& opts) {
623 for (
const auto& child : node.children) {
624 if (child.name !=
"DicomAttribute")
continue;
626 std::string tag_str = child.get_attr(
"tag");
627 auto tag_opt = parse_tag(tag_str);
630 std::cerr <<
"Warning: Invalid tag '" << tag_str <<
"', skipping\n";
636 auto element = create_element(*tag_opt, child, opts);
637 dataset.
insert(std::move(element));
638 }
catch (
const std::exception& e) {
640 std::cerr <<
"Warning: Failed to parse element " << tag_str
641 <<
": " << e.what() <<
"\n";
647void print_usage(
const char* program_name) {
649XML to DICOM Converter (DICOM Native XML PS3.19)
651Usage: )" << program_name
652 << R"( <xml-file> <output-dcm> [options]
655 xml-file Input XML file (DICOM Native XML PS3.19 format)
656 output-dcm Output DICOM file
659 -h, --help Show this help message
660 -t, --transfer-syntax Transfer Syntax UID (default: Explicit VR Little Endian)
661 --template <dcm> Template DICOM file (copies pixel data and missing tags)
662 --bulk-data-dir <dir> Directory for BulkData URI resolution
663 -v, --verbose Verbose output
664 -q, --quiet Quiet mode (errors only)
666Transfer Syntax Options:
667 1.2.840.10008.1.2 Implicit VR Little Endian
668 1.2.840.10008.1.2.1 Explicit VR Little Endian (default)
669 1.2.840.10008.1.2.2 Explicit VR Big Endian
673 << R"( metadata.xml output.dcm
675 << R"( metadata.xml output.dcm --template original.dcm
677 << R"( metadata.xml output.dcm --bulk-data-dir ./bulk/
679 << R"( metadata.xml output.dcm -t 1.2.840.10008.1.2
681Input Format (DICOM Native XML PS3.19):
682 <?xml version="1.0" encoding="UTF-8"?>
684 <DicomAttribute tag="00100010" vr="PN" keyword="PatientName">
687 <FamilyName>DOE</FamilyName>
688 <GivenName>JOHN</GivenName>
697 2 File error or invalid XML
701bool parse_arguments(
int argc,
char* argv[], options& opts) {
706 for (
int i = 1; i < argc; ++i) {
707 std::string arg = argv[i];
709 if (arg ==
"--help" || arg ==
"-h") {
711 }
else if ((arg ==
"--transfer-syntax" || arg ==
"-t") && i + 1 < argc) {
712 opts.transfer_syntax = argv[++i];
713 }
else if (arg ==
"--template" && i + 1 < argc) {
714 opts.template_path = argv[++i];
715 }
else if (arg ==
"--bulk-data-dir" && i + 1 < argc) {
716 opts.bulk_data_dir = argv[++i];
717 }
else if (arg ==
"--verbose" || arg ==
"-v") {
719 }
else if (arg ==
"--quiet" || arg ==
"-q") {
721 }
else if (arg[0] ==
'-') {
722 std::cerr <<
"Error: Unknown option '" << arg <<
"'\n";
724 }
else if (opts.input_path.empty()) {
725 opts.input_path = arg;
726 }
else if (opts.output_path.empty()) {
727 opts.output_path = arg;
729 std::cerr <<
"Error: Too many arguments\n";
734 if (opts.input_path.empty()) {
735 std::cerr <<
"Error: No input file specified\n";
739 if (opts.output_path.empty()) {
740 std::cerr <<
"Error: No output file specified\n";
745 opts.verbose =
false;
751int convert_file(
const options& opts) {
756 std::string xml_content;
758 xml_content = read_file(opts.input_path);
759 }
catch (
const std::exception& e) {
760 std::cerr <<
"Error: " << e.what() <<
"\n";
766 xml_parser parser(xml_content);
767 root = parser.parse();
768 }
catch (
const std::exception& e) {
769 std::cerr <<
"Error: Failed to parse XML: " << e.what() <<
"\n";
773 if (root.name !=
"NativeDicomModel") {
774 std::cerr <<
"Error: XML root element must be 'NativeDicomModel', got '"
775 << root.name <<
"'\n";
780 dicom_dataset dataset;
782 parse_dataset(root, dataset, opts);
783 }
catch (
const std::exception& e) {
784 std::cerr <<
"Error: Failed to create dataset: " << e.what() <<
"\n";
789 std::optional<dicom_file> template_file;
790 if (!opts.template_path.empty()) {
791 auto result = dicom_file::open(opts.template_path);
792 if (result.is_err()) {
793 std::cerr <<
"Error: Failed to open template file: "
794 << result.error().message <<
"\n";
797 template_file = std::move(result.value());
800 for (
const auto& [tag, element] : template_file->dataset()) {
801 if (dataset.get(tag) ==
nullptr) {
802 dataset.insert(element);
808 transfer_syntax
ts = transfer_syntax::explicit_vr_little_endian;
809 if (!opts.transfer_syntax.empty()) {
814 std::cerr <<
"Warning: Unknown transfer syntax '" << opts.transfer_syntax
815 <<
"', using Explicit VR Little Endian\n";
817 }
else if (template_file) {
818 ts = template_file->transfer_syntax();
822 auto file = dicom_file::create(dataset,
ts);
825 auto save_result = file.save(opts.output_path);
826 if (save_result.is_err()) {
827 std::cerr <<
"Error: Failed to save DICOM file: "
828 << save_result.error().message <<
"\n";
833 std::cout <<
"Successfully converted: " << opts.input_path.string()
834 <<
" -> " << opts.output_path.string() <<
"\n";
842int main(
int argc,
char* argv[]) {
845 if (!parse_arguments(argc, argv, opts)) {
847 __ ____ __ _ _____ ___ ____ ____ __ __
848 \ \/ / \/ | | |_ _|/ _ \ | _ \ / ___| \/ |
849 \ /| |\/| | | | | | | | | | | | | | | |\/| |
850 / \| | | | |___ | | | |_| | | |_| | |___| | | |
851 /_/\_\_| |_|_____| |_| \___/ |____/ \____|_| |_|
853 XML to DICOM Converter (PS3.19)
855 print_usage(argv[0]);
860 if (!std::filesystem::exists(opts.input_path)) {
861 std::cerr <<
"Error: Input file does not exist: " << opts.input_path.string() <<
"\n";
868 __ ____ __ _ _____ ___ ____ ____ __ __
869 \ \/ / \/ | | |_ _|/ _ \ | _ \ / ___| \/ |
870 \ /| |\/| | | | | | | | | | | | | | | |\/| |
871 / \| | | | |___ | | | |_| | | |_| | |___| | | |
872 /_/\_\_| |_|_____| |_| \___/ |____/ \____|_| |_|
874 XML to DICOM Converter (PS3.19)
878 return convert_file(opts);
void insert(dicom_element element)
Insert or replace an element in the dataset.
DICOM Data Dictionary for tag metadata lookup.
DICOM Part 10 file handling for reading/writing DICOM files.
Compile-time constants for commonly used DICOM tags.
std::optional< transfer_syntax > find_transfer_syntax(std::string_view uid)
Looks up a Transfer Syntax by its UID.
constexpr bool is_numeric_vr(vr_type vr) noexcept
Checks if a VR is a numeric type.
vr_type
DICOM Value Representation (VR) types.
constexpr bool is_string_vr(vr_type vr) noexcept
Checks if a VR is a string type.
@ get
C-GET retrieve request/response.
@ num
NUM - Numeric measurement.