42enum class bulk_data_mode {
52 std::filesystem::path input_path;
53 std::filesystem::path output_path;
54 bool pretty_print{
true};
56 bulk_data_mode bulk_mode{bulk_data_mode::exclude};
57 std::string bulk_data_uri_prefix{
"file://"};
58 std::filesystem::path bulk_data_dir;
59 std::vector<std::string> filter_tags;
61 bool recursive{
false};
62 bool include_meta{
true};
65 std::string charset{
"UTF-8"};
71constexpr char base64_chars[] =
72 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
73 "abcdefghijklmnopqrstuvwxyz"
81[[nodiscard]] std::string to_base64(std::span<const uint8_t> data) {
83 result.reserve(((data.size() + 2) / 3) * 4);
86 while (i < data.size()) {
87 uint32_t octet_a = i < data.size() ? data[i++] : 0;
88 uint32_t octet_b = i < data.size() ? data[i++] : 0;
89 uint32_t octet_c = i < data.size() ? data[i++] : 0;
91 uint32_t triple = (octet_a << 16) | (octet_b << 8) | octet_c;
93 result += base64_chars[(triple >> 18) & 0x3F];
94 result += base64_chars[(triple >> 12) & 0x3F];
95 result += (i > data.size() + 1) ?
'=' : base64_chars[(triple >> 6) & 0x3F];
96 result += (i > data.size()) ?
'=' : base64_chars[triple & 0x3F];
107[[nodiscard]] std::string xml_escape(
const std::string& str) {
108 std::ostringstream oss;
109 for (
unsigned char c : str) {
127 if (c < 0x20 && c !=
'\t' && c !=
'\n' && c !=
'\r') {
128 oss <<
"&#" <<
static_cast<int>(c) <<
";";
143 std::ostringstream oss;
144 oss << std::hex << std::uppercase << std::setfill(
'0')
145 << std::setw(4) << tag.
group()
146 << std::setw(4) << tag.
element();
157 return vr == vr_type::OB ||
vr == vr_type::OD ||
vr == vr_type::OF ||
158 vr == vr_type::OL ||
vr == vr_type::OV ||
vr == vr_type::OW ||
168 return tag.
group() == 0x7FE0 && tag.
element() == 0x0010;
175void print_usage(
const char* program_name) {
177DICOM to XML Converter (DICOM Native XML PS3.19)
179Usage: )" << program_name
180 << R"( <dicom-file> [output-file] [options]
183 dicom-file Input DICOM file or directory
184 output-file Output XML file (optional, stdout if omitted)
187 -h, --help Show this help message
188 -p, --pretty Pretty-print formatting (default)
189 -c, --compact Compact output (no formatting)
190 --bulk-data <mode> Binary data handling:
191 inline - Include as Base64 (InlineBinary)
192 uri - Save to file, reference by URI
193 exclude - Completely exclude (default)
194 --bulk-data-uri <pfx> BulkDataURI prefix (default: file://)
195 --bulk-data-dir <dir> Directory for bulk data files
196 -t, --tag <tag> Output specific tag only (e.g., 0010,0010)
197 --no-pixel Exclude pixel data
198 --no-meta Exclude File Meta Information
199 --charset <charset> XML encoding declaration (default: UTF-8)
200 -r, --recursive Process directories recursively
201 -v, --verbose Verbose output
202 -q, --quiet Quiet mode (errors only)
208 << R"( image.dcm output.xml --pretty
210 << R"( image.dcm --bulk-data inline
212 << R"( image.dcm --bulk-data uri --bulk-data-dir ./bulk/
214 << R"( image.dcm -t 0010,0010 -t 0010,0020
216 << R"( ./dicom_folder/ --recursive --no-pixel
218Output Format (DICOM Native XML PS3.19):
219 <?xml version="1.0" encoding="UTF-8"?>
221 <DicomAttribute tag="00100010" vr="PN" keyword="PatientName">
224 <FamilyName>DOE</FamilyName>
225 <GivenName>JOHN</GivenName>
234 2 File error or invalid DICOM
243[[nodiscard]] std::optional<kcenon::pacs::core::dicom_tag> parse_tag_string(
244 const std::string& tag_str) {
245 std::string s = tag_str;
247 if (!s.empty() && s.front() ==
'(') s.erase(0, 1);
248 if (!s.empty() && s.back() ==
')') s.pop_back();
250 s.erase(std::remove(s.begin(), s.end(),
','), s.end());
252 if (s.length() != 8) {
257 uint16_t group =
static_cast<uint16_t
>(std::stoul(s.substr(0, 4),
nullptr, 16));
258 uint16_t elem =
static_cast<uint16_t
>(std::stoul(s.substr(4, 4),
nullptr, 16));
272bool parse_arguments(
int argc,
char* argv[], options& opts) {
277 for (
int i = 1; i < argc; ++i) {
278 std::string arg = argv[i];
280 if (arg ==
"--help" || arg ==
"-h") {
282 }
else if (arg ==
"--pretty" || arg ==
"-p") {
283 opts.pretty_print =
true;
284 opts.compact =
false;
285 }
else if (arg ==
"--compact" || arg ==
"-c") {
287 opts.pretty_print =
false;
288 }
else if (arg ==
"--bulk-data" && i + 1 < argc) {
289 std::string mode = argv[++i];
290 if (mode ==
"inline") {
291 opts.bulk_mode = bulk_data_mode::inline_base64;
292 }
else if (mode ==
"uri") {
293 opts.bulk_mode = bulk_data_mode::uri;
294 }
else if (mode ==
"exclude") {
295 opts.bulk_mode = bulk_data_mode::exclude;
297 std::cerr <<
"Error: Unknown bulk-data mode '" << mode
298 <<
"'. Use: inline, uri, exclude\n";
301 }
else if (arg ==
"--bulk-data-uri" && i + 1 < argc) {
302 opts.bulk_data_uri_prefix = argv[++i];
303 }
else if (arg ==
"--bulk-data-dir" && i + 1 < argc) {
304 opts.bulk_data_dir = argv[++i];
305 }
else if ((arg ==
"--tag" || arg ==
"-t") && i + 1 < argc) {
306 opts.filter_tags.push_back(argv[++i]);
307 }
else if (arg ==
"--no-pixel") {
308 opts.no_pixel =
true;
309 }
else if (arg ==
"--no-meta") {
310 opts.include_meta =
false;
311 }
else if (arg ==
"--charset" && i + 1 < argc) {
312 opts.charset = argv[++i];
313 }
else if (arg ==
"--recursive" || arg ==
"-r") {
314 opts.recursive =
true;
315 }
else if (arg ==
"--verbose" || arg ==
"-v") {
317 }
else if (arg ==
"--quiet" || arg ==
"-q") {
319 }
else if (arg[0] ==
'-') {
320 std::cerr <<
"Error: Unknown option '" << arg <<
"'\n";
322 }
else if (opts.input_path.empty()) {
323 opts.input_path = arg;
324 }
else if (opts.output_path.empty()) {
325 opts.output_path = arg;
327 std::cerr <<
"Error: Too many arguments\n";
332 if (opts.input_path.empty()) {
333 std::cerr <<
"Error: No input file specified\n";
338 opts.verbose =
false;
351 const options& opts) {
353 if (opts.no_pixel && is_pixel_data_tag(tag)) {
358 if (!opts.filter_tags.empty()) {
359 for (
const auto& filter : opts.filter_tags) {
360 auto parsed = parse_tag_string(filter);
361 if (parsed && *parsed == tag) {
372void write_dataset_xml(std::ostream& out,
375 const std::filesystem::path& base_path,
377 size_t& bulk_data_counter);
384[[nodiscard]] std::tuple<std::string, std::string, std::string, std::string, std::string>
385parse_person_name_components(
const std::string& pn_string) {
386 std::vector<std::string> components;
387 std::stringstream ss(pn_string);
388 std::string component;
389 while (std::getline(ss, component,
'^')) {
390 components.push_back(component);
393 std::string family = components.size() > 0 ? components[0] :
"";
394 std::string given = components.size() > 1 ? components[1] :
"";
395 std::string middle = components.size() > 2 ? components[2] :
"";
396 std::string prefix = components.size() > 3 ? components[3] :
"";
397 std::string suffix = components.size() > 4 ? components[4] :
"";
399 return {family, given, middle, prefix, suffix};
411void write_element_value_xml(std::ostream& out,
415 const std::filesystem::path& base_path,
417 size_t& bulk_data_counter) {
421 const std::string indent = opts.compact ?
"" : std::string(indent_level * 2,
' ');
422 const std::string newline = opts.compact ?
"" :
"\n";
424 auto vr = element.
vr();
425 auto vr_str = to_string(
vr);
428 auto& dict = dicom_dictionary::instance();
429 auto info = dict.find(tag);
430 std::string keyword =
info ? std::string(
info->keyword) :
"";
433 std::ostringstream attr_oss;
434 attr_oss << indent <<
"<DicomAttribute tag=\"" << format_tag_key(tag)
435 <<
"\" vr=\"" << vr_str <<
"\"";
436 if (!keyword.empty()) {
437 attr_oss <<
" keyword=\"" << xml_escape(keyword) <<
"\"";
442 out << attr_oss.str() <<
">" << newline;
446 for (
const auto& item : items) {
447 out << indent <<
" <Item number=\"" << item_num++ <<
"\">" << newline;
448 write_dataset_xml(out, item, opts, base_path, indent_level + 2, bulk_data_counter);
449 out << indent <<
" </Item>" << newline;
452 out << indent <<
"</DicomAttribute>" << newline;
458 out << attr_oss.str() <<
"/>" << newline;
463 if (is_bulk_data_vr(
vr)) {
464 switch (opts.bulk_mode) {
465 case bulk_data_mode::inline_base64: {
467 out << attr_oss.str() <<
">" << newline;
468 out << indent <<
" <InlineBinary>" << to_base64(data) <<
"</InlineBinary>" << newline;
469 out << indent <<
"</DicomAttribute>" << newline;
472 case bulk_data_mode::uri: {
473 std::string filename =
"bulk_" + std::to_string(bulk_data_counter++) +
".raw";
474 std::filesystem::path bulk_path = opts.bulk_data_dir.empty()
475 ? base_path / filename
476 : opts.bulk_data_dir / filename;
479 std::ofstream bulk_file(bulk_path, std::ios::binary);
482 bulk_file.write(
reinterpret_cast<const char*
>(data.data()),
483 static_cast<std::streamsize
>(data.size()));
486 std::string uri = opts.bulk_data_uri_prefix + bulk_path.string();
487 out << attr_oss.str() <<
">" << newline;
488 out << indent <<
" <BulkData uri=\"" << xml_escape(uri) <<
"\"/>" << newline;
489 out << indent <<
"</DicomAttribute>" << newline;
492 case bulk_data_mode::exclude:
494 out << attr_oss.str() <<
"/>" << newline;
501 if (
vr == vr_type::PN) {
502 out << attr_oss.str() <<
">" << newline;
505 if (result.is_ok()) {
506 std::string value = result.value();
508 std::vector<std::string> names;
509 std::stringstream ss(value);
511 while (std::getline(ss,
name,
'\\')) {
512 names.push_back(
name);
516 for (
const auto& pn : names) {
518 std::vector<std::string> representations;
519 std::stringstream rep_ss(pn);
521 while (std::getline(rep_ss, rep,
'=')) {
522 representations.push_back(rep);
525 out << indent <<
" <PersonName number=\"" << pn_number++ <<
"\">" << newline;
528 if (!representations.empty() && !representations[0].empty()) {
529 auto [family, given, middle, prefix, suffix] =
530 parse_person_name_components(representations[0]);
531 out << indent <<
" <Alphabetic>" << newline;
533 out << indent <<
" <FamilyName>" << xml_escape(family) <<
"</FamilyName>" << newline;
535 out << indent <<
" <GivenName>" << xml_escape(given) <<
"</GivenName>" << newline;
537 out << indent <<
" <MiddleName>" << xml_escape(middle) <<
"</MiddleName>" << newline;
539 out << indent <<
" <NamePrefix>" << xml_escape(prefix) <<
"</NamePrefix>" << newline;
541 out << indent <<
" <NameSuffix>" << xml_escape(suffix) <<
"</NameSuffix>" << newline;
542 out << indent <<
" </Alphabetic>" << newline;
546 if (representations.size() > 1 && !representations[1].empty()) {
547 auto [family, given, middle, prefix, suffix] =
548 parse_person_name_components(representations[1]);
549 out << indent <<
" <Ideographic>" << newline;
551 out << indent <<
" <FamilyName>" << xml_escape(family) <<
"</FamilyName>" << newline;
553 out << indent <<
" <GivenName>" << xml_escape(given) <<
"</GivenName>" << newline;
555 out << indent <<
" <MiddleName>" << xml_escape(middle) <<
"</MiddleName>" << newline;
557 out << indent <<
" <NamePrefix>" << xml_escape(prefix) <<
"</NamePrefix>" << newline;
559 out << indent <<
" <NameSuffix>" << xml_escape(suffix) <<
"</NameSuffix>" << newline;
560 out << indent <<
" </Ideographic>" << newline;
564 if (representations.size() > 2 && !representations[2].empty()) {
565 auto [family, given, middle, prefix, suffix] =
566 parse_person_name_components(representations[2]);
567 out << indent <<
" <Phonetic>" << newline;
569 out << indent <<
" <FamilyName>" << xml_escape(family) <<
"</FamilyName>" << newline;
571 out << indent <<
" <GivenName>" << xml_escape(given) <<
"</GivenName>" << newline;
573 out << indent <<
" <MiddleName>" << xml_escape(middle) <<
"</MiddleName>" << newline;
575 out << indent <<
" <NamePrefix>" << xml_escape(prefix) <<
"</NamePrefix>" << newline;
577 out << indent <<
" <NameSuffix>" << xml_escape(suffix) <<
"</NameSuffix>" << newline;
578 out << indent <<
" </Phonetic>" << newline;
581 out << indent <<
" </PersonName>" << newline;
585 out << indent <<
"</DicomAttribute>" << newline;
591 out << attr_oss.str() <<
">" << newline;
594 if (result.is_ok()) {
595 std::string value = result.value();
597 std::vector<std::string> values;
598 std::stringstream ss(value);
600 while (std::getline(ss, v,
'\\')) {
605 for (
const auto& val : values) {
606 out << indent <<
" <Value number=\"" << val_number++ <<
"\">"
607 << xml_escape(val) <<
"</Value>" << newline;
611 out << indent <<
"</DicomAttribute>" << newline;
617 out << attr_oss.str() <<
">" << newline;
619 auto write_numeric_values = [&]<
typename T>() {
621 if (result.is_ok()) {
622 const auto& values = result.value();
624 for (
const auto& val : values) {
625 out << indent <<
" <Value number=\"" << val_number++ <<
"\">";
626 if constexpr (std::is_floating_point_v<T>) {
627 out << std::setprecision(17) << val;
629 out << static_cast<int64_t>(val);
631 out <<
"</Value>" << newline;
638 write_numeric_values.template operator()<uint16_t>();
641 write_numeric_values.template operator()<int16_t>();
644 write_numeric_values.template operator()<uint32_t>();
647 write_numeric_values.template operator()<int32_t>();
650 write_numeric_values.template operator()<
float>();
653 write_numeric_values.template operator()<
double>();
656 write_numeric_values.template operator()<uint64_t>();
659 write_numeric_values.template operator()<int64_t>();
663 if (
auto result = element.
as_string(); result.is_ok()) {
664 out << indent <<
" <Value number=\"1\">" << xml_escape(result.value())
665 <<
"</Value>" << newline;
670 out << indent <<
"</DicomAttribute>" << newline;
675 if (
vr == vr_type::AT) {
676 out << attr_oss.str() <<
">" << newline;
680 for (
size_t i = 0; i + 4 <= data.size(); i += 4) {
681 uint16_t group =
static_cast<uint16_t
>(data[i] | (data[i + 1] << 8));
682 uint16_t elem =
static_cast<uint16_t
>(data[i + 2] | (data[i + 3] << 8));
683 out << indent <<
" <Value number=\"" << val_number++ <<
"\">";
684 out << std::hex << std::uppercase << std::setfill(
'0')
685 << std::setw(4) << group << std::setw(4) << elem;
686 out <<
"</Value>" << newline << std::dec;
689 out << indent <<
"</DicomAttribute>" << newline;
694 out << attr_oss.str() <<
">" << newline;
695 if (
auto result = element.
as_string(); result.is_ok()) {
696 out << indent <<
" <Value number=\"1\">" << xml_escape(result.value())
697 <<
"</Value>" << newline;
699 out << indent <<
"</DicomAttribute>" << newline;
711void write_dataset_xml(std::ostream& out,
714 const std::filesystem::path& base_path,
716 size_t& bulk_data_counter) {
717 for (
const auto& [tag, element] : dataset) {
718 if (!should_include_tag(tag, opts)) {
722 write_element_value_xml(out, element, tag, opts, base_path,
723 indent_level, bulk_data_counter);
734int convert_file(
const std::filesystem::path& input_path,
735 std::ostream& output,
736 const options& opts) {
739 auto result = dicom_file::open(input_path);
740 if (result.is_err()) {
741 std::cerr <<
"Error: Failed to open '" << input_path.string()
742 <<
"': " << result.error().message <<
"\n";
746 auto& file = result.value();
747 size_t bulk_data_counter = 0;
748 std::filesystem::path base_path = input_path.parent_path();
750 const std::string newline = opts.compact ?
"" :
"\n";
753 output <<
"<?xml version=\"1.0\" encoding=\"" << opts.charset <<
"\"?>" << newline;
756 output <<
"<NativeDicomModel";
757 output <<
" xmlns=\"http://dicom.nema.org/PS3.19/models/NativeDICOM\"";
758 output <<
">" << newline;
761 if (opts.include_meta && !file.meta_information().empty()) {
762 write_dataset_xml(output, file.meta_information(), opts, base_path, 1, bulk_data_counter);
766 write_dataset_xml(output, file.dataset(), opts, base_path, 1, bulk_data_counter);
768 output <<
"</NativeDicomModel>" << newline;
779int process_directory(
const std::filesystem::path& dir_path,
const options& opts) {
782 auto process = [&](
const std::filesystem::path& file_path) {
783 auto ext = file_path.extension().string();
784 std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
785 if (ext !=
".dcm" && ext !=
".dicom" && !ext.empty()) {
790 std::filesystem::path output_path = file_path;
791 output_path.replace_extension(
".xml");
794 std::cout <<
"Converting: " << file_path.string() <<
" -> "
795 << output_path.string() <<
"\n";
798 std::ofstream output(output_path);
800 std::cerr <<
"Error: Cannot create output file: " << output_path.string() <<
"\n";
805 if (convert_file(file_path, output, opts) != 0) {
810 if (opts.recursive) {
811 for (
const auto& entry :
812 std::filesystem::recursive_directory_iterator(dir_path)) {
813 if (entry.is_regular_file()) {
814 process(entry.path());
818 for (
const auto& entry : std::filesystem::directory_iterator(dir_path)) {
819 if (entry.is_regular_file()) {
820 process(entry.path());
830int main(
int argc,
char* argv[]) {
833 if (!parse_arguments(argc, argv, opts)) {
835 ____ ____ __ __ _____ ___ __ ____ __ _
836 | _ \ / ___| \/ | |_ _|/ _ \ \ \/ / \/ | |
837 | | | | | | |\/| | | | | | | | \ /| |\/| | |
838 | |_| | |___| | | | | | | |_| | / \| | | | |___
839 |____/ \____|_| |_| |_| \___/ /_/\_\_| |_|_____|
841 DICOM to XML Converter (PS3.19)
843 print_usage(argv[0]);
848 if (!std::filesystem::exists(opts.input_path)) {
849 std::cerr <<
"Error: Path does not exist: " << opts.input_path.string() <<
"\n";
854 if (opts.bulk_mode == bulk_data_mode::uri && !opts.bulk_data_dir.empty()) {
855 std::filesystem::create_directories(opts.bulk_data_dir);
861 ____ ____ __ __ _____ ___ __ ____ __ _
862 | _ \ / ___| \/ | |_ _|/ _ \ \ \/ / \/ | |
863 | | | | | | |\/| | | | | | | | \ /| |\/| | |
864 | |_| | |___| | | | | | | |_| | / \| | | | |___
865 |____/ \____|_| |_| |_| \___/ /_/\_\_| |_|_____|
867 DICOM to XML Converter (PS3.19)
872 if (std::filesystem::is_directory(opts.input_path)) {
873 return process_directory(opts.input_path, opts);
876 if (opts.output_path.empty()) {
878 return convert_file(opts.input_path, std::cout, opts);
881 std::ofstream output(opts.output_path);
883 std::cerr <<
"Error: Cannot create output file: "
884 << opts.output_path.string() <<
"\n";
887 return convert_file(opts.input_path, output, opts);
auto is_sequence() const noexcept -> bool
Check if this element is a sequence.
auto raw_data() const noexcept -> std::span< const uint8_t >
Get the raw data bytes.
auto as_numeric_list() const -> kcenon::pacs::Result< std::vector< T > >
Get multi-valued numeric data as a list.
constexpr auto vr() const noexcept -> encoding::vr_type
Get the element's VR.
auto as_string() const -> kcenon::pacs::Result< std::string >
Get the value as a string.
auto sequence_items() -> std::vector< dicom_dataset > &
Get mutable access to sequence items.
auto is_empty() const noexcept -> bool
Check if the element has no value.
constexpr auto group() const noexcept -> uint16_t
Get the group number.
constexpr auto element() const noexcept -> uint16_t
Get the element number.
DICOM Data Dictionary for tag metadata lookup.
DICOM Part 10 file handling for reading/writing DICOM files.
Compile-time constants for commonly used DICOM tags.
constexpr bool is_numeric_vr(vr_type vr) noexcept
Checks if a VR is a numeric type.
vr_type
DICOM Value Representation (VR) types.
constexpr bool is_string_vr(vr_type vr) noexcept
Checks if a VR is a string type.