42enum class bulk_data_mode {
52 std::filesystem::path input_path;
53 std::filesystem::path output_path;
54 bool pretty_print{
true};
56 bulk_data_mode bulk_mode{bulk_data_mode::exclude};
57 std::string bulk_data_uri_prefix{
"file://"};
58 std::filesystem::path bulk_data_dir;
59 std::vector<std::string> filter_tags;
61 bool recursive{
false};
62 bool include_meta{
true};
70constexpr char base64_chars[] =
71 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
72 "abcdefghijklmnopqrstuvwxyz"
80[[nodiscard]] std::string to_base64(std::span<const uint8_t> data) {
82 result.reserve(((data.size() + 2) / 3) * 4);
85 while (i < data.size()) {
86 uint32_t octet_a = i < data.size() ? data[i++] : 0;
87 uint32_t octet_b = i < data.size() ? data[i++] : 0;
88 uint32_t octet_c = i < data.size() ? data[i++] : 0;
90 uint32_t triple = (octet_a << 16) | (octet_b << 8) | octet_c;
92 result += base64_chars[(triple >> 18) & 0x3F];
93 result += base64_chars[(triple >> 12) & 0x3F];
94 result += (i > data.size() + 1) ?
'=' : base64_chars[(triple >> 6) & 0x3F];
95 result += (i > data.size()) ?
'=' : base64_chars[triple & 0x3F];
106[[nodiscard]] std::string json_escape(
const std::string& str) {
107 std::ostringstream oss;
108 for (
unsigned char c : str) {
133 oss <<
"\\u" << std::hex << std::setfill(
'0') << std::setw(4)
134 <<
static_cast<int>(c);
149 std::ostringstream oss;
150 oss << std::hex << std::uppercase << std::setfill(
'0')
151 << std::setw(4) << tag.
group()
152 << std::setw(4) << tag.
element();
163 return vr == vr_type::OB ||
vr == vr_type::OD ||
vr == vr_type::OF ||
164 vr == vr_type::OL ||
vr == vr_type::OV ||
vr == vr_type::OW ||
174 return tag.
group() == 0x7FE0 && tag.
element() == 0x0010;
181void print_usage(
const char* program_name) {
183DICOM to JSON Converter (DICOM PS3.18)
185Usage: )" << program_name
186 << R"( <dicom-file> [output-file] [options]
189 dicom-file Input DICOM file or directory
190 output-file Output JSON file (optional, stdout if omitted)
193 -h, --help Show this help message
194 -p, --pretty Pretty-print formatting (default)
195 -c, --compact Compact output (no formatting)
196 --bulk-data <mode> Binary data handling:
197 inline - Include as Base64 (InlineBinary)
198 uri - Save to file, reference by URI
199 exclude - Completely exclude (default)
200 --bulk-data-uri <pfx> BulkDataURI prefix (default: file://)
201 --bulk-data-dir <dir> Directory for bulk data files
202 -t, --tag <tag> Output specific tag only (e.g., 0010,0010)
203 --no-pixel Exclude pixel data
204 --no-meta Exclude File Meta Information
205 -r, --recursive Process directories recursively
206 -v, --verbose Verbose output
207 -q, --quiet Quiet mode (errors only)
213 << R"( image.dcm output.json --pretty
215 << R"( image.dcm --bulk-data inline
217 << R"( image.dcm --bulk-data uri --bulk-data-dir ./bulk/
219 << R"( image.dcm -t 0010,0010 -t 0010,0020
221 << R"( ./dicom_folder/ --recursive --no-pixel
223Output Format (DICOM PS3.18 JSON):
227 "Value": [{"Alphabetic": "DOE^JOHN"}]
231 "Value": ["12345678"]
238 2 File error or invalid DICOM
247[[nodiscard]] std::optional<kcenon::pacs::core::dicom_tag> parse_tag_string(
248 const std::string& tag_str) {
249 std::string s = tag_str;
251 if (!s.empty() && s.front() ==
'(') s.erase(0, 1);
252 if (!s.empty() && s.back() ==
')') s.pop_back();
254 s.erase(std::remove(s.begin(), s.end(),
','), s.end());
256 if (s.length() != 8) {
261 uint16_t group =
static_cast<uint16_t
>(std::stoul(s.substr(0, 4),
nullptr, 16));
262 uint16_t elem =
static_cast<uint16_t
>(std::stoul(s.substr(4, 4),
nullptr, 16));
276bool parse_arguments(
int argc,
char* argv[], options& opts) {
281 for (
int i = 1; i < argc; ++i) {
282 std::string arg = argv[i];
284 if (arg ==
"--help" || arg ==
"-h") {
286 }
else if (arg ==
"--pretty" || arg ==
"-p") {
287 opts.pretty_print =
true;
288 opts.compact =
false;
289 }
else if (arg ==
"--compact" || arg ==
"-c") {
291 opts.pretty_print =
false;
292 }
else if (arg ==
"--bulk-data" && i + 1 < argc) {
293 std::string mode = argv[++i];
294 if (mode ==
"inline") {
295 opts.bulk_mode = bulk_data_mode::inline_base64;
296 }
else if (mode ==
"uri") {
297 opts.bulk_mode = bulk_data_mode::uri;
298 }
else if (mode ==
"exclude") {
299 opts.bulk_mode = bulk_data_mode::exclude;
301 std::cerr <<
"Error: Unknown bulk-data mode '" << mode
302 <<
"'. Use: inline, uri, exclude\n";
305 }
else if (arg ==
"--bulk-data-uri" && i + 1 < argc) {
306 opts.bulk_data_uri_prefix = argv[++i];
307 }
else if (arg ==
"--bulk-data-dir" && i + 1 < argc) {
308 opts.bulk_data_dir = argv[++i];
309 }
else if ((arg ==
"--tag" || arg ==
"-t") && i + 1 < argc) {
310 opts.filter_tags.push_back(argv[++i]);
311 }
else if (arg ==
"--no-pixel") {
312 opts.no_pixel =
true;
313 }
else if (arg ==
"--no-meta") {
314 opts.include_meta =
false;
315 }
else if (arg ==
"--recursive" || arg ==
"-r") {
316 opts.recursive =
true;
317 }
else if (arg ==
"--verbose" || arg ==
"-v") {
319 }
else if (arg ==
"--quiet" || arg ==
"-q") {
321 }
else if (arg[0] ==
'-') {
322 std::cerr <<
"Error: Unknown option '" << arg <<
"'\n";
324 }
else if (opts.input_path.empty()) {
325 opts.input_path = arg;
326 }
else if (opts.output_path.empty()) {
327 opts.output_path = arg;
329 std::cerr <<
"Error: Too many arguments\n";
334 if (opts.input_path.empty()) {
335 std::cerr <<
"Error: No input file specified\n";
340 opts.verbose =
false;
353 const options& opts) {
355 if (opts.no_pixel && is_pixel_data_tag(tag)) {
360 if (!opts.filter_tags.empty()) {
361 for (
const auto& filter : opts.filter_tags) {
362 auto parsed = parse_tag_string(filter);
363 if (parsed && *parsed == tag) {
374void write_dataset_json(std::ostream& out,
377 const std::filesystem::path& base_path,
379 size_t& bulk_data_counter);
390void write_element_value_json(std::ostream& out,
393 const std::filesystem::path& base_path,
395 size_t& bulk_data_counter) {
398 const std::string indent = opts.compact ?
"" : std::string(indent_level * 2,
' ');
399 const std::string newline = opts.compact ?
"" :
"\n";
400 const std::string space = opts.compact ?
"" :
" ";
402 auto vr = element.
vr();
403 auto vr_str = to_string(
vr);
407 out <<
"{" << newline;
408 out << indent <<
" \"vr\":" << space <<
"\"SQ\"";
411 if (!items.empty()) {
412 out <<
"," << newline << indent <<
" \"Value\":" << space <<
"[" << newline;
413 for (
size_t i = 0; i < items.size(); ++i) {
414 out << indent <<
" ";
415 write_dataset_json(out, items[i], opts, base_path,
416 indent_level + 2, bulk_data_counter);
417 if (i < items.size() - 1) {
422 out << indent <<
" ]" << newline;
426 out << indent <<
"}";
432 out <<
"{" << newline;
433 out << indent <<
" \"vr\":" << space <<
"\"" << vr_str <<
"\"" << newline;
434 out << indent <<
"}";
439 if (is_bulk_data_vr(
vr)) {
440 out <<
"{" << newline;
441 out << indent <<
" \"vr\":" << space <<
"\"" << vr_str <<
"\"";
443 switch (opts.bulk_mode) {
444 case bulk_data_mode::inline_base64: {
446 out <<
"," << newline << indent <<
" \"InlineBinary\":" << space
447 <<
"\"" << to_base64(data) <<
"\"" << newline;
450 case bulk_data_mode::uri: {
452 std::string filename =
"bulk_" + std::to_string(bulk_data_counter++) +
".raw";
453 std::filesystem::path bulk_path = opts.bulk_data_dir.empty()
454 ? base_path / filename
455 : opts.bulk_data_dir / filename;
458 std::ofstream bulk_file(bulk_path, std::ios::binary);
461 bulk_file.write(
reinterpret_cast<const char*
>(data.data()),
462 static_cast<std::streamsize
>(data.size()));
465 std::string uri = opts.bulk_data_uri_prefix + bulk_path.string();
466 out <<
"," << newline << indent <<
" \"BulkDataURI\":" << space
467 <<
"\"" << json_escape(uri) <<
"\"" << newline;
470 case bulk_data_mode::exclude:
475 out << indent <<
"}";
480 if (
vr == vr_type::PN) {
481 out <<
"{" << newline;
482 out << indent <<
" \"vr\":" << space <<
"\"PN\"," << newline;
483 out << indent <<
" \"Value\":" << space <<
"[" << newline;
486 if (result.is_ok()) {
487 std::string value = result.value();
489 std::vector<std::string> names;
490 std::stringstream ss(value);
492 while (std::getline(ss,
name,
'\\')) {
493 names.push_back(
name);
496 for (
size_t i = 0; i < names.size(); ++i) {
497 out << indent <<
" {" << newline;
498 out << indent <<
" \"Alphabetic\":" << space
499 <<
"\"" << json_escape(names[i]) <<
"\"" << newline;
500 out << indent <<
" }";
501 if (i < names.size() - 1) {
508 out << indent <<
" ]" << newline;
509 out << indent <<
"}";
515 out <<
"{" << newline;
516 out << indent <<
" \"vr\":" << space <<
"\"" << vr_str <<
"\"," << newline;
517 out << indent <<
" \"Value\":" << space <<
"[";
520 if (result.is_ok()) {
521 std::string value = result.value();
523 std::vector<std::string> values;
524 std::stringstream ss(value);
526 while (std::getline(ss, v,
'\\')) {
530 for (
size_t i = 0; i < values.size(); ++i) {
531 if (i > 0) out <<
",";
532 out <<
"\"" << json_escape(values[i]) <<
"\"";
536 out <<
"]" << newline;
537 out << indent <<
"}";
543 out <<
"{" << newline;
544 out << indent <<
" \"vr\":" << space <<
"\"" << vr_str <<
"\"," << newline;
545 out << indent <<
" \"Value\":" << space <<
"[";
547 auto write_numeric_values = [&]<
typename T>() {
549 if (result.is_ok()) {
550 const auto& values = result.value();
551 for (
size_t i = 0; i < values.size(); ++i) {
552 if (i > 0) out <<
",";
553 if constexpr (std::is_floating_point_v<T>) {
554 out << std::setprecision(17) << values[i];
556 out << static_cast<int64_t>(values[i]);
564 write_numeric_values.template operator()<uint16_t>();
567 write_numeric_values.template operator()<int16_t>();
570 write_numeric_values.template operator()<uint32_t>();
573 write_numeric_values.template operator()<int32_t>();
576 write_numeric_values.template operator()<
float>();
579 write_numeric_values.template operator()<
double>();
582 write_numeric_values.template operator()<uint64_t>();
585 write_numeric_values.template operator()<int64_t>();
589 if (
auto result = element.
as_string(); result.is_ok()) {
590 out <<
"\"" << json_escape(result.value()) <<
"\"";
595 out <<
"]" << newline;
596 out << indent <<
"}";
601 if (
vr == vr_type::AT) {
602 out <<
"{" << newline;
603 out << indent <<
" \"vr\":" << space <<
"\"AT\"," << newline;
604 out << indent <<
" \"Value\":" << space <<
"[";
607 for (
size_t i = 0; i + 4 <= data.size(); i += 4) {
608 if (i > 0) out <<
",";
609 uint16_t group =
static_cast<uint16_t
>(data[i] | (data[i + 1] << 8));
610 uint16_t elem =
static_cast<uint16_t
>(data[i + 2] | (data[i + 3] << 8));
611 out <<
"\"" << std::hex << std::uppercase << std::setfill(
'0')
612 << std::setw(4) << group << std::setw(4) << elem <<
"\"";
615 out <<
"]" << newline << std::dec;
616 out << indent <<
"}";
621 out <<
"{" << newline;
622 out << indent <<
" \"vr\":" << space <<
"\"" << vr_str <<
"\"," << newline;
623 out << indent <<
" \"Value\":" << space <<
"[";
625 if (
auto result = element.
as_string(); result.is_ok()) {
626 out <<
"\"" << json_escape(result.value()) <<
"\"";
629 out <<
"]" << newline;
630 out << indent <<
"}";
642void write_dataset_json(std::ostream& out,
645 const std::filesystem::path& base_path,
647 size_t& bulk_data_counter) {
648 const std::string indent = opts.compact ?
"" : std::string(indent_level * 2,
' ');
649 const std::string newline = opts.compact ?
"" :
"\n";
650 const std::string space = opts.compact ?
"" :
" ";
652 out <<
"{" << newline;
655 for (
const auto& [tag, element] : dataset) {
656 if (!should_include_tag(tag, opts)) {
661 out <<
"," << newline;
665 out << indent <<
" \"" << format_tag_key(tag) <<
"\":" << space;
666 write_element_value_json(out, element, opts, base_path,
667 indent_level + 1, bulk_data_counter);
670 out << newline << indent <<
"}";
680int convert_file(
const std::filesystem::path& input_path,
681 std::ostream& output,
682 const options& opts) {
685 auto result = dicom_file::open(input_path);
686 if (result.is_err()) {
687 std::cerr <<
"Error: Failed to open '" << input_path.string()
688 <<
"': " << result.error().message <<
"\n";
692 auto& file = result.value();
693 size_t bulk_data_counter = 0;
694 std::filesystem::path base_path = input_path.parent_path();
696 const std::string newline = opts.compact ?
"" :
"\n";
697 const std::string space = opts.compact ?
"" :
" ";
699 output <<
"{" << newline;
704 if (opts.include_meta && !file.meta_information().empty()) {
706 output <<
" \"00020000\":" << space;
707 write_dataset_json(output, file.meta_information(), opts, base_path, 1, bulk_data_counter);
711 for (
const auto& [tag, element] : file.dataset()) {
712 if (!should_include_tag(tag, opts)) {
717 output <<
"," << newline;
721 const std::string indent = opts.compact ?
"" :
" ";
722 output << indent <<
"\"" << format_tag_key(tag) <<
"\":" << space;
723 write_element_value_json(output, element, opts, base_path, 1, bulk_data_counter);
726 output << newline <<
"}" << newline;
737int process_directory(
const std::filesystem::path& dir_path,
const options& opts) {
740 auto process = [&](
const std::filesystem::path& file_path) {
741 auto ext = file_path.extension().string();
742 std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
743 if (ext !=
".dcm" && ext !=
".dicom" && !ext.empty()) {
748 std::filesystem::path output_path = file_path;
749 output_path.replace_extension(
".json");
752 std::cout <<
"Converting: " << file_path.string() <<
" -> "
753 << output_path.string() <<
"\n";
756 std::ofstream output(output_path);
758 std::cerr <<
"Error: Cannot create output file: " << output_path.string() <<
"\n";
763 if (convert_file(file_path, output, opts) != 0) {
768 if (opts.recursive) {
769 for (
const auto& entry :
770 std::filesystem::recursive_directory_iterator(dir_path)) {
771 if (entry.is_regular_file()) {
772 process(entry.path());
776 for (
const auto& entry : std::filesystem::directory_iterator(dir_path)) {
777 if (entry.is_regular_file()) {
778 process(entry.path());
788int main(
int argc,
char* argv[]) {
791 if (!parse_arguments(argc, argv, opts)) {
793 ____ ____ __ __ _____ ___ _ ____ ___ _ _
794 | _ \ / ___| \/ | |_ _|/ _ \ | / ___| / _ \| \ | |
795 | | | | | | |\/| | | | | | | | _ | \___ \| | | | \| |
796 | |_| | |___| | | | | | | |_| | | |_| |___) | |_| | |\ |
797 |____/ \____|_| |_| |_| \___/ \___/|____/ \___/|_| \_|
799 DICOM to JSON Converter (PS3.18)
801 print_usage(argv[0]);
806 if (!std::filesystem::exists(opts.input_path)) {
807 std::cerr <<
"Error: Path does not exist: " << opts.input_path.string() <<
"\n";
812 if (opts.bulk_mode == bulk_data_mode::uri && !opts.bulk_data_dir.empty()) {
813 std::filesystem::create_directories(opts.bulk_data_dir);
819 ____ ____ __ __ _____ ___ _ ____ ___ _ _
820 | _ \ / ___| \/ | |_ _|/ _ \ | / ___| / _ \| \ | |
821 | | | | | | |\/| | | | | | | | _ | \___ \| | | | \| |
822 | |_| | |___| | | | | | | |_| | | |_| |___) | |_| | |\ |
823 |____/ \____|_| |_| |_| \___/ \___/|____/ \___/|_| \_|
825 DICOM to JSON Converter (PS3.18)
830 if (std::filesystem::is_directory(opts.input_path)) {
831 return process_directory(opts.input_path, opts);
834 if (opts.output_path.empty()) {
836 return convert_file(opts.input_path, std::cout, opts);
839 std::ofstream output(opts.output_path);
841 std::cerr <<
"Error: Cannot create output file: "
842 << opts.output_path.string() <<
"\n";
845 return convert_file(opts.input_path, output, opts);
auto is_sequence() const noexcept -> bool
Check if this element is a sequence.
auto raw_data() const noexcept -> std::span< const uint8_t >
Get the raw data bytes.
auto as_numeric_list() const -> kcenon::pacs::Result< std::vector< T > >
Get multi-valued numeric data as a list.
constexpr auto vr() const noexcept -> encoding::vr_type
Get the element's VR.
auto as_string() const -> kcenon::pacs::Result< std::string >
Get the value as a string.
auto sequence_items() -> std::vector< dicom_dataset > &
Get mutable access to sequence items.
auto is_empty() const noexcept -> bool
Check if the element has no value.
constexpr auto group() const noexcept -> uint16_t
Get the group number.
constexpr auto element() const noexcept -> uint16_t
Get the element number.
DICOM Data Dictionary for tag metadata lookup.
DICOM Part 10 file handling for reading/writing DICOM files.
Compile-time constants for commonly used DICOM tags.
constexpr bool is_numeric_vr(vr_type vr) noexcept
Checks if a VR is a numeric type.
vr_type
DICOM Value Representation (VR) types.
constexpr bool is_string_vr(vr_type vr) noexcept
Checks if a VR is a string type.