PACS System 0.1.0
PACS DICOM system library
Loading...
Searching...
No Matches
main.cpp
Go to the documentation of this file.
1
30
31#include <algorithm>
32#include <chrono>
33#include <filesystem>
34#include <fstream>
35#include <iomanip>
36#include <iostream>
37#include <sstream>
38#include <string>
39#include <vector>
40
41namespace {
42
46struct options {
47 std::vector<std::filesystem::path> input_paths;
48 std::filesystem::path output_path;
51 std::optional<std::string> new_patient_id;
52 std::optional<std::string> new_patient_name;
53 std::vector<kcenon::pacs::core::dicom_tag> keep_tags;
54 std::map<kcenon::pacs::core::dicom_tag, std::string> replace_tags;
55 std::filesystem::path mapping_file;
56 bool retain_uid{false};
57 bool recursive{false};
58 bool verify{false};
59 bool verbose{false};
60 bool dry_run{false};
61 bool create_backup{true};
62 bool detailed_report{false};
63 std::optional<int> date_offset_days;
64};
65
69struct process_stats {
70 std::size_t total_files{0};
71 std::size_t successful{0};
72 std::size_t failed{0};
73 std::size_t tags_removed{0};
74 std::size_t tags_replaced{0};
75 std::size_t tags_kept{0};
76};
77
82void print_usage(const char* program_name) {
83 std::cout << "\nDICOM Anonymize - De-identification Utility\n\n";
84 std::cout << "Usage: " << program_name << " [options] <input> [output]\n\n";
85 std::cout << "Arguments:\n";
86 std::cout
87 << " input Input DICOM file or directory to anonymize\n";
88 std::cout
89 << " output Output file or directory (optional for "
90 "single file)\n\n";
91
92 std::cout << "Profile Options:\n";
93 std::cout << " -p, --profile <name> Anonymization profile (default: "
94 "basic)\n";
95 std::cout << " Available profiles:\n";
96 std::cout
97 << " basic - Remove direct "
98 "identifiers\n";
99 std::cout
100 << " clean_pixel - Remove burned-in "
101 "annotations\n";
102 std::cout
103 << " clean_descriptions - Clean free-text "
104 "fields\n";
105 std::cout
106 << " retain_longitudinal - Preserve temporal "
107 "relationships\n";
108 std::cout << " retain_patient_characteristics - "
109 "Keep demographics\n";
110 std::cout
111 << " hipaa_safe_harbor - HIPAA 18-identifier "
112 "removal\n";
113 std::cout
114 << " gdpr_compliant - GDPR pseudonymization\n\n";
115
116 std::cout << "Tag Customization Options:\n";
117 std::cout
118 << " -k, --keep <tag> Keep specific tag unchanged\n";
119 std::cout << " Example: -k \"(0010,0040)\" or -k "
120 "PatientSex\n";
121 std::cout << " -r, --replace <tag=val> Replace tag with specific value\n";
122 std::cout << " Example: -r \"PatientName=Anonymous\"\n";
123 std::cout << " --patient-id <id> Set new PatientID\n";
124 std::cout << " --patient-name <n> Set new PatientName\n";
125 std::cout << " --retain-uid Retain original UIDs\n";
126 std::cout << " --date-offset <days> Shift dates by specified days\n\n";
127
128 std::cout << "Mapping Options:\n";
129 std::cout << " -m, --mapping-file <f> UID mapping file (JSON format)\n";
130 std::cout << " Used for consistent anonymization "
131 "across files\n\n";
132
133 std::cout << "Output Options:\n";
134 std::cout << " -o, --output-dir <dir> Output directory for batch "
135 "processing\n";
136 std::cout << " --no-backup Do not create backup file\n\n";
137
138 std::cout << "Processing Options:\n";
139 std::cout << " --recursive Process directories recursively\n";
140 std::cout << " --verify Verify anonymization result\n";
141 std::cout << " --dry-run Show what would be done without "
142 "modifying\n";
143 std::cout << " --detailed Show detailed anonymization "
144 "report\n";
145 std::cout << " -v, --verbose Verbose output\n";
146 std::cout << " -h, --help Show this help message\n\n";
147
148 std::cout << "Examples:\n";
149 std::cout << " " << program_name << " patient.dcm anonymous.dcm\n";
150 std::cout << " " << program_name
151 << " --profile hipaa_safe_harbor patient.dcm output.dcm\n";
152 std::cout << " " << program_name << " --patient-id \"STUDY001_001\" -m "
153 "mapping.json patient.dcm\n";
154 std::cout << " " << program_name
155 << " --recursive -o anonymized/ ./originals/\n";
156 std::cout << " " << program_name
157 << " -k PatientSex -r \"InstitutionName=Research\" patient.dcm\n\n";
158
159 std::cout << "Anonymization Profiles (DICOM PS3.15 Annex E):\n";
160 std::cout
161 << " basic - Removes patient name, ID, birth date, "
162 "etc.\n";
163 std::cout
164 << " clean_pixel - Extends basic with pixel data cleaning\n";
165 std::cout
166 << " clean_descriptions - Extends basic with description field cleaning\n";
167 std::cout
168 << " retain_longitudinal - Date shifting for temporal studies\n";
169 std::cout
170 << " retain_patient_characteristics - Keeps sex, age, size, weight\n";
171 std::cout
172 << " hipaa_safe_harbor - Full HIPAA Safe Harbor compliance\n";
173 std::cout
174 << " gdpr_compliant - GDPR pseudonymization requirements\n\n";
175
176 std::cout << "Exit Codes:\n";
177 std::cout << " 0 Success\n";
178 std::cout << " 1 Invalid arguments\n";
179 std::cout << " 2 File/processing error\n";
180}
181
187std::optional<kcenon::pacs::core::dicom_tag> resolve_tag(const std::string& tag_str) {
188 std::string s = tag_str;
189
190 // Check if it's a numeric tag format
191 if (s.find('(') != std::string::npos || s.find(',') != std::string::npos) {
192 // Remove parentheses if present
193 if (!s.empty() && s.front() == '(') {
194 s.erase(0, 1);
195 }
196 if (!s.empty() && s.back() == ')') {
197 s.pop_back();
198 }
199
200 // Remove spaces
201 s.erase(std::remove(s.begin(), s.end(), ' '), s.end());
202
203 // Parse GGGG,EEEE format
204 std::size_t comma_pos = s.find(',');
205 if (comma_pos != std::string::npos) {
206 try {
207 auto group = static_cast<std::uint16_t>(
208 std::stoul(s.substr(0, comma_pos), nullptr, 16));
209 auto element = static_cast<std::uint16_t>(
210 std::stoul(s.substr(comma_pos + 1), nullptr, 16));
211 return kcenon::pacs::core::dicom_tag{group, element};
212 } catch (...) {
213 return std::nullopt;
214 }
215 }
216 }
217
218 // Try as keyword
220 auto info = dict.find_by_keyword(tag_str);
221 if (info) {
222 return info->tag;
223 }
224
225 return std::nullopt;
226}
227
235bool parse_arguments(int argc, char* argv[], options& opts) {
236 if (argc < 2) {
237 return false;
238 }
239
240 for (int i = 1; i < argc; ++i) {
241 std::string arg = argv[i];
242
243 if (arg == "--help" || arg == "-h") {
244 return false;
245 } else if ((arg == "-p" || arg == "--profile") && i + 1 < argc) {
246 std::string profile_name = argv[++i];
247 auto profile =
249 if (!profile) {
250 std::cerr << "Error: Unknown profile '" << profile_name
251 << "'\n";
252 std::cerr << "Available profiles: basic, clean_pixel, "
253 "clean_descriptions,\n";
254 std::cerr << " retain_longitudinal, "
255 "retain_patient_characteristics,\n";
256 std::cerr << " hipaa_safe_harbor, gdpr_compliant\n";
257 return false;
258 }
259 opts.profile = *profile;
260 } else if ((arg == "-k" || arg == "--keep") && i + 1 < argc) {
261 auto tag = resolve_tag(argv[++i]);
262 if (!tag) {
263 std::cerr << "Error: Invalid tag format for --keep\n";
264 return false;
265 }
266 opts.keep_tags.push_back(*tag);
267 } else if ((arg == "-r" || arg == "--replace") && i + 1 < argc) {
268 std::string replace_arg = argv[++i];
269 auto eq_pos = replace_arg.find('=');
270 if (eq_pos == std::string::npos) {
271 std::cerr << "Error: --replace requires tag=value format\n";
272 return false;
273 }
274 std::string tag_str = replace_arg.substr(0, eq_pos);
275 std::string value = replace_arg.substr(eq_pos + 1);
276 auto tag = resolve_tag(tag_str);
277 if (!tag) {
278 std::cerr << "Error: Invalid tag format for --replace\n";
279 return false;
280 }
281 opts.replace_tags[*tag] = value;
282 } else if (arg == "--patient-id" && i + 1 < argc) {
283 opts.new_patient_id = argv[++i];
284 } else if (arg == "--patient-name" && i + 1 < argc) {
285 opts.new_patient_name = argv[++i];
286 } else if (arg == "--retain-uid") {
287 opts.retain_uid = true;
288 } else if (arg == "--date-offset" && i + 1 < argc) {
289 try {
290 opts.date_offset_days = std::stoi(argv[++i]);
291 } catch (...) {
292 std::cerr << "Error: Invalid date offset value\n";
293 return false;
294 }
295 } else if ((arg == "-m" || arg == "--mapping-file") && i + 1 < argc) {
296 opts.mapping_file = argv[++i];
297 } else if ((arg == "-o" || arg == "--output-dir") && i + 1 < argc) {
298 opts.output_path = argv[++i];
299 } else if (arg == "--no-backup") {
300 opts.create_backup = false;
301 } else if (arg == "--recursive") {
302 opts.recursive = true;
303 } else if (arg == "--verify") {
304 opts.verify = true;
305 } else if (arg == "--dry-run") {
306 opts.dry_run = true;
307 } else if (arg == "--detailed") {
308 opts.detailed_report = true;
309 } else if (arg == "-v" || arg == "--verbose") {
310 opts.verbose = true;
311 } else if (arg[0] == '-') {
312 std::cerr << "Error: Unknown option '" << arg << "'\n";
313 return false;
314 } else {
315 opts.input_paths.emplace_back(arg);
316 }
317 }
318
319 // Validation
320 if (opts.input_paths.empty()) {
321 std::cerr << "Error: No input files specified\n";
322 return false;
323 }
324
325 // If two positional args and first is file, second is output
326 if (opts.input_paths.size() == 2 &&
327 std::filesystem::is_regular_file(opts.input_paths[0])) {
328 opts.output_path = opts.input_paths[1];
329 opts.input_paths.resize(1);
330 }
331
332 return true;
333}
334
341bool load_mapping(const std::filesystem::path& path,
343 if (!std::filesystem::exists(path)) {
344 // File doesn't exist yet - will be created after anonymization
345 return true;
346 }
347
348 std::ifstream file(path);
349 if (!file) {
350 std::cerr << "Error: Cannot open mapping file: " << path.string()
351 << "\n";
352 return false;
353 }
354
355 std::stringstream buffer;
356 buffer << file.rdbuf();
357 auto result = mapping.from_json(buffer.str());
358 if (result.is_err()) {
359 std::cerr << "Error: Invalid mapping file format\n";
360 return false;
361 }
362
363 return true;
364}
365
372bool save_mapping(const std::filesystem::path& path,
374 std::ofstream file(path);
375 if (!file) {
376 std::cerr << "Error: Cannot write mapping file: " << path.string()
377 << "\n";
378 return false;
379 }
380
381 file << mapping.to_json();
382 return true;
383}
384
390bool create_backup(const std::filesystem::path& file_path) {
391 auto backup_path = file_path;
392 backup_path += ".bak";
393
394 std::error_code ec;
395 std::filesystem::copy_file(
396 file_path, backup_path,
397 std::filesystem::copy_options::overwrite_existing, ec);
398 if (ec) {
399 std::cerr << "Warning: Failed to create backup: " << backup_path
400 << "\n";
401 return false;
402 }
403
404 return true;
405}
406
413std::vector<std::string> verify_anonymization(
416 std::vector<std::string> warnings;
417
418 using namespace kcenon::pacs::core;
419
420 // Check critical identifiers based on profile
421 std::vector<std::pair<dicom_tag, std::string>> checks = {
422 {tags::patient_name, "PatientName"},
423 {tags::patient_id, "PatientID"},
424 {tags::patient_birth_date, "PatientBirthDate"},
425 {dicom_tag{0x0010, 0x0050}, "PatientInsurancePlanCode"},
426 {dicom_tag{0x0010, 0x1000}, "OtherPatientIDs"},
427 {dicom_tag{0x0008, 0x0080}, "InstitutionName"},
428 {dicom_tag{0x0008, 0x0081}, "InstitutionAddress"},
429 };
430
431 for (const auto& [tag, name] : checks) {
432 auto value = dataset.get_string(tag);
433 if (!value.empty() && value != "Anonymous" && value != "ANONYMOUS" &&
434 value.find("ANON") == std::string::npos) {
435 warnings.push_back("Tag " + name + " may contain identifying "
436 "information: " + value);
437 }
438 }
439
440 return warnings;
441}
442
452bool process_file(const std::filesystem::path& input_path,
453 const std::filesystem::path& output_path,
454 const options& opts,
456 process_stats& stats) {
457 using namespace kcenon::pacs::core;
458 using namespace kcenon::pacs::security;
459
460 if (opts.verbose) {
461 std::cout << "Processing: " << input_path.string() << "\n";
462 }
463
464 // Dry run mode
465 if (opts.dry_run) {
466 std::cout << "Would anonymize: " << input_path.string() << "\n";
467 std::cout << " Profile: " << to_string(opts.profile) << "\n";
468 std::cout << " Output: " << output_path.string() << "\n";
469 if (!opts.keep_tags.empty()) {
470 std::cout << " Keep tags: " << opts.keep_tags.size() << "\n";
471 }
472 if (!opts.replace_tags.empty()) {
473 std::cout << " Replace tags: " << opts.replace_tags.size() << "\n";
474 }
475 return true;
476 }
477
478 // Open input file
479 auto result = dicom_file::open(input_path);
480 if (result.is_err()) {
481 std::cerr << "Error: Failed to open '" << input_path.string()
482 << "': " << result.error().message << "\n";
483 return false;
484 }
485
486 auto file = std::move(result.value());
487 auto& dataset = file.dataset();
488
489 // Create anonymizer with profile
490 anonymizer anon(opts.profile);
491
492 // Configure date offset if specified
493 if (opts.date_offset_days) {
494 anon.set_date_offset(std::chrono::days{*opts.date_offset_days});
495 }
496
497 // Add keep tags (action: keep)
498 for (const auto& tag : opts.keep_tags) {
499 anon.add_tag_action(tag, tag_action_config::make_keep());
500 if (opts.verbose) {
501 std::cout << " Keeping tag: " << tag.to_string() << "\n";
502 }
503 }
504
505 // Add replace tags
506 for (const auto& [tag, value] : opts.replace_tags) {
507 anon.add_tag_action(tag, tag_action_config::make_replace(value));
508 if (opts.verbose) {
509 std::cout << " Replacing tag: " << tag.to_string()
510 << " = " << value << "\n";
511 }
512 }
513
514 // Handle patient ID/name replacements
515 if (opts.new_patient_id) {
516 anon.add_tag_action(
517 tags::patient_id,
518 tag_action_config::make_replace(*opts.new_patient_id));
519 }
520 if (opts.new_patient_name) {
521 anon.add_tag_action(
522 tags::patient_name,
523 tag_action_config::make_replace(*opts.new_patient_name));
524 }
525
526 // Handle UID retention
527 if (opts.retain_uid) {
528 anon.add_tag_action(tags::study_instance_uid,
529 tag_action_config::make_keep());
530 anon.add_tag_action(tags::series_instance_uid,
531 tag_action_config::make_keep());
532 anon.add_tag_action(tags::sop_instance_uid,
533 tag_action_config::make_keep());
534 }
535
536 // Perform anonymization
537 auto anon_result = (!opts.mapping_file.empty() || !mapping.empty())
538 ? anon.anonymize_with_mapping(dataset, mapping)
539 : anon.anonymize(dataset);
540
541 if (anon_result.is_err()) {
542 std::cerr << "Error: Anonymization failed for '"
543 << input_path.string()
544 << "': " << anon_result.error().message << "\n";
545 return false;
546 }
547
548 auto report = anon_result.value();
549 stats.tags_removed += report.tags_removed;
550 stats.tags_replaced += report.tags_replaced;
551 stats.tags_kept += report.tags_kept;
552
553 // Show summary if detailed report requested
554 if (opts.detailed_report) {
555 std::cout << " Processed: " << report.total_tags_processed << " tags\n";
556 std::cout << " Removed: " << report.tags_removed << "\n";
557 std::cout << " Replaced: " << report.tags_replaced << "\n";
558 std::cout << " Emptied: " << report.tags_emptied << "\n";
559 std::cout << " UIDs replaced: " << report.uids_replaced << "\n";
560 std::cout << " Dates shifted: " << report.dates_shifted << "\n";
561 std::cout << " Kept: " << report.tags_kept << "\n";
562 }
563
564 // Verify if requested
565 if (opts.verify) {
566 auto warnings = verify_anonymization(dataset, opts.profile);
567 if (!warnings.empty()) {
568 std::cout << " Verification warnings:\n";
569 for (const auto& warning : warnings) {
570 std::cout << " - " << warning << "\n";
571 }
572 } else if (opts.verbose) {
573 std::cout << " Verification: PASSED\n";
574 }
575 }
576
577 // Create output file with same transfer syntax
578 auto output_file =
579 dicom_file::create(std::move(dataset), file.transfer_syntax());
580
581 // Ensure output directory exists
582 auto output_dir = output_path.parent_path();
583 if (!output_dir.empty() && !std::filesystem::exists(output_dir)) {
584 std::filesystem::create_directories(output_dir);
585 }
586
587 // Create backup if in-place and backup is enabled
588 if (input_path == output_path && opts.create_backup) {
589 create_backup(input_path);
590 }
591
592 // Save
593 auto save_result = output_file.save(output_path);
594 if (save_result.is_err()) {
595 std::cerr << "Error: Failed to save '" << output_path.string()
596 << "': " << save_result.error().message << "\n";
597 return false;
598 }
599
600 if (opts.verbose) {
601 std::cout << " Saved: " << output_path.string() << "\n";
602 std::cout << " Tags removed: " << report.tags_removed
603 << ", replaced: " << report.tags_replaced
604 << ", kept: " << report.tags_kept << "\n";
605 }
606
607 return true;
608}
609
616void process_inputs(const options& opts, process_stats& stats,
618 for (const auto& input_path : opts.input_paths) {
619 if (!std::filesystem::exists(input_path)) {
620 std::cerr << "Error: Path does not exist: " << input_path.string()
621 << "\n";
622 ++stats.failed;
623 continue;
624 }
625
626 if (std::filesystem::is_directory(input_path)) {
627 // Directory mode
628 auto process_entry = [&](const std::filesystem::path& file_path) {
629 auto ext = file_path.extension().string();
630 std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
631 if (ext != ".dcm" && ext != ".dicom" && !ext.empty()) {
632 return; // Skip non-DICOM files
633 }
634
635 ++stats.total_files;
636
637 std::filesystem::path output_path;
638 if (!opts.output_path.empty()) {
639 auto relative =
640 std::filesystem::relative(file_path, input_path);
641 output_path = opts.output_path / relative;
642 } else {
643 // In-place anonymization
644 output_path = file_path;
645 }
646
647 if (process_file(file_path, output_path, opts, mapping,
648 stats)) {
649 ++stats.successful;
650 } else {
651 ++stats.failed;
652 }
653 };
654
655 if (opts.recursive) {
656 for (const auto& entry :
657 std::filesystem::recursive_directory_iterator(
658 input_path)) {
659 if (entry.is_regular_file()) {
660 process_entry(entry.path());
661 }
662 }
663 } else {
664 for (const auto& entry :
665 std::filesystem::directory_iterator(input_path)) {
666 if (entry.is_regular_file()) {
667 process_entry(entry.path());
668 }
669 }
670 }
671 } else {
672 // Single file mode
673 ++stats.total_files;
674
675 std::filesystem::path output_path;
676 if (!opts.output_path.empty()) {
677 output_path = opts.output_path;
678 } else {
679 // In-place anonymization
680 output_path = input_path;
681 }
682
683 if (process_file(input_path, output_path, opts, mapping, stats)) {
684 ++stats.successful;
685 } else {
686 ++stats.failed;
687 }
688 }
689 }
690}
691
696void print_summary(const process_stats& stats) {
697 if (stats.total_files > 1 || stats.tags_removed > 0) {
698 std::cout << "\n";
699 std::cout << "========================================\n";
700 std::cout << " Anonymization Summary\n";
701 std::cout << "========================================\n";
702 std::cout << " Total files: " << stats.total_files << "\n";
703 std::cout << " Successful: " << stats.successful << "\n";
704 std::cout << " Failed: " << stats.failed << "\n";
705 std::cout << " ----------------------------------------\n";
706 std::cout << " Tags removed: " << stats.tags_removed << "\n";
707 std::cout << " Tags replaced: " << stats.tags_replaced << "\n";
708 std::cout << " Tags kept: " << stats.tags_kept << "\n";
709 std::cout << "========================================\n";
710 }
711}
712
713} // namespace
714
715int main(int argc, char* argv[]) {
716 std::cout << R"(
717 ____ ____ __ __ _ _ _ ___ _ ___ ____ __ ___ ________
718 | _ \ / ___| \/ | / \ | \ | |/ _ \| \ | \ \ / / \/ |_ _|__ / __|
719 | | | | | | |\/| | / _ \ | \| | | | | \| |\ V /| |\/| || | / /| _|
720 | |_| | |___| | | | / ___ \| |\ | |_| | |\ | | | | | | || | / /_| |__
721 |____/ \____|_| |_| /_/ \_\_| \_|\___/|_| \_| |_| |_| |_|___/____|____|
722
723 DICOM De-identification Utility (PS3.15 Compliant)
724)" << "\n";
725
726 options opts;
727
728 if (!parse_arguments(argc, argv, opts)) {
729 print_usage(argv[0]);
730 return 1;
731 }
732
733 // Load existing UID mapping if specified
735 if (!opts.mapping_file.empty()) {
736 if (!load_mapping(opts.mapping_file, mapping)) {
737 return 2;
738 }
739 if (opts.verbose && !mapping.empty()) {
740 std::cout << "Loaded " << mapping.size()
741 << " existing UID mappings\n";
742 }
743 }
744
745 // Show profile info
746 if (opts.verbose) {
747 std::cout << "Anonymization profile: " << to_string(opts.profile)
748 << "\n";
749 }
750
751 // Process files
752 process_stats stats;
753 process_inputs(opts, stats, mapping);
754
755 // Save UID mapping if specified
756 if (!opts.mapping_file.empty() && !mapping.empty() && !opts.dry_run) {
757 if (!save_mapping(opts.mapping_file, mapping)) {
758 std::cerr << "Warning: Failed to save UID mapping file\n";
759 } else if (opts.verbose) {
760 std::cout << "Saved " << mapping.size() << " UID mappings to "
761 << opts.mapping_file.string() << "\n";
762 }
763 }
764
765 // Print summary
766 print_summary(stats);
767
768 if (stats.failed > 0) {
769 return 2;
770 }
771
772 if (stats.total_files == 1 && stats.successful == 1) {
773 std::cout << "Successfully anonymized file.\n";
774 }
775
776 return 0;
777}
DICOM de-identification profiles per PS3.15 Annex E.
auto get_string(dicom_tag tag, std::string_view default_value="") const -> std::string
Get the string value of an element.
static auto instance() -> dicom_dictionary &
Get the singleton instance.
auto to_json() const -> std::string
Export mappings to JSON format.
auto empty() const -> bool
Check if the mapping is empty.
auto size() const -> std::size_t
Get the number of mappings.
auto from_json(std::string_view json) -> kcenon::common::VoidResult
Import mappings from JSON format.
DICOM Data Dictionary for tag metadata lookup.
DICOM Part 10 file handling for reading/writing DICOM files.
Compile-time constants for commonly used DICOM tags.
DICOM de-identification/anonymization per PS3.15 Annex E.
int main()
Definition main.cpp:84
anonymization_profile
DICOM de-identification profiles based on PS3.15 Annex E.
@ basic
Basic Profile - Remove direct identifiers.
auto profile_from_string(std::string_view name) -> std::optional< anonymization_profile >
Parse profile from string.
std::string_view name
UID mapping for consistent de-identification across studies.