47 std::vector<std::filesystem::path> input_paths;
48 std::filesystem::path output_path;
51 std::optional<std::string> new_patient_id;
52 std::optional<std::string> new_patient_name;
53 std::vector<kcenon::pacs::core::dicom_tag> keep_tags;
54 std::map<kcenon::pacs::core::dicom_tag, std::string> replace_tags;
55 std::filesystem::path mapping_file;
56 bool retain_uid{
false};
57 bool recursive{
false};
61 bool create_backup{
true};
62 bool detailed_report{
false};
63 std::optional<int> date_offset_days;
70 std::size_t total_files{0};
71 std::size_t successful{0};
73 std::size_t tags_removed{0};
74 std::size_t tags_replaced{0};
75 std::size_t tags_kept{0};
82void print_usage(
const char* program_name) {
83 std::cout <<
"\nDICOM Anonymize - De-identification Utility\n\n";
84 std::cout <<
"Usage: " << program_name <<
" [options] <input> [output]\n\n";
85 std::cout <<
"Arguments:\n";
87 <<
" input Input DICOM file or directory to anonymize\n";
89 <<
" output Output file or directory (optional for "
92 std::cout <<
"Profile Options:\n";
93 std::cout <<
" -p, --profile <name> Anonymization profile (default: "
95 std::cout <<
" Available profiles:\n";
97 <<
" basic - Remove direct "
100 <<
" clean_pixel - Remove burned-in "
103 <<
" clean_descriptions - Clean free-text "
106 <<
" retain_longitudinal - Preserve temporal "
108 std::cout <<
" retain_patient_characteristics - "
109 "Keep demographics\n";
111 <<
" hipaa_safe_harbor - HIPAA 18-identifier "
114 <<
" gdpr_compliant - GDPR pseudonymization\n\n";
116 std::cout <<
"Tag Customization Options:\n";
118 <<
" -k, --keep <tag> Keep specific tag unchanged\n";
119 std::cout <<
" Example: -k \"(0010,0040)\" or -k "
121 std::cout <<
" -r, --replace <tag=val> Replace tag with specific value\n";
122 std::cout <<
" Example: -r \"PatientName=Anonymous\"\n";
123 std::cout <<
" --patient-id <id> Set new PatientID\n";
124 std::cout <<
" --patient-name <n> Set new PatientName\n";
125 std::cout <<
" --retain-uid Retain original UIDs\n";
126 std::cout <<
" --date-offset <days> Shift dates by specified days\n\n";
128 std::cout <<
"Mapping Options:\n";
129 std::cout <<
" -m, --mapping-file <f> UID mapping file (JSON format)\n";
130 std::cout <<
" Used for consistent anonymization "
133 std::cout <<
"Output Options:\n";
134 std::cout <<
" -o, --output-dir <dir> Output directory for batch "
136 std::cout <<
" --no-backup Do not create backup file\n\n";
138 std::cout <<
"Processing Options:\n";
139 std::cout <<
" --recursive Process directories recursively\n";
140 std::cout <<
" --verify Verify anonymization result\n";
141 std::cout <<
" --dry-run Show what would be done without "
143 std::cout <<
" --detailed Show detailed anonymization "
145 std::cout <<
" -v, --verbose Verbose output\n";
146 std::cout <<
" -h, --help Show this help message\n\n";
148 std::cout <<
"Examples:\n";
149 std::cout <<
" " << program_name <<
" patient.dcm anonymous.dcm\n";
150 std::cout <<
" " << program_name
151 <<
" --profile hipaa_safe_harbor patient.dcm output.dcm\n";
152 std::cout <<
" " << program_name <<
" --patient-id \"STUDY001_001\" -m "
153 "mapping.json patient.dcm\n";
154 std::cout <<
" " << program_name
155 <<
" --recursive -o anonymized/ ./originals/\n";
156 std::cout <<
" " << program_name
157 <<
" -k PatientSex -r \"InstitutionName=Research\" patient.dcm\n\n";
159 std::cout <<
"Anonymization Profiles (DICOM PS3.15 Annex E):\n";
161 <<
" basic - Removes patient name, ID, birth date, "
164 <<
" clean_pixel - Extends basic with pixel data cleaning\n";
166 <<
" clean_descriptions - Extends basic with description field cleaning\n";
168 <<
" retain_longitudinal - Date shifting for temporal studies\n";
170 <<
" retain_patient_characteristics - Keeps sex, age, size, weight\n";
172 <<
" hipaa_safe_harbor - Full HIPAA Safe Harbor compliance\n";
174 <<
" gdpr_compliant - GDPR pseudonymization requirements\n\n";
176 std::cout <<
"Exit Codes:\n";
177 std::cout <<
" 0 Success\n";
178 std::cout <<
" 1 Invalid arguments\n";
179 std::cout <<
" 2 File/processing error\n";
187std::optional<kcenon::pacs::core::dicom_tag> resolve_tag(
const std::string& tag_str) {
188 std::string s = tag_str;
191 if (s.find(
'(') != std::string::npos || s.find(
',') != std::string::npos) {
193 if (!s.empty() && s.front() ==
'(') {
196 if (!s.empty() && s.back() ==
')') {
201 s.erase(std::remove(s.begin(), s.end(),
' '), s.end());
204 std::size_t comma_pos = s.find(
',');
205 if (comma_pos != std::string::npos) {
207 auto group =
static_cast<std::uint16_t
>(
208 std::stoul(s.substr(0, comma_pos),
nullptr, 16));
209 auto element =
static_cast<std::uint16_t
>(
210 std::stoul(s.substr(comma_pos + 1),
nullptr, 16));
220 auto info = dict.find_by_keyword(tag_str);
235bool parse_arguments(
int argc,
char* argv[], options& opts) {
240 for (
int i = 1; i < argc; ++i) {
241 std::string arg = argv[i];
243 if (arg ==
"--help" || arg ==
"-h") {
245 }
else if ((arg ==
"-p" || arg ==
"--profile") && i + 1 < argc) {
246 std::string profile_name = argv[++i];
250 std::cerr <<
"Error: Unknown profile '" << profile_name
252 std::cerr <<
"Available profiles: basic, clean_pixel, "
253 "clean_descriptions,\n";
254 std::cerr <<
" retain_longitudinal, "
255 "retain_patient_characteristics,\n";
256 std::cerr <<
" hipaa_safe_harbor, gdpr_compliant\n";
259 opts.profile = *profile;
260 }
else if ((arg ==
"-k" || arg ==
"--keep") && i + 1 < argc) {
261 auto tag = resolve_tag(argv[++i]);
263 std::cerr <<
"Error: Invalid tag format for --keep\n";
266 opts.keep_tags.push_back(*tag);
267 }
else if ((arg ==
"-r" || arg ==
"--replace") && i + 1 < argc) {
268 std::string replace_arg = argv[++i];
269 auto eq_pos = replace_arg.find(
'=');
270 if (eq_pos == std::string::npos) {
271 std::cerr <<
"Error: --replace requires tag=value format\n";
274 std::string tag_str = replace_arg.substr(0, eq_pos);
275 std::string value = replace_arg.substr(eq_pos + 1);
276 auto tag = resolve_tag(tag_str);
278 std::cerr <<
"Error: Invalid tag format for --replace\n";
281 opts.replace_tags[*tag] = value;
282 }
else if (arg ==
"--patient-id" && i + 1 < argc) {
283 opts.new_patient_id = argv[++i];
284 }
else if (arg ==
"--patient-name" && i + 1 < argc) {
285 opts.new_patient_name = argv[++i];
286 }
else if (arg ==
"--retain-uid") {
287 opts.retain_uid =
true;
288 }
else if (arg ==
"--date-offset" && i + 1 < argc) {
290 opts.date_offset_days = std::stoi(argv[++i]);
292 std::cerr <<
"Error: Invalid date offset value\n";
295 }
else if ((arg ==
"-m" || arg ==
"--mapping-file") && i + 1 < argc) {
296 opts.mapping_file = argv[++i];
297 }
else if ((arg ==
"-o" || arg ==
"--output-dir") && i + 1 < argc) {
298 opts.output_path = argv[++i];
299 }
else if (arg ==
"--no-backup") {
300 opts.create_backup =
false;
301 }
else if (arg ==
"--recursive") {
302 opts.recursive =
true;
303 }
else if (arg ==
"--verify") {
305 }
else if (arg ==
"--dry-run") {
307 }
else if (arg ==
"--detailed") {
308 opts.detailed_report =
true;
309 }
else if (arg ==
"-v" || arg ==
"--verbose") {
311 }
else if (arg[0] ==
'-') {
312 std::cerr <<
"Error: Unknown option '" << arg <<
"'\n";
315 opts.input_paths.emplace_back(arg);
320 if (opts.input_paths.empty()) {
321 std::cerr <<
"Error: No input files specified\n";
326 if (opts.input_paths.size() == 2 &&
327 std::filesystem::is_regular_file(opts.input_paths[0])) {
328 opts.output_path = opts.input_paths[1];
329 opts.input_paths.resize(1);
341bool load_mapping(
const std::filesystem::path& path,
343 if (!std::filesystem::exists(path)) {
348 std::ifstream file(path);
350 std::cerr <<
"Error: Cannot open mapping file: " << path.string()
355 std::stringstream buffer;
356 buffer << file.rdbuf();
357 auto result = mapping.
from_json(buffer.str());
358 if (result.is_err()) {
359 std::cerr <<
"Error: Invalid mapping file format\n";
372bool save_mapping(
const std::filesystem::path& path,
374 std::ofstream file(path);
376 std::cerr <<
"Error: Cannot write mapping file: " << path.string()
390bool create_backup(
const std::filesystem::path& file_path) {
391 auto backup_path = file_path;
392 backup_path +=
".bak";
395 std::filesystem::copy_file(
396 file_path, backup_path,
397 std::filesystem::copy_options::overwrite_existing, ec);
399 std::cerr <<
"Warning: Failed to create backup: " << backup_path
413std::vector<std::string> verify_anonymization(
416 std::vector<std::string> warnings;
421 std::vector<std::pair<dicom_tag, std::string>> checks = {
422 {tags::patient_name,
"PatientName"},
423 {tags::patient_id,
"PatientID"},
424 {tags::patient_birth_date,
"PatientBirthDate"},
425 {dicom_tag{0x0010, 0x0050},
"PatientInsurancePlanCode"},
426 {dicom_tag{0x0010, 0x1000},
"OtherPatientIDs"},
427 {dicom_tag{0x0008, 0x0080},
"InstitutionName"},
428 {dicom_tag{0x0008, 0x0081},
"InstitutionAddress"},
431 for (
const auto& [tag,
name] : checks) {
433 if (!value.empty() && value !=
"Anonymous" && value !=
"ANONYMOUS" &&
434 value.find(
"ANON") == std::string::npos) {
435 warnings.push_back(
"Tag " +
name +
" may contain identifying "
436 "information: " + value);
452bool process_file(
const std::filesystem::path& input_path,
453 const std::filesystem::path& output_path,
456 process_stats& stats) {
461 std::cout <<
"Processing: " << input_path.string() <<
"\n";
466 std::cout <<
"Would anonymize: " << input_path.string() <<
"\n";
467 std::cout <<
" Profile: " << to_string(opts.profile) <<
"\n";
468 std::cout <<
" Output: " << output_path.string() <<
"\n";
469 if (!opts.keep_tags.empty()) {
470 std::cout <<
" Keep tags: " << opts.keep_tags.size() <<
"\n";
472 if (!opts.replace_tags.empty()) {
473 std::cout <<
" Replace tags: " << opts.replace_tags.size() <<
"\n";
479 auto result = dicom_file::open(input_path);
480 if (result.is_err()) {
481 std::cerr <<
"Error: Failed to open '" << input_path.string()
482 <<
"': " << result.error().message <<
"\n";
486 auto file = std::move(result.value());
487 auto& dataset = file.dataset();
490 anonymizer anon(opts.profile);
493 if (opts.date_offset_days) {
494 anon.set_date_offset(std::chrono::days{*opts.date_offset_days});
498 for (
const auto& tag : opts.keep_tags) {
499 anon.add_tag_action(tag, tag_action_config::make_keep());
501 std::cout <<
" Keeping tag: " << tag.to_string() <<
"\n";
506 for (
const auto& [tag, value] : opts.replace_tags) {
507 anon.add_tag_action(tag, tag_action_config::make_replace(value));
509 std::cout <<
" Replacing tag: " << tag.to_string()
510 <<
" = " << value <<
"\n";
515 if (opts.new_patient_id) {
518 tag_action_config::make_replace(*opts.new_patient_id));
520 if (opts.new_patient_name) {
523 tag_action_config::make_replace(*opts.new_patient_name));
527 if (opts.retain_uid) {
528 anon.add_tag_action(tags::study_instance_uid,
529 tag_action_config::make_keep());
530 anon.add_tag_action(tags::series_instance_uid,
531 tag_action_config::make_keep());
532 anon.add_tag_action(tags::sop_instance_uid,
533 tag_action_config::make_keep());
537 auto anon_result = (!opts.mapping_file.empty() || !mapping.
empty())
538 ? anon.anonymize_with_mapping(dataset, mapping)
539 : anon.anonymize(dataset);
541 if (anon_result.is_err()) {
542 std::cerr <<
"Error: Anonymization failed for '"
543 << input_path.string()
544 <<
"': " << anon_result.error().message <<
"\n";
548 auto report = anon_result.value();
549 stats.tags_removed +=
report.tags_removed;
550 stats.tags_replaced +=
report.tags_replaced;
551 stats.tags_kept +=
report.tags_kept;
554 if (opts.detailed_report) {
555 std::cout <<
" Processed: " <<
report.total_tags_processed <<
" tags\n";
556 std::cout <<
" Removed: " <<
report.tags_removed <<
"\n";
557 std::cout <<
" Replaced: " <<
report.tags_replaced <<
"\n";
558 std::cout <<
" Emptied: " <<
report.tags_emptied <<
"\n";
559 std::cout <<
" UIDs replaced: " <<
report.uids_replaced <<
"\n";
560 std::cout <<
" Dates shifted: " <<
report.dates_shifted <<
"\n";
561 std::cout <<
" Kept: " <<
report.tags_kept <<
"\n";
566 auto warnings = verify_anonymization(dataset, opts.profile);
567 if (!warnings.empty()) {
568 std::cout <<
" Verification warnings:\n";
569 for (
const auto& warning : warnings) {
570 std::cout <<
" - " <<
warning <<
"\n";
572 }
else if (opts.verbose) {
573 std::cout <<
" Verification: PASSED\n";
579 dicom_file::create(std::move(dataset), file.transfer_syntax());
582 auto output_dir = output_path.parent_path();
583 if (!output_dir.empty() && !std::filesystem::exists(output_dir)) {
584 std::filesystem::create_directories(output_dir);
588 if (input_path == output_path && opts.create_backup) {
589 create_backup(input_path);
593 auto save_result = output_file.save(output_path);
594 if (save_result.is_err()) {
595 std::cerr <<
"Error: Failed to save '" << output_path.string()
596 <<
"': " << save_result.error().message <<
"\n";
601 std::cout <<
" Saved: " << output_path.string() <<
"\n";
602 std::cout <<
" Tags removed: " <<
report.tags_removed
603 <<
", replaced: " <<
report.tags_replaced
604 <<
", kept: " <<
report.tags_kept <<
"\n";
616void process_inputs(
const options& opts, process_stats& stats,
618 for (
const auto& input_path : opts.input_paths) {
619 if (!std::filesystem::exists(input_path)) {
620 std::cerr <<
"Error: Path does not exist: " << input_path.string()
626 if (std::filesystem::is_directory(input_path)) {
628 auto process_entry = [&](
const std::filesystem::path& file_path) {
629 auto ext = file_path.extension().string();
630 std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
631 if (ext !=
".dcm" && ext !=
".dicom" && !ext.empty()) {
637 std::filesystem::path output_path;
638 if (!opts.output_path.empty()) {
640 std::filesystem::relative(file_path, input_path);
641 output_path = opts.output_path /
relative;
644 output_path = file_path;
647 if (process_file(file_path, output_path, opts, mapping,
655 if (opts.recursive) {
656 for (
const auto& entry :
657 std::filesystem::recursive_directory_iterator(
659 if (entry.is_regular_file()) {
660 process_entry(entry.path());
664 for (
const auto& entry :
665 std::filesystem::directory_iterator(input_path)) {
666 if (entry.is_regular_file()) {
667 process_entry(entry.path());
675 std::filesystem::path output_path;
676 if (!opts.output_path.empty()) {
677 output_path = opts.output_path;
680 output_path = input_path;
683 if (process_file(input_path, output_path, opts, mapping, stats)) {
696void print_summary(
const process_stats& stats) {
697 if (stats.total_files > 1 || stats.tags_removed > 0) {
699 std::cout <<
"========================================\n";
700 std::cout <<
" Anonymization Summary\n";
701 std::cout <<
"========================================\n";
702 std::cout <<
" Total files: " << stats.total_files <<
"\n";
703 std::cout <<
" Successful: " << stats.successful <<
"\n";
704 std::cout <<
" Failed: " << stats.failed <<
"\n";
705 std::cout <<
" ----------------------------------------\n";
706 std::cout <<
" Tags removed: " << stats.tags_removed <<
"\n";
707 std::cout <<
" Tags replaced: " << stats.tags_replaced <<
"\n";
708 std::cout <<
" Tags kept: " << stats.tags_kept <<
"\n";
709 std::cout <<
"========================================\n";
715int main(
int argc,
char* argv[]) {
717 ____ ____ __ __ _ _ _ ___ _ ___ ____ __ ___ ________
718 | _ \ / ___| \/ | / \ | \ | |/ _ \| \ | \ \ / / \/ |_ _|__ / __|
719 | | | | | | |\/| | / _ \ | \| | | | | \| |\ V /| |\/| || | / /| _|
720 | |_| | |___| | | | / ___ \| |\ | |_| | |\ | | | | | | || | / /_| |__
721 |____/ \____|_| |_| /_/ \_\_| \_|\___/|_| \_| |_| |_| |_|___/____|____|
723 DICOM De-identification Utility (PS3.15 Compliant)
728 if (!parse_arguments(argc, argv, opts)) {
729 print_usage(argv[0]);
735 if (!opts.mapping_file.empty()) {
736 if (!load_mapping(opts.mapping_file, mapping)) {
739 if (opts.verbose && !mapping.
empty()) {
740 std::cout <<
"Loaded " << mapping.
size()
741 <<
" existing UID mappings\n";
747 std::cout <<
"Anonymization profile: " << to_string(opts.profile)
753 process_inputs(opts, stats, mapping);
756 if (!opts.mapping_file.empty() && !mapping.
empty() && !opts.dry_run) {
757 if (!save_mapping(opts.mapping_file, mapping)) {
758 std::cerr <<
"Warning: Failed to save UID mapping file\n";
759 }
else if (opts.verbose) {
760 std::cout <<
"Saved " << mapping.
size() <<
" UID mappings to "
761 << opts.mapping_file.string() <<
"\n";
766 print_summary(stats);
768 if (stats.failed > 0) {
772 if (stats.total_files == 1 && stats.successful == 1) {
773 std::cout <<
"Successfully anonymized file.\n";
DICOM de-identification profiles per PS3.15 Annex E.
auto get_string(dicom_tag tag, std::string_view default_value="") const -> std::string
Get the string value of an element.
static auto instance() -> dicom_dictionary &
Get the singleton instance.
auto to_json() const -> std::string
Export mappings to JSON format.
auto empty() const -> bool
Check if the mapping is empty.
auto size() const -> std::size_t
Get the number of mappings.
auto from_json(std::string_view json) -> kcenon::common::VoidResult
Import mappings from JSON format.
DICOM Data Dictionary for tag metadata lookup.
DICOM Part 10 file handling for reading/writing DICOM files.
Compile-time constants for commonly used DICOM tags.
DICOM de-identification/anonymization per PS3.15 Annex E.
@ failed
Job failed with error.
anonymization_profile
DICOM de-identification profiles based on PS3.15 Annex E.
@ basic
Basic Profile - Remove direct identifiers.
auto profile_from_string(std::string_view name) -> std::optional< anonymization_profile >
Parse profile from string.
@ relative
RELATIVE - Relative dose.
UID mapping for consistent de-identification across studies.