PACS System 0.1.0
PACS DICOM system library
Loading...
Searching...
No Matches
file_storage.cpp
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
11
15
16#include <algorithm>
17#include <chrono>
18#include <fstream>
19#include <random>
20#include <set>
21
22namespace kcenon::pacs::storage {
23
24using kcenon::common::make_error;
25using kcenon::common::ok;
26
27namespace {
28
30constexpr int kMissingRequiredUid = -1;
31constexpr int kDuplicateInstance = -2;
32constexpr int kFileNotFound = -3;
33constexpr int kFileWriteError = -4;
34constexpr int kFileReadError = -5;
35constexpr int kDirectoryCreateError = -6;
36constexpr int kIntegrityError = -7;
37
39auto generate_temp_filename(const std::filesystem::path& base)
40 -> std::filesystem::path {
41 static std::random_device rd;
42 static std::mt19937 gen(rd());
43 static std::uniform_int_distribution<uint64_t> dist;
44
45 auto temp_name = base.filename().string() + ".tmp." +
46 std::to_string(dist(gen));
47 return base.parent_path() / temp_name;
48}
49
50} // namespace
51
52// ============================================================================
53// Construction
54// ============================================================================
55
57 : config_(config) {
58 // Create root directory if configured
60 std::error_code ec;
61 std::filesystem::create_directories(config_.root_path, ec);
62 // Ignore error - will be caught during store operations
63 }
64
65 // Rebuild index from existing files
66 if (std::filesystem::exists(config_.root_path)) {
67 (void)rebuild_index();
68 }
69}
70
71// ============================================================================
72// storage_interface Implementation
73// ============================================================================
74
75auto file_storage::store(const core::dicom_dataset& dataset) -> VoidResult {
76 // Extract required UIDs
77 auto study_uid = dataset.get_string(core::tags::study_instance_uid);
78 auto series_uid = dataset.get_string(core::tags::series_instance_uid);
79 auto sop_uid = dataset.get_string(core::tags::sop_instance_uid);
80
81 if (study_uid.empty() || series_uid.empty() || sop_uid.empty()) {
82 return make_error<std::monostate>(
83 kMissingRequiredUid,
84 "Missing required UID (Study, Series, or SOP Instance UID)",
85 "file_storage");
86 }
87
88 // Build file path based on naming scheme
89 std::filesystem::path file_path;
90 switch (config_.naming) {
92 file_path = build_path(study_uid, series_uid, sop_uid);
93 break;
95 auto study_date = dataset.get_string(core::tags::study_date);
96 if (study_date.empty()) {
97 // Use current date if study date not available
98 auto now = std::chrono::system_clock::now();
99 auto time = std::chrono::system_clock::to_time_t(now);
100 std::tm tm_buf{};
101#ifdef _WIN32
102 localtime_s(&tm_buf, &time);
103#else
104 localtime_r(&time, &tm_buf);
105#endif
106 char date_str[9];
107 std::strftime(date_str, sizeof(date_str), "%Y%m%d", &tm_buf);
108 study_date = date_str;
109 }
110 file_path = build_date_path(study_date, study_uid, sop_uid);
111 break;
112 }
114 file_path = config_.root_path /
115 (sanitize_uid(sop_uid) + config_.file_extension);
116 break;
117 }
118
119 // Handle duplicate checking
120 {
121 std::shared_lock lock(mutex_);
122 if (index_.contains(sop_uid)) {
123 switch (config_.duplicate) {
125 return make_error<std::monostate>(
126 kDuplicateInstance,
127 "Instance already exists: " + sop_uid,
128 "file_storage");
130 return ok();
132 // Continue to overwrite
133 break;
134 }
135 }
136 }
137
138 // Create directories if needed
139 if (config_.create_directories) {
140 std::error_code ec;
141 std::filesystem::create_directories(file_path.parent_path(), ec);
142 if (ec) {
143 return make_error<std::monostate>(
144 kDirectoryCreateError,
145 "Failed to create directory: " + ec.message(),
146 "file_storage");
147 }
148 }
149
150 // Create DICOM file and write atomically
151 auto dicom_file = core::dicom_file::create(
153
154 // Write to temporary file first
155 auto temp_path = generate_temp_filename(file_path);
156 auto save_result = dicom_file.save(temp_path);
157
158 if (save_result.is_err()) {
159 std::filesystem::remove(temp_path);
160 return make_error<std::monostate>(
161 kFileWriteError,
162 "Failed to write DICOM file: " + save_result.error().message,
163 "file_storage");
164 }
165
166 // Atomic rename
167 std::error_code ec;
168 std::filesystem::rename(temp_path, file_path, ec);
169 if (ec) {
170 std::filesystem::remove(temp_path);
171 return make_error<std::monostate>(
172 kFileWriteError,
173 "Failed to rename temp file: " + ec.message(),
174 "file_storage");
175 }
176
177 // Update index
178 {
179 std::unique_lock lock(mutex_);
180 index_[sop_uid] = file_path;
181 }
182
183 return ok();
184}
185
186auto file_storage::retrieve(std::string_view sop_instance_uid)
188 std::filesystem::path file_path;
189
190 {
191 std::shared_lock lock(mutex_);
192 auto it = index_.find(std::string{sop_instance_uid});
193 if (it == index_.end()) {
194 return make_error<core::dicom_dataset>(
195 kFileNotFound,
196 "Instance not found: " + std::string{sop_instance_uid},
197 "file_storage");
198 }
199 file_path = it->second;
200 }
201
202 // Read DICOM file
203 auto open_result = core::dicom_file::open(file_path);
204 if (open_result.is_err()) {
205 return make_error<core::dicom_dataset>(
206 kFileReadError,
207 "Failed to read DICOM file: " + open_result.error().message,
208 "file_storage");
209 }
210
211 return open_result.value().dataset();
212}
213
214auto file_storage::remove(std::string_view sop_instance_uid) -> VoidResult {
215 std::filesystem::path file_path;
216
217 {
218 std::unique_lock lock(mutex_);
219 auto it = index_.find(std::string{sop_instance_uid});
220 if (it == index_.end()) {
221 // Not found is not an error for remove
222 return ok();
223 }
224 file_path = it->second;
225 index_.erase(it);
226 }
227
228 // Delete the file
229 std::error_code ec;
230 std::filesystem::remove(file_path, ec);
231 // Ignore errors - file might have been deleted externally
232
233 // Try to clean up empty parent directories
234 auto parent = file_path.parent_path();
235 while (parent != config_.root_path) {
236 if (std::filesystem::is_empty(parent)) {
237 std::filesystem::remove(parent, ec);
238 parent = parent.parent_path();
239 } else {
240 break;
241 }
242 }
243
244 return ok();
245}
246
247auto file_storage::exists(std::string_view sop_instance_uid) const -> bool {
248 std::shared_lock lock(mutex_);
249 return index_.contains(std::string{sop_instance_uid});
250}
251
254 std::vector<core::dicom_dataset> results;
255
256 std::vector<std::filesystem::path> paths_to_check;
257 {
258 std::shared_lock lock(mutex_);
259 paths_to_check.reserve(index_.size());
260 for (const auto& [uid, path] : index_) {
261 paths_to_check.push_back(path);
262 }
263 }
264
265 for (const auto& path : paths_to_check) {
266 auto open_result = core::dicom_file::open(path);
267 if (open_result.is_err()) {
268 continue; // Skip files that can't be read
269 }
270
271 const auto& dataset = open_result.value().dataset();
272 if (matches_query(dataset, query)) {
273 results.push_back(dataset);
274 }
275 }
276
277 return results;
278}
279
281 storage_statistics stats;
282
283 std::set<std::string> studies;
284 std::set<std::string> series;
285 std::set<std::string> patients;
286
287 std::vector<std::filesystem::path> paths;
288 {
289 std::shared_lock lock(mutex_);
290 stats.total_instances = index_.size();
291 paths.reserve(index_.size());
292 for (const auto& [uid, path] : index_) {
293 paths.push_back(path);
294 }
295 }
296
297 for (const auto& path : paths) {
298 std::error_code ec;
299 stats.total_bytes += std::filesystem::file_size(path, ec);
300
301 // Read file to get study/series/patient info
302 auto open_result = core::dicom_file::open(path);
303 if (open_result.is_ok()) {
304 const auto& ds = open_result.value().dataset();
305 auto study_uid = ds.get_string(core::tags::study_instance_uid);
306 auto series_uid = ds.get_string(core::tags::series_instance_uid);
307 auto patient_id = ds.get_string(core::tags::patient_id);
308
309 if (!study_uid.empty()) {
310 studies.insert(study_uid);
311 }
312 if (!series_uid.empty()) {
313 series.insert(series_uid);
314 }
315 if (!patient_id.empty()) {
316 patients.insert(patient_id);
317 }
318 }
319 }
320
321 stats.studies_count = studies.size();
322 stats.series_count = series.size();
323 stats.patients_count = patients.size();
324
325 return stats;
326}
327
328auto file_storage::verify_integrity() -> VoidResult {
329 std::vector<std::pair<std::string, std::filesystem::path>> entries;
330 {
331 std::shared_lock lock(mutex_);
332 entries.reserve(index_.size());
333 for (const auto& [uid, path] : index_) {
334 entries.emplace_back(uid, path);
335 }
336 }
337
338 std::vector<std::string> invalid_entries;
339
340 for (const auto& [uid, path] : entries) {
341 if (!std::filesystem::exists(path)) {
342 invalid_entries.push_back(uid + " (file missing)");
343 continue;
344 }
345
346 auto open_result = core::dicom_file::open(path);
347 if (open_result.is_err()) {
348 invalid_entries.push_back(uid + " (invalid DICOM)");
349 continue;
350 }
351
352 // Verify UID matches
353 auto file_uid =
354 open_result.value().dataset().get_string(core::tags::sop_instance_uid);
355 if (file_uid != uid) {
356 invalid_entries.push_back(uid + " (UID mismatch)");
357 }
358 }
359
360 if (!invalid_entries.empty()) {
361 std::string message = "Integrity check failed for " +
362 std::to_string(invalid_entries.size()) +
363 " entries";
364 return make_error<std::monostate>(kIntegrityError, message,
365 "file_storage");
366 }
367
368 return ok();
369}
370
371// ============================================================================
372// File-specific Operations
373// ============================================================================
374
375auto file_storage::get_file_path(std::string_view sop_instance_uid) const
376 -> std::filesystem::path {
377 std::shared_lock lock(mutex_);
378 auto it = index_.find(std::string{sop_instance_uid});
379 if (it != index_.end()) {
380 return it->second;
381 }
382 return {};
383}
384
385auto file_storage::import_directory(const std::filesystem::path& source)
386 -> VoidResult {
387 if (!std::filesystem::exists(source)) {
388 return make_error<std::monostate>(
389 kFileNotFound,
390 "Source directory does not exist: " + source.string(),
391 "file_storage");
392 }
393
394 std::error_code ec;
395 for (const auto& entry :
396 std::filesystem::recursive_directory_iterator(source, ec)) {
397 if (!entry.is_regular_file()) {
398 continue;
399 }
400
401 // Try to open as DICOM file
402 auto open_result = core::dicom_file::open(entry.path());
403 if (open_result.is_err()) {
404 continue; // Not a DICOM file, skip
405 }
406
407 // Store the dataset
408 auto store_result = store(open_result.value().dataset());
409 if (store_result.is_err()) {
410 // Log error but continue with other files
411 // In production, this should be logged
412 }
413 }
414
415 return ok();
416}
417
418auto file_storage::root_path() const -> const std::filesystem::path& {
419 return config_.root_path;
420}
421
422auto file_storage::rebuild_index() -> VoidResult {
423 std::unique_lock lock(mutex_);
424 index_.clear();
425
426 if (!std::filesystem::exists(config_.root_path)) {
427 return ok();
428 }
429
430 std::error_code ec;
431 for (const auto& entry :
432 std::filesystem::recursive_directory_iterator(config_.root_path, ec)) {
433 if (!entry.is_regular_file()) {
434 continue;
435 }
436
437 // Check file extension
438 if (!config_.file_extension.empty() &&
439 entry.path().extension() != config_.file_extension) {
440 continue;
441 }
442
443 // Try to read as DICOM file
444 auto open_result = core::dicom_file::open(entry.path());
445 if (open_result.is_err()) {
446 continue;
447 }
448
449 auto sop_uid =
450 open_result.value().dataset().get_string(core::tags::sop_instance_uid);
451 if (!sop_uid.empty()) {
452 index_[sop_uid] = entry.path();
453 }
454 }
455
456 return ok();
457}
458
459// ============================================================================
460// Internal Helper Methods
461// ============================================================================
462
463auto file_storage::build_path(std::string_view study_uid,
464 std::string_view series_uid,
465 std::string_view sop_uid) const
466 -> std::filesystem::path {
467 return config_.root_path / sanitize_uid(study_uid) /
468 sanitize_uid(series_uid) /
469 (sanitize_uid(sop_uid) + config_.file_extension);
470}
471
472auto file_storage::build_date_path(std::string_view study_date,
473 std::string_view study_uid,
474 std::string_view sop_uid) const
475 -> std::filesystem::path {
476 // Parse date YYYYMMDD
477 std::string year = "unknown";
478 std::string month = "01";
479 std::string day = "01";
480
481 if (study_date.length() >= 8) {
482 year = std::string{study_date.substr(0, 4)};
483 month = std::string{study_date.substr(4, 2)};
484 day = std::string{study_date.substr(6, 2)};
485 }
486
487 return config_.root_path / year / month / day / sanitize_uid(study_uid) /
488 (sanitize_uid(sop_uid) + config_.file_extension);
489}
490
491void file_storage::update_index(const std::string& sop_uid,
492 const std::filesystem::path& path) {
493 std::unique_lock lock(mutex_);
494 index_[sop_uid] = path;
495}
496
497void file_storage::remove_from_index(const std::string& sop_uid) {
498 std::unique_lock lock(mutex_);
499 index_.erase(sop_uid);
500}
501
503 const core::dicom_dataset& query) -> bool {
504 // If query is empty, match all
505 if (query.empty()) {
506 return true;
507 }
508
509 // Check each query element
510 for (const auto& [tag, element] : query) {
511 auto query_value = element.as_string().unwrap_or("");
512 if (query_value.empty()) {
513 continue; // Empty value acts as wildcard
514 }
515
516 auto dataset_value = dataset.get_string(tag);
517
518 // Support basic wildcard matching (* and ?)
519 if (query_value.find('*') != std::string::npos ||
520 query_value.find('?') != std::string::npos) {
521 // Convert to regex-like matching
522 std::string pattern = query_value;
523 // Escape regex special characters except * and ?
524 std::string escaped;
525 for (char c : pattern) {
526 if (c == '*') {
527 escaped += ".*";
528 } else if (c == '?') {
529 escaped += ".";
530 } else if (c == '.' || c == '[' || c == ']' || c == '(' ||
531 c == ')' || c == '+' || c == '^' || c == '$' ||
532 c == '|' || c == '\\') {
533 escaped += '\\';
534 escaped += c;
535 } else {
536 escaped += c;
537 }
538 }
539
540 // Simple pattern matching (without full regex for performance)
541 // For now, just check if pattern starts with or ends with value
542 if (query_value.front() == '*' && query_value.back() == '*') {
543 // Contains
544 auto inner =
545 query_value.substr(1, query_value.length() - 2);
546 if (dataset_value.find(inner) == std::string::npos) {
547 return false;
548 }
549 } else if (query_value.front() == '*') {
550 // Ends with
551 auto suffix = query_value.substr(1);
552 if (dataset_value.length() < suffix.length() ||
553 dataset_value.substr(dataset_value.length() -
554 suffix.length()) != suffix) {
555 return false;
556 }
557 } else if (query_value.back() == '*') {
558 // Starts with
559 auto prefix = query_value.substr(0, query_value.length() - 1);
560 if (dataset_value.substr(0, prefix.length()) != prefix) {
561 return false;
562 }
563 }
564 } else {
565 // Exact match
566 if (dataset_value != query_value) {
567 return false;
568 }
569 }
570 }
571
572 return true;
573}
574
575auto file_storage::sanitize_uid(std::string_view uid) -> std::string {
576 std::string result;
577 result.reserve(uid.length());
578
579 for (char c : uid) {
580 // UIDs contain digits and dots, which are safe for filesystems
581 // Replace any other characters with underscore
582 if (std::isalnum(static_cast<unsigned char>(c)) || c == '.') {
583 result += c;
584 } else {
585 result += '_';
586 }
587 }
588
589 return result;
590}
591
592} // namespace kcenon::pacs::storage
if(!color.empty()) style.color
static auto open(const std::filesystem::path &path) -> kcenon::pacs::Result< dicom_file >
Open and read a DICOM file from disk.
static auto create(dicom_dataset dataset, const encoding::transfer_syntax &ts) -> dicom_file
Create a new DICOM file from a dataset.
static const transfer_syntax explicit_vr_little_endian
Explicit VR Little Endian (1.2.840.10008.1.2.1)
auto retrieve(std::string_view sop_instance_uid) -> Result< core::dicom_dataset > override
Retrieve a DICOM dataset by SOP Instance UID.
auto import_directory(const std::filesystem::path &source) -> VoidResult
Import DICOM files from a directory.
auto build_date_path(std::string_view study_date, std::string_view study_uid, std::string_view sop_uid) const -> std::filesystem::path
Build filesystem path using date-based hierarchy.
auto find(const core::dicom_dataset &query) -> Result< std::vector< core::dicom_dataset > > override
Find DICOM datasets matching query criteria.
std::unordered_map< std::string, std::filesystem::path > index_
Mapping from SOP Instance UID to file path.
void update_index(const std::string &sop_uid, const std::filesystem::path &path)
Update internal index with new mapping.
file_storage(const file_storage_config &config)
Construct file storage with configuration.
auto get_file_path(std::string_view sop_instance_uid) const -> std::filesystem::path
Get the filesystem path for a SOP Instance UID.
void remove_from_index(const std::string &sop_uid)
Remove entry from internal index.
static auto sanitize_uid(std::string_view uid) -> std::string
Sanitize UID for use in filesystem path.
auto rebuild_index() -> VoidResult
Rebuild the internal index from filesystem.
file_storage_config config_
Storage configuration.
std::shared_mutex mutex_
Mutex for thread-safe access.
auto root_path() const -> const std::filesystem::path &
Get the root storage path.
auto get_statistics() const -> storage_statistics override
Get storage statistics.
auto exists(std::string_view sop_instance_uid) const -> bool override
Check if a DICOM instance exists.
auto remove(std::string_view sop_instance_uid) -> VoidResult override
Remove a DICOM file by SOP Instance UID.
auto store(const core::dicom_dataset &dataset) -> VoidResult override
Store a DICOM dataset to filesystem.
static auto matches_query(const core::dicom_dataset &dataset, const core::dicom_dataset &query) -> bool
Check if dataset matches query criteria.
auto verify_integrity() -> VoidResult override
Verify storage integrity.
auto build_path(std::string_view study_uid, std::string_view series_uid, std::string_view sop_uid) const -> std::filesystem::path
Build filesystem path for a dataset.
Compile-time constants for commonly used DICOM tags.
Filesystem-based DICOM storage with hierarchical organization.
constexpr dicom_tag patient_id
Patient ID.
constexpr dicom_tag sop_instance_uid
SOP Instance UID.
constexpr dicom_tag study_instance_uid
Study Instance UID.
constexpr dicom_tag study_date
Study Date.
constexpr dicom_tag series_instance_uid
Series Instance UID.
@ flat
{SOPUID}.dcm (flat structure)
@ date_hierarchical
YYYY/MM/DD/{StudyUID}/{SOPUID}.dcm.
@ uid_hierarchical
{StudyUID}/{SeriesUID}/{SOPUID}.dcm
@ ignore
Skip silently if instance exists.
@ reject
Return error if instance already exists.
@ replace
Overwrite existing instance.
Configuration for file_storage.
bool create_directories
Create directories automatically if they don't exist.
std::filesystem::path root_path
Root directory for storage.
std::string_view uid