PACS System 0.1.0
PACS DICOM system library
Loading...
Searching...
No Matches
hsm_storage.cpp
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
11
13
14#include <algorithm>
15#include <chrono>
16#include <mutex>
17#include <set>
18#include <stdexcept>
19
20namespace kcenon::pacs::storage {
21
22using kcenon::common::make_error;
23using kcenon::common::ok;
24
25namespace {
26
28constexpr int kInvalidConfiguration = -100;
29constexpr int kInstanceNotFound = -101;
30constexpr int kMigrationFailed = -102;
31constexpr int kTierNotAvailable = -103;
32constexpr int kIntegrityError = -104;
33
34} // namespace
35
36// ============================================================================
37// Construction
38// ============================================================================
39
40hsm_storage::hsm_storage(std::unique_ptr<storage_interface> hot_tier,
41 std::unique_ptr<storage_interface> warm_tier,
42 std::unique_ptr<storage_interface> cold_tier,
43 const hsm_storage_config& config)
44 : hot_tier_(std::move(hot_tier)),
45 warm_tier_(std::move(warm_tier)),
46 cold_tier_(std::move(cold_tier)),
47 config_(config) {
48 if (!hot_tier_) {
49 throw std::invalid_argument("hot_tier cannot be nullptr");
50 }
51}
52
53// ============================================================================
54// storage_interface Implementation
55// ============================================================================
56
57auto hsm_storage::store(const core::dicom_dataset& dataset) -> VoidResult {
58 // Extract SOP Instance UID for metadata tracking
59 auto sop_uid = dataset.get_string(core::tags::sop_instance_uid);
60 if (sop_uid.empty()) {
61 return make_error<std::monostate>(
62 kInvalidConfiguration, "Missing SOP Instance UID", "hsm_storage");
63 }
64
65 // Store in hot tier
66 auto result = hot_tier_->store(dataset);
67 if (!result.is_ok()) {
68 return result;
69 }
70
71 // Update metadata
72 std::unique_lock lock(mutex_);
73 update_metadata(sop_uid, storage_tier::hot, dataset);
74
75 return ok();
76}
77
78auto hsm_storage::retrieve(std::string_view sop_instance_uid)
80 // Find which tier contains the instance
81 auto tier = find_tier(sop_instance_uid);
82 if (!tier.has_value()) {
83 return make_error<core::dicom_dataset>(
84 kInstanceNotFound,
85 "Instance not found: " + std::string(sop_instance_uid),
86 "hsm_storage");
87 }
88
89 // Retrieve from the tier
90 auto* storage = get_storage(*tier);
91 if (storage == nullptr) {
92 return make_error<core::dicom_dataset>(
93 kTierNotAvailable, "Tier storage not available", "hsm_storage");
94 }
95
96 auto result = storage->retrieve(sop_instance_uid);
97 if (!result.is_ok()) {
98 return result;
99 }
100
101 // Update access time if tracking is enabled
102 if (config_.track_access_time) {
103 std::unique_lock lock(mutex_);
104 update_access_time(sop_instance_uid);
105 }
106
107 return result;
108}
109
110auto hsm_storage::remove(std::string_view sop_instance_uid) -> VoidResult {
111 // Find which tier contains the instance
112 auto tier = find_tier(sop_instance_uid);
113 if (!tier.has_value()) {
114 // Not found is not an error for remove
115 return ok();
116 }
117
118 // Remove from the tier
119 auto* storage = get_storage(*tier);
120 if (storage == nullptr) {
121 return make_error<std::monostate>(
122 kTierNotAvailable, "Tier storage not available", "hsm_storage");
123 }
124
125 auto result = storage->remove(sop_instance_uid);
126 if (!result.is_ok()) {
127 return result;
128 }
129
130 // Remove metadata
131 std::unique_lock lock(mutex_);
132 remove_metadata(sop_instance_uid);
133
134 return ok();
135}
136
137auto hsm_storage::exists(std::string_view sop_instance_uid) const -> bool {
138 std::shared_lock lock(mutex_);
139 return metadata_index_.contains(std::string(sop_instance_uid));
140}
141
144 std::vector<core::dicom_dataset> combined_results;
145
146 // Search all tiers and combine results
147 std::vector<storage_interface*> tiers = {hot_tier_.get(), warm_tier_.get(),
148 cold_tier_.get()};
149
150 for (auto* tier : tiers) {
151 if (tier == nullptr) {
152 continue;
153 }
154
155 auto result = tier->find(query);
156 if (result.is_ok()) {
157 auto& datasets = result.value();
158 combined_results.insert(combined_results.end(), datasets.begin(),
159 datasets.end());
160 }
161 }
162
163 return combined_results;
164}
165
167 storage_statistics stats;
168
169 std::shared_lock lock(mutex_);
170
171 // Aggregate from all tiers
172 std::vector<storage_interface*> tiers = {hot_tier_.get(), warm_tier_.get(),
173 cold_tier_.get()};
174
175 std::set<std::string> studies;
176 std::set<std::string> series;
177 std::set<std::string> patients;
178
179 for (auto* tier : tiers) {
180 if (tier == nullptr) {
181 continue;
182 }
183
184 auto tier_stats = tier->get_statistics();
185 stats.total_instances += tier_stats.total_instances;
186 stats.total_bytes += tier_stats.total_bytes;
187 }
188
189 // Count unique studies/series from metadata
190 for (const auto& [uid, meta] : metadata_index_) {
191 if (!meta.study_instance_uid.empty()) {
192 studies.insert(meta.study_instance_uid);
193 }
194 if (!meta.series_instance_uid.empty()) {
195 series.insert(meta.series_instance_uid);
196 }
197 }
198
199 stats.studies_count = studies.size();
200 stats.series_count = series.size();
201
202 return stats;
203}
204
205auto hsm_storage::verify_integrity() -> VoidResult {
206 // Verify each tier
207 std::vector<std::pair<storage_tier, storage_interface*>> tiers = {
208 {storage_tier::hot, hot_tier_.get()},
209 {storage_tier::warm, warm_tier_.get()},
210 {storage_tier::cold, cold_tier_.get()}};
211
212 for (const auto& [tier, storage] : tiers) {
213 if (storage == nullptr) {
214 continue;
215 }
216
217 auto result = storage->verify_integrity();
218 if (!result.is_ok()) {
219 return make_error<std::monostate>(
220 kIntegrityError,
221 "Integrity check failed for " + std::string(to_string(tier)) +
222 " tier: " + std::string(result.error().message),
223 "hsm_storage");
224 }
225 }
226
227 // Verify metadata consistency
228 std::shared_lock lock(mutex_);
229 for (const auto& [uid, meta] : metadata_index_) {
230 auto* storage = get_storage(meta.current_tier);
231 if (storage == nullptr) {
232 continue;
233 }
234
235 if (!storage->exists(uid)) {
236 return make_error<std::monostate>(
237 kIntegrityError,
238 "Metadata references non-existent instance: " + uid,
239 "hsm_storage");
240 }
241 }
242
243 return ok();
244}
245
246// ============================================================================
247// HSM-specific Operations
248// ============================================================================
249
250auto hsm_storage::get_tier(std::string_view sop_instance_uid) const
251 -> std::optional<storage_tier> {
252 std::shared_lock lock(mutex_);
253 auto it = metadata_index_.find(std::string(sop_instance_uid));
254 if (it == metadata_index_.end()) {
255 return std::nullopt;
256 }
257 return it->second.current_tier;
258}
259
260auto hsm_storage::get_tier_metadata(std::string_view sop_instance_uid) const
261 -> std::optional<tier_metadata> {
262 std::shared_lock lock(mutex_);
263 auto it = metadata_index_.find(std::string(sop_instance_uid));
264 if (it == metadata_index_.end()) {
265 return std::nullopt;
266 }
267 return it->second;
268}
269
270auto hsm_storage::migrate(std::string_view sop_instance_uid,
271 storage_tier target_tier) -> VoidResult {
272 // Find current tier
273 auto current_tier_opt = find_tier(sop_instance_uid);
274 if (!current_tier_opt.has_value()) {
275 return make_error<std::monostate>(
276 kInstanceNotFound,
277 "Instance not found: " + std::string(sop_instance_uid),
278 "hsm_storage");
279 }
280
281 auto current_tier = *current_tier_opt;
282 if (current_tier == target_tier) {
283 // Already in target tier
284 return ok();
285 }
286
287 return migrate_instance(sop_instance_uid, current_tier, target_tier);
288}
289
291 storage_tier to_tier) const
292 -> std::vector<tier_metadata> {
293 std::vector<tier_metadata> candidates;
294
295 std::shared_lock lock(mutex_);
296 for (const auto& [uid, meta] : metadata_index_) {
297 if (meta.current_tier == from_tier &&
298 meta.should_migrate(config_.policy, to_tier)) {
299 candidates.push_back(meta);
300 }
301 }
302
303 // Sort by age (oldest first)
304 std::sort(candidates.begin(), candidates.end(),
305 [](const tier_metadata& a, const tier_metadata& b) {
306 return a.stored_at < b.stored_at;
307 });
308
309 return candidates;
310}
311
313 migration_result result;
314 auto start_time = std::chrono::steady_clock::now();
315
316 // Hot to warm migration
317 if (warm_tier_) {
318 auto candidates =
319 get_migration_candidates(storage_tier::hot, storage_tier::warm);
320
321 for (const auto& meta : candidates) {
322 if (result.instances_migrated >=
323 config_.policy.max_instances_per_cycle) {
324 break;
325 }
326 if (result.bytes_migrated >= config_.policy.max_bytes_per_cycle) {
327 break;
328 }
329
330 auto migrate_result = migrate_instance(
331 meta.sop_instance_uid, storage_tier::hot, storage_tier::warm);
332
333 if (migrate_result.is_ok()) {
334 result.instances_migrated++;
335 result.bytes_migrated += meta.size_bytes;
336 } else {
337 result.failed_uids.push_back(meta.sop_instance_uid);
338 }
339 }
340 }
341
342 // Warm to cold migration
343 if (cold_tier_) {
344 auto candidates =
345 get_migration_candidates(storage_tier::warm, storage_tier::cold);
346
347 for (const auto& meta : candidates) {
348 if (result.instances_migrated >=
349 config_.policy.max_instances_per_cycle) {
350 break;
351 }
352 if (result.bytes_migrated >= config_.policy.max_bytes_per_cycle) {
353 break;
354 }
355
356 auto migrate_result = migrate_instance(
357 meta.sop_instance_uid, storage_tier::warm, storage_tier::cold);
358
359 if (migrate_result.is_ok()) {
360 result.instances_migrated++;
361 result.bytes_migrated += meta.size_bytes;
362 } else {
363 result.failed_uids.push_back(meta.sop_instance_uid);
364 }
365 }
366 }
367
368 // Also check hot to cold (if warm tier is skipped)
369 if (!warm_tier_ && cold_tier_) {
370 auto candidates =
371 get_migration_candidates(storage_tier::hot, storage_tier::cold);
372
373 for (const auto& meta : candidates) {
374 if (result.instances_migrated >=
375 config_.policy.max_instances_per_cycle) {
376 break;
377 }
378 if (result.bytes_migrated >= config_.policy.max_bytes_per_cycle) {
379 break;
380 }
381
382 auto migrate_result = migrate_instance(
383 meta.sop_instance_uid, storage_tier::hot, storage_tier::cold);
384
385 if (migrate_result.is_ok()) {
386 result.instances_migrated++;
387 result.bytes_migrated += meta.size_bytes;
388 } else {
389 result.failed_uids.push_back(meta.sop_instance_uid);
390 }
391 }
392 }
393
394 auto end_time = std::chrono::steady_clock::now();
395 result.duration = std::chrono::duration_cast<std::chrono::milliseconds>(
396 end_time - start_time);
397
398 return result;
399}
400
402 std::shared_lock lock(mutex_);
403 return config_.policy;
404}
405
407 std::unique_lock lock(mutex_);
408 config_.policy = policy;
409}
410
412 hsm_statistics stats;
413
414 std::shared_lock lock(mutex_);
415
416 std::set<std::string> hot_studies, hot_series;
417 std::set<std::string> warm_studies, warm_series;
418 std::set<std::string> cold_studies, cold_series;
419
420 for (const auto& [uid, meta] : metadata_index_) {
421 switch (meta.current_tier) {
423 stats.hot.instance_count++;
424 stats.hot.total_bytes += meta.size_bytes;
425 if (!meta.study_instance_uid.empty()) {
426 hot_studies.insert(meta.study_instance_uid);
427 }
428 if (!meta.series_instance_uid.empty()) {
429 hot_series.insert(meta.series_instance_uid);
430 }
431 break;
433 stats.warm.instance_count++;
434 stats.warm.total_bytes += meta.size_bytes;
435 if (!meta.study_instance_uid.empty()) {
436 warm_studies.insert(meta.study_instance_uid);
437 }
438 if (!meta.series_instance_uid.empty()) {
439 warm_series.insert(meta.series_instance_uid);
440 }
441 break;
443 stats.cold.instance_count++;
444 stats.cold.total_bytes += meta.size_bytes;
445 if (!meta.study_instance_uid.empty()) {
446 cold_studies.insert(meta.study_instance_uid);
447 }
448 if (!meta.series_instance_uid.empty()) {
449 cold_series.insert(meta.series_instance_uid);
450 }
451 break;
452 }
453 }
454
455 stats.hot.study_count = hot_studies.size();
456 stats.hot.series_count = hot_series.size();
457 stats.warm.study_count = warm_studies.size();
458 stats.warm.series_count = warm_series.size();
459 stats.cold.study_count = cold_studies.size();
460 stats.cold.series_count = cold_series.size();
461
462 return stats;
463}
464
467 return get_storage(tier);
468}
469
470// ============================================================================
471// Internal Helper Methods
472// ============================================================================
473
474auto hsm_storage::find_tier(std::string_view sop_instance_uid) const
475 -> std::optional<storage_tier> {
476 std::shared_lock lock(mutex_);
477 auto it = metadata_index_.find(std::string(sop_instance_uid));
478 if (it != metadata_index_.end()) {
479 return it->second.current_tier;
480 }
481
482 // Fallback: search each tier directly
483 lock.unlock();
484
485 if (hot_tier_ && hot_tier_->exists(sop_instance_uid)) {
486 return storage_tier::hot;
487 }
488 if (warm_tier_ && warm_tier_->exists(sop_instance_uid)) {
489 return storage_tier::warm;
490 }
491 if (cold_tier_ && cold_tier_->exists(sop_instance_uid)) {
492 return storage_tier::cold;
493 }
494
495 return std::nullopt;
496}
497
499 switch (tier) {
501 return hot_tier_.get();
503 return warm_tier_.get();
505 return cold_tier_.get();
506 }
507 return nullptr;
508}
509
510void hsm_storage::update_metadata(std::string_view sop_instance_uid,
511 storage_tier tier,
512 const core::dicom_dataset& dataset) {
513 std::string uid(sop_instance_uid);
514
515 tier_metadata meta;
516 meta.sop_instance_uid = uid;
517 meta.current_tier = tier;
518 meta.stored_at = std::chrono::system_clock::now();
519 meta.study_instance_uid =
523
524 // Try to get size from dataset (if available)
525 // For now, estimate based on pixel data if present
526 meta.size_bytes = 0; // Will be updated by storage backend
527
528 metadata_index_[uid] = std::move(meta);
529}
530
531void hsm_storage::update_access_time(std::string_view sop_instance_uid) {
532 auto it = metadata_index_.find(std::string(sop_instance_uid));
533 if (it != metadata_index_.end()) {
534 it->second.last_accessed = std::chrono::system_clock::now();
535 }
536}
537
538void hsm_storage::remove_metadata(std::string_view sop_instance_uid) {
539 metadata_index_.erase(std::string(sop_instance_uid));
540}
541
542auto hsm_storage::migrate_instance(std::string_view uid, storage_tier from_tier,
543 storage_tier to_tier) -> VoidResult {
544 auto* source = get_storage(from_tier);
545 auto* target = get_storage(to_tier);
546
547 if (source == nullptr) {
548 return make_error<std::monostate>(
549 kTierNotAvailable,
550 "Source tier not available: " + std::string(to_string(from_tier)),
551 "hsm_storage");
552 }
553 if (target == nullptr) {
554 return make_error<std::monostate>(
555 kTierNotAvailable,
556 "Target tier not available: " + std::string(to_string(to_tier)),
557 "hsm_storage");
558 }
559
560 // Retrieve from source
561 auto retrieve_result = source->retrieve(uid);
562 if (!retrieve_result.is_ok()) {
563 return make_error<std::monostate>(
564 kMigrationFailed,
565 "Failed to retrieve from source: " +
566 std::string(retrieve_result.error().message),
567 "hsm_storage");
568 }
569
570 auto& dataset = retrieve_result.value();
571
572 // Store to target
573 auto store_result = target->store(dataset);
574 if (!store_result.is_ok()) {
575 return make_error<std::monostate>(
576 kMigrationFailed,
577 "Failed to store to target: " +
578 std::string(store_result.error().message),
579 "hsm_storage");
580 }
581
582 // Verify if configured
583 if (config_.verify_after_migration) {
584 if (!target->exists(uid)) {
585 return make_error<std::monostate>(
586 kMigrationFailed,
587 "Verification failed: instance not found in target tier",
588 "hsm_storage");
589 }
590 }
591
592 // Remove from source if configured
593 if (config_.delete_after_migration) {
594 auto remove_result = source->remove(uid);
595 // Ignore remove failure - the instance is already in target tier
596 (void)remove_result;
597 }
598
599 // Update metadata
600 {
601 std::unique_lock lock(mutex_);
602 auto it = metadata_index_.find(std::string(uid));
603 if (it != metadata_index_.end()) {
604 it->second.current_tier = to_tier;
605 }
606 }
607
608 return ok();
609}
610
611} // namespace kcenon::pacs::storage
auto get_string(dicom_tag tag, std::string_view default_value="") const -> std::string
Get the string value of an element.
auto get_tier(std::string_view sop_instance_uid) const -> std::optional< storage_tier >
Get the current tier of an instance.
std::unique_ptr< storage_interface > warm_tier_
Warm tier storage backend (may be nullptr)
std::shared_mutex mutex_
Mutex for thread-safe access.
std::unordered_map< std::string, tier_metadata > metadata_index_
Tier metadata index (SOP Instance UID -> metadata)
auto migrate(std::string_view sop_instance_uid, storage_tier target_tier) -> VoidResult
Manually migrate an instance to a different tier.
void update_metadata(std::string_view sop_instance_uid, storage_tier tier, const core::dicom_dataset &dataset)
Update tier metadata after store/retrieve.
void update_access_time(std::string_view sop_instance_uid)
Update last access time for an instance.
auto get_tier_policy() const -> tier_policy
Get the current tier policy.
auto retrieve(std::string_view sop_instance_uid) -> Result< core::dicom_dataset > override
Retrieve a DICOM dataset by SOP Instance UID.
auto migrate_instance(std::string_view uid, storage_tier from_tier, storage_tier to_tier) -> VoidResult
Migrate a single instance between tiers.
void remove_metadata(std::string_view sop_instance_uid)
Remove tier metadata.
auto remove(std::string_view sop_instance_uid) -> VoidResult override
Remove a DICOM dataset from all tiers.
std::unique_ptr< storage_interface > hot_tier_
Hot tier storage backend.
auto get_tier_metadata(std::string_view sop_instance_uid) const -> std::optional< tier_metadata >
Get tier metadata for an instance.
auto run_migration_cycle() -> migration_result
Run a single migration cycle.
auto verify_integrity() -> VoidResult override
Verify storage integrity across all tiers.
auto find_tier(std::string_view sop_instance_uid) const -> std::optional< storage_tier >
Find which tier contains an instance.
hsm_storage(std::unique_ptr< storage_interface > hot_tier, std::unique_ptr< storage_interface > warm_tier, std::unique_ptr< storage_interface > cold_tier, const hsm_storage_config &config={})
Construct HSM storage with three tier backends.
void set_tier_policy(const tier_policy &policy)
Set the tier policy.
hsm_storage_config config_
HSM configuration.
auto exists(std::string_view sop_instance_uid) const -> bool override
Check if a DICOM instance exists in any tier.
auto get_storage(storage_tier tier) const -> storage_interface *
Get the storage backend for a tier.
auto get_tier_storage(storage_tier tier) const -> storage_interface *
Get the storage backend for a specific tier.
auto get_migration_candidates(storage_tier from_tier, storage_tier to_tier) const -> std::vector< tier_metadata >
Get instances eligible for migration.
auto get_hsm_statistics() const -> hsm_statistics
Get HSM-specific statistics.
auto find(const core::dicom_dataset &query) -> Result< std::vector< core::dicom_dataset > > override
Find DICOM datasets matching query criteria across all tiers.
auto store(const core::dicom_dataset &dataset) -> VoidResult override
Store a DICOM dataset to the hot tier.
std::unique_ptr< storage_interface > cold_tier_
Cold tier storage backend (may be nullptr)
auto get_statistics() const -> storage_statistics override
Get combined storage statistics from all tiers.
Compile-time constants for commonly used DICOM tags.
Hierarchical Storage Management (HSM) for multi-tier DICOM storage.
constexpr dicom_tag sop_instance_uid
SOP Instance UID.
constexpr dicom_tag study_instance_uid
Study Instance UID.
constexpr dicom_tag series_instance_uid
Series Instance UID.
storage_tier
Storage tier classification.
Definition hsm_types.h:37
@ hot
Hot tier - Recent, frequently accessed data (SSD/NVMe)
@ cold
Cold tier - Archive, rarely accessed data (S3/Glacier)
@ warm
Warm tier - Older, occasionally accessed data (HDD)
auto to_string(annotation_type type) -> std::string
Convert annotation_type to string.
Combined statistics for all HSM tiers.
Definition hsm_types.h:282
Configuration for HSM storage.
Definition hsm_storage.h:32
tier_policy policy
Tier migration policy.
Definition hsm_storage.h:34
Result of a migration operation.
Definition hsm_types.h:230
std::chrono::milliseconds duration
Duration of the migration operation.
Definition hsm_types.h:238
std::size_t bytes_migrated
Total bytes migrated.
Definition hsm_types.h:235
std::size_t instances_migrated
Number of instances successfully migrated.
Definition hsm_types.h:232
std::vector< std::string > failed_uids
SOP Instance UIDs that failed to migrate.
Definition hsm_types.h:241
Metadata for tracking instance tier location.
Definition hsm_types.h:142
std::size_t size_bytes
Size of the instance in bytes.
Definition hsm_types.h:158
std::string series_instance_uid
Series Instance UID (for grouping migrations)
Definition hsm_types.h:164
std::string study_instance_uid
Study Instance UID (for grouping migrations)
Definition hsm_types.h:161
std::string sop_instance_uid
SOP Instance UID of the DICOM instance.
Definition hsm_types.h:144
std::chrono::system_clock::time_point stored_at
Timestamp when instance was stored.
Definition hsm_types.h:150
storage_tier current_tier
Current storage tier.
Definition hsm_types.h:147
std::string_view uid