PACS System 0.1.0
PACS DICOM system library
Loading...
Searching...
No Matches
azure_blob_storage.cpp
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
16
21
22#include <algorithm>
23#include <iomanip>
24#include <set>
25#include <sstream>
26
27#if defined(PACS_WITH_AZURE_SDK) && !defined(PACS_USE_MOCK_AZURE)
28#include <azure/storage/blobs.hpp>
29#endif
30
31namespace kcenon::pacs::storage {
32
33using kcenon::common::make_error;
34using kcenon::common::ok;
35
36namespace {
37
39constexpr int kMissingRequiredUid = -1;
40constexpr int kBlobNotFound = -2;
41constexpr int kUploadError = -3;
42constexpr int kDownloadError = -4;
43constexpr int kConnectionError = -6;
44constexpr int kIntegrityError = -7;
45constexpr int kSerializationError = -8;
46constexpr int kTierChangeError = -9;
47
53auto compute_content_hash(const std::vector<std::uint8_t> &data)
54 -> std::string {
55 std::size_t hash = 0;
56 for (const auto &byte : data) {
57 hash = hash * 31 + byte;
58 }
59
60 std::ostringstream oss;
61 oss << std::hex << std::setfill('0') << std::setw(16) << hash;
62 return oss.str();
63}
64
70auto generate_block_id(std::size_t block_index) -> std::string {
71 std::ostringstream oss;
72 oss << "block_" << std::setfill('0') << std::setw(6) << block_index;
73 return oss.str();
74}
75
76} // namespace
77
78// ============================================================================
79// Azure Client Interface
80// ============================================================================
81
89public:
90 virtual ~azure_client_interface() = default;
91
92 [[nodiscard]] virtual auto put_blob(const std::string &blob_name,
93 const std::vector<std::uint8_t> &data)
94 -> VoidResult = 0;
95
96 [[nodiscard]] virtual auto get_blob(const std::string &blob_name)
98
99 [[nodiscard]] virtual auto delete_blob(const std::string &blob_name)
100 -> VoidResult = 0;
101
102 [[nodiscard]] virtual auto head_blob(const std::string &blob_name) const
103 -> bool = 0;
104
105 [[nodiscard]] virtual auto get_blob_size(const std::string &blob_name) const
106 -> std::size_t = 0;
107
108 [[nodiscard]] virtual auto get_blob_etag(const std::string &blob_name) const
109 -> std::string = 0;
110
111 [[nodiscard]] virtual auto get_blob_md5(const std::string &blob_name) const
112 -> std::string = 0;
113
114 [[nodiscard]] virtual auto list_blobs() const
115 -> std::vector<std::string> = 0;
116
117 [[nodiscard]] virtual auto is_connected() const -> bool = 0;
118
119 [[nodiscard]] virtual auto stage_block(
120 const std::string &blob_name, const std::string &block_id,
121 const std::vector<std::uint8_t> &data) -> VoidResult = 0;
122
123 [[nodiscard]] virtual auto commit_blocks(
124 const std::string &blob_name,
125 const std::vector<std::string> &block_ids) -> VoidResult = 0;
126
127 [[nodiscard]] virtual auto set_tier(const std::string &blob_name,
128 const std::string &tier)
129 -> VoidResult = 0;
130};
131
132// ============================================================================
133// Mock Azure Client Implementation
134// ============================================================================
135
144 : public azure_blob_storage::azure_client_interface {
145public:
146 explicit mock_azure_client(const azure_storage_config & /*config*/)
147 : connected_(true) {}
148
149 [[nodiscard]] auto put_blob(const std::string &blob_name,
150 const std::vector<std::uint8_t> &data)
151 -> VoidResult override {
152 if (!connected_) {
153 return make_error<std::monostate>(
154 kConnectionError, "Azure client not connected",
155 "azure_blob_storage");
156 }
157 blob_data blob;
158 blob.data = data;
159 blob.etag = "\"" + compute_content_hash(data) + "\"";
160 blob.content_md5 = compute_content_hash(data);
161 blob.tier = "Hot";
162 blobs_[blob_name] = std::move(blob);
163 return ok();
164 }
165
166 [[nodiscard]] auto get_blob(const std::string &blob_name)
168 if (!connected_) {
169 return make_error<std::vector<std::uint8_t>>(
170 kConnectionError, "Azure client not connected",
171 "azure_blob_storage");
172 }
173 auto it = blobs_.find(blob_name);
174 if (it == blobs_.end()) {
175 return make_error<std::vector<std::uint8_t>>(
176 kBlobNotFound, "Blob not found: " + blob_name,
177 "azure_blob_storage");
178 }
179 return it->second.data;
180 }
181
182 [[nodiscard]] auto delete_blob(const std::string &blob_name)
183 -> VoidResult override {
184 if (!connected_) {
185 return make_error<std::monostate>(
186 kConnectionError, "Azure client not connected",
187 "azure_blob_storage");
188 }
189 blobs_.erase(blob_name);
190 return ok();
191 }
192
193 [[nodiscard]] auto head_blob(const std::string &blob_name) const
194 -> bool override {
195 if (!connected_) {
196 return false;
197 }
198 return blobs_.contains(blob_name);
199 }
200
201 [[nodiscard]] auto get_blob_size(const std::string &blob_name) const
202 -> std::size_t override {
203 auto it = blobs_.find(blob_name);
204 if (it != blobs_.end()) {
205 return it->second.data.size();
206 }
207 return 0;
208 }
209
210 [[nodiscard]] auto get_blob_etag(const std::string &blob_name) const
211 -> std::string override {
212 auto it = blobs_.find(blob_name);
213 if (it != blobs_.end()) {
214 return it->second.etag;
215 }
216 return {};
217 }
218
219 [[nodiscard]] auto get_blob_md5(const std::string &blob_name) const
220 -> std::string override {
221 auto it = blobs_.find(blob_name);
222 if (it != blobs_.end()) {
223 return it->second.content_md5;
224 }
225 return {};
226 }
227
228 [[nodiscard]] auto list_blobs() const
229 -> std::vector<std::string> override {
230 std::vector<std::string> names;
231 names.reserve(blobs_.size());
232 for (const auto &[name, data] : blobs_) {
233 names.push_back(name);
234 }
235 return names;
236 }
237
238 [[nodiscard]] auto is_connected() const -> bool override {
239 return connected_;
240 }
241
242 [[nodiscard]] auto stage_block(const std::string &blob_name,
243 const std::string &block_id,
244 const std::vector<std::uint8_t> &data)
245 -> VoidResult override {
246 if (!connected_) {
247 return make_error<std::monostate>(
248 kConnectionError, "Azure client not connected",
249 "azure_blob_storage");
250 }
251 staged_blocks_[blob_name][block_id] = data;
252 return ok();
253 }
254
255 [[nodiscard]] auto commit_blocks(const std::string &blob_name,
256 const std::vector<std::string> &block_ids)
257 -> VoidResult override {
258 if (!connected_) {
259 return make_error<std::monostate>(
260 kConnectionError, "Azure client not connected",
261 "azure_blob_storage");
262 }
263
264 auto it = staged_blocks_.find(blob_name);
265 if (it == staged_blocks_.end()) {
266 return make_error<std::monostate>(
267 kUploadError, "No staged blocks found for: " + blob_name,
268 "azure_blob_storage");
269 }
270
271 std::vector<std::uint8_t> combined_data;
272 for (const auto &block_id : block_ids) {
273 auto block_it = it->second.find(block_id);
274 if (block_it == it->second.end()) {
275 return make_error<std::monostate>(
276 kUploadError, "Block not found: " + block_id,
277 "azure_blob_storage");
278 }
279 combined_data.insert(combined_data.end(), block_it->second.begin(),
280 block_it->second.end());
281 }
282
283 blob_data blob;
284 blob.data = std::move(combined_data);
285 blob.etag = "\"" + compute_content_hash(blob.data) + "\"";
286 blob.content_md5 = compute_content_hash(blob.data);
287 blob.tier = "Hot";
288 blobs_[blob_name] = std::move(blob);
289
290 staged_blocks_.erase(blob_name);
291 return ok();
292 }
293
294 [[nodiscard]] auto set_tier(const std::string &blob_name,
295 const std::string &tier)
296 -> VoidResult override {
297 if (!connected_) {
298 return make_error<std::monostate>(
299 kConnectionError, "Azure client not connected",
300 "azure_blob_storage");
301 }
302 auto it = blobs_.find(blob_name);
303 if (it == blobs_.end()) {
304 return make_error<std::monostate>(
305 kBlobNotFound, "Blob not found: " + blob_name,
306 "azure_blob_storage");
307 }
308 it->second.tier = tier;
309 return ok();
310 }
311
312private:
313 struct blob_data {
314 std::vector<std::uint8_t> data;
315 std::string etag;
316 std::string content_md5;
317 std::string tier;
318 };
319
320 std::unordered_map<std::string, blob_data> blobs_;
321 std::unordered_map<std::string,
322 std::unordered_map<std::string, std::vector<std::uint8_t>>>
325};
326
327// ============================================================================
328// Azure SDK Client Implementation
329// ============================================================================
330
331#if defined(PACS_WITH_AZURE_SDK) && !defined(PACS_USE_MOCK_AZURE)
332
339class azure_sdk_client
341public:
342 explicit azure_sdk_client(const azure_storage_config &config) {
343 if (config.endpoint_url.has_value()) {
344 // Azurite or custom endpoint
345 container_client_ =
346 std::make_unique<Azure::Storage::Blobs::BlobContainerClient>(
347 config.endpoint_url.value() + "/" + config.container_name);
348 } else {
349 container_client_ =
350 std::make_unique<Azure::Storage::Blobs::BlobContainerClient>(
351 Azure::Storage::Blobs::BlobContainerClient::
352 CreateFromConnectionString(config.connection_string,
353 config.container_name));
354 }
355 }
356
357 [[nodiscard]] auto put_blob(const std::string &blob_name,
358 const std::vector<std::uint8_t> &data)
359 -> VoidResult override {
360 try {
361 auto blob_client = container_client_->GetBlockBlobClient(blob_name);
362 Azure::Core::IO::MemoryBodyStream stream(data);
363 blob_client.Upload(stream);
364 return ok();
365 } catch (const Azure::Storage::StorageException &e) {
366 return make_error<std::monostate>(
367 kUploadError,
368 "Azure PutBlob failed: " + std::string(e.what()),
369 "azure_blob_storage");
370 }
371 }
372
373 [[nodiscard]] auto get_blob(const std::string &blob_name)
374 -> Result<std::vector<std::uint8_t>> override {
375 try {
376 auto blob_client = container_client_->GetBlobClient(blob_name);
377 auto response = blob_client.Download();
378 auto &body = response.Value.BodyStream;
379
380 std::vector<std::uint8_t> result(
381 static_cast<std::size_t>(body->Length()));
382 body->ReadToCount(result.data(), result.size());
383 return result;
384 } catch (const Azure::Storage::StorageException &e) {
385 if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) {
386 return make_error<std::vector<std::uint8_t>>(
387 kBlobNotFound, "Blob not found: " + blob_name,
388 "azure_blob_storage");
389 }
390 return make_error<std::vector<std::uint8_t>>(
391 kDownloadError,
392 "Azure GetBlob failed: " + std::string(e.what()),
393 "azure_blob_storage");
394 }
395 }
396
397 [[nodiscard]] auto delete_blob(const std::string &blob_name)
398 -> VoidResult override {
399 try {
400 auto blob_client = container_client_->GetBlobClient(blob_name);
401 blob_client.Delete();
402 return ok();
403 } catch (const Azure::Storage::StorageException &e) {
404 return make_error<std::monostate>(
405 kUploadError,
406 "Azure DeleteBlob failed: " + std::string(e.what()),
407 "azure_blob_storage");
408 }
409 }
410
411 [[nodiscard]] auto head_blob(const std::string &blob_name) const
412 -> bool override {
413 try {
414 auto blob_client = container_client_->GetBlobClient(blob_name);
415 blob_client.GetProperties();
416 return true;
417 } catch (const Azure::Storage::StorageException &) {
418 return false;
419 }
420 }
421
422 [[nodiscard]] auto get_blob_size(const std::string &blob_name) const
423 -> std::size_t override {
424 try {
425 auto blob_client = container_client_->GetBlobClient(blob_name);
426 auto props = blob_client.GetProperties();
427 return static_cast<std::size_t>(props.Value.BlobSize);
428 } catch (const Azure::Storage::StorageException &) {
429 return 0;
430 }
431 }
432
433 [[nodiscard]] auto get_blob_etag(const std::string &blob_name) const
434 -> std::string override {
435 try {
436 auto blob_client = container_client_->GetBlobClient(blob_name);
437 auto props = blob_client.GetProperties();
438 return props.Value.ETag.ToString();
439 } catch (const Azure::Storage::StorageException &) {
440 return {};
441 }
442 }
443
444 [[nodiscard]] auto get_blob_md5(const std::string &blob_name) const
445 -> std::string override {
446 try {
447 auto blob_client = container_client_->GetBlobClient(blob_name);
448 auto props = blob_client.GetProperties();
449 if (props.Value.HttpHeaders.ContentHash.Value.empty()) {
450 return {};
451 }
452 // Convert hash bytes to hex string
453 std::ostringstream oss;
454 for (auto byte : props.Value.HttpHeaders.ContentHash.Value) {
455 oss << std::hex << std::setfill('0') << std::setw(2)
456 << static_cast<int>(byte);
457 }
458 return oss.str();
459 } catch (const Azure::Storage::StorageException &) {
460 return {};
461 }
462 }
463
464 [[nodiscard]] auto list_blobs() const
465 -> std::vector<std::string> override {
466 std::vector<std::string> names;
467 try {
468 for (auto page = container_client_->ListBlobs(); page.HasPage();
469 page.MoveToNextPage()) {
470 for (const auto &blob : page.Blobs) {
471 names.push_back(blob.Name);
472 }
473 }
474 } catch (const Azure::Storage::StorageException &) {
475 // Return whatever was collected
476 }
477 return names;
478 }
479
480 [[nodiscard]] auto is_connected() const -> bool override {
481 return container_client_ != nullptr;
482 }
483
484 [[nodiscard]] auto stage_block(const std::string &blob_name,
485 const std::string &block_id,
486 const std::vector<std::uint8_t> &data)
487 -> VoidResult override {
488 try {
489 auto blob_client = container_client_->GetBlockBlobClient(blob_name);
490 Azure::Core::IO::MemoryBodyStream stream(data);
491 blob_client.StageBlock(block_id, stream);
492 return ok();
493 } catch (const Azure::Storage::StorageException &e) {
494 return make_error<std::monostate>(
495 kUploadError,
496 "Azure StageBlock failed: " + std::string(e.what()),
497 "azure_blob_storage");
498 }
499 }
500
501 [[nodiscard]] auto commit_blocks(const std::string &blob_name,
502 const std::vector<std::string> &block_ids)
503 -> VoidResult override {
504 try {
505 auto blob_client = container_client_->GetBlockBlobClient(blob_name);
506 blob_client.CommitBlockList(block_ids);
507 return ok();
508 } catch (const Azure::Storage::StorageException &e) {
509 return make_error<std::monostate>(
510 kUploadError,
511 "Azure CommitBlockList failed: " + std::string(e.what()),
512 "azure_blob_storage");
513 }
514 }
515
516 [[nodiscard]] auto set_tier(const std::string &blob_name,
517 const std::string &tier)
518 -> VoidResult override {
519 try {
520 auto blob_client = container_client_->GetBlobClient(blob_name);
521 Azure::Storage::Blobs::Models::AccessTier access_tier;
522 if (tier == "Hot") {
523 access_tier = Azure::Storage::Blobs::Models::AccessTier::Hot;
524 } else if (tier == "Cool") {
525 access_tier = Azure::Storage::Blobs::Models::AccessTier::Cool;
526 } else if (tier == "Archive") {
527 access_tier = Azure::Storage::Blobs::Models::AccessTier::Archive;
528 } else {
529 access_tier = Azure::Storage::Blobs::Models::AccessTier::Hot;
530 }
531 blob_client.SetAccessTier(access_tier);
532 return ok();
533 } catch (const Azure::Storage::StorageException &e) {
534 return make_error<std::monostate>(
535 kTierChangeError,
536 "Azure SetAccessTier failed: " + std::string(e.what()),
537 "azure_blob_storage");
538 }
539 }
540
541private:
542 std::unique_ptr<Azure::Storage::Blobs::BlobContainerClient>
543 container_client_;
544};
545
546#endif // PACS_WITH_AZURE_SDK && !PACS_USE_MOCK_AZURE
547
548// ============================================================================
549// Construction
550// ============================================================================
551
553 : config_(config),
554#if defined(PACS_WITH_AZURE_SDK) && !defined(PACS_USE_MOCK_AZURE)
555 client_(std::make_unique<azure_sdk_client>(config))
556#else
557 client_(std::make_unique<mock_azure_client>(config))
558#endif
559{
560}
561
563
564// ============================================================================
565// storage_interface Implementation
566// ============================================================================
567
569 -> VoidResult {
570 return store_with_progress(dataset, nullptr);
571}
572
574 const core::dicom_dataset &dataset, azure_progress_callback callback)
575 -> VoidResult {
576 // Extract required UIDs
577 auto study_uid = dataset.get_string(core::tags::study_instance_uid);
578 auto series_uid = dataset.get_string(core::tags::series_instance_uid);
579 auto sop_uid = dataset.get_string(core::tags::sop_instance_uid);
580
581 if (study_uid.empty() || series_uid.empty() || sop_uid.empty()) {
582 return make_error<std::monostate>(
583 kMissingRequiredUid,
584 "Missing required UID (Study, Series, or SOP Instance UID)",
585 "azure_blob_storage");
586 }
587
588 // Build blob name
589 auto blob_name = build_blob_name(study_uid, series_uid, sop_uid);
590
591 // Create DICOM file and serialize to bytes
592 auto dicom_file = core::dicom_file::create(
594
595 auto data = dicom_file.to_bytes();
596 if (data.empty()) {
597 return make_error<std::monostate>(kSerializationError,
598 "Failed to serialize DICOM dataset",
599 "azure_blob_storage");
600 }
601
602 // Report initial progress
603 if (callback && !callback(0, data.size())) {
604 return make_error<std::monostate>(kUploadError, "Upload cancelled by user",
605 "azure_blob_storage");
606 }
607
608 // Upload to Azure (use block blob for large files)
609 VoidResult upload_result = ok();
610 if (data.size() > config_.block_upload_threshold) {
611 upload_result = upload_block_blob(blob_name, data, callback);
612 } else {
613 upload_result = client_->put_blob(blob_name, data);
614
615 // Report completion progress
616 if (callback) {
617 callback(data.size(), data.size());
618 }
619 }
620
621 if (upload_result.is_err()) {
622 return upload_result;
623 }
624
625 // Update local index
626 {
627 std::unique_lock lock(mutex_);
628 azure_blob_info info;
629 info.blob_name = blob_name;
630 info.sop_instance_uid = sop_uid;
631 info.study_instance_uid = study_uid;
632 info.series_instance_uid = series_uid;
633 info.size_bytes = data.size();
634 info.etag = client_->get_blob_etag(blob_name);
635 info.content_md5 = client_->get_blob_md5(blob_name);
636 index_[sop_uid] = std::move(info);
637 }
638
639 return ok();
640}
641
642auto azure_blob_storage::retrieve(std::string_view sop_instance_uid)
644 return retrieve_with_progress(sop_instance_uid, nullptr);
645}
646
648 std::string_view sop_instance_uid, azure_progress_callback callback)
650 std::string blob_name;
651
652 {
653 std::shared_lock lock(mutex_);
654 auto it = index_.find(std::string{sop_instance_uid});
655 if (it == index_.end()) {
656 return make_error<core::dicom_dataset>(
657 kBlobNotFound,
658 "Instance not found: " + std::string{sop_instance_uid},
659 "azure_blob_storage");
660 }
661 blob_name = it->second.blob_name;
662 }
663
664 // Download from Azure
665 auto download_result = client_->get_blob(blob_name);
666 if (download_result.is_err()) {
667 return make_error<core::dicom_dataset>(kDownloadError,
668 "Failed to download from Azure",
669 "azure_blob_storage");
670 }
671
672 const auto &data = download_result.value();
673
674 // Report progress (download complete)
675 if (callback) {
676 callback(data.size(), data.size());
677 }
678
679 // Deserialize DICOM data
680 auto parse_result = core::dicom_file::from_bytes(data);
681 if (parse_result.is_err()) {
682 return make_error<core::dicom_dataset>(
683 kSerializationError,
684 "Failed to parse DICOM data: " + parse_result.error().message,
685 "azure_blob_storage");
686 }
687
688 return parse_result.value().dataset();
689}
690
691auto azure_blob_storage::remove(std::string_view sop_instance_uid)
692 -> VoidResult {
693 std::string blob_name;
694
695 {
696 std::unique_lock lock(mutex_);
697 auto it = index_.find(std::string{sop_instance_uid});
698 if (it == index_.end()) {
699 // Not found is not an error for remove
700 return ok();
701 }
702 blob_name = it->second.blob_name;
703 index_.erase(it);
704 }
705
706 // Delete from Azure
707 auto delete_result = client_->delete_blob(blob_name);
708 // Ignore delete errors - blob might have been deleted externally
709
710 return ok();
711}
712
713auto azure_blob_storage::exists(std::string_view sop_instance_uid) const
714 -> bool {
715 std::shared_lock lock(mutex_);
716 return index_.contains(std::string{sop_instance_uid});
717}
718
721 std::vector<core::dicom_dataset> results;
722
723 std::vector<std::string> blobs_to_retrieve;
724 {
725 std::shared_lock lock(mutex_);
726 blobs_to_retrieve.reserve(index_.size());
727 for (const auto &[uid, info] : index_) {
728 blobs_to_retrieve.push_back(info.blob_name);
729 }
730 }
731
732 for (const auto &blob_name : blobs_to_retrieve) {
733 auto download_result = client_->get_blob(blob_name);
734 if (download_result.is_err()) {
735 continue; // Skip blobs that can't be downloaded
736 }
737
738 auto parse_result = core::dicom_file::from_bytes(download_result.value());
739 if (parse_result.is_err()) {
740 continue; // Skip invalid DICOM files
741 }
742
743 const auto &dataset = parse_result.value().dataset();
744 if (matches_query(dataset, query)) {
745 results.push_back(dataset);
746 }
747 }
748
749 return results;
750}
751
753 storage_statistics stats;
754
755 std::set<std::string> studies;
756 std::set<std::string> series;
757
758 {
759 std::shared_lock lock(mutex_);
760 stats.total_instances = index_.size();
761
762 for (const auto &[uid, info] : index_) {
763 stats.total_bytes += info.size_bytes;
764
765 if (!info.study_instance_uid.empty()) {
766 studies.insert(info.study_instance_uid);
767 }
768 if (!info.series_instance_uid.empty()) {
769 series.insert(info.series_instance_uid);
770 }
771 }
772 }
773
774 stats.studies_count = studies.size();
775 stats.series_count = series.size();
776 // Note: patient_count requires downloading datasets to extract PatientID
777
778 return stats;
779}
780
782 std::vector<std::pair<std::string, std::string>> entries;
783 {
784 std::shared_lock lock(mutex_);
785 entries.reserve(index_.size());
786 for (const auto &[uid, info] : index_) {
787 entries.emplace_back(uid, info.blob_name);
788 }
789 }
790
791 std::vector<std::string> invalid_entries;
792
793 for (const auto &[uid, blob_name] : entries) {
794 if (!client_->head_blob(blob_name)) {
795 invalid_entries.push_back(uid + " (blob missing)");
796 }
797 }
798
799 if (!invalid_entries.empty()) {
800 std::string message = "Integrity check failed for " +
801 std::to_string(invalid_entries.size()) + " entries";
802 return make_error<std::monostate>(kIntegrityError, message,
803 "azure_blob_storage");
804 }
805
806 return ok();
807}
808
809// ============================================================================
810// Azure-specific Operations
811// ============================================================================
812
814 std::string_view sop_instance_uid) const -> std::string {
815 std::shared_lock lock(mutex_);
816 auto it = index_.find(std::string{sop_instance_uid});
817 if (it != index_.end()) {
818 return it->second.blob_name;
819 }
820 return {};
821}
822
823auto azure_blob_storage::container_name() const -> const std::string & {
824 return config_.container_name;
825}
826
828 std::unique_lock lock(mutex_);
829 index_.clear();
830
831 // List all blobs from Azure
832 auto blob_names = client_->list_blobs();
833
834 for (const auto &blob_name : blob_names) {
835 // Download and parse each blob to rebuild index
836 auto download_result = client_->get_blob(blob_name);
837 if (download_result.is_err()) {
838 continue;
839 }
840
841 auto parse_result = core::dicom_file::from_bytes(download_result.value());
842 if (parse_result.is_err()) {
843 continue;
844 }
845
846 const auto &dataset = parse_result.value().dataset();
847 auto sop_uid = dataset.get_string(core::tags::sop_instance_uid);
848 auto study_uid = dataset.get_string(core::tags::study_instance_uid);
849 auto series_uid = dataset.get_string(core::tags::series_instance_uid);
850
851 if (!sop_uid.empty()) {
852 azure_blob_info info;
853 info.blob_name = blob_name;
854 info.sop_instance_uid = sop_uid;
855 info.study_instance_uid = study_uid;
856 info.series_instance_uid = series_uid;
857 info.size_bytes = client_->get_blob_size(blob_name);
858 info.etag = client_->get_blob_etag(blob_name);
859 info.content_md5 = client_->get_blob_md5(blob_name);
860 index_[sop_uid] = std::move(info);
861 }
862 }
863
864 return ok();
865}
866
868 return client_ && client_->is_connected();
869}
870
871auto azure_blob_storage::set_access_tier(std::string_view sop_instance_uid,
872 std::string_view tier) -> VoidResult {
873 std::string blob_name;
874
875 {
876 std::shared_lock lock(mutex_);
877 auto it = index_.find(std::string{sop_instance_uid});
878 if (it == index_.end()) {
879 return make_error<std::monostate>(
880 kBlobNotFound,
881 "Instance not found: " + std::string{sop_instance_uid},
882 "azure_blob_storage");
883 }
884 blob_name = it->second.blob_name;
885 }
886
887 auto result = client_->set_tier(blob_name, std::string{tier});
888 if (result.is_err()) {
889 return make_error<std::monostate>(
890 kTierChangeError,
891 "Failed to change access tier: " + std::string{sop_instance_uid},
892 "azure_blob_storage");
893 }
894
895 return ok();
896}
897
898// ============================================================================
899// Internal Helper Methods
900// ============================================================================
901
902auto azure_blob_storage::build_blob_name(std::string_view study_uid,
903 std::string_view series_uid,
904 std::string_view sop_uid) const
905 -> std::string {
906 std::ostringstream oss;
907 oss << sanitize_uid(study_uid) << "/" << sanitize_uid(series_uid) << "/"
908 << sanitize_uid(sop_uid) << ".dcm";
909 return oss.str();
910}
911
912auto azure_blob_storage::sanitize_uid(std::string_view uid) -> std::string {
913 std::string result;
914 result.reserve(uid.length());
915
916 for (char c : uid) {
917 // UIDs contain digits and dots, which are safe for blob names
918 // Replace any other characters with underscore
919 if (std::isalnum(static_cast<unsigned char>(c)) || c == '.') {
920 result += c;
921 } else {
922 result += '_';
923 }
924 }
925
926 return result;
927}
928
930 const std::string &blob_name, const std::vector<std::uint8_t> &data,
931 azure_progress_callback callback) -> VoidResult {
932 std::size_t total_bytes = data.size();
933 std::size_t bytes_uploaded = 0;
934 std::vector<std::string> block_ids;
935
936 // Stage blocks
937 std::size_t block_index = 0;
938 while (bytes_uploaded < total_bytes) {
939 std::size_t block_size =
940 (std::min)(config_.block_size, total_bytes - bytes_uploaded);
941
942 auto block_id = generate_block_id(block_index);
943 block_ids.push_back(block_id);
944
945 std::vector<std::uint8_t> block_data(data.begin() + bytes_uploaded,
946 data.begin() + bytes_uploaded +
947 block_size);
948
949 auto stage_result = client_->stage_block(blob_name, block_id, block_data);
950 if (stage_result.is_err()) {
951 return make_error<std::monostate>(kUploadError,
952 "Failed to stage block: " + block_id,
953 "azure_blob_storage");
954 }
955
956 bytes_uploaded += block_size;
957 block_index++;
958
959 if (callback && !callback(bytes_uploaded, total_bytes)) {
960 return make_error<std::monostate>(kUploadError,
961 "Upload cancelled by user",
962 "azure_blob_storage");
963 }
964 }
965
966 // Commit blocks
967 auto commit_result = client_->commit_blocks(blob_name, block_ids);
968 if (commit_result.is_err()) {
969 return make_error<std::monostate>(kUploadError, "Failed to commit blocks",
970 "azure_blob_storage");
971 }
972
973 return ok();
974}
975
977 const core::dicom_dataset &query)
978 -> bool {
979 // If query is empty, match all
980 if (query.empty()) {
981 return true;
982 }
983
984 // Check each query element
985 for (const auto &[tag, element] : query) {
986 auto query_value = element.as_string().unwrap_or("");
987 if (query_value.empty()) {
988 continue; // Empty value acts as wildcard
989 }
990
991 auto dataset_value = dataset.get_string(tag);
992
993 // Support basic wildcard matching (* and ?)
994 if (query_value.find('*') != std::string::npos ||
995 query_value.find('?') != std::string::npos) {
996 // Simple pattern matching
997 if (query_value.front() == '*' && query_value.back() == '*') {
998 // Contains
999 auto inner = query_value.substr(1, query_value.length() - 2);
1000 if (dataset_value.find(inner) == std::string::npos) {
1001 return false;
1002 }
1003 } else if (query_value.front() == '*') {
1004 // Ends with
1005 auto suffix = query_value.substr(1);
1006 if (dataset_value.length() < suffix.length() ||
1007 dataset_value.substr(dataset_value.length() - suffix.length()) !=
1008 suffix) {
1009 return false;
1010 }
1011 } else if (query_value.back() == '*') {
1012 // Starts with
1013 auto prefix = query_value.substr(0, query_value.length() - 1);
1014 if (dataset_value.substr(0, prefix.length()) != prefix) {
1015 return false;
1016 }
1017 }
1018 } else {
1019 // Exact match
1020 if (dataset_value != query_value) {
1021 return false;
1022 }
1023 }
1024 }
1025
1026 return true;
1027}
1028
1029} // namespace kcenon::pacs::storage
if(!color.empty()) style.color
Azure Blob storage backend for DICOM cloud storage support.
static auto from_bytes(std::span< const uint8_t > data) -> kcenon::pacs::Result< dicom_file >
Parse a DICOM file from raw bytes.
static auto create(dicom_dataset dataset, const encoding::transfer_syntax &ts) -> dicom_file
Create a new DICOM file from a dataset.
static const transfer_syntax explicit_vr_little_endian
Explicit VR Little Endian (1.2.840.10008.1.2.1)
Abstract interface for Azure Blob client operations.
virtual auto get_blob_etag(const std::string &blob_name) const -> std::string=0
virtual auto stage_block(const std::string &blob_name, const std::string &block_id, const std::vector< std::uint8_t > &data) -> VoidResult=0
virtual auto get_blob_md5(const std::string &blob_name) const -> std::string=0
virtual auto list_blobs() const -> std::vector< std::string >=0
virtual auto delete_blob(const std::string &blob_name) -> VoidResult=0
virtual auto get_blob_size(const std::string &blob_name) const -> std::size_t=0
virtual auto set_tier(const std::string &blob_name, const std::string &tier) -> VoidResult=0
virtual auto head_blob(const std::string &blob_name) const -> bool=0
virtual auto get_blob(const std::string &blob_name) -> Result< std::vector< std::uint8_t > >=0
virtual auto put_blob(const std::string &blob_name, const std::vector< std::uint8_t > &data) -> VoidResult=0
virtual auto commit_blocks(const std::string &blob_name, const std::vector< std::string > &block_ids) -> VoidResult=0
azure_storage_config config_
Storage configuration.
auto exists(std::string_view sop_instance_uid) const -> bool override
Check if a DICOM instance exists in Azure Storage.
auto is_connected() const -> bool
Check Azure connectivity.
auto find(const core::dicom_dataset &query) -> Result< std::vector< core::dicom_dataset > > override
Find DICOM datasets matching query criteria.
auto build_blob_name(std::string_view study_uid, std::string_view series_uid, std::string_view sop_uid) const -> std::string
Build blob name for a dataset.
std::unordered_map< std::string, azure_blob_info > index_
Mapping from SOP Instance UID to Azure blob info.
static auto sanitize_uid(std::string_view uid) -> std::string
Sanitize UID for use in blob name.
auto remove(std::string_view sop_instance_uid) -> VoidResult override
Remove a DICOM blob from Azure Storage.
auto rebuild_index() -> VoidResult
Rebuild the local index from Azure.
std::unique_ptr< azure_client_interface > client_
Azure client (mock for testing, Azure SDK for production)
auto store_with_progress(const core::dicom_dataset &dataset, azure_progress_callback callback) -> VoidResult
Store with progress tracking.
auto retrieve_with_progress(std::string_view sop_instance_uid, azure_progress_callback callback) -> Result< core::dicom_dataset >
Retrieve with progress tracking.
auto upload_block_blob(const std::string &blob_name, const std::vector< std::uint8_t > &data, azure_progress_callback callback) -> VoidResult
Execute block blob upload for large files.
std::shared_mutex mutex_
Mutex for thread-safe access.
auto verify_integrity() -> VoidResult override
Verify storage integrity.
auto store(const core::dicom_dataset &dataset) -> VoidResult override
Store a DICOM dataset to Azure Blob Storage.
auto set_access_tier(std::string_view sop_instance_uid, std::string_view tier) -> VoidResult
Set blob access tier.
auto get_blob_name(std::string_view sop_instance_uid) const -> std::string
Get the blob name for a SOP Instance UID.
auto retrieve(std::string_view sop_instance_uid) -> Result< core::dicom_dataset > override
Retrieve a DICOM dataset by SOP Instance UID.
auto container_name() const -> const std::string &
Get the container name.
static auto matches_query(const core::dicom_dataset &dataset, const core::dicom_dataset &query) -> bool
Check if dataset matches query criteria.
auto get_statistics() const -> storage_statistics override
Get storage statistics.
azure_blob_storage(const azure_storage_config &config)
Construct Azure Blob storage with configuration.
Mock Azure Blob client for testing without Azure SDK dependency.
auto get_blob_size(const std::string &blob_name) const -> std::size_t override
auto put_blob(const std::string &blob_name, const std::vector< std::uint8_t > &data) -> VoidResult override
auto commit_blocks(const std::string &blob_name, const std::vector< std::string > &block_ids) -> VoidResult override
auto set_tier(const std::string &blob_name, const std::string &tier) -> VoidResult override
auto get_blob(const std::string &blob_name) -> Result< std::vector< std::uint8_t > > override
auto head_blob(const std::string &blob_name) const -> bool override
std::unordered_map< std::string, std::unordered_map< std::string, std::vector< std::uint8_t > > > staged_blocks_
auto get_blob_md5(const std::string &blob_name) const -> std::string override
mock_azure_client(const azure_storage_config &)
std::unordered_map< std::string, blob_data > blobs_
auto delete_blob(const std::string &blob_name) -> VoidResult override
auto list_blobs() const -> std::vector< std::string > override
auto get_blob_etag(const std::string &blob_name) const -> std::string override
auto stage_block(const std::string &blob_name, const std::string &block_id, const std::vector< std::uint8_t > &data) -> VoidResult override
DICOM Part 10 file handling for reading/writing DICOM files.
Compile-time constants for commonly used DICOM tags.
constexpr dicom_tag sop_instance_uid
SOP Instance UID.
constexpr dicom_tag study_instance_uid
Study Instance UID.
constexpr dicom_tag series_instance_uid
Series Instance UID.
@ hash
Hash the value for research linkage.
std::function< bool(std::size_t bytes_transferred, std::size_t total_bytes)> azure_progress_callback
Callback type for upload/download progress tracking.
Information about an Azure Blob object.
Configuration for Azure Blob storage.
std::string container_name
Azure Blob container name for storing DICOM files.
std::optional< std::string > endpoint_url
Optional custom endpoint URL for Azurite emulator If set, this takes precedence over connection_strin...
std::string connection_string
Connection string for Azure Storage account Format: DefaultEndpointsProtocol=https;AccountName=....
std::string_view uid
std::string_view name