PACS System 0.1.0
PACS DICOM system library
Loading...
Searching...
No Matches
character_set.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
35#ifndef PACS_ENCODING_CHARACTER_SET_HPP
36#define PACS_ENCODING_CHARACTER_SET_HPP
37
38#include <cstdint>
39#include <optional>
40#include <string>
41#include <string_view>
42#include <vector>
43
44namespace kcenon::pacs::encoding {
45
53 std::string_view defined_term;
54 std::string_view description;
55 std::string_view iso_ir;
57 std::string_view escape_sequence;
58 std::string_view encoding_name;
60};
61
64
73[[nodiscard]] const character_set_info* find_character_set(
74 std::string_view defined_term) noexcept;
75
82 int iso_ir_number) noexcept;
83
88[[nodiscard]] const character_set_info& default_character_set() noexcept;
89
94[[nodiscard]] std::vector<const character_set_info*> all_character_sets() noexcept;
95
97
100
116
118 std::vector<const character_set_info*> extension_sets;
119
121 [[nodiscard]] bool uses_extensions() const noexcept;
122
124 [[nodiscard]] bool is_single_byte_only() const noexcept;
125
127 [[nodiscard]] bool is_utf8() const noexcept;
128};
129
140 std::string_view value);
141
143
146
155 std::string_view text;
157};
158
169[[nodiscard]] std::vector<text_segment> split_by_escape_sequences(
170 std::string_view text,
171 const specific_character_set& scs);
172
174
177
192[[nodiscard]] std::string decode_to_utf8(
193 std::string_view text,
194 const specific_character_set& scs);
195
205[[nodiscard]] std::string convert_to_utf8(
206 std::string_view text,
207 const character_set_info& charset);
208
222[[nodiscard]] std::string decode_person_name(
223 std::string_view pn_value,
224 const specific_character_set& scs);
225
239[[nodiscard]] std::string encode_from_utf8(
240 std::string_view utf8_text,
241 const specific_character_set& scs);
242
249[[nodiscard]] std::string convert_from_utf8(
250 std::string_view utf8_text,
251 const character_set_info& charset);
252
254
255} // namespace kcenon::pacs::encoding
256
257#endif // PACS_ENCODING_CHARACTER_SET_HPP
std::vector< text_segment > split_by_escape_sequences(std::string_view text, const specific_character_set &scs)
Split a string into segments by ISO 2022 escape sequences.
const character_set_info * find_character_set(std::string_view defined_term) noexcept
Look up character set info by DICOM Defined Term.
std::string encode_from_utf8(std::string_view utf8_text, const specific_character_set &scs)
Encode a UTF-8 string to the target character set encoding.
specific_character_set parse_specific_character_set(std::string_view value)
Parse a Specific Character Set (0008,0005) value.
const character_set_info & default_character_set() noexcept
Get the default character set (ISO-IR 6, ASCII).
std::vector< const character_set_info * > all_character_sets() noexcept
Get all registered character sets.
const character_set_info * find_character_set_by_ir(int iso_ir_number) noexcept
Look up character set info by ISO-IR number.
std::string decode_person_name(std::string_view pn_value, const specific_character_set &scs)
Decode a Person Name (PN) value to UTF-8.
std::string decode_to_utf8(std::string_view text, const specific_character_set &scs)
Decode a DICOM string to UTF-8 using the given character set.
std::string convert_from_utf8(std::string_view utf8_text, const character_set_info &charset)
Encode a single UTF-8 segment to a specific character set.
std::string convert_to_utf8(std::string_view text, const character_set_info &charset)
Decode a single segment from a specific encoding to UTF-8.
Information about a DICOM character set.
std::string_view defined_term
DICOM Defined Term (e.g., "ISO 2022 IR 149")
bool uses_code_extensions
true if ISO 2022 escape sequences are used
std::string_view escape_sequence
Raw escape sequence bytes (empty if none)
std::string_view description
Human-readable name (e.g., "Korean (KS X 1001)")
std::string_view encoding_name
ICU converter name (e.g., "EUC-KR")
std::string_view iso_ir
ISO-IR designation (e.g., "ISO-IR 149")
bool is_multi_byte
true if characters can be multi-byte
Parsed representation of a multi-valued Specific Character Set.
std::vector< const character_set_info * > extension_sets
Additional character sets activated by escape sequences.
const character_set_info * default_set
Character set for default (G0) repertoire.
A text segment with its associated character set.
const character_set_info * charset
The character set for this segment.
std::string_view text
The raw bytes of this segment.