35#ifndef PACS_ENCODING_CHARACTER_SET_HPP
36#define PACS_ENCODING_CHARACTER_SET_HPP
74 std::string_view defined_term)
noexcept;
82 int iso_ir_number)
noexcept;
121 [[nodiscard]]
bool uses_extensions() const noexcept;
124 [[nodiscard]]
bool is_single_byte_only() const noexcept;
127 [[nodiscard]]
bool is_utf8() const noexcept;
140 std::string_view value);
170 std::string_view text,
193 std::string_view text,
206 std::string_view text,
223 std::string_view pn_value,
240 std::string_view utf8_text,
250 std::string_view utf8_text,
std::vector< text_segment > split_by_escape_sequences(std::string_view text, const specific_character_set &scs)
Split a string into segments by ISO 2022 escape sequences.
const character_set_info * find_character_set(std::string_view defined_term) noexcept
Look up character set info by DICOM Defined Term.
std::string encode_from_utf8(std::string_view utf8_text, const specific_character_set &scs)
Encode a UTF-8 string to the target character set encoding.
specific_character_set parse_specific_character_set(std::string_view value)
Parse a Specific Character Set (0008,0005) value.
const character_set_info & default_character_set() noexcept
Get the default character set (ISO-IR 6, ASCII).
std::vector< const character_set_info * > all_character_sets() noexcept
Get all registered character sets.
const character_set_info * find_character_set_by_ir(int iso_ir_number) noexcept
Look up character set info by ISO-IR number.
std::string decode_person_name(std::string_view pn_value, const specific_character_set &scs)
Decode a Person Name (PN) value to UTF-8.
std::string decode_to_utf8(std::string_view text, const specific_character_set &scs)
Decode a DICOM string to UTF-8 using the given character set.
std::string convert_from_utf8(std::string_view utf8_text, const character_set_info &charset)
Encode a single UTF-8 segment to a specific character set.
std::string convert_to_utf8(std::string_view text, const character_set_info &charset)
Decode a single segment from a specific encoding to UTF-8.
Information about a DICOM character set.
std::string_view defined_term
DICOM Defined Term (e.g., "ISO 2022 IR 149")
bool uses_code_extensions
true if ISO 2022 escape sequences are used
std::string_view escape_sequence
Raw escape sequence bytes (empty if none)
std::string_view description
Human-readable name (e.g., "Korean (KS X 1001)")
std::string_view encoding_name
ICU converter name (e.g., "EUC-KR")
std::string_view iso_ir
ISO-IR designation (e.g., "ISO-IR 149")
bool is_multi_byte
true if characters can be multi-byte
Parsed representation of a multi-valued Specific Character Set.
std::vector< const character_set_info * > extension_sets
Additional character sets activated by escape sequences.
const character_set_info * default_set
Character set for default (G0) repertoire.
A text segment with its associated character set.
const character_set_info * charset
The character set for this segment.
std::string_view text
The raw bytes of this segment.