Thread System 0.3.1
High-performance C++20 thread pool with work stealing and DAG scheduling
Loading...
Searching...
No Matches
convert_string.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2024, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
5#pragma once
6
7#include <tuple>
8#include <vector>
9#include <string>
10#include <cstdint>
11#include <optional>
12#include <algorithm>
13#include <string_view>
14
16{
46 class convert_string
47 {
48 public:
61 static auto to_string(const std::wstring& value)
62 -> std::tuple<std::optional<std::string>, std::optional<std::string>>;
63
72 static auto to_string(std::wstring_view value)
73 -> std::tuple<std::optional<std::string>, std::optional<std::string>>;
74
82 static auto to_wstring(const std::string& value)
83 -> std::tuple<std::optional<std::wstring>, std::optional<std::string>>;
84
90 static auto to_wstring(std::string_view value)
91 -> std::tuple<std::optional<std::wstring>, std::optional<std::string>>;
92
100 static auto get_system_code_page() -> int;
101
109 static auto system_to_utf8(const std::string& value)
110 -> std::tuple<std::optional<std::string>, std::optional<std::string>>;
111
117 static auto utf8_to_system(const std::string& value)
118 -> std::tuple<std::optional<std::string>, std::optional<std::string>>;
119
130 static auto split(const std::string& source, const std::string& token)
131 -> std::tuple<std::optional<std::vector<std::string>>, std::optional<std::string>>;
132
140 static auto to_array(const std::string& value)
141 -> std::tuple<std::optional<std::vector<uint8_t>>, std::optional<std::string>>;
142
150 static auto to_string(const std::vector<uint8_t>& value)
151 -> std::tuple<std::optional<std::string>, std::optional<std::string>>;
152
163 static auto to_base64(const std::vector<uint8_t>& value)
164 -> std::tuple<std::optional<std::string>, std::optional<std::string>>;
165
174 static auto from_base64(const std::string& base64_str)
175 -> std::tuple<std::vector<uint8_t>, std::optional<std::string>>;
176
187 static auto replace(std::string& source,
188 const std::string& token,
189 const std::string& target) -> std::optional<std::string>;
190
201 static auto replace2(const std::string& source,
202 const std::string& token,
203 const std::string& target)
204 -> std::tuple<std::optional<std::string>, std::optional<std::string>>;
205
206 private:
208 enum class endian_types
209 {
210 little,
211 big,
212 unknown
213 };
214
216 enum class encoding_types
217 {
218 utf8,
219 utf16,
220 utf32
221 };
222
228 static auto get_code_page_name(int code_page) -> std::string;
229
236 static auto get_encoding_name(encoding_types encoding,
237 endian_types endian = endian_types::little) -> std::string;
238
244 static auto get_wchar_encoding(endian_types endian = endian_types::little) -> std::string;
245
258 template <typename FromType, typename ToType>
259 static auto convert(const FromType& value,
260 const std::string& from_encoding,
261 const std::string& to_encoding)
262 -> std::tuple<std::optional<ToType>, std::optional<std::string>>;
263
269 static auto detect_endian(const std::u16string& value) -> endian_types;
270
276 static auto detect_endian(const std::u32string& value) -> endian_types;
277
283 static auto has_utf8_bom(const std::string& value) -> bool;
284
290 static auto remove_utf8_bom(const std::string& value) -> std::string;
291
297 static auto add_utf8_bom(const std::string& value) -> std::string;
298
304 static auto base64_encode(const std::vector<uint8_t>& data) -> std::string;
305
314 static auto base64_decode(const std::string& base64_str)
315 -> std::tuple<std::vector<uint8_t>, std::optional<std::string>>;
316 };
317} // namespace utility_module
Provides utilities for string encoding conversion, Base64 encoding/decoding, and substring operations...
static auto to_string(const std::wstring &value) -> std::tuple< std::optional< std::string >, std::optional< std::string > >
Converts a std::wstring to a std::string using the system encoding.
static auto to_string(const std::vector< uint8_t > &value) -> std::tuple< std::optional< std::string >, std::optional< std::string > >
Converts a UTF-8 byte array to a system-encoded string.
static auto base64_encode(const std::vector< uint8_t > &data) -> std::string
Encodes a byte array into a Base64 string.
static auto get_encoding_name(encoding_types encoding, endian_types endian=endian_types::little) -> std::string
Returns the encoding name string for the given encoding type and endianness.
static auto has_utf8_bom(const std::string &value) -> bool
Checks if a string has a UTF-8 BOM (Byte Order Mark).
static auto add_utf8_bom(const std::string &value) -> std::string
Adds a UTF-8 BOM to a string if it doesn't already have one.
static auto to_base64(const std::vector< uint8_t > &value) -> std::tuple< std::optional< std::string >, std::optional< std::string > >
Encodes a byte array into a Base64 string.
static auto to_wstring(std::string_view value) -> std::tuple< std::optional< std::wstring >, std::optional< std::string > >
Converts a std::string_view (system-encoded) to a std::wstring.
static auto split(const std::string &source, const std::string &token) -> std::tuple< std::optional< std::vector< std::string > >, std::optional< std::string > >
Splits a string by a given delimiter.
static auto get_system_code_page() -> int
Retrieves the system code page used for conversions.
static auto replace(std::string &source, const std::string &token, const std::string &target) -> std::optional< std::string >
Replaces all occurrences of token in source with target, in place.
static auto remove_utf8_bom(const std::string &value) -> std::string
Removes a leading UTF-8 BOM from a string, if present.
encoding_types
Supported encoding types for Unicode conversion.
static auto from_base64(const std::string &base64_str) -> std::tuple< std::vector< uint8_t >, std::optional< std::string > >
Decodes a Base64 string into a byte array.
static auto get_wchar_encoding(endian_types endian=endian_types::little) -> std::string
Derives the wchar_t encoding name based on its size and endianness.
static auto detect_endian(const std::u32string &value) -> endian_types
Detects the endianness of a UTF-32 string by checking for BOM or content patterns.
static auto system_to_utf8(const std::string &value) -> std::tuple< std::optional< std::string >, std::optional< std::string > >
Converts a system-encoded string to UTF-8.
static auto utf8_to_system(const std::string &value) -> std::tuple< std::optional< std::string >, std::optional< std::string > >
Converts a UTF-8 encoded string to the system encoding.
static auto detect_endian(const std::u16string &value) -> endian_types
Detects the endianness of a UTF-16 string by checking for BOM or content patterns.
static auto base64_decode(const std::string &base64_str) -> std::tuple< std::vector< uint8_t >, std::optional< std::string > >
Decodes a Base64 string into a byte array.
static auto to_string(std::wstring_view value) -> std::tuple< std::optional< std::string >, std::optional< std::string > >
Converts a std::wstring_view to a std::string using the system encoding.
static auto replace2(const std::string &source, const std::string &token, const std::string &target) -> std::tuple< std::optional< std::string >, std::optional< std::string > >
Replaces all occurrences of token in source with target, returning a new string.
static auto get_code_page_name(int code_page) -> std::string
Retrieves a textual name for a code page (e.g., "CP_ACP" or a locale-based name).
static auto to_wstring(const std::string &value) -> std::tuple< std::optional< std::wstring >, std::optional< std::string > >
Converts a std::string (system-encoded) to a std::wstring.
static auto convert(const FromType &value, const std::string &from_encoding, const std::string &to_encoding) -> std::tuple< std::optional< ToType >, std::optional< std::string > >
Converts from one encoding to another using simdutf.
static auto to_array(const std::string &value) -> std::tuple< std::optional< std::vector< uint8_t > >, std::optional< std::string > >
Converts a system-encoded string to a UTF-8 byte array.
endian_types
Possible endianness values for UTF-16 or UTF-32 data.