Logger System 0.1.3
High-performance C++20 thread-safe logging system with asynchronous capabilities
Loading...
Searching...
No Matches
log_sanitizer.h
Go to the documentation of this file.
1// BSD 3-Clause License
2// Copyright (c) 2021-2025, 🍀☀🌕🌥 🌊
3// See the LICENSE file in the project root for full license information.
4
25#pragma once
26
27#include <string>
28#include <string_view>
29#include <vector>
30#include <regex>
31#include <memory>
32#include <functional>
33
35
41 ssn,
42 api_key,
43 password,
44 email,
47 custom
48};
49
54 std::string name;
55 std::regex pattern;
56 std::string replacement;
58
59 sanitization_rule(std::string n, const std::string& p, std::string r, bool pp = false)
60 : name(std::move(n))
61 , pattern(p, std::regex::icase | std::regex::optimize)
62 , replacement(std::move(r))
63 , preserve_partial(pp) {}
64};
65
83public:
90 switch (type) {
93 break;
96 break;
99 break;
102 break;
105 break;
108 break;
111 break;
113 // Custom patterns are added via add_custom_pattern()
114 break;
115 }
116 return *this;
117 }
118
128 std::string_view name,
129 std::string_view regex_pattern,
130 std::string_view replacement = "[REDACTED]",
131 bool preserve_partial = false) {
132 rules_.emplace_back(
133 std::string(name),
134 std::string(regex_pattern),
135 std::string(replacement),
136 preserve_partial);
137 return *this;
138 }
139
145 log_sanitizer& remove_pattern(std::string_view name) {
146 rules_.erase(
147 std::remove_if(rules_.begin(), rules_.end(),
148 [name](const sanitization_rule& rule) {
149 return rule.name == name;
150 }),
151 rules_.end());
152 return *this;
153 }
154
160 std::string sanitize(std::string_view input) const {
161 if (rules_.empty()) {
162 return std::string(input);
163 }
164
165 std::string result(input);
166 for (const auto& rule : rules_) {
167 result = apply_rule(result, rule);
168 }
169 return result;
170 }
171
177 bool contains_sensitive_data(std::string_view input) const {
178 for (const auto& rule : rules_) {
179 if (std::regex_search(input.begin(), input.end(), rule.pattern)) {
180 return true;
181 }
182 }
183 return false;
184 }
185
190 std::vector<std::string> active_patterns() const {
191 std::vector<std::string> names;
192 names.reserve(rules_.size());
193 for (const auto& rule : rules_) {
194 names.push_back(rule.name);
195 }
196 return names;
197 }
198
204 rules_.clear();
205 return *this;
206 }
207
220
221private:
222 std::vector<sanitization_rule> rules_;
223
225 // Matches credit card numbers with or without separators
226 // Preserves last 4 digits
227 rules_.emplace_back(
228 "credit_card",
229 R"(\b(\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?)(\d{4})\b)",
230 "****-****-****-$2",
231 true);
232 }
233
235 // Matches SSN format XXX-XX-XXXX
236 // Preserves last 4 digits
237 rules_.emplace_back(
238 "ssn",
239 R"(\b(\d{3})[-\s]?(\d{2})[-\s]?(\d{4})\b)",
240 "***-**-$3",
241 true);
242 }
243
245 // Matches common API key formats
246 rules_.emplace_back(
247 "api_key",
248 R"(\b(sk[-_]|api[-_]|key[-_]|token[-_]|bearer\s+)([a-zA-Z0-9]{16,})\b)",
249 "$1[REDACTED]",
250 false);
251 }
252
254 // Matches password=xxx, pwd=xxx, passwd=xxx patterns
255 rules_.emplace_back(
256 "password",
257 R"(((?:password|passwd|pwd|secret|credential)[\s]*[=:]\s*)([^\s&]+))",
258 "$1[REDACTED]",
259 false);
260 }
261
263 // Matches email addresses, masks local part partially
264 rules_.emplace_back(
265 "email",
266 R"(\b([a-zA-Z0-9._%+-])([a-zA-Z0-9._%+-]*)(@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b)",
267 "$1***$3",
268 true);
269 }
270
272 // Matches IPv4 addresses, masks last two octets
273 rules_.emplace_back(
274 "ip_address",
275 R"(\b(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})\b)",
276 "$1.$2.x.x",
277 true);
278 }
279
281 // Matches phone numbers in various formats
282 rules_.emplace_back(
283 "phone_number",
284 R"(\b(\+?\d{1,3}[-.\s]?)(\d{3})[-.\s]?(\d{3})[-.\s]?(\d{4})\b)",
285 "$1***-***-$4",
286 true);
287 }
288
289 static std::string apply_rule(const std::string& input, const sanitization_rule& rule) {
290 return std::regex_replace(input, rule.pattern, rule.replacement);
291 }
292};
293
299 log_sanitizer sanitizer;
300 sanitizer.add_common_patterns();
301 return sanitizer;
302}
303
304} // namespace kcenon::logger::security
Log sanitizer for masking sensitive data.
log_sanitizer & remove_pattern(std::string_view name)
Remove a pattern by name.
log_sanitizer & add_common_patterns()
Add all common patterns at once.
bool contains_sensitive_data(std::string_view input) const
Check if a string contains sensitive data.
log_sanitizer & add_pattern(sensitive_data_type type)
Add a built-in pattern for sensitive data detection.
log_sanitizer & clear()
Clear all patterns.
static std::string apply_rule(const std::string &input, const sanitization_rule &rule)
log_sanitizer & add_custom_pattern(std::string_view name, std::string_view regex_pattern, std::string_view replacement="[REDACTED]", bool preserve_partial=false)
Add a custom sanitization pattern.
std::vector< sanitization_rule > rules_
std::vector< std::string > active_patterns() const
Get list of active pattern names.
std::string sanitize(std::string_view input) const
Sanitize a string by masking all detected sensitive data.
sensitive_data_type
Types of sensitive data that can be automatically detected.
@ api_key
API keys (sk-, api_, bearer tokens)
@ phone_number
Phone numbers (various formats)
@ password
Password fields in key=value format.
@ custom
Custom user-defined patterns.
@ credit_card
Credit card numbers (Visa, MC, Amex, etc.)
@ ssn
Social Security Numbers (US format)
log_sanitizer make_default_sanitizer()
Create a sanitizer with common patterns pre-configured.
Configuration for a sanitization rule.
std::string name
Identifier for the rule.
std::regex pattern
Regex pattern to match.
sanitization_rule(std::string n, const std::string &p, std::string r, bool pp=false)
std::string replacement
Replacement text or pattern.
bool preserve_partial
Keep last N characters visible.