18 rng_state_(static_cast<std::uint64_t>(
19 std::chrono::steady_clock::now().time_since_epoch().count()) | 1) {
21 auto now = std::chrono::steady_clock::now();
28 : config_(std::move(other.config_)),
29 total_count_(other.total_count_.load()),
30 sampled_count_(other.sampled_count_.load()),
31 dropped_count_(other.dropped_count_.load()),
32 bypassed_count_(other.bypassed_count_.load()),
33 rng_state_(other.rng_state_.load()),
34 rate_limit_count_(other.rate_limit_count_.load()),
35 rate_limit_window_start_(other.rate_limit_window_start_.load()),
36 effective_rate_(other.effective_rate_.load()),
37 adaptive_window_count_(other.adaptive_window_count_.load()),
38 adaptive_window_start_(other.adaptive_window_start_.load()),
39 is_throttling_(other.is_throttling_.load()) {
44 std::unique_lock<std::shared_mutex> lock(config_mutex_);
45 config_ = std::move(other.config_);
46 total_count_.store(other.total_count_.load());
47 sampled_count_.store(other.sampled_count_.load());
48 dropped_count_.store(other.dropped_count_.load());
49 bypassed_count_.store(other.bypassed_count_.load());
50 rng_state_.store(other.rng_state_.load());
51 rate_limit_count_.store(other.rate_limit_count_.load());
52 rate_limit_window_start_.store(other.rate_limit_window_start_.load());
53 effective_rate_.store(other.effective_rate_.load());
54 adaptive_window_count_.store(other.adaptive_window_count_.load());
55 adaptive_window_start_.store(other.adaptive_window_start_.load());
56 is_throttling_.store(other.is_throttling_.load());
104 bool sampled =
false;
130 const std::string& message) {
135 const std::string& message,
136 const std::optional<std::string>& category) {
141 bool enabled =
false;
163 double effective = rate;
164 if (category.has_value()) {
169 bool sampled =
false;
240 return std::find(levels.begin(), levels.end(), level) != levels.end();
244 if (!entry.
fields.has_value()) {
251 for (
const auto& field_name : bypass_fields) {
252 if (entry.
fields->find(field_name) != entry.
fields->end()) {
260 if (!entry.
fields.has_value()) {
268 for (
const auto& [field_name, value_rates] : field_rates) {
269 auto field_it = entry.
fields->find(field_name);
270 if (field_it == entry.
fields->end()) {
275 std::string value_str;
276 if (std::holds_alternative<std::string>(field_it->second)) {
277 value_str = std::get<std::string>(field_it->second);
278 }
else if (std::holds_alternative<int64_t>(field_it->second)) {
279 value_str = std::to_string(std::get<int64_t>(field_it->second));
280 }
else if (std::holds_alternative<double>(field_it->second)) {
281 value_str = std::to_string(std::get<double>(field_it->second));
282 }
else if (std::holds_alternative<bool>(field_it->second)) {
283 value_str = std::get<bool>(field_it->second) ?
"true" :
"false";
287 auto rate_it = value_rates.find(value_str);
288 if (rate_it != value_rates.end()) {
289 return rate_it->second;
315 double normalized =
static_cast<double>(
random) /
316 static_cast<double>(std::numeric_limits<std::uint64_t>::max());
317 return normalized < rate;
321 auto now = std::chrono::steady_clock::now();
322 auto now_count = now.time_since_epoch().count();
324 std::size_t limit_per_second = 0;
325 std::size_t window_ms = 0;
333 std::size_t max_per_window = (limit_per_second * window_ms) / 1000;
334 if (max_per_window == 0) {
339 auto window_start = std::chrono::steady_clock::time_point(
341 auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
342 now - window_start).count();
344 if (
static_cast<std::size_t
>(elapsed_ms) >= window_ms) {
348 window_start = std::chrono::steady_clock::time_point(
350 elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
351 now - window_start).count();
353 if (
static_cast<std::size_t
>(elapsed_ms) >= window_ms) {
360 std::uint64_t current =
rate_limit_count_.fetch_add(1, std::memory_order_relaxed);
361 return current < max_per_window;
374 auto now = std::chrono::steady_clock::now();
375 auto now_count = now.time_since_epoch().count();
377 std::size_t threshold = 0;
378 double min_rate = 0.01;
379 double base_rate = 1.0;
391 auto window_start = std::chrono::steady_clock::time_point(
393 auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
394 now - window_start).count();
396 if (elapsed_ms >= 1000) {
401 if (count > threshold) {
403 double ratio =
static_cast<double>(threshold) /
static_cast<double>(count);
404 double new_rate = base_rate * ratio;
405 new_rate = std::max(new_rate, min_rate);
424 std::uint64_t seed = 0;
435 double normalized =
static_cast<double>(hash) /
436 static_cast<double>(std::numeric_limits<std::uint64_t>::max());
437 return normalized < rate;
441 std::uint64_t x =
rng_state_.load(std::memory_order_relaxed);
445 rng_state_.store(x, std::memory_order_relaxed);
450 constexpr std::uint64_t kFnvOffsetBasis = 14695981039346656037ULL;
451 constexpr std::uint64_t kFnvPrime = 1099511628211ULL;
453 std::uint64_t hash = kFnvOffsetBasis;
455 hash ^=
static_cast<std::uint64_t
>(
static_cast<unsigned char>(c));
478 std::size_t threshold,
485 std::vector<log_level> critical_levels) {
488 config.
rate = base_rate;
494 return std::make_unique<log_sampler>(config);
Thread-safe log sampler with multiple strategy support.
bool random_sample(double rate)
Perform random sampling decision.
void update_adaptive_rate()
Update adaptive sampling rate based on current throughput.
std::mutex rate_limit_mutex_
bool adaptive_sample()
Perform adaptive sampling decision.
std::atomic< std::uint64_t > sampled_count_
std::atomic< std::uint64_t > rate_limit_count_
void set_enabled(bool enabled)
Enable or disable sampling.
std::shared_mutex config_mutex_
sampling_config get_config() const
Get the current configuration.
sampling_stats get_stats() const
Get sampling statistics.
bool rate_limit_sample()
Perform rate limiting sampling decision.
bool is_enabled() const
Check if sampling is enabled.
std::atomic< std::uint64_t > bypassed_count_
std::atomic< std::chrono::steady_clock::time_point::rep > rate_limit_window_start_
double get_field_rate(const log_entry &entry) const
Get the sampling rate based on structured fields.
std::atomic< std::uint64_t > total_count_
std::atomic< std::uint64_t > adaptive_window_count_
bool should_bypass_level(log_level level) const
Check if a level should bypass sampling.
bool should_bypass_field(const log_entry &entry) const
Check if any field should bypass sampling.
std::atomic< std::chrono::steady_clock::time_point::rep > adaptive_window_start_
std::atomic< std::uint64_t > dropped_count_
log_sampler & operator=(const log_sampler &)=delete
Copy assignment operator (deleted)
void reset_stats()
Reset sampling statistics.
double get_effective_rate() const
Get the current effective sampling rate.
void set_config(const sampling_config &config)
Update the sampling configuration.
std::uint64_t xorshift64()
Fast xorshift64 PRNG for random sampling.
std::atomic< bool > is_throttling_
double get_category_rate(const std::string &category) const
Get the sampling rate for a category.
bool should_sample(const log_entry &entry)
Check if a log entry should be sampled (logged)
log_sampler(const sampling_config &config=sampling_config{})
Construct a sampler with the given configuration.
bool hash_sample(const std::string &message, double rate)
Perform hash-based sampling decision.
std::atomic< std::uint64_t > rng_state_
static std::uint64_t fnv1a_hash(const std::string &str)
FNV-1a hash for message hashing.
std::atomic< double > effective_rate_
static std::unique_ptr< log_sampler > create_random(double rate)
Create a random sampling sampler.
static std::unique_ptr< log_sampler > create_adaptive(std::size_t threshold=10000, double min_rate=0.01)
Create an adaptive sampler.
static std::unique_ptr< log_sampler > create_disabled()
Create a disabled sampler (pass-through)
static std::unique_ptr< log_sampler > create_rate_limited(std::size_t max_per_second)
Create a rate limiting sampler.
static std::unique_ptr< log_sampler > create_production(double base_rate=0.1, std::vector< log_level > critical_levels={ log_level::warn, log_level::error, log_level::fatal })
Create a production-ready sampler.
std::string to_string() const
Convert to std::string.
Log sampling implementation for high-volume scenarios kcenon.
sampling_strategy
Defines the sampling algorithm to use.
@ hash_based
Deterministic sampling based on message hash.
@ random
Simple random sampling based on probability.
@ rate_limiting
Rate-based sampling (N logs per time window)
@ adaptive
Adaptive sampling that adjusts based on volume.
Represents a single log entry with all associated metadata.
std::optional< log_fields > fields
Optional structured fields for key-value logging.
log_level level
Severity level of the log message.
std::optional< small_string_128 > category
Optional category for log filtering and routing.
small_string_256 message
The actual log message.
Configuration for log sampling behavior.
static sampling_config random_sampling(double sample_rate)
Create a random sampling configuration.
double rate
Base sampling rate (0.0 to 1.0)
static sampling_config rate_limited(std::size_t max_per_second)
Create a rate limiting configuration.
std::size_t adaptive_threshold
Threshold (messages/second) to trigger adaptive sampling.
std::vector< std::string > always_log_fields
std::unordered_map< std::string, std::unordered_map< std::string, double > > field_rates
std::unordered_map< std::string, double > category_rates
std::size_t rate_limit_per_second
Maximum logs per second for rate limiting strategy.
std::size_t rate_limit_window_ms
Time window for rate limiting (milliseconds)
std::uint64_t hash_seed
Seed for hash-based sampling.
bool adaptive_enabled
Enable adaptive sampling.
double adaptive_min_rate
Minimum sampling rate when adaptive sampling is active.
static sampling_config disabled()
Create a default sampling configuration (disabled)
static sampling_config adaptive(std::size_t threshold, double min_rate=0.01)
Create an adaptive sampling configuration.
sampling_strategy strategy
Sampling strategy to use.
std::vector< log_level > always_log_levels
Log levels that are never sampled (always logged)
bool enabled
Enable or disable sampling.
Statistics about sampling behavior.
std::uint64_t sampled_count
Number of messages that passed sampling (were logged)
std::uint64_t bypassed_count
Number of messages that bypassed sampling (always_log levels)
std::uint64_t dropped_count
Number of messages dropped due to sampling.
double effective_rate
Current effective sampling rate.
bool is_throttling
Whether adaptive sampling is currently reducing the rate.
std::uint64_t total_count
Total number of log messages considered for sampling.