Files
hakmem/core/box/tiny_class_stats_box.h
Moe Charm (CI) 8052e8b320 Phase 24-26: Hot path atomic telemetry prune (+2.00% cumulative)
Summary:
- Phase 24 (alloc stats): +0.93% GO
- Phase 25 (free stats): +1.07% GO
- Phase 26 (diagnostics): -0.33% NEUTRAL (code cleanliness)
- Total: 11 atomics compiled-out, +2.00% improvement

Phase 24: OBSERVE tax prune (tiny_class_stats_box.h)
- Added HAKMEM_TINY_CLASS_STATS_COMPILED (default: 0)
- Wrapped 5 stats functions: uc_miss, warm_hit, shared_lock, tls_carve_*
- Result: +0.93% (baseline 56.675M vs compiled-in 56.151M ops/s)

Phase 25: Tiny free stats prune (tiny_superslab_free.inc.h)
- Added HAKMEM_TINY_FREE_STATS_COMPILED (default: 0)
- Wrapped g_free_ss_enter atomic in free hot path
- Result: +1.07% (baseline 57.017M vs compiled-in 56.415M ops/s)

Phase 26: Hot path diagnostic atomics prune
- Added 5 compile gates for low-frequency error counters:
  - HAKMEM_TINY_C7_FREE_COUNT_COMPILED
  - HAKMEM_TINY_HDR_MISMATCH_LOG_COMPILED
  - HAKMEM_TINY_HDR_META_MISMATCH_COMPILED
  - HAKMEM_TINY_METRIC_BAD_CLASS_COMPILED
  - HAKMEM_TINY_HDR_META_FAST_COMPILED
- Result: -0.33% NEUTRAL (within noise, kept for cleanliness)

Alignment with mimalloc principles:
- "No atomics on hot path" - telemetry moved to compile-time opt-in
- Fixed per-op tax elimination
- Production builds: maximum performance (atomics compiled-out)
- Research builds: full diagnostics (COMPILED=1)

Generated with Claude Code
https://claude.com/claude-code

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 05:35:11 +09:00

109 lines
4.1 KiB
C

// tiny_class_stats_box.h - Lightweight per-thread class stats (OBSERVE layer)
//
// Purpose:
// - Provide per-class counters without atomics for cheap observation.
// - Hot paths call small inline helpers; aggregation/printing can be added later.
#ifndef TINY_CLASS_STATS_BOX_H
#define TINY_CLASS_STATS_BOX_H
#include <stdint.h>
#include <stdatomic.h>
#include <stdio.h>
#include "../hakmem_tiny_config.h"
typedef struct TinyClassStatsThread {
uint64_t uc_miss[TINY_NUM_CLASSES]; // unified_cache_refill() hits
uint64_t warm_hit[TINY_NUM_CLASSES]; // warm pool successes
uint64_t shared_lock[TINY_NUM_CLASSES]; // shared pool lock acquisitions (hook as needed)
uint64_t tls_carve_attempt[TINY_NUM_CLASSES]; // Warm/TLS carve attempts
uint64_t tls_carve_success[TINY_NUM_CLASSES]; // Warm/TLS carve successes
} TinyClassStatsThread;
extern __thread TinyClassStatsThread g_tiny_class_stats;
// Global (cross-thread) aggregates for OBSERVE/LEARN
extern _Atomic uint64_t g_tiny_class_stats_uc_miss_global[TINY_NUM_CLASSES];
extern _Atomic uint64_t g_tiny_class_stats_warm_hit_global[TINY_NUM_CLASSES];
extern _Atomic uint64_t g_tiny_class_stats_shared_lock_global[TINY_NUM_CLASSES];
extern _Atomic uint64_t g_tiny_class_stats_tls_carve_attempt_global[TINY_NUM_CLASSES];
extern _Atomic uint64_t g_tiny_class_stats_tls_carve_success_global[TINY_NUM_CLASSES];
static inline void tiny_class_stats_on_uc_miss(int ci) {
#if HAKMEM_TINY_CLASS_STATS_COMPILED
// Phase 24: Compile-out stats atomics (default OFF)
if (ci >= 0 && ci < TINY_NUM_CLASSES) {
g_tiny_class_stats.uc_miss[ci]++;
atomic_fetch_add_explicit(&g_tiny_class_stats_uc_miss_global[ci],
1, memory_order_relaxed);
}
#else
(void)ci; // Suppress unused variable warning
#endif
}
static inline void tiny_class_stats_on_warm_hit(int ci) {
#if HAKMEM_TINY_CLASS_STATS_COMPILED
// Phase 24: Compile-out stats atomics (default OFF)
if (ci >= 0 && ci < TINY_NUM_CLASSES) {
g_tiny_class_stats.warm_hit[ci]++;
atomic_fetch_add_explicit(&g_tiny_class_stats_warm_hit_global[ci],
1, memory_order_relaxed);
}
#else
(void)ci; // Suppress unused variable warning
#endif
}
static inline void tiny_class_stats_on_shared_lock(int ci) {
#if HAKMEM_TINY_CLASS_STATS_COMPILED
// Phase 24: Compile-out stats atomics (default OFF)
if (ci >= 0 && ci < TINY_NUM_CLASSES) {
g_tiny_class_stats.shared_lock[ci]++;
atomic_fetch_add_explicit(&g_tiny_class_stats_shared_lock_global[ci],
1, memory_order_relaxed);
}
#else
(void)ci; // Suppress unused variable warning
#endif
}
static inline void tiny_class_stats_on_tls_carve_attempt(int ci) {
#if HAKMEM_TINY_CLASS_STATS_COMPILED
// Phase 24: Compile-out stats atomics (default OFF)
if (ci >= 0 && ci < TINY_NUM_CLASSES) {
g_tiny_class_stats.tls_carve_attempt[ci]++;
atomic_fetch_add_explicit(&g_tiny_class_stats_tls_carve_attempt_global[ci],
1, memory_order_relaxed);
}
#else
(void)ci; // Suppress unused variable warning
#endif
}
static inline void tiny_class_stats_on_tls_carve_success(int ci) {
#if HAKMEM_TINY_CLASS_STATS_COMPILED
// Phase 24: Compile-out stats atomics (default OFF)
if (ci >= 0 && ci < TINY_NUM_CLASSES) {
g_tiny_class_stats.tls_carve_success[ci]++;
atomic_fetch_add_explicit(&g_tiny_class_stats_tls_carve_success_global[ci],
1, memory_order_relaxed);
}
#else
(void)ci; // Suppress unused variable warning
#endif
}
// Optional: reset per-thread counters (cold path only).
void tiny_class_stats_reset_thread(void);
// Snapshot helpers (cold path): copy current counters into caller-provided struct.
void tiny_class_stats_snapshot_thread(TinyClassStatsThread* out);
void tiny_class_stats_snapshot_global(TinyClassStatsThread* out);
// Simple stderr dump helpers (cold path)
void tiny_class_stats_dump_thread(FILE* out, const char* tag);
void tiny_class_stats_dump_global(FILE* out, const char* tag);
#endif // TINY_CLASS_STATS_BOX_H