Remove debug overhead from release builds (19 hotspots)

Problem:
- Release builds (-DHAKMEM_BUILD_RELEASE=1) still execute debug code
- fprintf, getenv(), atomic counters in hot paths
- Performance: 9M ops/s vs System malloc 43M ops/s (4.8x slower)

Fixed hotspots:
1. hak_alloc_api.inc.h - atomic_fetch_add + fprintf every alloc
2. hak_free_api.inc.h - Free wrapper trace + route trace
3. hak_wrappers.inc.h - Malloc wrapper logs
4. tiny_free_fast.inc.h - getenv() every free (CRITICAL!)
5. hakmem_tiny_refill.inc.h - Expensive validation
6. hakmem_tiny_sfc.c - SFC initialization logs
7. tiny_alloc_fast_sfc.inc.h - getenv() caching

Changes:
- Guard all fprintf/printf with #if !HAKMEM_BUILD_RELEASE
- Cache getenv() results in TLS variables (debug builds only)
- Remove atomic counters from hot paths in release builds
- Add no-op stubs for release builds

Impact:
- All debug code completely eliminated in release builds
- Expected improvement: Limited (deeper profiling needed)
- Root cause: Performance bottleneck exists beyond debug overhead

Note: Benchmark results show debug removal alone insufficient for
performance goals. Further investigation required with perf profiling.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-13 13:32:58 +09:00
parent c28314fb96
commit 6570f52f7b
8 changed files with 661 additions and 3 deletions

View File

@ -21,12 +21,14 @@ static inline void* hak_os_map_boundary(size_t size, uintptr_t site_id) {
__attribute__((always_inline))
inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
#if !HAKMEM_BUILD_RELEASE
static _Atomic uint64_t hak_alloc_call_count = 0;
uint64_t call_num = atomic_fetch_add(&hak_alloc_call_count, 1);
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu size=%zu\n", call_num, size);
fflush(stderr);
}
#endif
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t0);
@ -36,24 +38,30 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
uintptr_t site_id = (uintptr_t)site;
if (__builtin_expect(size <= TINY_MAX_SIZE, 1)) {
#if !HAKMEM_BUILD_RELEASE
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu entering tiny path\n", call_num);
fflush(stderr);
}
#endif
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_tiny);
#endif
void* tiny_ptr = NULL;
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
#if !HAKMEM_BUILD_RELEASE
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu calling hak_tiny_alloc_fast_wrapper\n", call_num);
fflush(stderr);
}
#endif
tiny_ptr = hak_tiny_alloc_fast_wrapper(size);
#if !HAKMEM_BUILD_RELEASE
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu hak_tiny_alloc_fast_wrapper returned %p\n", call_num, tiny_ptr);
fflush(stderr);
}
#endif
#elif defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
tiny_ptr = hak_tiny_alloc_ultra_simple(size);
#elif defined(HAKMEM_TINY_PHASE6_METADATA)

View File

@ -12,6 +12,7 @@
#endif
// Optional route trace: print first N classification lines when enabled by env
#if !HAKMEM_BUILD_RELEASE
static inline int hak_free_route_trace_on(void) {
static int g_trace = -1;
if (__builtin_expect(g_trace == -1, 0)) {
@ -31,6 +32,9 @@ static inline void hak_free_route_log(const char* tag, void* p) {
(*budget)--;
fprintf(stderr, "[FREE_ROUTE] %s ptr=%p\n", tag, p);
}
#else
static inline void hak_free_route_log(const char* tag, void* p) { (void)tag; (void)p; }
#endif
// Optional: request-trace for invalid-magic cases (first N hits)
static inline int hak_super_reg_reqtrace_on(void) {
@ -74,6 +78,7 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
#endif
(void)site; (void)size;
// Optional lightweight trace of early free calls (first few only)
#if !HAKMEM_BUILD_RELEASE
static int free_trace_en = -1; static _Atomic int free_trace_count = 0;
if (__builtin_expect(free_trace_en == -1, 0)) {
const char* e = getenv("HAKMEM_FREE_WRAP_TRACE");
@ -85,6 +90,7 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr);
}
}
#endif
if (!ptr) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);

View File

@ -114,15 +114,19 @@ void* malloc(size_t size) {
}
}
#if !HAKMEM_BUILD_RELEASE
if (count > 14250 && count < 14280 && size <= 1024) {
fprintf(stderr, "[MALLOC_WRAPPER] count=%lu calling hak_alloc_at\n", count);
fflush(stderr);
}
#endif
void* ptr = hak_alloc_at(size, HAK_CALLSITE());
#if !HAKMEM_BUILD_RELEASE
if (count > 14250 && count < 14280 && size <= 1024) {
fprintf(stderr, "[MALLOC_WRAPPER] count=%lu hak_alloc_at returned %p\n", count, ptr);
fflush(stderr);
}
#endif
g_hakmem_lock_depth--;
return ptr;
}

View File

@ -102,7 +102,8 @@ static inline int ultra_sll_cap_for_class(int class_idx);
static void eventq_push(int class_idx, uint32_t size);
// Debug-only: Validate that a base node belongs to the expected Tiny SuperSlab and is stride-aligned
#if !HAKMEM_BUILD_RELEASE
// IMPORTANT: This is expensive validation, ONLY enabled in DEBUG builds
#if !HAKMEM_BUILD_RELEASE && 0 // Disabled by default even in debug (enable with #if 1 if needed)
static inline void tiny_debug_validate_node_base(int class_idx, void* node, const char* where) {
if ((uintptr_t)node < 4096) {
fprintf(stderr, "[SLL_NODE_SMALL] %s: node=%p cls=%d\n", where, node, class_idx);

View File

@ -121,6 +121,7 @@ void sfc_init(void) {
// Register shutdown hook for optional stats dump
atexit(sfc_shutdown);
#if !HAKMEM_BUILD_RELEASE
// One-shot debug log
static int debug_printed = 0;
if (!debug_printed) {
@ -137,6 +138,7 @@ void sfc_init(void) {
}
}
}
#endif
// Ensure stats (if requested) are printed at process exit.
// This is inexpensive and guarded inside sfc_shutdown by HAKMEM_SFC_STATS_DUMP.

View File

@ -108,8 +108,12 @@ static inline int sfc_free_push(int cls, void* ptr) {
#if !HAKMEM_BUILD_RELEASE && defined(HAKMEM_SFC_DEBUG_LOG)
// Debug logging (compile-time gated; zero cost in release)
do {
static __thread int free_debug_enabled = -1;
static __thread int free_debug_count = 0;
if (getenv("HAKMEM_SFC_DEBUG") && free_debug_count < 20) {
if (__builtin_expect(free_debug_enabled == -1, 0)) {
free_debug_enabled = getenv("HAKMEM_SFC_DEBUG") ? 1 : 0;
}
if (free_debug_enabled && free_debug_count < 20) {
free_debug_count++;
extern int g_sfc_enabled;
fprintf(stderr, "[SFC_FREE_PUSH] cls=%d, ptr=%p, cnt=%u, cap=%u, will_succeed=%d, enabled=%d\n",

View File

@ -102,14 +102,20 @@ static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* base, uin
TinySlabMeta* meta = &ss->slabs[slab_idx];
// Debug: Track tiny_free_fast_ss calls
#if !HAKMEM_BUILD_RELEASE
static __thread int free_ss_debug_enabled = -1;
static __thread int free_ss_debug_count = 0;
if (getenv("HAKMEM_SFC_DEBUG") && free_ss_debug_count < 20) {
if (__builtin_expect(free_ss_debug_enabled == -1, 0)) {
free_ss_debug_enabled = getenv("HAKMEM_SFC_DEBUG") ? 1 : 0;
}
if (free_ss_debug_enabled && free_ss_debug_count < 20) {
free_ss_debug_count++;
int is_same = tiny_free_is_same_thread_ss(ss, slab_idx, my_tid);
extern int g_sfc_enabled;
fprintf(stderr, "[FREE_SS] base=%p, cls=%d, same_thread=%d, sfc_enabled=%d\n",
base, ss->size_class, is_same, g_sfc_enabled);
}
#endif
// Box 6 Boundary: Ownership check (TOCTOU-safe)
if (__builtin_expect(!tiny_free_is_same_thread_ss(ss, slab_idx, my_tid), 0)) {