Remove debug overhead from release builds (19 hotspots)

Problem:
- Release builds (-DHAKMEM_BUILD_RELEASE=1) still execute debug code
- fprintf, getenv(), atomic counters in hot paths
- Performance: 9M ops/s vs System malloc 43M ops/s (4.8x slower)

Fixed hotspots:
1. hak_alloc_api.inc.h - atomic_fetch_add + fprintf every alloc
2. hak_free_api.inc.h - Free wrapper trace + route trace
3. hak_wrappers.inc.h - Malloc wrapper logs
4. tiny_free_fast.inc.h - getenv() every free (CRITICAL!)
5. hakmem_tiny_refill.inc.h - Expensive validation
6. hakmem_tiny_sfc.c - SFC initialization logs
7. tiny_alloc_fast_sfc.inc.h - getenv() caching

Changes:
- Guard all fprintf/printf with #if !HAKMEM_BUILD_RELEASE
- Cache getenv() results in TLS variables (debug builds only)
- Remove atomic counters from hot paths in release builds
- Add no-op stubs for release builds

Impact:
- All debug code completely eliminated in release builds
- Expected improvement: Limited (deeper profiling needed)
- Root cause: Performance bottleneck exists beyond debug overhead

Note: Benchmark results show debug removal alone insufficient for
performance goals. Further investigation required with perf profiling.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-13 13:32:58 +09:00
parent c28314fb96
commit 6570f52f7b
8 changed files with 661 additions and 3 deletions

View File

@ -21,12 +21,14 @@ static inline void* hak_os_map_boundary(size_t size, uintptr_t site_id) {
__attribute__((always_inline))
inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
#if !HAKMEM_BUILD_RELEASE
static _Atomic uint64_t hak_alloc_call_count = 0;
uint64_t call_num = atomic_fetch_add(&hak_alloc_call_count, 1);
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu size=%zu\n", call_num, size);
fflush(stderr);
}
#endif
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t0);
@ -36,24 +38,30 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
uintptr_t site_id = (uintptr_t)site;
if (__builtin_expect(size <= TINY_MAX_SIZE, 1)) {
#if !HAKMEM_BUILD_RELEASE
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu entering tiny path\n", call_num);
fflush(stderr);
}
#endif
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_tiny);
#endif
void* tiny_ptr = NULL;
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
#if !HAKMEM_BUILD_RELEASE
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu calling hak_tiny_alloc_fast_wrapper\n", call_num);
fflush(stderr);
}
#endif
tiny_ptr = hak_tiny_alloc_fast_wrapper(size);
#if !HAKMEM_BUILD_RELEASE
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu hak_tiny_alloc_fast_wrapper returned %p\n", call_num, tiny_ptr);
fflush(stderr);
}
#endif
#elif defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
tiny_ptr = hak_tiny_alloc_ultra_simple(size);
#elif defined(HAKMEM_TINY_PHASE6_METADATA)

View File

@ -12,6 +12,7 @@
#endif
// Optional route trace: print first N classification lines when enabled by env
#if !HAKMEM_BUILD_RELEASE
static inline int hak_free_route_trace_on(void) {
static int g_trace = -1;
if (__builtin_expect(g_trace == -1, 0)) {
@ -31,6 +32,9 @@ static inline void hak_free_route_log(const char* tag, void* p) {
(*budget)--;
fprintf(stderr, "[FREE_ROUTE] %s ptr=%p\n", tag, p);
}
#else
static inline void hak_free_route_log(const char* tag, void* p) { (void)tag; (void)p; }
#endif
// Optional: request-trace for invalid-magic cases (first N hits)
static inline int hak_super_reg_reqtrace_on(void) {
@ -74,6 +78,7 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
#endif
(void)site; (void)size;
// Optional lightweight trace of early free calls (first few only)
#if !HAKMEM_BUILD_RELEASE
static int free_trace_en = -1; static _Atomic int free_trace_count = 0;
if (__builtin_expect(free_trace_en == -1, 0)) {
const char* e = getenv("HAKMEM_FREE_WRAP_TRACE");
@ -85,6 +90,7 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr);
}
}
#endif
if (!ptr) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);

View File

@ -114,15 +114,19 @@ void* malloc(size_t size) {
}
}
#if !HAKMEM_BUILD_RELEASE
if (count > 14250 && count < 14280 && size <= 1024) {
fprintf(stderr, "[MALLOC_WRAPPER] count=%lu calling hak_alloc_at\n", count);
fflush(stderr);
}
#endif
void* ptr = hak_alloc_at(size, HAK_CALLSITE());
#if !HAKMEM_BUILD_RELEASE
if (count > 14250 && count < 14280 && size <= 1024) {
fprintf(stderr, "[MALLOC_WRAPPER] count=%lu hak_alloc_at returned %p\n", count, ptr);
fflush(stderr);
}
#endif
g_hakmem_lock_depth--;
return ptr;
}