Remove debug overhead from release builds (19 hotspots)
Problem: - Release builds (-DHAKMEM_BUILD_RELEASE=1) still execute debug code - fprintf, getenv(), atomic counters in hot paths - Performance: 9M ops/s vs System malloc 43M ops/s (4.8x slower) Fixed hotspots: 1. hak_alloc_api.inc.h - atomic_fetch_add + fprintf every alloc 2. hak_free_api.inc.h - Free wrapper trace + route trace 3. hak_wrappers.inc.h - Malloc wrapper logs 4. tiny_free_fast.inc.h - getenv() every free (CRITICAL!) 5. hakmem_tiny_refill.inc.h - Expensive validation 6. hakmem_tiny_sfc.c - SFC initialization logs 7. tiny_alloc_fast_sfc.inc.h - getenv() caching Changes: - Guard all fprintf/printf with #if !HAKMEM_BUILD_RELEASE - Cache getenv() results in TLS variables (debug builds only) - Remove atomic counters from hot paths in release builds - Add no-op stubs for release builds Impact: - All debug code completely eliminated in release builds - Expected improvement: Limited (deeper profiling needed) - Root cause: Performance bottleneck exists beyond debug overhead Note: Benchmark results show debug removal alone insufficient for performance goals. Further investigation required with perf profiling. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -21,12 +21,14 @@ static inline void* hak_os_map_boundary(size_t size, uintptr_t site_id) {
|
||||
|
||||
__attribute__((always_inline))
|
||||
inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static _Atomic uint64_t hak_alloc_call_count = 0;
|
||||
uint64_t call_num = atomic_fetch_add(&hak_alloc_call_count, 1);
|
||||
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
|
||||
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu size=%zu\n", call_num, size);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAKMEM_DEBUG_TIMING
|
||||
HKM_TIME_START(t0);
|
||||
@ -36,24 +38,30 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
|
||||
uintptr_t site_id = (uintptr_t)site;
|
||||
|
||||
if (__builtin_expect(size <= TINY_MAX_SIZE, 1)) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
|
||||
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu entering tiny path\n", call_num);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
#if HAKMEM_DEBUG_TIMING
|
||||
HKM_TIME_START(t_tiny);
|
||||
#endif
|
||||
void* tiny_ptr = NULL;
|
||||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
|
||||
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu calling hak_tiny_alloc_fast_wrapper\n", call_num);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
tiny_ptr = hak_tiny_alloc_fast_wrapper(size);
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
|
||||
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu hak_tiny_alloc_fast_wrapper returned %p\n", call_num, tiny_ptr);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
#elif defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
|
||||
tiny_ptr = hak_tiny_alloc_ultra_simple(size);
|
||||
#elif defined(HAKMEM_TINY_PHASE6_METADATA)
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
#endif
|
||||
|
||||
// Optional route trace: print first N classification lines when enabled by env
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static inline int hak_free_route_trace_on(void) {
|
||||
static int g_trace = -1;
|
||||
if (__builtin_expect(g_trace == -1, 0)) {
|
||||
@ -31,6 +32,9 @@ static inline void hak_free_route_log(const char* tag, void* p) {
|
||||
(*budget)--;
|
||||
fprintf(stderr, "[FREE_ROUTE] %s ptr=%p\n", tag, p);
|
||||
}
|
||||
#else
|
||||
static inline void hak_free_route_log(const char* tag, void* p) { (void)tag; (void)p; }
|
||||
#endif
|
||||
|
||||
// Optional: request-trace for invalid-magic cases (first N hits)
|
||||
static inline int hak_super_reg_reqtrace_on(void) {
|
||||
@ -74,6 +78,7 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
|
||||
#endif
|
||||
(void)site; (void)size;
|
||||
// Optional lightweight trace of early free calls (first few only)
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static int free_trace_en = -1; static _Atomic int free_trace_count = 0;
|
||||
if (__builtin_expect(free_trace_en == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_FREE_WRAP_TRACE");
|
||||
@ -85,6 +90,7 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
|
||||
fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!ptr) {
|
||||
#if HAKMEM_DEBUG_TIMING
|
||||
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
|
||||
|
||||
@ -114,15 +114,19 @@ void* malloc(size_t size) {
|
||||
}
|
||||
}
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (count > 14250 && count < 14280 && size <= 1024) {
|
||||
fprintf(stderr, "[MALLOC_WRAPPER] count=%lu calling hak_alloc_at\n", count);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
void* ptr = hak_alloc_at(size, HAK_CALLSITE());
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (count > 14250 && count < 14280 && size <= 1024) {
|
||||
fprintf(stderr, "[MALLOC_WRAPPER] count=%lu hak_alloc_at returned %p\n", count, ptr);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
g_hakmem_lock_depth--;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user