Remove debug overhead from release builds (19 hotspots)
Problem: - Release builds (-DHAKMEM_BUILD_RELEASE=1) still execute debug code - fprintf, getenv(), atomic counters in hot paths - Performance: 9M ops/s vs System malloc 43M ops/s (4.8x slower) Fixed hotspots: 1. hak_alloc_api.inc.h - atomic_fetch_add + fprintf every alloc 2. hak_free_api.inc.h - Free wrapper trace + route trace 3. hak_wrappers.inc.h - Malloc wrapper logs 4. tiny_free_fast.inc.h - getenv() every free (CRITICAL!) 5. hakmem_tiny_refill.inc.h - Expensive validation 6. hakmem_tiny_sfc.c - SFC initialization logs 7. tiny_alloc_fast_sfc.inc.h - getenv() caching Changes: - Guard all fprintf/printf with #if !HAKMEM_BUILD_RELEASE - Cache getenv() results in TLS variables (debug builds only) - Remove atomic counters from hot paths in release builds - Add no-op stubs for release builds Impact: - All debug code completely eliminated in release builds - Expected improvement: Limited (deeper profiling needed) - Root cause: Performance bottleneck exists beyond debug overhead Note: Benchmark results show debug removal alone insufficient for performance goals. Further investigation required with perf profiling. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -21,12 +21,14 @@ static inline void* hak_os_map_boundary(size_t size, uintptr_t site_id) {
|
||||
|
||||
__attribute__((always_inline))
|
||||
inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static _Atomic uint64_t hak_alloc_call_count = 0;
|
||||
uint64_t call_num = atomic_fetch_add(&hak_alloc_call_count, 1);
|
||||
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
|
||||
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu size=%zu\n", call_num, size);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAKMEM_DEBUG_TIMING
|
||||
HKM_TIME_START(t0);
|
||||
@ -36,24 +38,30 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
|
||||
uintptr_t site_id = (uintptr_t)site;
|
||||
|
||||
if (__builtin_expect(size <= TINY_MAX_SIZE, 1)) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
|
||||
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu entering tiny path\n", call_num);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
#if HAKMEM_DEBUG_TIMING
|
||||
HKM_TIME_START(t_tiny);
|
||||
#endif
|
||||
void* tiny_ptr = NULL;
|
||||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
|
||||
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu calling hak_tiny_alloc_fast_wrapper\n", call_num);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
tiny_ptr = hak_tiny_alloc_fast_wrapper(size);
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
|
||||
fprintf(stderr, "[HAK_ALLOC_AT] call=%lu hak_tiny_alloc_fast_wrapper returned %p\n", call_num, tiny_ptr);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
#elif defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
|
||||
tiny_ptr = hak_tiny_alloc_ultra_simple(size);
|
||||
#elif defined(HAKMEM_TINY_PHASE6_METADATA)
|
||||
|
||||
Reference in New Issue
Block a user