diff --git a/core/hakmem.c b/core/hakmem.c index a297ce9e..f5fc1995 100644 --- a/core/hakmem.c +++ b/core/hakmem.c @@ -1239,23 +1239,37 @@ __thread uint64_t g_malloc_fast_path_tried = 0; __thread uint64_t g_malloc_fast_path_null = 0; __thread uint64_t g_malloc_slow_path = 0; +// Option A (Full): Inline TLS cache access (zero function call overhead) +extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES]; + void* malloc(size_t size) { // ======================================================================== // Phase 6-5: ULTRA-FAST PATH FIRST (mimalloc/tcache style) // Phase 6-1.7: Box Theory Integration - Zero overhead path + // Option A (Full): Inline TLS cache access (LARSON_PERFORMANCE_ANALYSIS.md) // ======================================================================== // CRITICAL: This MUST be before all guard checks to achieve 3-4 instruction fast path! - // Removed all counter overhead for maximum performance + // Eliminates function call overhead by inlining TLS cache pop directly! + // Expected: +200-400% (system tcache equivalent design) // ======================================================================== #ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR - if (__builtin_expect(size <= TINY_FAST_THRESHOLD, 1)) { - // Box 5: Ultra-fast TLS freelist pop (3-4 instructions) - // LTO (-flto) should inline this wrapper automatically + if (__builtin_expect(g_initialized && size <= TINY_FAST_THRESHOLD, 1)) { + // Inline size-to-class mapping (LUT: 1 load) + int cls = hak_tiny_size_to_class(size); + if (__builtin_expect(cls >= 0, 1)) { + // Inline TLS cache pop (3-4 instructions, zero function call!) + void* head = g_tls_sll_head[cls]; + if (__builtin_expect(head != NULL, 1)) { + g_tls_sll_head[cls] = *(void**)head; // Pop: next = *head + return head; // 🚀 TRUE FAST PATH: No function calls! + } + } + // Cache miss or invalid class → call wrapper for refill void* ptr = hak_tiny_alloc_fast_wrapper(size); if (__builtin_expect(ptr != NULL, 1)) { - return ptr; // ✅ FAST PATH SUCCESS: Zero overhead! + return ptr; } - // Miss: fall through to slow path with full initialization + // Refill failed: fall through to slow path } #endif // ========================================================================