// hak_wrappers.inc.h — malloc/free/calloc/realloc wrappers (LD_PRELOAD-aware) #ifndef HAK_WRAPPERS_INC_H #define HAK_WRAPPERS_INC_H #ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD // Sanitizer/diagnostic builds: bypass hakmem allocator completely. void* malloc(size_t size) { extern void* __libc_malloc(size_t); return __libc_malloc(size); } void free(void* ptr) { if (!ptr) return; extern void __libc_free(void*); __libc_free(ptr); } void* calloc(size_t nmemb, size_t size) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } void* realloc(void* ptr, size_t size) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } #else #include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback #include "front_gate_classifier.h" // Box FG: pointer classification (header/reg) #include "../hakmem_pool.h" // Mid registry lookup (failsafe for headerless Mid) #include "../front/malloc_tiny_fast.h" // Phase 26: Front Gate Unification (Tiny fast alloc) #include "tiny_alloc_gate_box.h" // Tiny Alloc Gatekeeper Box (BASE/USER+Bridge 入口) #include "tiny_front_config_box.h" // Phase 4-Step3: Compile-time config for dead code elimination #include "wrapper_env_box.h" // Wrapper env cache (step trace / LD safe / free trace) #include "wrapper_env_cache_box.h" // Phase 3 D2: TLS cache for wrapper_env_cfg pointer #include "free_wrapper_env_snapshot_box.h" // Phase 5 E4-1: Free wrapper ENV snapshot #include "malloc_wrapper_env_snapshot_box.h" // Phase 5 E4-2: Malloc wrapper ENV snapshot #include "free_tiny_direct_env_box.h" // Phase 5 E5-1: Free Tiny direct path ENV gate #include "free_tiny_direct_stats_box.h" // Phase 5 E5-1: Free Tiny direct path stats #include "malloc_tiny_direct_env_box.h" // Phase 5 E5-4: Malloc Tiny direct path ENV gate #include "malloc_tiny_direct_stats_box.h" // Phase 5 E5-4: Malloc Tiny direct path stats #include "front_fastlane_box.h" // Phase 6: Front FastLane (Layer Collapse) #include "fastlane_direct_env_box.h" // Phase 19-1: FastLane Direct Path (remove wrapper layer) #include "../hakmem_internal.h" // AllocHeader helpers for diagnostics #include "../hakmem_super_registry.h" // Superslab lookup for diagnostics #include "../superslab/superslab_inline.h" // slab_index_for, capacity #include // mincore for safe mapping checks #include // write for diagnostics #include // strlen for diagnostics // malloc wrapper - intercepts system malloc() calls __thread uint64_t g_malloc_total_calls = 0; __thread uint64_t g_malloc_tiny_size_match = 0; __thread uint64_t g_malloc_fast_path_tried = 0; __thread uint64_t g_malloc_fast_path_null = 0; __thread uint64_t g_malloc_slow_path = 0; extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES]; // CRITICAL FIX (BUG #10): Use cached g_jemalloc_loaded instead of calling hak_jemalloc_loaded() // The function call version triggers infinite recursion: malloc → hak_jemalloc_loaded → dlopen → malloc extern int g_jemalloc_loaded; // Cached during hak_init_impl(), defined in hakmem.c // Global malloc call counter for debugging (exposed for validation code) // Defined here, accessed from tls_sll_box.h for corruption detection _Atomic uint64_t malloc_count = 0; // Lightweight fallback diagnostics (enabled with HAKMEM_WRAP_DIAG=1) typedef enum { FB_INIT_WAIT_FAIL = 0, FB_INIT_LD_WAIT_FAIL, FB_FORCE_LIBC, FB_LD_SAFE, FB_JEMALLOC_BLOCK, FB_LOCKDEPTH, FB_NOT_OWNED, FB_OTHER, FB_REASON_COUNT } wrapper_fb_reason_t; static _Atomic uint64_t g_fb_counts[FB_REASON_COUNT]; static _Atomic int g_fb_log_count[FB_REASON_COUNT]; static inline void wrapper_trace_write(const char* msg, size_t len) { ssize_t w = write(2, msg, len); (void)w; } static inline void wrapper_record_fallback(wrapper_fb_reason_t reason, const char* msg) { atomic_fetch_add_explicit(&g_fb_counts[reason], 1, memory_order_relaxed); const wrapper_env_cfg_t* wcfg = wrapper_env_cfg(); if (__builtin_expect(wcfg->wrap_diag, 0)) { int n = atomic_fetch_add_explicit(&g_fb_log_count[reason], 1, memory_order_relaxed); if (n < 4 && msg) { wrapper_trace_write(msg, strlen(msg)); } } } // Phase 2 B4: malloc_cold() - Cold path for malloc (noinline,cold) // Handles: BenchFast, LD mode, jemalloc checks, force_libc, init waits, hak_alloc_at routing // Note: g_hakmem_lock_depth is ALREADY incremented before calling this function __attribute__((noinline, cold)) static void* malloc_cold(size_t size, const wrapper_env_cfg_t* wcfg) { // BenchFast mode (structural ceiling measurement) if (__builtin_expect(!atomic_load(&g_bench_fast_init_in_progress) && bench_fast_enabled(), 0)) { if (size <= 1024) { void* p = bench_fast_alloc(size); g_hakmem_lock_depth--; return p; } } // Force libc check if (__builtin_expect(hak_force_libc_alloc(), 0)) { wrapper_record_fallback(FB_FORCE_LIBC, "[wrap] libc malloc: force_libc\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); return __libc_malloc(size); } // LD mode checks int ld_mode = hak_ld_env_mode(); if (ld_mode) { if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) { wrapper_record_fallback(FB_JEMALLOC_BLOCK, "[wrap] libc malloc: jemalloc block\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); return __libc_malloc(size); } if (!g_initialized) { hak_init(); } int ld_init_wait = hak_init_wait_for_ready(); if (__builtin_expect(ld_init_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc malloc: ld init_wait\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); return __libc_malloc(size); } if (wcfg->ld_safe_mode >= 2) { wrapper_record_fallback(FB_LD_SAFE, "[wrap] libc malloc: ld_safe\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); return __libc_malloc(size); } } // Mid/Large routing via hak_alloc_at void* ptr = hak_alloc_at(size, HAK_CALLSITE()); g_hakmem_lock_depth--; return ptr; } void* malloc(size_t size) { #ifndef NDEBUG uint64_t count = atomic_fetch_add(&malloc_count, 1); #endif #if !HAKMEM_BUILD_RELEASE // Debug-only trace counter: in release builds this atomic increment // is disabled to avoid hot-path cache misses and contention. static _Atomic int g_wrap_malloc_trace_count = 0; if (atomic_fetch_add_explicit(&g_wrap_malloc_trace_count, 1, memory_order_relaxed) < 256) { HAK_TRACE("[wrap_malloc_enter]\n"); } #endif // NDEBUG: malloc_count increment disabled - removes 27.55% bottleneck // Force libc must override FastLane/hot wrapper paths. // NOTE: Use the cached file-scope g_force_libc_alloc to avoid getenv recursion // during early startup (before lock_depth is incremented). if (__builtin_expect(g_force_libc_alloc == 1, 0)) { extern void* __libc_malloc(size_t); return __libc_malloc(size); } // Phase 20-2: BenchFast mode (structural ceiling measurement) // WARNING: Bypasses ALL safety checks - benchmark only! // IMPORTANT: Do NOT use BenchFast during preallocation/init to avoid recursion. // Phase 8-TLS-Fix: Use atomic_load for cross-thread safety if (__builtin_expect(!atomic_load(&g_bench_fast_init_in_progress) && bench_fast_enabled(), 0)) { if (size <= 1024) { // Tiny range return bench_fast_alloc(size); } // Fallback to normal path for large allocations } // Phase 19-1b: FastLane Direct Path (bypass wrapper layer, revised) // Strategy: Direct call to malloc_tiny_fast() (remove wrapper overhead; miss falls through) // Expected: -17.5 instructions/op, -6.0 branches/op, +10-15% throughput // ENV: HAKMEM_FASTLANE_DIRECT=0/1 (default: 0, opt-in) // Phase 19-1b changes: // 1. Removed __builtin_expect() from fastlane_direct_enabled() check (unfair A/B) // 2. No change to malloc path (malloc_tiny_fast already optimal) if (fastlane_direct_enabled()) { // Fail-fast: match Front FastLane rule (FastLane is only safe after init completes). if (__builtin_expect(!g_initialized, 0)) { // Not safe → fall through to wrapper path (handles init/LD safety). } else { // Direct path: bypass front_fastlane_try_malloc() wrapper void* ptr = malloc_tiny_fast(size); if (__builtin_expect(ptr != NULL, 1)) { return ptr; // Success: handled by hot path } // Not handled → fall through to existing FastLane + wrapper path. // This preserves lock_depth/init/LD semantics for Mid/Large allocations. } } // Phase 6: Front FastLane (Layer Collapse) // Strategy: Collapse wrapper→gate→policy→route layers into single hot box // Observed: +11.13% on Mixed 10-run (Phase 6 A/B) // ENV: HAKMEM_FRONT_FASTLANE=0/1 (default: 1, opt-out) if (__builtin_expect(front_fastlane_enabled(), 1)) { void* p = front_fastlane_try_malloc(size); if (__builtin_expect(p != NULL, 1)) { return p; // Success: handled by FastLane } // Fallback: not handled, continue to existing wrapper path } // Phase 5 E4-2: Malloc Wrapper ENV Snapshot (optional, ENV-gated) // Strategy: Consolidate 2+ TLS reads -> 1 TLS read (50%+ reduction) // Expected gain: +2-4% (from malloc 16.13% + tiny_alloc_gate_fast 19.50% reduction) // Phase 19-4a: Remove UNLIKELY hint, gate is ON by default in presets if (malloc_wrapper_env_snapshot_enabled()) { // Optimized path: Single TLS snapshot (1 TLS read instead of 2+) const struct malloc_wrapper_env_snapshot* env = malloc_wrapper_env_get(); // Phase 5 E5-4: Malloc Tiny Direct Path (ENV-gated, opt-in) // Strategy: Bypass tiny_alloc_gate_fast() "gate tax", go directly to malloc_tiny_fast_for_class() // Expected gain: +3-5% (mirrors E5-1 success pattern on alloc side) // ENV: HAKMEM_MALLOC_TINY_DIRECT=0/1 (default: 0, research box) if (__builtin_expect(malloc_tiny_direct_enabled(), 0)) { // Safety checks (same as E5-1 pattern) if (__builtin_expect(env->front_gate_unified && env->tiny_max_size_256 && size <= 256, 1)) { MALLOC_TINY_DIRECT_STAT_INC(direct_total); // Direct class calculation (bypass gate overhead) int class_idx = hak_tiny_size_to_class(size); if (__builtin_expect(class_idx >= 0 && class_idx < 8, 1)) { // Direct Tiny alloc path (bypass gate diagnostics + routing overhead) void* ptr = malloc_tiny_fast_for_class(size, class_idx); if (__builtin_expect(ptr != NULL, 1)) { MALLOC_TINY_DIRECT_STAT_INC(direct_hit); return ptr; // Success } MALLOC_TINY_DIRECT_STAT_INC(fast_null); // Fall through to normal path (refill failure) } else { MALLOC_TINY_DIRECT_STAT_INC(class_oob); } } } // Fast path: Front gate unified (LIKELY in current presets) if (__builtin_expect(env->front_gate_unified, 1)) { // Common case: size <= 256 (pre-cached, no function call) if (__builtin_expect(env->tiny_max_size_256 && size <= 256, 1)) { void* ptr = tiny_alloc_gate_fast(size); if (__builtin_expect(ptr != NULL, 1)) { return ptr; } } else if (size <= tiny_get_max_size()) { // Fallback for non-256 max sizes (rare) void* ptr = tiny_alloc_gate_fast(size); if (__builtin_expect(ptr != NULL, 1)) { return ptr; } } } // Slow path fallback: Wrap shape dispatch if (__builtin_expect(env->wrap_shape, 0)) { // Need to increment lock depth for malloc_cold path g_hakmem_lock_depth++; // Guard against recursion during initialization int init_wait = hak_init_wait_for_ready(); if (__builtin_expect(init_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc malloc: init_wait\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); return __libc_malloc(size); } // Ensure initialization before cold path if (!g_initialized) hak_init(); // Delegate to cold path const wrapper_env_cfg_t* wcfg = wrapper_env_cfg_fast(); return malloc_cold(size, wcfg); } // Fall through to legacy path below } // Phase 2 B4: Hot/Cold dispatch (HAKMEM_WRAP_SHAPE) // Phase 3 D2: Use wrapper_env_cfg_fast() to reduce hot path overhead const wrapper_env_cfg_t* wcfg = wrapper_env_cfg_fast(); if (__builtin_expect(wcfg->wrap_shape, 0)) { // B4 Optimized: Hot/Cold split // CRITICAL FIX (BUG #7): Increment lock depth FIRST, before ANY libc calls g_hakmem_lock_depth++; // Guard against recursion during initialization int init_wait = hak_init_wait_for_ready(); if (__builtin_expect(init_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc malloc: init_wait\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); return __libc_malloc(size); } // Phase 26: CRITICAL - Ensure initialization before fast path if (!g_initialized) hak_init(); // Phase 26: Front Gate Unification (Tiny fast path) if (__builtin_expect(TINY_FRONT_UNIFIED_GATE_ENABLED, 1)) { if (size <= tiny_get_max_size()) { void* ptr = tiny_alloc_gate_fast(size); if (__builtin_expect(ptr != NULL, 1)) { g_hakmem_lock_depth--; return ptr; } } } // Hot path exhausted → delegate to cold return malloc_cold(size, wcfg); } // DEBUG BAILOUT DISABLED - Testing full path // if (__builtin_expect(count >= 14270 && count <= 14285, 0)) { // extern void* __libc_malloc(size_t); // fprintf(stderr, "[MALLOC_WRAPPER] count=%lu size=%zu - BAILOUT TO LIBC!\n", count, size); // fflush(stderr); // return __libc_malloc(size); // } // CRITICAL FIX (BUG #7): Increment lock depth FIRST, before ANY libc calls // This prevents infinite recursion when getenv/fprintf/dlopen call malloc g_hakmem_lock_depth++; // Debug step trace for 33KB: gated by env HAKMEM_STEP_TRACE (default: OFF) if (wcfg->step_trace && size == 33000) wrapper_trace_write("STEP:1 Lock++\n", 14); // Guard against recursion during initialization int init_wait = hak_init_wait_for_ready(); if (__builtin_expect(init_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc malloc: init_wait\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); if (size == 33000) wrapper_trace_write("RET:Initializing\n", 17); return __libc_malloc(size); } // Now safe to call getenv/fprintf/dlopen (will use __libc_malloc if needed) extern int g_sfc_debug; static _Atomic int debug_count = 0; if (__builtin_expect(g_sfc_debug, 0) && debug_count < 100) { int n = atomic_fetch_add(&debug_count, 1); if (n < 20) fprintf(stderr, "[SFC_DEBUG] malloc(%zu)\n", size); } if (__builtin_expect(hak_force_libc_alloc(), 0)) { wrapper_record_fallback(FB_FORCE_LIBC, "[wrap] libc malloc: force_libc\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); if (wcfg->step_trace && size == 33000) wrapper_trace_write("RET:ForceLibc\n", 14); return __libc_malloc(size); } if (wcfg->step_trace && size == 33000) wrapper_trace_write("STEP:2 ForceLibc passed\n", 24); int ld_mode = hak_ld_env_mode(); if (ld_mode) { if (wcfg->step_trace && size == 33000) wrapper_trace_write("STEP:3 LD Mode\n", 15); // BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback // Only fallback if jemalloc is ACTUALLY loaded (> 0) if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) { wrapper_record_fallback(FB_JEMALLOC_BLOCK, "[wrap] libc malloc: jemalloc block\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); if (wcfg->step_trace && size == 33000) wrapper_trace_write("RET:Jemalloc\n", 13); return __libc_malloc(size); } if (!g_initialized) { hak_init(); } int ld_init_wait = hak_init_wait_for_ready(); if (__builtin_expect(ld_init_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc malloc: ld init_wait\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); if (wcfg->step_trace && size == 33000) wrapper_trace_write("RET:Init2\n", 10); return __libc_malloc(size); } // Cache HAKMEM_LD_SAFE to avoid repeated getenv on hot path if (wcfg->ld_safe_mode >= 2) { wrapper_record_fallback(FB_LD_SAFE, "[wrap] libc malloc: ld_safe\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); if (wcfg->step_trace && size == 33000) wrapper_trace_write("RET:LDSafe\n", 11); return __libc_malloc(size); } } if (wcfg->step_trace && size == 33000) wrapper_trace_write("STEP:4 LD Check passed\n", 23); // Phase 26: CRITICAL - Ensure initialization before fast path // (fast path bypasses hak_alloc_at, so we need to init here) if (!g_initialized) hak_init(); // Phase 26: Front Gate Unification (Tiny fast path) // Placed AFTER all safety checks (lock depth, initializing, LD_SAFE, jemalloc) // Bypasses: hak_alloc_at routing (236 lines) + wrapper diagnostics + tiny overhead // Target: +10-15% performance (11.35M → 12.5-13.5M ops/s) // ENV: HAKMEM_FRONT_GATE_UNIFIED=1 to enable (default: OFF) // Phase 4-Step3: Use config macro for compile-time optimization // Phase 7-Step1: Changed expect hint from 0→1 (unified path is now LIKELY) if (__builtin_expect(TINY_FRONT_UNIFIED_GATE_ENABLED, 1)) { if (wcfg->step_trace && size == 33000) wrapper_trace_write("STEP:5 Unified Gate check\n", 26); if (size <= tiny_get_max_size()) { if (wcfg->step_trace && size == 33000) wrapper_trace_write("STEP:5.1 Inside Unified\n", 24); // Tiny Alloc Gate Box: malloc_tiny_fast() の薄いラッパ // (診断 OFF 時は従来どおりの挙動・コスト) void* ptr = tiny_alloc_gate_fast(size); if (__builtin_expect(ptr != NULL, 1)) { g_hakmem_lock_depth--; if (wcfg->step_trace && size == 33000) wrapper_trace_write("RET:TinyFast\n", 13); return ptr; } // Unified Cache miss → fallback to normal path (hak_alloc_at) } } if (wcfg->step_trace && size == 33000) wrapper_trace_write("STEP:6 All checks passed\n", 25); #if !HAKMEM_BUILD_RELEASE if (count > 14250 && count < 14280 && size <= 1024) { fprintf(stderr, "[MALLOC_WRAPPER] count=%lu calling hak_alloc_at\n", count); fflush(stderr); } #endif void* ptr = hak_alloc_at(size, HAK_CALLSITE()); #if !HAKMEM_BUILD_RELEASE if (count > 14250 && count < 14280 && size <= 1024) { fprintf(stderr, "[MALLOC_WRAPPER] count=%lu hak_alloc_at returned %p\n", count, ptr); fflush(stderr); } #endif g_hakmem_lock_depth--; return ptr; } // Phase 2 B4: free_cold() - Cold path for free (noinline,cold) // Handles: classify_ptr, ownership checks, header checks, hak_free_at routing // Note: This function contains all the expensive classification and fallback logic __attribute__((noinline, cold)) static void free_cold(void* ptr, const wrapper_env_cfg_t* wcfg) { // Trace do { static int on=-1; if (on==-1){ const char* e=getenv("HAKMEM_FREE_WRAP_TRACE"); on=(e&&*e&&*e!='0')?1:0;} if(on){ fprintf(stderr,"[WRAP_FREE_COLD] ptr=%p depth=%d\n", ptr, g_hakmem_lock_depth); } } while(0); #if !HAKMEM_BUILD_RELEASE // Debug safety: guard obviously invalid tiny integers to avoid libc crash and collect trace if ((uintptr_t)ptr < 4096) { ptr_trace_dump_now("wrap_small_ptr"); fprintf(stderr, "[FREE_SMALL_PTR] ignore ptr=%p (likely header-corruption sentinel)\n", ptr); return; } #endif // Classify pointer BEFORE early libc fallbacks to avoid misrouting Tiny pointers // This is safe: classifier uses header probe and registry; does not allocate. int is_hakmem_owned = 0; { ptr_classification_t c = classify_ptr(ptr); switch (c.kind) { case PTR_KIND_TINY_HEADER: case PTR_KIND_TINY_HEADERLESS: case PTR_KIND_POOL_TLS: case PTR_KIND_MID_LARGE: // FIX: Include Mid-Large (mmap/ACE) pointers is_hakmem_owned = 1; break; default: break; } } if (!is_hakmem_owned) { // Failsafe: Mid registry lookup catches headerless/corrupted Mid allocations if (hak_pool_mid_lookup(ptr, NULL)) { is_hakmem_owned = 1; } } if (is_hakmem_owned) { // Route to hak_free_at even if lock_depth>0(ログ抑制のためptr_traceのみ使用) g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; return; } // Front Gate libc bypass detection (quiet in release) static _Atomic uint64_t fg_libc_bypass_count = 0; if (g_hakmem_lock_depth > 0) { #if !HAKMEM_BUILD_RELEASE uint64_t count = atomic_fetch_add_explicit(&fg_libc_bypass_count, 1, memory_order_relaxed); if (count < 10) { fprintf(stderr, "[FG_LIBC_BYPASS] lockdepth=%d count=%llu ptr=%p\n", g_hakmem_lock_depth, (unsigned long long)count, ptr); } #else (void)fg_libc_bypass_count; #endif // Safety: If this is a HAKMEM-owned header allocation, free raw correctly do { void* raw = (char*)ptr - HEADER_SIZE; int safe_same_page = (((uintptr_t)ptr & 0xFFFu) >= HEADER_SIZE); if (!safe_same_page) { if (!hak_is_memory_readable(raw)) break; } AllocHeader* hdr = (AllocHeader*)raw; if (hdr->magic == HAKMEM_MAGIC) { // Dispatch based on allocation method if (hdr->method == ALLOC_METHOD_MALLOC) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_lockdepth_hak_hdr_malloc"); __libc_free(raw); return; } else if (hdr->method == ALLOC_METHOD_MMAP) { ptr_trace_dump_now("wrap_libc_lockdepth_hak_hdr_mmap"); hkm_sys_munmap(raw, hdr->size); return; } } } while (0); // Unknown pointer or non-HAKMEM: fall back to libc free(ptr) extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_lockdepth"); wrapper_record_fallback(FB_LOCKDEPTH, "[wrap] libc free: lockdepth\n"); __libc_free(ptr); return; } int free_init_wait = hak_init_wait_for_ready(); if (__builtin_expect(free_init_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc free: init_wait\n"); #if !HAKMEM_BUILD_RELEASE uint64_t count = atomic_fetch_add_explicit(&fg_libc_bypass_count, 1, memory_order_relaxed); if (count < 10) { fprintf(stderr, "[FG_LIBC_BYPASS] init=%d count=%llu ptr=%p\n", g_initializing, (unsigned long long)count, ptr); } #endif extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_init"); __libc_free(ptr); return; } if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_force"); __libc_free(ptr); return; } if (hak_ld_env_mode()) { // BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_jemalloc"); __libc_free(ptr); return; } if (!g_initialized) { hak_init(); } int free_ld_wait = hak_init_wait_for_ready(); if (__builtin_expect(free_ld_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc free: ld init_wait\n"); extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_init"); __libc_free(ptr); return; } } // Phase 15: Box Separation - Domain check to distinguish hakmem vs external pointers // CRITICAL: Prevent BenchMeta (slots[]) from entering CoreAlloc (hak_free_at) // Strategy: Check 1-byte header at ptr-1 for HEADER_MAGIC (0xa0/0xb0) // - If hakmem Tiny allocation → route to hak_free_at() // - Otherwise → delegate to __libc_free() (external/BenchMeta) // // Safety: Only check header if ptr is NOT page-aligned (ptr-1 is safe to read) uintptr_t offset_in_page = (uintptr_t)ptr & 0xFFF; if (offset_in_page > 0) { // Not page-aligned, safe to check ptr-1 uint8_t header = *((uint8_t*)ptr - 1); if ((header & 0xF0) == 0xA0) { // Tiny header byte → require Superslab to avoid誤分類 SuperSlab* ss = hak_super_lookup(ptr); if (ss && ss->magic == SUPERSLAB_MAGIC) { g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; return; } // Superslab未登録 → hakmem管理外。libc free にも渡さず無視(ワークセットのゴミ対策)。 return; } else if ((header & 0xF0) == 0xB0) { // Pool TLS header (if enabled) — no registry check needed #ifdef HAKMEM_POOL_TLS_PHASE1 g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; return; #endif } // No valid hakmem header → external pointer (BenchMeta, libc allocation, etc.) // Phase 5 E4-1: Get wcfg for wrap_diag check (may be snapshot path or legacy path) const wrapper_env_cfg_t* wcfg_diag = wrapper_env_cfg_fast(); if (__builtin_expect(wcfg_diag->wrap_diag, 0)) { SuperSlab* ss = hak_super_lookup(ptr); int slab_idx = -1; int meta_cls = -1; int alloc_method = -1; if (__builtin_expect(ss && ss->magic == SUPERSLAB_MAGIC, 0)) { slab_idx = slab_index_for(ss, (void*)((uint8_t*)ptr - 1)); if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) { meta_cls = ss->slabs[slab_idx].class_idx; } } else if (offset_in_page >= HEADER_SIZE) { AllocHeader* ah = hak_header_from_user(ptr); if (hak_header_validate(ah)) { alloc_method = ah->method; } } fprintf(stderr, "[WRAP_FREE_NOT_OWNED] ptr=%p hdr=0x%02x off=0x%lx lockdepth=%d init=%d ss=%p slab=%d meta_cls=%d alloc_method=%d\n", ptr, header, (unsigned long)offset_in_page, g_hakmem_lock_depth, g_initializing, (void*)ss, slab_idx, meta_cls, alloc_method); } // Self-heal: if this looks like a SuperSlab (magic matches) but registry lookup failed, // re-register on the fly and route to hakmem free to avoid libc abort. { SuperSlab* ss_guess = (SuperSlab*)((uintptr_t)ptr & ~((uintptr_t)SUPERSLAB_SIZE_MIN - 1u)); long page_sz = sysconf(_SC_PAGESIZE); unsigned char mincore_vec = 0; int mapped = (page_sz > 0) && (mincore((void*)((uintptr_t)ss_guess & ~(uintptr_t)(page_sz - 1)), (size_t)page_sz, &mincore_vec) == 0); if (mapped && ss_guess->magic == SUPERSLAB_MAGIC) { hak_super_register((uintptr_t)ss_guess, ss_guess); // idempotent if already registered g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; return; } } extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_external_nomag"); wrapper_record_fallback(FB_NOT_OWNED, "[wrap] libc free: not_owned\n"); __libc_free(ptr); return; } // Page-aligned pointer → cannot safely check header, use full classification // (This includes Pool/Mid/L25 allocations which may be page-aligned) g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; } void free(void* ptr) { #if !HAKMEM_BUILD_RELEASE // Debug-only trace counters; disabled in release to keep free() hot path // free of atomic increments. static _Atomic int g_wrap_free_trace_count = 0; if (atomic_fetch_add_explicit(&g_wrap_free_trace_count, 1, memory_order_relaxed) < 256) { HAK_TRACE("[wrap_free_enter]\n"); } atomic_fetch_add_explicit(&g_free_wrapper_calls, 1, memory_order_relaxed); #endif if (!ptr) return; // Force libc must override FastLane/hot wrapper paths. // NOTE: Use the cached file-scope g_force_libc_alloc (no getenv) to keep // this check safe even during early startup/recursion scenarios. if (__builtin_expect(g_force_libc_alloc == 1, 0)) { extern void __libc_free(void*); __libc_free(ptr); return; } // Phase 19-1b: FastLane Direct Path (bypass wrapper layer, revised) // Strategy: Direct call to free_tiny_fast() / free_cold() (remove 30% wrapper overhead) // Expected: -17.5 instructions/op, -6.0 branches/op, +10-15% throughput // ENV: HAKMEM_FASTLANE_DIRECT=0/1 (default: 0, opt-in) // Phase 19-1b changes: // 1. Removed __builtin_expect() from fastlane_direct_enabled() check (unfair A/B) // 2. Changed free_tiny_fast_hot() → free_tiny_fast() (use winning path directly) if (fastlane_direct_enabled()) { // Fail-fast: match Front FastLane rule (FastLane is only safe after init completes). if (__builtin_expect(!g_initialized, 0)) { // Not safe → fall through to wrapper path (handles init/LD safety). } else { // Direct path: bypass front_fastlane_try_free() wrapper if (free_tiny_fast(ptr)) { return; // Success: handled by hot path } // Fallback: cold path handles Mid/Large/external pointers const wrapper_env_cfg_t* wcfg = wrapper_env_cfg_fast(); free_cold(ptr, wcfg); return; } } // Phase 6: Front FastLane (Layer Collapse) - free path // Strategy: Collapse wrapper→gate→classify layers into single hot box // Observed: +11.13% on Mixed 10-run (Phase 6 A/B) // ENV: HAKMEM_FRONT_FASTLANE=0/1 (default: 1, opt-out) if (__builtin_expect(front_fastlane_enabled(), 1)) { if (front_fastlane_try_free(ptr)) { return; // Success: handled by FastLane } // Fallback: not handled, continue to existing wrapper path } // Phase 5 E5-1: Free Tiny Direct Path (ENV-gated, opt-in) // Strategy: Wrapper-level Tiny validation → direct path (skip ENV snapshot + cold path) // Expected gain: +3-5% (reduces 29.56% overhead by 30-40%) // ENV: HAKMEM_FREE_TINY_DIRECT=0/1 (default: 0, research box) // Phase 19-4c: Remove UNLIKELY hint, gate is ON by default in presets if (free_tiny_direct_enabled()) { #if HAKMEM_TINY_HEADER_CLASSIDX // Page boundary guard: ptr must not be page-aligned uintptr_t off = (uintptr_t)ptr & 0xFFFu; if (__builtin_expect(off != 0, 1)) { // Fast header validation (1 load, 1 compare) uint8_t header = *((uint8_t*)ptr - 1); uint8_t magic = header & 0xF0u; if (magic == 0xA0u) { // Tiny header magic int class_idx = (int)(header & 0x0Fu); if (__builtin_expect(class_idx < 8, 1)) { FREE_TINY_DIRECT_STAT_INC(direct_total); // Direct Tiny free path (bypass wrapper overhead) if (free_tiny_fast(ptr)) { FREE_TINY_DIRECT_STAT_INC(fast_fallback); return; // Success } FREE_TINY_DIRECT_STAT_INC(fast_failure); // Fall through to normal path (cold path failure) } } else if (magic != 0) { // Non-Tiny header (Mid/Pool/Large) FREE_TINY_DIRECT_STAT_INC(invalid_header); } } #endif } // Phase 20-2: BenchFast mode (structural ceiling measurement) // WARNING: Bypasses ALL safety checks - benchmark only! if (__builtin_expect(bench_fast_enabled(), 0)) { // Trust header magic to identify Tiny allocations #if HAKMEM_TINY_HEADER_CLASSIDX uint8_t header = *((uint8_t*)ptr - 1); if ((header & 0xf0) == 0xa0) { // Tiny header magic (0xa0-0xa7) bench_fast_free(ptr); return; } #endif // Fallback to normal path for non-Tiny or no-header mode } // Phase 5 E4-1: Free Wrapper ENV Snapshot (optional, ENV-gated) // Strategy: Consolidate 2 TLS reads -> 1 TLS read (50% reduction) // Expected gain: +1.5-2.5% (from free() 25.26% self% reduction) // Phase 19-4a: Remove UNLIKELY hint, gate is ON by default in presets if (free_wrapper_env_snapshot_enabled()) { // Optimized path: Single TLS snapshot (1 TLS read instead of 2) const struct free_wrapper_env_snapshot* env = free_wrapper_env_get(); // Fast path: Front gate unified (LIKELY in current presets) if (__builtin_expect(env->front_gate_unified, 1)) { int freed; if (__builtin_expect(env->hotcold_enabled, 0)) { freed = free_tiny_fast_hot(ptr); // Hot/cold split version } else { freed = free_tiny_fast(ptr); // Legacy monolithic version } if (__builtin_expect(freed, 1)) { return; // Success (pushed to Unified Cache) } } // Slow path fallback: Wrap shape dispatch if (__builtin_expect(env->wrap_shape, 0)) { const wrapper_env_cfg_t* wcfg = wrapper_env_cfg_fast(); return free_cold(ptr, wcfg); } // Fall through to legacy classification path below } else { // Legacy path (SNAPSHOT=0, default): Original behavior preserved // Phase 3 D2: Use wrapper_env_cfg_fast() to reduce hot path overhead const wrapper_env_cfg_t* wcfg = wrapper_env_cfg_fast(); // Phase 2 B4: HAKMEM_WRAP_SHAPE dispatch (hot/cold split for free) if (__builtin_expect(wcfg->wrap_shape, 0)) { // B4 Optimized: Hot path handles simple cases, delegates to free_cold() // Phase 26: Front Gate Unification (Tiny free fast path) // Placed AFTER BenchFast check, BEFORE expensive classify_ptr() // Bypasses: hak_free_at routing + wrapper overhead + classification // Target: +10-15% performance (pairs with malloc_tiny_fast) // ENV: HAKMEM_FRONT_GATE_UNIFIED=1 to enable (default: OFF) // Phase 4-Step3: Use config macro for compile-time optimization // Phase 7-Step1: Changed expect hint from 0→1 (unified path is now LIKELY) if (__builtin_expect(TINY_FRONT_UNIFIED_GATE_ENABLED, 1)) { // Phase FREE-TINY-FAST-HOTCOLD-OPT-1: Hot/Cold split dispatch int freed; if (__builtin_expect(hak_free_tiny_fast_hotcold_enabled(), 0)) { freed = free_tiny_fast_hot(ptr); // NEW: Hot/Cold split version } else { freed = free_tiny_fast(ptr); // OLD: Legacy monolithic version } if (__builtin_expect(freed, 1)) { return; // Success (pushed to Unified Cache) } // Unified Cache full OR invalid header → fallback to cold path } // All hot cases exhausted → delegate to free_cold() for classification and fallback return free_cold(ptr, wcfg); } // Phase 2 B4: Legacy path (HAKMEM_WRAP_SHAPE=0, default) // Phase 26: Front Gate Unification (Tiny free fast path) // Placed AFTER BenchFast check, BEFORE expensive classify_ptr() // Bypasses: hak_free_at routing + wrapper overhead + classification // Target: +10-15% performance (pairs with malloc_tiny_fast) // ENV: HAKMEM_FRONT_GATE_UNIFIED=1 to enable (default: OFF) // Phase 4-Step3: Use config macro for compile-time optimization // Phase 7-Step1: Changed expect hint from 0→1 (unified path is now LIKELY) if (__builtin_expect(TINY_FRONT_UNIFIED_GATE_ENABLED, 1)) { // Phase FREE-TINY-FAST-HOTCOLD-OPT-1: Hot/Cold split dispatch int freed; if (__builtin_expect(hak_free_tiny_fast_hotcold_enabled(), 0)) { freed = free_tiny_fast_hot(ptr); // NEW: Hot/Cold split version } else { freed = free_tiny_fast(ptr); // OLD: Legacy monolithic version } if (__builtin_expect(freed, 1)) { return; // Success (pushed to Unified Cache) } // Unified Cache full OR invalid header → fallback to normal path } } do { static int on=-1; if (on==-1){ const char* e=getenv("HAKMEM_FREE_WRAP_TRACE"); on=(e&&*e&&*e!='0')?1:0;} if(on){ fprintf(stderr,"[WRAP_FREE_ENTER] ptr=%p depth=%d init=%d\n", ptr, g_hakmem_lock_depth, g_initializing); } } while(0); #if !HAKMEM_BUILD_RELEASE // Debug safety: guard obviously invalid tiny integers to avoid libc crash and collect trace if ((uintptr_t)ptr < 4096) { ptr_trace_dump_now("wrap_small_ptr"); fprintf(stderr, "[FREE_SMALL_PTR] ignore ptr=%p (likely header-corruption sentinel)\n", ptr); return; } #endif // Classify pointer BEFORE early libc fallbacks to avoid misrouting Tiny pointers // This is safe: classifier uses header probe and registry; does not allocate. int is_hakmem_owned = 0; { ptr_classification_t c = classify_ptr(ptr); switch (c.kind) { case PTR_KIND_TINY_HEADER: case PTR_KIND_TINY_HEADERLESS: case PTR_KIND_POOL_TLS: case PTR_KIND_MID_LARGE: // FIX: Include Mid-Large (mmap/ACE) pointers is_hakmem_owned = 1; break; default: break; } } if (!is_hakmem_owned) { // Failsafe: Mid registry lookup catches headerless/corrupted Mid allocations if (hak_pool_mid_lookup(ptr, NULL)) { is_hakmem_owned = 1; } } if (is_hakmem_owned) { // Route to hak_free_at even if lock_depth>0(ログ抑制のためptr_traceのみ使用) g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; return; } // Front Gate libc bypass detection (quiet in release) static _Atomic uint64_t fg_libc_bypass_count = 0; if (g_hakmem_lock_depth > 0) { #if !HAKMEM_BUILD_RELEASE uint64_t count = atomic_fetch_add_explicit(&fg_libc_bypass_count, 1, memory_order_relaxed); if (count < 10) { fprintf(stderr, "[FG_LIBC_BYPASS] lockdepth=%d count=%llu ptr=%p\n", g_hakmem_lock_depth, (unsigned long long)count, ptr); } #else (void)fg_libc_bypass_count; #endif // Safety: If this is a HAKMEM-owned header allocation, free raw correctly do { void* raw = (char*)ptr - HEADER_SIZE; int safe_same_page = (((uintptr_t)ptr & 0xFFFu) >= HEADER_SIZE); if (!safe_same_page) { if (!hak_is_memory_readable(raw)) break; } AllocHeader* hdr = (AllocHeader*)raw; if (hdr->magic == HAKMEM_MAGIC) { // Dispatch based on allocation method if (hdr->method == ALLOC_METHOD_MALLOC) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_lockdepth_hak_hdr_malloc"); __libc_free(raw); return; } else if (hdr->method == ALLOC_METHOD_MMAP) { ptr_trace_dump_now("wrap_libc_lockdepth_hak_hdr_mmap"); hkm_sys_munmap(raw, hdr->size); return; } } } while (0); // Unknown pointer or non-HAKMEM: fall back to libc free(ptr) extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_lockdepth"); wrapper_record_fallback(FB_LOCKDEPTH, "[wrap] libc free: lockdepth\n"); __libc_free(ptr); return; } int free_init_wait = hak_init_wait_for_ready(); if (__builtin_expect(free_init_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc free: init_wait\n"); #if !HAKMEM_BUILD_RELEASE uint64_t count = atomic_fetch_add_explicit(&fg_libc_bypass_count, 1, memory_order_relaxed); if (count < 10) { fprintf(stderr, "[FG_LIBC_BYPASS] init=%d count=%llu ptr=%p\n", g_initializing, (unsigned long long)count, ptr); } #endif extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_init"); __libc_free(ptr); return; } if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_force"); __libc_free(ptr); return; } if (hak_ld_env_mode()) { // BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_jemalloc"); __libc_free(ptr); return; } if (!g_initialized) { hak_init(); } int free_ld_wait = hak_init_wait_for_ready(); if (__builtin_expect(free_ld_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc free: ld init_wait\n"); extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_init"); __libc_free(ptr); return; } } // Phase 15: Box Separation - Domain check to distinguish hakmem vs external pointers // CRITICAL: Prevent BenchMeta (slots[]) from entering CoreAlloc (hak_free_at) // Strategy: Check 1-byte header at ptr-1 for HEADER_MAGIC (0xa0/0xb0) // - If hakmem Tiny allocation → route to hak_free_at() // - Otherwise → delegate to __libc_free() (external/BenchMeta) // // Safety: Only check header if ptr is NOT page-aligned (ptr-1 is safe to read) uintptr_t offset_in_page = (uintptr_t)ptr & 0xFFF; if (offset_in_page > 0) { // Not page-aligned, safe to check ptr-1 uint8_t header = *((uint8_t*)ptr - 1); if ((header & 0xF0) == 0xA0) { // Tiny header byte → require Superslab to avoid誤分類 SuperSlab* ss = hak_super_lookup(ptr); if (ss && ss->magic == SUPERSLAB_MAGIC) { g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; return; } // Superslab未登録 → hakmem管理外。libc free にも渡さず無視(ワークセットのゴミ対策)。 return; } else if ((header & 0xF0) == 0xB0) { // Pool TLS header (if enabled) — no registry check needed #ifdef HAKMEM_POOL_TLS_PHASE1 g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; return; #endif } // No valid hakmem header → external pointer (BenchMeta, libc allocation, etc.) // Phase 5 E4-1: Get wcfg for wrap_diag check (may be snapshot path or legacy path) const wrapper_env_cfg_t* wcfg_diag = wrapper_env_cfg_fast(); if (__builtin_expect(wcfg_diag->wrap_diag, 0)) { SuperSlab* ss = hak_super_lookup(ptr); int slab_idx = -1; int meta_cls = -1; int alloc_method = -1; if (__builtin_expect(ss && ss->magic == SUPERSLAB_MAGIC, 0)) { slab_idx = slab_index_for(ss, (void*)((uint8_t*)ptr - 1)); if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) { meta_cls = ss->slabs[slab_idx].class_idx; } } else if (offset_in_page >= HEADER_SIZE) { AllocHeader* ah = hak_header_from_user(ptr); if (hak_header_validate(ah)) { alloc_method = ah->method; } } fprintf(stderr, "[WRAP_FREE_NOT_OWNED] ptr=%p hdr=0x%02x off=0x%lx lockdepth=%d init=%d ss=%p slab=%d meta_cls=%d alloc_method=%d\n", ptr, header, (unsigned long)offset_in_page, g_hakmem_lock_depth, g_initializing, (void*)ss, slab_idx, meta_cls, alloc_method); } // Self-heal: if this looks like a SuperSlab (magic matches) but registry lookup failed, // re-register on the fly and route to hakmem free to avoid libc abort. { SuperSlab* ss_guess = (SuperSlab*)((uintptr_t)ptr & ~((uintptr_t)SUPERSLAB_SIZE_MIN - 1u)); long page_sz = sysconf(_SC_PAGESIZE); unsigned char mincore_vec = 0; int mapped = (page_sz > 0) && (mincore((void*)((uintptr_t)ss_guess & ~(uintptr_t)(page_sz - 1)), (size_t)page_sz, &mincore_vec) == 0); if (mapped && ss_guess->magic == SUPERSLAB_MAGIC) { hak_super_register((uintptr_t)ss_guess, ss_guess); // idempotent if already registered g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; return; } } extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_external_nomag"); wrapper_record_fallback(FB_NOT_OWNED, "[wrap] libc free: not_owned\n"); __libc_free(ptr); return; } // Page-aligned pointer → cannot safely check header, use full classification // (This includes Pool/Mid/L25 allocations which may be page-aligned) g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; } void* calloc(size_t nmemb, size_t size) { static _Atomic int g_wrap_calloc_trace_count = 0; if (atomic_fetch_add_explicit(&g_wrap_calloc_trace_count, 1, memory_order_relaxed) < 128) { HAK_TRACE("[wrap_calloc_enter]\n"); } // CRITICAL FIX (BUG #8): Increment lock depth FIRST, before ANY libc calls g_hakmem_lock_depth++; // Early check for recursion (lock depth already incremented by outer call) if (g_hakmem_lock_depth > 1) { g_hakmem_lock_depth--; extern void* __libc_calloc(size_t, size_t); wrapper_record_fallback(FB_LOCKDEPTH, "[wrap] libc calloc: lockdepth\n"); return __libc_calloc(nmemb, size); } int calloc_init_wait = hak_init_wait_for_ready(); if (__builtin_expect(calloc_init_wait <= 0, 0)) { g_hakmem_lock_depth--; extern void* __libc_calloc(size_t, size_t); wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc calloc: init_wait\n"); return __libc_calloc(nmemb, size); } // Overflow check if (size != 0 && nmemb > (SIZE_MAX / size)) { g_hakmem_lock_depth--; errno = ENOMEM; return NULL; } if (__builtin_expect(hak_force_libc_alloc(), 0)) { g_hakmem_lock_depth--; extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } int ld_mode = hak_ld_env_mode(); if (ld_mode) { // BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) { g_hakmem_lock_depth--; extern void* __libc_calloc(size_t, size_t); wrapper_record_fallback(FB_JEMALLOC_BLOCK, "[wrap] libc calloc: jemalloc block\n"); return __libc_calloc(nmemb, size); } if (!g_initialized) { hak_init(); } int calloc_ld_wait = hak_init_wait_for_ready(); if (__builtin_expect(calloc_ld_wait <= 0, 0)) { g_hakmem_lock_depth--; extern void* __libc_calloc(size_t, size_t); wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc calloc: ld init_wait\n"); return __libc_calloc(nmemb, size); } // Reuse cached ld_safe_mode from malloc (same static variable scope won't work, use inline function instead) // For now, duplicate the caching logic static _Atomic int ld_safe_mode_calloc = -1; if (__builtin_expect(ld_safe_mode_calloc < 0, 0)) { const char* lds = getenv("HAKMEM_LD_SAFE"); ld_safe_mode_calloc = (lds ? atoi(lds) : 1); } size_t total = nmemb * size; if (ld_safe_mode_calloc >= 2 || total > TINY_MAX_SIZE) { g_hakmem_lock_depth--; extern void* __libc_calloc(size_t, size_t); if (ld_safe_mode_calloc >= 2) wrapper_record_fallback(FB_LD_SAFE, "[wrap] libc calloc: ld_safe\n"); return __libc_calloc(nmemb, size); } } size_t total_size = nmemb * size; void* ptr = hak_alloc_at(total_size, HAK_CALLSITE()); if (ptr) { memset(ptr, 0, total_size); } g_hakmem_lock_depth--; return ptr; } void* realloc(void* ptr, size_t size) { static _Atomic int g_wrap_realloc_trace_count = 0; if (atomic_fetch_add_explicit(&g_wrap_realloc_trace_count, 1, memory_order_relaxed) < 128) { HAK_TRACE("[wrap_realloc_enter]\n"); } if (g_hakmem_lock_depth > 0) { wrapper_record_fallback(FB_LOCKDEPTH, "[wrap] libc realloc: lockdepth\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } int realloc_init_wait = hak_init_wait_for_ready(); if (__builtin_expect(realloc_init_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc realloc: init_wait\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } if (__builtin_expect(hak_force_libc_alloc(), 0)) { wrapper_record_fallback(FB_FORCE_LIBC, "[wrap] libc realloc: force_libc\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } int ld_mode = hak_ld_env_mode(); if (ld_mode) { // BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) { wrapper_record_fallback(FB_JEMALLOC_BLOCK, "[wrap] libc realloc: jemalloc block\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } if (!g_initialized) { hak_init(); } int realloc_ld_wait = hak_init_wait_for_ready(); if (__builtin_expect(realloc_ld_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc realloc: ld init_wait\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } } if (ptr == NULL) { return malloc(size); } if (size == 0) { free(ptr); return NULL; } void* new_ptr = malloc(size); if (!new_ptr) return NULL; memcpy(new_ptr, ptr, size); free(ptr); return new_ptr; } #endif // HAKMEM_FORCE_LIBC_ALLOC_BUILD #endif // HAK_WRAPPERS_INC_H