Files
hakmem/core/box/hak_wrappers.inc.h
Moe Charm (CI) a32d0fafd4 Two-Speed Optimization Part 2: Remove atomic trace counters from hot path
Performance improvements:
- lock incl instructions completely removed from malloc/free hot paths
- Cache misses reduced from 24.4% → 13.4% of cycles
- Throughput: 85M → 89.12M ops/sec (+4.8% improvement)
- Cycles/op: 48.8 → 48.25 (-1.1%)

Changes in core/box/hak_wrappers.inc.h:
- malloc: Guard g_wrap_malloc_trace_count atomic with #if !HAKMEM_BUILD_RELEASE
- free: Guard g_wrap_free_trace_count and g_free_wrapper_calls with same guard

Debug builds retain full instrumentation via HAK_TRACE.
Release builds execute completely clean hot paths without atomic operations.

Verified via:
- perf report: lock incl instructions gone
- perf stat: cycles/op reduced, cache miss % improved
- objdump: 0 lock instructions in hot paths

Next: Inline unified_cache_refill for additional 3-4 cycles/op improvement

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-04 19:20:44 +09:00

576 lines
26 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hak_wrappers.inc.h — malloc/free/calloc/realloc wrappers (LD_PRELOAD-aware)
#ifndef HAK_WRAPPERS_INC_H
#define HAK_WRAPPERS_INC_H
#ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD
// Sanitizer/diagnostic builds: bypass hakmem allocator completely.
void* malloc(size_t size) {
extern void* __libc_malloc(size_t);
return __libc_malloc(size);
}
void free(void* ptr) {
if (!ptr) return;
extern void __libc_free(void*);
__libc_free(ptr);
}
void* calloc(size_t nmemb, size_t size) {
extern void* __libc_calloc(size_t, size_t);
return __libc_calloc(nmemb, size);
}
void* realloc(void* ptr, size_t size) {
extern void* __libc_realloc(void*, size_t);
return __libc_realloc(ptr, size);
}
#else
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
#include "front_gate_classifier.h" // Box FG: pointer classification (header/reg)
#include "../hakmem_pool.h" // Mid registry lookup (failsafe for headerless Mid)
#include "../front/malloc_tiny_fast.h" // Phase 26: Front Gate Unification (Tiny fast alloc)
#include "tiny_alloc_gate_box.h" // Tiny Alloc Gatekeeper Box (BASE/USER+Bridge 入口)
#include "tiny_front_config_box.h" // Phase 4-Step3: Compile-time config for dead code elimination
#include "wrapper_env_box.h" // Wrapper env cache (step trace / LD safe / free trace)
#include "../hakmem_internal.h" // AllocHeader helpers for diagnostics
#include "../hakmem_super_registry.h" // Superslab lookup for diagnostics
#include "../superslab/superslab_inline.h" // slab_index_for, capacity
#include <sys/mman.h> // mincore for safe mapping checks
#include <unistd.h> // write for diagnostics
#include <string.h> // strlen for diagnostics
// malloc wrapper - intercepts system malloc() calls
__thread uint64_t g_malloc_total_calls = 0;
__thread uint64_t g_malloc_tiny_size_match = 0;
__thread uint64_t g_malloc_fast_path_tried = 0;
__thread uint64_t g_malloc_fast_path_null = 0;
__thread uint64_t g_malloc_slow_path = 0;
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
// CRITICAL FIX (BUG #10): Use cached g_jemalloc_loaded instead of calling hak_jemalloc_loaded()
// The function call version triggers infinite recursion: malloc → hak_jemalloc_loaded → dlopen → malloc
extern int g_jemalloc_loaded; // Cached during hak_init_impl(), defined in hakmem.c
// Global malloc call counter for debugging (exposed for validation code)
// Defined here, accessed from tls_sll_box.h for corruption detection
_Atomic uint64_t malloc_count = 0;
// Lightweight fallback diagnostics (enabled with HAKMEM_WRAP_DIAG=1)
typedef enum {
FB_INIT_WAIT_FAIL = 0,
FB_INIT_LD_WAIT_FAIL,
FB_FORCE_LIBC,
FB_LD_SAFE,
FB_JEMALLOC_BLOCK,
FB_LOCKDEPTH,
FB_NOT_OWNED,
FB_OTHER,
FB_REASON_COUNT
} wrapper_fb_reason_t;
static _Atomic uint64_t g_fb_counts[FB_REASON_COUNT];
static _Atomic int g_fb_log_count[FB_REASON_COUNT];
static inline void wrapper_record_fallback(wrapper_fb_reason_t reason, const char* msg) {
atomic_fetch_add_explicit(&g_fb_counts[reason], 1, memory_order_relaxed);
const wrapper_env_cfg_t* wcfg = wrapper_env_cfg();
if (__builtin_expect(wcfg->wrap_diag, 0)) {
int n = atomic_fetch_add_explicit(&g_fb_log_count[reason], 1, memory_order_relaxed);
if (n < 4 && msg) {
write(2, msg, strlen(msg));
}
}
}
void* malloc(size_t size) {
#ifndef NDEBUG
uint64_t count = atomic_fetch_add(&malloc_count, 1);
#endif
#if !HAKMEM_BUILD_RELEASE
// Debug-only trace counter: in release builds this atomic increment
// is disabled to avoid hot-path cache misses and contention.
static _Atomic int g_wrap_malloc_trace_count = 0;
if (atomic_fetch_add_explicit(&g_wrap_malloc_trace_count, 1, memory_order_relaxed) < 256) {
HAK_TRACE("[wrap_malloc_enter]\n");
}
#endif
// NDEBUG: malloc_count increment disabled - removes 27.55% bottleneck
// Phase 20-2: BenchFast mode (structural ceiling measurement)
// WARNING: Bypasses ALL safety checks - benchmark only!
// IMPORTANT: Do NOT use BenchFast during preallocation/init to avoid recursion.
// Phase 8-TLS-Fix: Use atomic_load for cross-thread safety
if (__builtin_expect(!atomic_load(&g_bench_fast_init_in_progress) && bench_fast_enabled(), 0)) {
if (size <= 1024) { // Tiny range
return bench_fast_alloc(size);
}
// Fallback to normal path for large allocations
}
// DEBUG BAILOUT DISABLED - Testing full path
// if (__builtin_expect(count >= 14270 && count <= 14285, 0)) {
// extern void* __libc_malloc(size_t);
// fprintf(stderr, "[MALLOC_WRAPPER] count=%lu size=%zu - BAILOUT TO LIBC!\n", count, size);
// fflush(stderr);
// return __libc_malloc(size);
// }
// CRITICAL FIX (BUG #7): Increment lock depth FIRST, before ANY libc calls
// This prevents infinite recursion when getenv/fprintf/dlopen call malloc
g_hakmem_lock_depth++;
// Debug step trace for 33KB: gated by env HAKMEM_STEP_TRACE (default: OFF)
const wrapper_env_cfg_t* wcfg = wrapper_env_cfg();
if (wcfg->step_trace && size == 33000) write(2, "STEP:1 Lock++\n", 14);
// Guard against recursion during initialization
int init_wait = hak_init_wait_for_ready();
if (__builtin_expect(init_wait <= 0, 0)) {
wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc malloc: init_wait\n");
g_hakmem_lock_depth--;
extern void* __libc_malloc(size_t);
if (size == 33000) write(2, "RET:Initializing\n", 17);
return __libc_malloc(size);
}
// Now safe to call getenv/fprintf/dlopen (will use __libc_malloc if needed)
extern int g_sfc_debug;
static _Atomic int debug_count = 0;
if (__builtin_expect(g_sfc_debug, 0) && debug_count < 100) {
int n = atomic_fetch_add(&debug_count, 1);
if (n < 20) fprintf(stderr, "[SFC_DEBUG] malloc(%zu)\n", size);
}
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
wrapper_record_fallback(FB_FORCE_LIBC, "[wrap] libc malloc: force_libc\n");
g_hakmem_lock_depth--;
extern void* __libc_malloc(size_t);
if (wcfg->step_trace && size == 33000) write(2, "RET:ForceLibc\n", 14);
return __libc_malloc(size);
}
if (wcfg->step_trace && size == 33000) write(2, "STEP:2 ForceLibc passed\n", 24);
int ld_mode = hak_ld_env_mode();
if (ld_mode) {
if (wcfg->step_trace && size == 33000) write(2, "STEP:3 LD Mode\n", 15);
// BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback
// Only fallback if jemalloc is ACTUALLY loaded (> 0)
if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) {
wrapper_record_fallback(FB_JEMALLOC_BLOCK, "[wrap] libc malloc: jemalloc block\n");
g_hakmem_lock_depth--;
extern void* __libc_malloc(size_t);
if (wcfg->step_trace && size == 33000) write(2, "RET:Jemalloc\n", 13);
return __libc_malloc(size);
}
if (!g_initialized) { hak_init(); }
int ld_init_wait = hak_init_wait_for_ready();
if (__builtin_expect(ld_init_wait <= 0, 0)) {
wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc malloc: ld init_wait\n");
g_hakmem_lock_depth--;
extern void* __libc_malloc(size_t);
if (wcfg->step_trace && size == 33000) write(2, "RET:Init2\n", 10);
return __libc_malloc(size);
}
// Cache HAKMEM_LD_SAFE to avoid repeated getenv on hot path
if (wcfg->ld_safe_mode >= 2) {
wrapper_record_fallback(FB_LD_SAFE, "[wrap] libc malloc: ld_safe\n");
g_hakmem_lock_depth--;
extern void* __libc_malloc(size_t);
if (wcfg->step_trace && size == 33000) write(2, "RET:LDSafe\n", 11);
return __libc_malloc(size);
}
}
if (wcfg->step_trace && size == 33000) write(2, "STEP:4 LD Check passed\n", 23);
// Phase 26: CRITICAL - Ensure initialization before fast path
// (fast path bypasses hak_alloc_at, so we need to init here)
if (!g_initialized) hak_init();
// Phase 26: Front Gate Unification (Tiny fast path)
// Placed AFTER all safety checks (lock depth, initializing, LD_SAFE, jemalloc)
// Bypasses: hak_alloc_at routing (236 lines) + wrapper diagnostics + tiny overhead
// Target: +10-15% performance (11.35M → 12.5-13.5M ops/s)
// ENV: HAKMEM_FRONT_GATE_UNIFIED=1 to enable (default: OFF)
// Phase 4-Step3: Use config macro for compile-time optimization
// Phase 7-Step1: Changed expect hint from 0→1 (unified path is now LIKELY)
if (__builtin_expect(TINY_FRONT_UNIFIED_GATE_ENABLED, 1)) {
if (wcfg->step_trace && size == 33000) write(2, "STEP:5 Unified Gate check\n", 26);
if (size <= tiny_get_max_size()) {
if (wcfg->step_trace && size == 33000) write(2, "STEP:5.1 Inside Unified\n", 24);
// Tiny Alloc Gate Box: malloc_tiny_fast() の薄いラッパ
// (診断 OFF 時は従来どおりの挙動・コスト)
void* ptr = tiny_alloc_gate_fast(size);
if (__builtin_expect(ptr != NULL, 1)) {
g_hakmem_lock_depth--;
if (wcfg->step_trace && size == 33000) write(2, "RET:TinyFast\n", 13);
return ptr;
}
// Unified Cache miss → fallback to normal path (hak_alloc_at)
}
}
if (wcfg->step_trace && size == 33000) write(2, "STEP:6 All checks passed\n", 25);
#if !HAKMEM_BUILD_RELEASE
if (count > 14250 && count < 14280 && size <= 1024) {
fprintf(stderr, "[MALLOC_WRAPPER] count=%lu calling hak_alloc_at\n", count);
fflush(stderr);
}
#endif
void* ptr = hak_alloc_at(size, HAK_CALLSITE());
#if !HAKMEM_BUILD_RELEASE
if (count > 14250 && count < 14280 && size <= 1024) {
fprintf(stderr, "[MALLOC_WRAPPER] count=%lu hak_alloc_at returned %p\n", count, ptr);
fflush(stderr);
}
#endif
g_hakmem_lock_depth--;
return ptr;
}
void free(void* ptr) {
#if !HAKMEM_BUILD_RELEASE
// Debug-only trace counters; disabled in release to keep free() hot path
// free of atomic increments.
static _Atomic int g_wrap_free_trace_count = 0;
if (atomic_fetch_add_explicit(&g_wrap_free_trace_count, 1, memory_order_relaxed) < 256) {
HAK_TRACE("[wrap_free_enter]\n");
}
atomic_fetch_add_explicit(&g_free_wrapper_calls, 1, memory_order_relaxed);
#endif
if (!ptr) return;
// Phase 20-2: BenchFast mode (structural ceiling measurement)
// WARNING: Bypasses ALL safety checks - benchmark only!
if (__builtin_expect(bench_fast_enabled(), 0)) {
// Trust header magic to identify Tiny allocations
#if HAKMEM_TINY_HEADER_CLASSIDX
uint8_t header = *((uint8_t*)ptr - 1);
if ((header & 0xf0) == 0xa0) { // Tiny header magic (0xa0-0xa7)
bench_fast_free(ptr);
return;
}
#endif
// Fallback to normal path for non-Tiny or no-header mode
}
const wrapper_env_cfg_t* wcfg = wrapper_env_cfg();
// Phase 26: Front Gate Unification (Tiny free fast path)
// Placed AFTER BenchFast check, BEFORE expensive classify_ptr()
// Bypasses: hak_free_at routing + wrapper overhead + classification
// Target: +10-15% performance (pairs with malloc_tiny_fast)
// ENV: HAKMEM_FRONT_GATE_UNIFIED=1 to enable (default: OFF)
// Phase 4-Step3: Use config macro for compile-time optimization
// Phase 7-Step1: Changed expect hint from 0→1 (unified path is now LIKELY)
if (__builtin_expect(TINY_FRONT_UNIFIED_GATE_ENABLED, 1)) {
int freed = free_tiny_fast(ptr);
if (__builtin_expect(freed, 1)) {
return; // Success (pushed to Unified Cache)
}
// Unified Cache full OR invalid header → fallback to normal path
}
do { static int on=-1; if (on==-1){ const char* e=getenv("HAKMEM_FREE_WRAP_TRACE"); on=(e&&*e&&*e!='0')?1:0;} if(on){ fprintf(stderr,"[WRAP_FREE_ENTER] ptr=%p depth=%d init=%d\n", ptr, g_hakmem_lock_depth, g_initializing); } } while(0);
#if !HAKMEM_BUILD_RELEASE
// Debug safety: guard obviously invalid tiny integers to avoid libc crash and collect trace
if ((uintptr_t)ptr < 4096) {
ptr_trace_dump_now("wrap_small_ptr");
fprintf(stderr, "[FREE_SMALL_PTR] ignore ptr=%p (likely header-corruption sentinel)\n", ptr);
return;
}
#endif
// Classify pointer BEFORE early libc fallbacks to avoid misrouting Tiny pointers
// This is safe: classifier uses header probe and registry; does not allocate.
int is_hakmem_owned = 0;
{
ptr_classification_t c = classify_ptr(ptr);
switch (c.kind) {
case PTR_KIND_TINY_HEADER:
case PTR_KIND_TINY_HEADERLESS:
case PTR_KIND_POOL_TLS:
case PTR_KIND_MID_LARGE: // FIX: Include Mid-Large (mmap/ACE) pointers
is_hakmem_owned = 1; break;
default: break;
}
}
if (!is_hakmem_owned) {
// Failsafe: Mid registry lookup catches headerless/corrupted Mid allocations
if (hak_pool_mid_lookup(ptr, NULL)) {
is_hakmem_owned = 1;
}
}
if (is_hakmem_owned) {
// Route to hak_free_at even if lock_depth>0ログ抑制のためptr_traceのみ使用
g_hakmem_lock_depth++;
hak_free_at(ptr, 0, HAK_CALLSITE());
g_hakmem_lock_depth--;
return;
}
// Front Gate libc bypass detection (quiet in release)
static _Atomic uint64_t fg_libc_bypass_count = 0;
if (g_hakmem_lock_depth > 0) {
#if !HAKMEM_BUILD_RELEASE
uint64_t count = atomic_fetch_add_explicit(&fg_libc_bypass_count, 1, memory_order_relaxed);
if (count < 10) {
fprintf(stderr, "[FG_LIBC_BYPASS] lockdepth=%d count=%llu ptr=%p\n", g_hakmem_lock_depth, (unsigned long long)count, ptr);
}
#else
(void)fg_libc_bypass_count;
#endif
// Safety: If this is a HAKMEM-owned header allocation, free raw correctly
do {
void* raw = (char*)ptr - HEADER_SIZE;
int safe_same_page = (((uintptr_t)ptr & 0xFFFu) >= HEADER_SIZE);
if (!safe_same_page) {
if (!hak_is_memory_readable(raw)) break;
}
AllocHeader* hdr = (AllocHeader*)raw;
if (hdr->magic == HAKMEM_MAGIC) {
// Dispatch based on allocation method
if (hdr->method == ALLOC_METHOD_MALLOC) {
extern void __libc_free(void*);
ptr_trace_dump_now("wrap_libc_lockdepth_hak_hdr_malloc");
__libc_free(raw);
return;
} else if (hdr->method == ALLOC_METHOD_MMAP) {
ptr_trace_dump_now("wrap_libc_lockdepth_hak_hdr_mmap");
hkm_sys_munmap(raw, hdr->size);
return;
}
}
} while (0);
// Unknown pointer or non-HAKMEM: fall back to libc free(ptr)
extern void __libc_free(void*);
ptr_trace_dump_now("wrap_libc_lockdepth");
wrapper_record_fallback(FB_LOCKDEPTH, "[wrap] libc free: lockdepth\n");
__libc_free(ptr);
return;
}
int free_init_wait = hak_init_wait_for_ready();
if (__builtin_expect(free_init_wait <= 0, 0)) {
wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc free: init_wait\n");
#if !HAKMEM_BUILD_RELEASE
uint64_t count = atomic_fetch_add_explicit(&fg_libc_bypass_count, 1, memory_order_relaxed);
if (count < 10) {
fprintf(stderr, "[FG_LIBC_BYPASS] init=%d count=%llu ptr=%p\n", g_initializing, (unsigned long long)count, ptr);
}
#endif
extern void __libc_free(void*);
ptr_trace_dump_now("wrap_libc_init");
__libc_free(ptr);
return;
}
if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_force"); __libc_free(ptr); return; }
if (hak_ld_env_mode()) {
// BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback
if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_jemalloc"); __libc_free(ptr); return; }
if (!g_initialized) { hak_init(); }
int free_ld_wait = hak_init_wait_for_ready();
if (__builtin_expect(free_ld_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc free: ld init_wait\n"); extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_init"); __libc_free(ptr); return; }
}
// Phase 15: Box Separation - Domain check to distinguish hakmem vs external pointers
// CRITICAL: Prevent BenchMeta (slots[]) from entering CoreAlloc (hak_free_at)
// Strategy: Check 1-byte header at ptr-1 for HEADER_MAGIC (0xa0/0xb0)
// - If hakmem Tiny allocation → route to hak_free_at()
// - Otherwise → delegate to __libc_free() (external/BenchMeta)
//
// Safety: Only check header if ptr is NOT page-aligned (ptr-1 is safe to read)
uintptr_t offset_in_page = (uintptr_t)ptr & 0xFFF;
if (offset_in_page > 0) {
// Not page-aligned, safe to check ptr-1
uint8_t header = *((uint8_t*)ptr - 1);
if ((header & 0xF0) == 0xA0) {
// Tiny header byte → require Superslab to avoid誤分類
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
g_hakmem_lock_depth++;
hak_free_at(ptr, 0, HAK_CALLSITE());
g_hakmem_lock_depth--;
return;
}
// Superslab未登録 → hakmem管理外。libc free にも渡さず無視(ワークセットのゴミ対策)。
return;
} else if ((header & 0xF0) == 0xB0) {
// Pool TLS header (if enabled) — no registry check needed
#ifdef HAKMEM_POOL_TLS_PHASE1
g_hakmem_lock_depth++;
hak_free_at(ptr, 0, HAK_CALLSITE());
g_hakmem_lock_depth--;
return;
#endif
}
// No valid hakmem header → external pointer (BenchMeta, libc allocation, etc.)
if (__builtin_expect(wcfg->wrap_diag, 0)) {
SuperSlab* ss = hak_super_lookup(ptr);
int slab_idx = -1;
int meta_cls = -1;
int alloc_method = -1;
if (__builtin_expect(ss && ss->magic == SUPERSLAB_MAGIC, 0)) {
slab_idx = slab_index_for(ss, (void*)((uint8_t*)ptr - 1));
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
meta_cls = ss->slabs[slab_idx].class_idx;
}
} else if (offset_in_page >= HEADER_SIZE) {
AllocHeader* ah = hak_header_from_user(ptr);
if (hak_header_validate(ah)) {
alloc_method = ah->method;
}
}
fprintf(stderr,
"[WRAP_FREE_NOT_OWNED] ptr=%p hdr=0x%02x off=0x%lx lockdepth=%d init=%d ss=%p slab=%d meta_cls=%d alloc_method=%d\n",
ptr,
header,
(unsigned long)offset_in_page,
g_hakmem_lock_depth,
g_initializing,
(void*)ss,
slab_idx,
meta_cls,
alloc_method);
}
// Self-heal: if this looks like a SuperSlab (magic matches) but registry lookup failed,
// re-register on the fly and route to hakmem free to avoid libc abort.
{
SuperSlab* ss_guess = (SuperSlab*)((uintptr_t)ptr & ~((uintptr_t)SUPERSLAB_SIZE_MIN - 1u));
long page_sz = sysconf(_SC_PAGESIZE);
unsigned char mincore_vec = 0;
int mapped = (page_sz > 0) &&
(mincore((void*)((uintptr_t)ss_guess & ~(uintptr_t)(page_sz - 1)),
(size_t)page_sz,
&mincore_vec) == 0);
if (mapped && ss_guess->magic == SUPERSLAB_MAGIC) {
hak_super_register((uintptr_t)ss_guess, ss_guess); // idempotent if already registered
g_hakmem_lock_depth++;
hak_free_at(ptr, 0, HAK_CALLSITE());
g_hakmem_lock_depth--;
return;
}
}
extern void __libc_free(void*);
ptr_trace_dump_now("wrap_libc_external_nomag");
wrapper_record_fallback(FB_NOT_OWNED, "[wrap] libc free: not_owned\n");
__libc_free(ptr);
return;
}
// Page-aligned pointer → cannot safely check header, use full classification
// (This includes Pool/Mid/L25 allocations which may be page-aligned)
g_hakmem_lock_depth++;
hak_free_at(ptr, 0, HAK_CALLSITE());
g_hakmem_lock_depth--;
}
void* calloc(size_t nmemb, size_t size) {
static _Atomic int g_wrap_calloc_trace_count = 0;
if (atomic_fetch_add_explicit(&g_wrap_calloc_trace_count, 1, memory_order_relaxed) < 128) {
HAK_TRACE("[wrap_calloc_enter]\n");
}
// CRITICAL FIX (BUG #8): Increment lock depth FIRST, before ANY libc calls
g_hakmem_lock_depth++;
// Early check for recursion (lock depth already incremented by outer call)
if (g_hakmem_lock_depth > 1) {
g_hakmem_lock_depth--;
extern void* __libc_calloc(size_t, size_t);
wrapper_record_fallback(FB_LOCKDEPTH, "[wrap] libc calloc: lockdepth\n");
return __libc_calloc(nmemb, size);
}
int calloc_init_wait = hak_init_wait_for_ready();
if (__builtin_expect(calloc_init_wait <= 0, 0)) {
g_hakmem_lock_depth--;
extern void* __libc_calloc(size_t, size_t);
wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc calloc: init_wait\n");
return __libc_calloc(nmemb, size);
}
// Overflow check
if (size != 0 && nmemb > (SIZE_MAX / size)) {
g_hakmem_lock_depth--;
errno = ENOMEM;
return NULL;
}
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
g_hakmem_lock_depth--;
extern void* __libc_calloc(size_t, size_t);
return __libc_calloc(nmemb, size);
}
int ld_mode = hak_ld_env_mode();
if (ld_mode) {
// BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback
if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) {
g_hakmem_lock_depth--;
extern void* __libc_calloc(size_t, size_t);
wrapper_record_fallback(FB_JEMALLOC_BLOCK, "[wrap] libc calloc: jemalloc block\n");
return __libc_calloc(nmemb, size);
}
if (!g_initialized) { hak_init(); }
int calloc_ld_wait = hak_init_wait_for_ready();
if (__builtin_expect(calloc_ld_wait <= 0, 0)) {
g_hakmem_lock_depth--;
extern void* __libc_calloc(size_t, size_t);
wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc calloc: ld init_wait\n");
return __libc_calloc(nmemb, size);
}
// Reuse cached ld_safe_mode from malloc (same static variable scope won't work, use inline function instead)
// For now, duplicate the caching logic
static _Atomic int ld_safe_mode_calloc = -1;
if (__builtin_expect(ld_safe_mode_calloc < 0, 0)) {
const char* lds = getenv("HAKMEM_LD_SAFE");
ld_safe_mode_calloc = (lds ? atoi(lds) : 1);
}
size_t total = nmemb * size;
if (ld_safe_mode_calloc >= 2 || total > TINY_MAX_SIZE) {
g_hakmem_lock_depth--;
extern void* __libc_calloc(size_t, size_t);
if (ld_safe_mode_calloc >= 2) wrapper_record_fallback(FB_LD_SAFE, "[wrap] libc calloc: ld_safe\n");
return __libc_calloc(nmemb, size);
}
}
size_t total_size = nmemb * size;
void* ptr = hak_alloc_at(total_size, HAK_CALLSITE());
if (ptr) { memset(ptr, 0, total_size); }
g_hakmem_lock_depth--;
return ptr;
}
void* realloc(void* ptr, size_t size) {
static _Atomic int g_wrap_realloc_trace_count = 0;
if (atomic_fetch_add_explicit(&g_wrap_realloc_trace_count, 1, memory_order_relaxed) < 128) {
HAK_TRACE("[wrap_realloc_enter]\n");
}
if (g_hakmem_lock_depth > 0) { wrapper_record_fallback(FB_LOCKDEPTH, "[wrap] libc realloc: lockdepth\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
int realloc_init_wait = hak_init_wait_for_ready();
if (__builtin_expect(realloc_init_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc realloc: init_wait\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
if (__builtin_expect(hak_force_libc_alloc(), 0)) { wrapper_record_fallback(FB_FORCE_LIBC, "[wrap] libc realloc: force_libc\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
int ld_mode = hak_ld_env_mode();
if (ld_mode) {
// BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback
if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) { wrapper_record_fallback(FB_JEMALLOC_BLOCK, "[wrap] libc realloc: jemalloc block\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
if (!g_initialized) { hak_init(); }
int realloc_ld_wait = hak_init_wait_for_ready();
if (__builtin_expect(realloc_ld_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc realloc: ld init_wait\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
}
if (ptr == NULL) { return malloc(size); }
if (size == 0) { free(ptr); return NULL; }
void* new_ptr = malloc(size);
if (!new_ptr) return NULL;
memcpy(new_ptr, ptr, size);
free(ptr);
return new_ptr;
}
#endif // HAKMEM_FORCE_LIBC_ALLOC_BUILD
#endif // HAK_WRAPPERS_INC_H