// hakmem.c - Minimal PoC Implementation // Purpose: Verify call-site profiling concept #define _GNU_SOURCE // For mincore, madvise on Linux #include "hakmem.h" #include "hakmem_config.h" // NEW Phase 6.8: Mode-based configuration #include "hakmem_internal.h" // NEW Phase 6.8: Static inline helpers #include "hakmem_bigcache.h" // NEW: BigCache Box #include "hakmem_pool.h" // NEW Phase 6.9: L2 Hybrid Pool (2-32KiB) #include "hakmem_l25_pool.h" // NEW Phase 6.13: L2.5 LargePool (64KB-1MB) #include "hakmem_policy.h" // NEW Phase 6.16: FrozenPolicy (SACS-3) #include "hakmem_learner.h" // NEW: CAP auto-tuner (background) #include "hakmem_size_hist.h" // NEW: size histogram sampling (off hot path) #include "hakmem_ace.h" // NEW Phase 6.16: ACE layer (L1) #include "hakmem_site_rules.h" // NEW Phase 6.10: Site-Aware Cache Routing #include "hakmem_tiny.h" // NEW Phase 6.12: Tiny Pool (≤1KB) #include "hakmem_tiny_superslab.h" // NEW Phase 7.6: SuperSlab for Tiny Pool #include "tiny_fastcache.h" // NEW Phase 6-3: Tiny Fast Path (System tcache style) #include "hakmem_mid_mt.h" // NEW Phase Hybrid: Mid Range MT (8-32KB, mimalloc-style) #include "hakmem_super_registry.h" // NEW Phase 1: SuperSlab Registry (mincore elimination) #include "hakmem_elo.h" // NEW: ELO Strategy Selection (Phase 6.2) #include "hakmem_ace_stats.h" // NEW: ACE lightweight stats (avoid implicit decl warnings) #include "hakmem_batch.h" // NEW: madvise Batching (Phase 6.3) #include "hakmem_evo.h" // NEW: Learning Lifecycle (Phase 6.5) #include "hakmem_debug.h" // NEW Phase 6.11.1: Debug Timing #include "hakmem_sys.h" // NEW Phase 6.11.1: Syscall Wrappers #include "hakmem_whale.h" // NEW Phase 6.11.1: Whale Fast-Path (≥2MB) #include "hakmem_prof.h" // NEW Phase 6.16: Sampling profiler #include "hakmem_syscall.h" // NEW Phase 6.X P0 FIX: Box 3 (dlsym direct libc) #include "hakmem_ace_controller.h" // NEW Phase ACE: Adaptive Control Engine #include "hakmem_ace_metrics.h" // NEW Phase ACE: Metrics tracking (inline helpers) #include #include #include #include #include #include // NEW Phase 6.5: For atomic tick counter #include // Phase 6.15: Threading primitives (recursion guard only) #include // calloc overflow handling // For mmap (Linux) #ifdef __linux__ #include #include // MADV_FREE support (Linux kernel 4.5+) #ifndef MADV_FREE #define MADV_FREE 8 // Linux MADV_FREE #endif #endif // ============================================================================ // Configuration // ============================================================================ #define MAX_SITES 256 // Hash table size (power of 2) #define SAMPLING_RATE 1 // Sample ALL (PoC demo: no sampling) #define HASH_MASK (MAX_SITES - 1) // Phase 6.8: FREE_POLICY/FreePolicy moved to hakmem_config.h // Phase 6.8: FreeThermal/THERMAL_* constants moved to hakmem_internal.h // Phase 6.8: THP_POLICY/THPPolicy moved to hakmem_config.h // ============================================================================ // Global State // ============================================================================ // NEW Phase ACE: Adaptive Control Engine static struct hkm_ace_controller g_ace_controller; static int g_initialized = 0; static int g_strict_free = 0; // runtime: HAKMEM_SAFE_FREE=1 enables extra safety checks int g_invalid_free_log = 0; // runtime: HAKMEM_INVALID_FREE_LOG=1 to log invalid-free messages (extern visible) // Phase 7.4: Cache HAKMEM_INVALID_FREE to eliminate 44% CPU overhead (getenv on hot path) // Perf analysis showed getenv("HAKMEM_INVALID_FREE") consumed 43.96% of CPU time! static int g_invalid_free_mode = 1; // 1 = skip invalid-free check (default), 0 = fallback to libc // Statistics static uint64_t g_malloc_count = 0; // Used for optimization stats display // Phase 6.11.4 P0-2: Cached Strategy (atomic, updated by hak_evo_tick) static _Atomic int g_cached_strategy_id = 0; // Cached strategy ID (updated every window closure) // Phase 6.15 P0.3: EVO Sampling Control (environment variable) static uint64_t g_evo_sample_mask = 0; // 0 = disabled (default), (1< 0; // Simple and correct! } // Initialization guard static int g_initializing = 0; int hak_is_initializing(void) { return g_initializing; } // ============================================================================ // Phase 6-1.5: Ultra-Simple Fast Path Forward Declarations // ============================================================================ // Forward declarations for Phase 6 fast path variants // Phase 6-1.5: Alignment guessing (hakmem_tiny_ultra_simple.inc) #ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE extern void* hak_tiny_alloc_ultra_simple(size_t size); extern void hak_tiny_free_ultra_simple(void* ptr); #endif // Phase 6-1.6: Metadata header (hakmem_tiny_metadata.inc) #ifdef HAKMEM_TINY_PHASE6_METADATA extern void* hak_tiny_alloc_metadata(size_t size); extern void hak_tiny_free_metadata(void* ptr); #endif // Phase 6-1.7: Box Theory Refactoring - Wrapper function declarations #ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR extern void* hak_tiny_alloc_fast_wrapper(size_t size); extern void hak_tiny_free_fast_wrapper(void* ptr); #endif static void hak_flush_tiny_exit(void) { // Best-effort: flush Tiny magazines at process exit if (g_flush_tiny_on_exit) { hak_tiny_magazine_flush_all(); hak_tiny_trim(); } if (g_ultra_debug_on_exit) { hak_tiny_ultra_debug_dump(); } // Path debug dump (optional): HAKMEM_TINY_PATH_DEBUG=1 hak_tiny_path_debug_dump(); // Extended counters (optional): HAKMEM_TINY_COUNTERS_DUMP=1 extern void hak_tiny_debug_counters_dump(void); hak_tiny_debug_counters_dump(); } // ============================================================================ // KPI Measurement (for UCB1) - NEW! // ============================================================================ #ifdef __linux__ // Latency histogram (simple buckets for P50/P95/P99) #define LATENCY_BUCKETS 100 static uint64_t g_latency_histogram[LATENCY_BUCKETS]; static uint64_t g_latency_samples = 0; // Baseline page faults (at init) static uint64_t g_baseline_soft_pf = 0; static uint64_t g_baseline_hard_pf = 0; static uint64_t g_baseline_rss_kb = 0; // Get page faults from /proc/self/stat static void get_page_faults(uint64_t* soft_pf, uint64_t* hard_pf) { FILE* f = fopen("/proc/self/stat", "r"); if (!f) { *soft_pf = 0; *hard_pf = 0; return; } // Format: pid (comm) state ... minflt cminflt majflt cmajflt ... // Fields: 1 2 3 ... 10(minflt) 11(cminflt) 12(majflt) 13(cmajflt) unsigned long minflt = 0, majflt = 0; unsigned long dummy; char comm[256], state; (void)fscanf(f, "%lu %s %c %lu %lu %lu %lu %lu %lu %lu %lu %lu", &dummy, comm, &state, &dummy, &dummy, &dummy, &dummy, &dummy, &dummy, &minflt, &dummy, &majflt); fclose(f); *soft_pf = minflt; *hard_pf = majflt; } // Get RSS from /proc/self/statm (in KB) static uint64_t get_rss_kb(void) { FILE* f = fopen("/proc/self/statm", "r"); if (!f) return 0; // Format: size resident shared text lib data dt // We want 'resident' (field 2) in pages unsigned long size, resident; (void)fscanf(f, "%lu %lu", &size, &resident); fclose(f); long page_size = sysconf(_SC_PAGESIZE); return (resident * page_size) / 1024; // Convert to KB } // NOTE: Latency measurement functions (currently unused, for future use) /* static inline uint64_t measure_latency_ns(void (*func)(void*), void* arg) { struct timespec start, end; clock_gettime(CLOCK_MONOTONIC, &start); func(arg); // Execute function clock_gettime(CLOCK_MONOTONIC, &end); uint64_t ns = (end.tv_sec - start.tv_sec) * 1000000000ULL + (end.tv_nsec - start.tv_nsec); return ns; } static void record_latency(uint64_t ns) { // Bucket: 0-10ns, 10-20ns, ..., 990-1000ns, 1000+ns size_t bucket = ns / 10; if (bucket >= LATENCY_BUCKETS) bucket = LATENCY_BUCKETS - 1; g_latency_histogram[bucket]++; g_latency_samples++; } */ // Calculate percentile from histogram static uint64_t calculate_percentile(double percentile) { if (g_latency_samples == 0) return 0; uint64_t target = (uint64_t)(g_latency_samples * percentile); uint64_t cumulative = 0; for (size_t i = 0; i < LATENCY_BUCKETS; i++) { cumulative += g_latency_histogram[i]; if (cumulative >= target) { return i * 10; // Return bucket midpoint (ns) } } return (LATENCY_BUCKETS - 1) * 10; } // Implement hak_get_kpi() void hak_get_kpi(hak_kpi_t* out) { memset(out, 0, sizeof(hak_kpi_t)); // Latency (from histogram) out->p50_alloc_ns = calculate_percentile(0.50); out->p95_alloc_ns = calculate_percentile(0.95); out->p99_alloc_ns = calculate_percentile(0.99); // Page Faults (delta from baseline) uint64_t soft_pf, hard_pf; get_page_faults(&soft_pf, &hard_pf); out->soft_page_faults = soft_pf - g_baseline_soft_pf; out->hard_page_faults = hard_pf - g_baseline_hard_pf; // RSS (delta from baseline, in MB) uint64_t rss_kb = get_rss_kb(); int64_t rss_delta_kb = (int64_t)rss_kb - (int64_t)g_baseline_rss_kb; out->rss_delta_mb = rss_delta_kb / 1024; } #else // Non-Linux: stub implementation void hak_get_kpi(hak_kpi_t* out) { memset(out, 0, sizeof(hak_kpi_t)); } #endif // ============================================================================ // Internal Helpers // ============================================================================ // Phase 6.8: All legacy profiling functions removed // - hash_site(), get_site_profile(), infer_policy(), record_alloc(), allocate_with_policy() // Replaced by ELO-based allocation (hakmem_elo.c) // ============================================================================ // BigCache eviction callback // ============================================================================ // BigCache eviction callback (called when cache is full and needs to evict) static void bigcache_free_callback(void* ptr, size_t size) { (void)size; // Not used if (!ptr) return; // Get raw pointer and header void* raw = (char*)ptr - HEADER_SIZE; AllocHeader* hdr = (AllocHeader*)raw; // Verify magic before accessing method field if (hdr->magic != HAKMEM_MAGIC) { fprintf(stderr, "[hakmem] BigCache eviction: invalid magic, fallback to free()\n"); free(raw); return; } // Dispatch based on allocation method switch (hdr->method) { case ALLOC_METHOD_MALLOC: free(raw); break; case ALLOC_METHOD_MMAP: // Cold eviction: route through batch for large blocks // This completes Phase 6.3 architecture #ifdef __linux__ if (hdr->size >= BATCH_MIN_SIZE) { // Large blocks: use batch (deferred munmap + TLB optimization) hak_batch_add(raw, hdr->size); } else { // Small blocks: direct munmap (not worth batching) // Phase 6.11.1: Try whale cache first if (hkm_whale_put(raw, hdr->size) != 0) { // Whale cache full or not a whale: munmap madvise(raw, hdr->size, MADV_FREE); // Best-effort hkm_sys_munmap(raw, hdr->size); } // else: Successfully cached in whale cache (no munmap!) } #else free(raw); // Fallback (should not happen) #endif break; default: fprintf(stderr, "[hakmem] BigCache eviction: unknown method %d\n", hdr->method); free(raw); // Fallback break; } } // ============================================================================ // Public API // ============================================================================ // Thread-safe one-time initialization static void hak_init_impl(void); static pthread_once_t g_init_once = PTHREAD_ONCE_INIT; void hak_init(void) { (void)pthread_once(&g_init_once, hak_init_impl); } static void hak_init_impl(void) { g_initializing = 1; // Phase 6.X P0 FIX (2025-10-24): Initialize Box 3 (Syscall Layer) FIRST! // This MUST be called before ANY allocation (Tiny/Mid/Large/Learner) // dlsym() initializes function pointers to real libc (bypasses LD_PRELOAD) hkm_syscall_init(); // NEW Phase 6.11.1: Initialize debug timing hkm_timing_init(); // NEW Phase 6.11.1: Initialize whale fast-path cache hkm_whale_init(); // NEW Phase Hybrid: Initialize Mid Range MT allocator (8-32KB, mimalloc-style) mid_mt_init(); // NEW Phase 6.8: Initialize configuration system (replaces init_free_policy + init_thp_policy) hak_config_init(); // Phase 6.16: Initialize FrozenPolicy (SACS-3) hkm_policy_init(); // Phase 6.15 P0.3: Configure EVO sampling from environment variable // HAKMEM_EVO_SAMPLE: 0=disabled (default), N=sample every 2^N calls // Example: HAKMEM_EVO_SAMPLE=10 → sample every 1024 calls // HAKMEM_EVO_SAMPLE=16 → sample every 65536 calls char* evo_sample_str = getenv("HAKMEM_EVO_SAMPLE"); if (evo_sample_str && atoi(evo_sample_str) > 0) { int freq = atoi(evo_sample_str); if (freq >= 64) { fprintf(stderr, "[hakmem] Warning: HAKMEM_EVO_SAMPLE=%d too large, using 63\n", freq); freq = 63; } g_evo_sample_mask = (1ULL << freq) - 1; HAKMEM_LOG("EVO sampling enabled: every 2^%d = %llu calls\n", freq, (unsigned long long)(g_evo_sample_mask + 1)); } else { g_evo_sample_mask = 0; // Disabled by default HAKMEM_LOG("EVO sampling disabled (HAKMEM_EVO_SAMPLE not set or 0)\n"); } #ifdef __linux__ // Record baseline KPIs memset(g_latency_histogram, 0, sizeof(g_latency_histogram)); g_latency_samples = 0; get_page_faults(&g_baseline_soft_pf, &g_baseline_hard_pf); g_baseline_rss_kb = get_rss_kb(); HAKMEM_LOG("Baseline: soft_pf=%lu, hard_pf=%lu, rss=%lu KB\n", (unsigned long)g_baseline_soft_pf, (unsigned long)g_baseline_hard_pf, (unsigned long)g_baseline_rss_kb); #endif HAKMEM_LOG("Initialized (PoC version)\n"); HAKMEM_LOG("Sampling rate: 1/%d\n", SAMPLING_RATE); HAKMEM_LOG("Max sites: %d\n", MAX_SITES); // Bench preset: Tiny-only (disable non-essential subsystems) { char* bt = getenv("HAKMEM_BENCH_TINY_ONLY"); if (bt && atoi(bt) != 0) { g_bench_tiny_only = 1; } } // Under LD_PRELOAD, enforce safer defaults for Tiny path unless overridden { char* ldpre = getenv("LD_PRELOAD"); if (ldpre && strstr(ldpre, "libhakmem.so")) { g_ldpreload_mode = 1; // Default LD-safe mode if not set: 1 (Tiny-only) char* lds = getenv("HAKMEM_LD_SAFE"); if (lds) { /* NOP used in wrappers */ } else { setenv("HAKMEM_LD_SAFE", "1", 0); } if (!getenv("HAKMEM_TINY_TLS_SLL")) { setenv("HAKMEM_TINY_TLS_SLL", "0", 0); // disable TLS SLL by default } if (!getenv("HAKMEM_TINY_USE_SUPERSLAB")) { setenv("HAKMEM_TINY_USE_SUPERSLAB", "0", 0); // disable SuperSlab path by default } } } // Runtime safety toggle char* safe_free_env = getenv("HAKMEM_SAFE_FREE"); if (safe_free_env && atoi(safe_free_env) != 0) { g_strict_free = 1; HAKMEM_LOG("Strict free safety enabled (HAKMEM_SAFE_FREE=1)\n"); } else { // Heuristic: if loaded via LD_PRELOAD, enable strict free by default char* ldpre = getenv("LD_PRELOAD"); if (ldpre && strstr(ldpre, "libhakmem.so")) { g_ldpreload_mode = 1; g_strict_free = 1; HAKMEM_LOG("Strict free safety auto-enabled under LD_PRELOAD\n"); } } // Invalid free logging toggle (default off to avoid spam under LD_PRELOAD) char* invlog = getenv("HAKMEM_INVALID_FREE_LOG"); if (invlog && atoi(invlog) != 0) { g_invalid_free_log = 1; HAKMEM_LOG("Invalid free logging enabled (HAKMEM_INVALID_FREE_LOG=1)\n"); } // Phase 7.4: Cache HAKMEM_INVALID_FREE to eliminate 44% CPU overhead // Perf showed getenv() on hot path consumed 43.96% CPU time (26.41% strcmp + 17.55% getenv) char* inv = getenv("HAKMEM_INVALID_FREE"); if (inv && strcmp(inv, "fallback") == 0) { g_invalid_free_mode = 0; // fallback mode: route invalid frees to libc HAKMEM_LOG("Invalid free mode: fallback to libc (HAKMEM_INVALID_FREE=fallback)\n"); } else { // Under LD_PRELOAD, prefer safety: default to fallback unless explicitly overridden char* ldpre = getenv("LD_PRELOAD"); if (ldpre && strstr(ldpre, "libhakmem.so")) { g_ldpreload_mode = 1; g_invalid_free_mode = 0; HAKMEM_LOG("Invalid free mode: fallback to libc (auto under LD_PRELOAD)\n"); } else { g_invalid_free_mode = 1; // default: skip invalid-free check HAKMEM_LOG("Invalid free mode: skip check (default)\n"); } } // NEW Phase 6.8: Feature-gated initialization (check g_hakem_config flags) if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) { hak_pool_init(); } // NEW Phase 6.13: L2.5 LargePool (64KB-1MB allocations) hak_l25_pool_init(); if (!g_bench_tiny_only && HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE)) { hak_bigcache_init(); hak_bigcache_set_free_callback(bigcache_free_callback); } if (!g_bench_tiny_only && HAK_ENABLED_LEARNING(HAKMEM_FEATURE_ELO)) { hak_elo_init(); // Phase 6.11.4 P0-2: Initialize cached strategy to default (strategy 0) atomic_store(&g_cached_strategy_id, 0); } if (!g_bench_tiny_only && HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE)) { hak_batch_init(); } if (!g_bench_tiny_only && HAK_ENABLED_LEARNING(HAKMEM_FEATURE_EVOLUTION)) { hak_evo_init(); } if (!g_bench_tiny_only) { // Phase 6.16: Initialize ACE stats (sampling) – default off hkm_ace_stats_init(); // Phase 6.16: Initialize sampling profiler – default off hkm_prof_init(); // Size histogram sampling (optional) hkm_size_hist_init(); } if (!g_bench_tiny_only) { // Start CAP learner (optional, env-gated) hkm_learner_init(); } // NEW Phase 6.10: Site Rules (MVP: always ON) // MT note: default disabled unless HAKMEM_SITE_RULES=1 char* sr_env = getenv("HAKMEM_SITE_RULES"); g_site_rules_enabled = (sr_env && atoi(sr_env) != 0); if (!g_bench_tiny_only && g_site_rules_enabled) { hak_site_rules_init(); } // NEW Phase 6.12: Tiny Pool (≤1KB allocations) hak_tiny_init(); // Env: optional Tiny flush on exit (memory efficiency evaluation) { char* tf = getenv("HAKMEM_TINY_FLUSH_ON_EXIT"); if (tf && atoi(tf) != 0) { g_flush_tiny_on_exit = 1; } char* ud = getenv("HAKMEM_TINY_ULTRA_DEBUG"); if (ud && atoi(ud) != 0) { g_ultra_debug_on_exit = 1; } // Register exit hook if any of the debug/flush toggles are on // or when path debug is requested. if (g_flush_tiny_on_exit || g_ultra_debug_on_exit || getenv("HAKMEM_TINY_PATH_DEBUG")) { atexit(hak_flush_tiny_exit); } } // NEW Phase ACE: Initialize Adaptive Control Engine hkm_ace_controller_init(&g_ace_controller); if (g_ace_controller.enabled) { hkm_ace_controller_start(&g_ace_controller); HAKMEM_LOG("ACE Learning Layer enabled and started\n"); } g_initializing = 0; // Publish that initialization is complete atomic_thread_fence(memory_order_seq_cst); g_initialized = 1; } void hak_shutdown(void) { if (!g_initialized) return; // NEW Phase ACE: Shutdown Adaptive Control Engine FIRST (before other subsystems) hkm_ace_controller_destroy(&g_ace_controller); if (!g_bench_tiny_only) { printf("[hakmem] Shutting down...\n"); hak_print_stats(); } // NEW Phase 6.9: Shutdown L2 Pool if (!g_bench_tiny_only) hak_pool_shutdown(); // NEW Phase 6.13: Shutdown L2.5 LargePool if (!g_bench_tiny_only) hak_l25_pool_shutdown(); // NEW: Shutdown BigCache Box if (!g_bench_tiny_only) hak_bigcache_shutdown(); // NEW Phase 6.2: Shutdown ELO Strategy Selection if (!g_bench_tiny_only) hak_elo_shutdown(); // NEW Phase 6.3: Shutdown madvise Batching if (!g_bench_tiny_only) hak_batch_shutdown(); // NEW Phase 6.10: Shutdown Site Rules if (!g_bench_tiny_only) hak_site_rules_shutdown(); // NEW Phase 6.12: Print Tiny Pool statistics if (!g_bench_tiny_only) hak_tiny_print_stats(); // NEW Phase 6.11.1: Print whale cache statistics if (!g_bench_tiny_only) { hkm_whale_dump_stats(); // NEW Phase 6.11.1: Shutdown whale cache hkm_whale_shutdown(); } // NEW Phase 6.11.1: Shutdown debug timing (must be last!) if (!g_bench_tiny_only) hkm_timing_shutdown(); // Phase 6.16: Dump sampling profiler if (!g_bench_tiny_only) hkm_prof_shutdown(); // Stop learner thread if (!g_bench_tiny_only) hkm_learner_shutdown(); // Stop Tiny background components (e.g., Intelligence Engine) hak_tiny_shutdown(); g_initialized = 0; } // Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%) __attribute__((always_inline)) inline void* hak_alloc_at(size_t size, hak_callsite_t site) { #if HAKMEM_DEBUG_TIMING HKM_TIME_START(t0); // Profiling (build-time gated) #endif if (!g_initialized) hak_init(); // ======================================================================== // Phase 6-3: Tiny Fast Path (System tcache style, 3-4 instruction fast path) // ======================================================================== #ifdef HAKMEM_TINY_FAST_PATH if (size <= TINY_FAST_THRESHOLD) { // Ultra-simple TLS cache pop (bypasses Magazine/SuperSlab) extern void* tiny_fast_alloc(size_t); extern void tiny_fast_init(void); extern __thread int g_tiny_fast_initialized; if (__builtin_expect(!g_tiny_fast_initialized, 0)) { tiny_fast_init(); } void* ptr = tiny_fast_alloc(size); if (ptr) return ptr; // Fall through to slow path on failure } #endif // ======================================================================== uintptr_t site_id = (uintptr_t)site; // Phase 6.12: Tiny Pool fast-path (≤1KB allocations) // Priority: highest for tiny allocations (most frequent) if (__builtin_expect(size <= TINY_MAX_SIZE, 1)) { #if HAKMEM_DEBUG_TIMING HKM_TIME_START(t_tiny); #endif void* tiny_ptr = NULL; #ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR // Phase 6-1.7: Box Theory Refactoring (3-4 instruction fast path) tiny_ptr = hak_tiny_alloc_fast_wrapper(size); #elif defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE) // Phase 6-1.5: Ultra Simple (alignment guessing) tiny_ptr = hak_tiny_alloc_ultra_simple(size); #elif defined(HAKMEM_TINY_PHASE6_METADATA) // Phase 6-1.6: Metadata header tiny_ptr = hak_tiny_alloc_metadata(size); #else // Default: Standard Tiny path tiny_ptr = hak_tiny_alloc(size); #endif #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_TINY_ALLOC, t_tiny); #endif if (tiny_ptr) { // NEW Phase ACE: Track allocation for learning hkm_ace_track_alloc(); // Tiny Pool hit! Return immediately (no header needed) return tiny_ptr; } // DEBUG: Tiny Pool returned NULL - fallback to other paths static int log_count = 0; if (log_count < 3) { fprintf(stderr, "[DEBUG] tiny_alloc(%zu) returned NULL, falling back\n", size); log_count++; } // Tiny Pool miss: fallback to other paths below } // Record size histogram (sampling) — moved after Tiny fast-path to // keep hottest path minimal. Tiny hits skip histogram to reduce overhead. hkm_size_hist_record(size); // Phase Hybrid: Mid Range MT fast-path (8-32KB allocations) // Priority: second highest (after Tiny Pool) // Uses mimalloc-style per-thread segments for optimal MT performance if (__builtin_expect(mid_is_in_range(size), 0)) { #if HAKMEM_DEBUG_TIMING HKM_TIME_START(t_mid); #endif void* mid_ptr = mid_mt_alloc(size); #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_POOL_GET, t_mid); #endif if (mid_ptr) { // Mid MT hit! Return immediately (no header, lock-free) return mid_ptr; } // Mid MT miss: fallback to other paths below (should be rare) } // Phase 6.11.4 P0-1 & P0-2: Compile-time guard + cached strategy update // Phase 6.15 P0.3: Restored with environment variable control (default disabled) #if HAKMEM_FEATURE_EVOLUTION // Only sample if enabled via HAKMEM_EVO_SAMPLE environment variable if (g_evo_sample_mask > 0) { static _Atomic uint64_t tick_counter = 0; if ((atomic_fetch_add(&tick_counter, 1) & g_evo_sample_mask) == 0) { struct timespec now; clock_gettime(CLOCK_MONOTONIC, &now); uint64_t now_ns = now.tv_sec * 1000000000ULL + now.tv_nsec; // P0-2: Update cached strategy when window closes if (hak_evo_tick(now_ns)) { // Window closed, update cached strategy int new_strategy = hak_elo_select_strategy(); atomic_store(&g_cached_strategy_id, new_strategy); } } } #endif // Phase 6.11.4 P0-2: Always use cached strategy (LEARN/FROZEN/CANARY all use same path) size_t threshold; if (HAK_ENABLED_LEARNING(HAKMEM_FEATURE_ELO)) { // ELO enabled: use cached strategy (updated by hak_evo_tick) int strategy_id = atomic_load(&g_cached_strategy_id); threshold = hak_elo_get_threshold(strategy_id); } else { // ELO disabled: use default threshold (2MB - mimalloc's large threshold) threshold = 2097152; // 2MB } // Phase SACS-3: BigCache only for very large blocks (>= threshold) if (HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE) && size >= threshold) { void* cached_ptr = NULL; #if HAKMEM_DEBUG_TIMING HKM_TIME_START(t_bc); #endif if (hak_bigcache_try_get(size, site_id, &cached_ptr)) { #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_BIGCACHE_GET, t_bc); #endif // Cache hit! Return immediately return cached_ptr; } #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_BIGCACHE_GET, t_bc); #endif } // Phase SACS-3: No Site Rules in tier selection (size-only decision) // Phase 6.16 SACS-3: L1 via ACE unified path if (size > TINY_MAX_SIZE && size < threshold) { const FrozenPolicy* pol = hkm_policy_get(); #if HAKMEM_DEBUG_TIMING HKM_TIME_START(t_ace); #endif void* l1 = hkm_ace_alloc(size, site_id, pol); #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_POOL_GET, t_ace); #endif if (l1) return l1; } // Phase SACS-3: For < threshold, prefer malloc; for >= threshold prefer mmap void* ptr; if (size >= threshold) { // Large allocation (L2): use mmap (enables batch madvise) #if HAKMEM_DEBUG_TIMING HKM_TIME_START(t_mmap); #endif ptr = hak_alloc_mmap_impl(size); #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_SYSCALL_MMAP, t_mmap); #endif } else { // Small/medium allocation (L0/L1): use malloc (faster for <2MB) #if HAKMEM_DEBUG_TIMING HKM_TIME_START(t_malloc); #endif ptr = hak_alloc_malloc_impl(size); #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_FALLBACK_MALLOC, t_malloc); #endif } if (!ptr) return NULL; // NEW Phase 6.5: Record allocation size for distribution signature (gated) if (g_evo_sample_mask > 0) { hak_evo_record_size(size); } // NEW: Set alloc_site and class_bytes in header (for BigCache Phase 2) AllocHeader* hdr = (AllocHeader*)((char*)ptr - HEADER_SIZE); // Verify magic (fail-fast if header corrupted) if (hdr->magic != HAKMEM_MAGIC) { fprintf(stderr, "[hakmem] ERROR: Invalid magic in allocated header!\n"); return ptr; // Return anyway, but log error } // Set allocation site (for per-site cache reuse) hdr->alloc_site = site_id; // Set size class for caching (L2 only → threshold class) if (size >= threshold) { hdr->class_bytes = threshold; // cacheable at L2 threshold } else { hdr->class_bytes = 0; // Not cacheable } #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_HAK_ALLOC, t0); // Profiling (build-time gated) #endif return ptr; } // Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%) // Phase 6-1.7: Disable inline for box refactor to avoid recursive inlining #ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR __attribute__((always_inline)) inline #endif void hak_free_at(void* ptr, size_t size, hak_callsite_t site) { #if HAKMEM_DEBUG_TIMING HKM_TIME_START(t0); // Profiling (build-time gated) #endif (void)site; // Not used yet (will be used in BigCache Phase 2) (void)size; // Size stored in header if (!ptr) { #if HAKMEM_DEBUG_TIMING #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_HAK_FREE, t0); #endif #endif return; } // OPTIMIZATION PHASE 2+1 (2025-11-01): Check Tiny Pool FIRST // Phase 2: Ultra-fast owner_slab with TLS range check (1-2 cycles negative lookup) // Phase 1: Reorder to avoid Mid MT mutex overhead for Tiny allocations (90% of mixed workload) // // Target: +12-13% improvement (16.24 → 18.4-18.6 M ops/sec) // - Tiny allocations (90%): Skip Mid MT mutex entirely → ~12% improvement // - Mid allocations (10%): Fast negative lookup from owner_slab → minimal overhead TinySlab* tiny_slab = hak_tiny_owner_slab(ptr); if (tiny_slab) { #ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR // Phase 6-1.7: Box Theory Refactoring (2-3 instruction fast path) // Box 6 handles both same-thread (fast) and cross-thread (remote) internally hak_tiny_free_fast_wrapper(ptr); return; #elif defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE) // Phase 6-1.5: Only use ultra-simple free on same-thread pointers. // Cross-thread frees must go through the full tiny free path // to ensure proper remote-queue handling and slab reuse. pthread_t self_pt = pthread_self(); if (__builtin_expect(pthread_equal(tiny_slab->owner_tid, self_pt), 1)) { hak_tiny_free_ultra_simple(ptr); return; } #elif defined(HAKMEM_TINY_PHASE6_METADATA) // Phase 6-1.6: Metadata header hak_tiny_free_metadata(ptr); return; #endif // Fallback: full tiny free (handles cross-thread case correctly) hak_tiny_free(ptr); return; } // Phase Hybrid: Mid Range MT check (8-32KB, headerless) { size_t mid_block_size = 0; int mid_class_idx = 0; // First check if ptr is in current thread's segment (fast path) for (int i = 0; i < MID_NUM_CLASSES; i++) { MidThreadSegment* seg = &g_mid_segments[i]; if (seg->chunk_base && ptr >= seg->chunk_base && ptr < seg->end) { *(void**)ptr = seg->free_list; seg->free_list = ptr; seg->used_count--; return; } } // Not in current thread's segment - try registry (mutex + binary search) if (mid_registry_lookup(ptr, &mid_block_size, &mid_class_idx)) { mid_mt_free(ptr, mid_block_size); return; } } // DISABLED: SuperSlab Registry lookup causes false positives // Problem: L25 allocations aligned to 1MB boundary are misidentified as SuperSlabs // causing crashes when checking magic number on unmapped/invalid memory // TODO: Fix SuperSlab registry to avoid false positives (descriptor-based check?) #if 0 SuperSlab* ss = hak_super_lookup(ptr); if (ss) { hak_tiny_free(ptr); #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_HAK_FREE, t0); #endif return; } #endif // Mid Pool headerless fast route: use page descriptor before header read { extern int hak_pool_mid_lookup(void* ptr, size_t* out_size); extern void hak_pool_free_fast(void* ptr, uintptr_t site_id); size_t mid_sz = 0; if (hak_pool_mid_lookup(ptr, &mid_sz)) { // For Mid, header read is unnecessary; free directly via pool. hak_pool_free_fast(ptr, (uintptr_t)site); #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_HAK_FREE, t0); #endif return; } } // L2.5 headerless route: use page descriptor before header read { extern int hak_l25_lookup(void* ptr, size_t* out_size); extern void hak_l25_pool_free_fast(void* ptr, uintptr_t site_id); size_t l25_sz = 0; if (hak_l25_lookup(ptr, &l25_sz)) { // Stats (optional): count as large free hkm_ace_stat_large_free(); hak_l25_pool_free_fast(ptr, (uintptr_t)site); #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_HAK_FREE, t0); #endif return; } } // NEW Phase 6.5: Measure free latency (start timing) // Gate by EVO sampling mask to avoid per-op overhead when disabled int _do_evo = (g_evo_sample_mask > 0); struct timespec start_time, end_time; if (_do_evo) { clock_gettime(CLOCK_MONOTONIC, &start_time); } // Helper macro to record latency before returning (build-time gated timing) #if HAKMEM_DEBUG_TIMING #define RECORD_FREE_LATENCY() do { \ if (_do_evo) { \ clock_gettime(CLOCK_MONOTONIC, &end_time); \ uint64_t ns = (end_time.tv_sec - start_time.tv_sec) * 1000000000ULL + \ (end_time.tv_nsec - start_time.tv_nsec); \ hak_evo_record_latency((double)ns); \ if (hak_evo_is_canary()) { \ hak_evo_record_canary_result(0, (double)ns); \ } \ } \ HKM_TIME_END(HKM_CAT_HAK_FREE, t0); \ } while(0) #else #define RECORD_FREE_LATENCY() do { \ if (_do_evo) { \ clock_gettime(CLOCK_MONOTONIC, &end_time); \ uint64_t ns = (end_time.tv_sec - start_time.tv_sec) * 1000000000ULL + \ (end_time.tv_nsec - start_time.tv_nsec); \ hak_evo_record_latency((double)ns); \ if (hak_evo_is_canary()) { \ hak_evo_record_canary_result(0, (double)ns); \ } \ } \ } while(0) #endif // Get raw pointer (before header) void* raw = (char*)ptr - HEADER_SIZE; #ifdef __linux__ if (g_strict_free) { // Safety: ensure header address is mapped before touching it (optional) long _ps = sysconf(_SC_PAGESIZE); void* _pg = (void*)((uintptr_t)raw & ~((uintptr_t)_ps - 1)); unsigned char _vec; if (mincore(_pg, (size_t)_ps, &_vec) != 0) { // Not a valid mapped region → fallback directly to libc free extern void __libc_free(void*); __libc_free(ptr); RECORD_FREE_LATENCY(); return; } } #endif // Read header AllocHeader* hdr = (AllocHeader*)raw; // NEW: Verify magic (fail-fast if corrupted or not from hakmem) if (hdr->magic != HAKMEM_MAGIC) { if (g_invalid_free_log) { fprintf(stderr, "[hakmem] ERROR: Invalid magic 0x%X (expected 0x%X) - possible corruption or non-hakmem pointer\n", hdr->magic, HAKMEM_MAGIC); } // Phase 7.4: Use cached mode (eliminates 44% CPU overhead from getenv on hot path!) // OLD CODE (44% CPU time!): const char* inv = getenv("HAKMEM_INVALID_FREE"); // if (inv && strcmp(inv, "fallback") == 0) mode_skip = 0; int mode_skip = g_invalid_free_mode; // 1 = skip, 0 = fallback to libc if (mode_skip) { // Skip freeing unknown pointer to avoid abort (possible mmap region). Log only. RECORD_FREE_LATENCY(); return; } else { fprintf(stderr, "[hakmem] Attempting fallback to system free()...\n"); extern void __libc_free(void*); __libc_free(ptr); RECORD_FREE_LATENCY(); return; } } // Phase SACS-3: BigCache put only for L2 (class_bytes >= 2MB) if (HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE) && hdr->class_bytes >= 2097152) { // Pass actual allocated size (hdr->size), not class_bytes! // This prevents buffer overflow when BigCache returns undersized blocks if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) { RECORD_FREE_LATENCY(); return; // Successfully cached, skip actual free } } // Phase 6.9.1: Pool allocations are now handled via header method // (no separate detection needed, just dispatch on method) // Dispatch to correct free function switch (hdr->method) { case ALLOC_METHOD_POOL: // Phase 6.9.1: Pool allocation - return to pool if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) { // Stats: record free in ACE L1 Mid hkm_ace_stat_mid_free(); hak_pool_free(ptr, hdr->size, hdr->alloc_site); } else { // Pool disabled, shouldn't happen (fail-fast) fprintf(stderr, "[hakmem] ERROR: POOL allocation but POOL feature disabled!\\n"); } RECORD_FREE_LATENCY(); return; case ALLOC_METHOD_L25_POOL: // Phase 6.13: L2.5 Pool allocation - return to pool hkm_ace_stat_large_free(); hak_l25_pool_free(ptr, hdr->size, hdr->alloc_site); RECORD_FREE_LATENCY(); return; case ALLOC_METHOD_MALLOC: free(raw); break; case ALLOC_METHOD_MMAP: // Phase 6.4 P1: Apply free policy (Hot/Warm/Cold) if (g_hakem_config.free_policy == FREE_POLICY_KEEP) { // KEEP: 何もしない(VA保持、madviseもしない) RECORD_FREE_LATENCY(); return; } else if (g_hakem_config.free_policy == FREE_POLICY_ADAPTIVE) { // ADAPTIVE: Hot/Warm/Cold判定 FreeThermal thermal = hak_classify_thermal(hdr->size); switch (thermal) { case FREE_THERMAL_HOT: // HOT (< 1MB): 何もしない(すぐ再利用される) RECORD_FREE_LATENCY(); return; case FREE_THERMAL_WARM: // WARM (1-2MB): MADV_FREE(munmapしない、物理ページのみ返す) #ifdef __linux__ madvise(raw, hdr->size, MADV_FREE); #endif RECORD_FREE_LATENCY(); return; case FREE_THERMAL_COLD: // COLD (>= 2MB): batch (Phase 6.8: feature-gated) if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) { hak_batch_add(raw, hdr->size); RECORD_FREE_LATENCY(); return; } // Small blocks: immediate munmap #ifdef __linux__ // Phase 6.11.1: Try whale cache first if (hkm_whale_put(raw, hdr->size) != 0) { hkm_sys_munmap(raw, hdr->size); } #else free(raw); #endif break; } } else { // BATCH (default): Phase 6.8 feature-gated // - Keep VA mapped for reuse (mimalloc strategy) // - Only MADV_FREE on batch flush (release physical pages) // - munmap happens on cold eviction only if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) { hak_batch_add(raw, hdr->size); RECORD_FREE_LATENCY(); return; } // Small blocks: immediate munmap (not worth batching) #ifdef __linux__ // Phase 6.11.1: Try whale cache first if (hkm_whale_put(raw, hdr->size) != 0) { hkm_sys_munmap(raw, hdr->size); } #else free(raw); #endif } break; default: fprintf(stderr, "[hakmem] ERROR: Unknown allocation method: %d\n", hdr->method); break; } // Record latency for all paths that reach here RECORD_FREE_LATENCY(); #undef RECORD_FREE_LATENCY } void hak_print_stats(void) { printf("\n========================================\n"); printf("hakmem ELO-based Profiling Statistics\n"); printf("========================================\n"); printf("\nOptimization Stats:\n"); printf(" malloc() calls: %llu\n", (unsigned long long)g_malloc_count); hak_elo_print_leaderboard(); printf("========================================\n\n"); } // ============================================================================ // Phase 6.15 P0: Standard C Library Wrappers (for LD_PRELOAD) // ============================================================================ #ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD // Sanitizer/diagnostic builds: bypass hakmem allocator completely. void* malloc(size_t size) { extern void* __libc_malloc(size_t); return __libc_malloc(size); } void free(void* ptr) { if (!ptr) return; extern void __libc_free(void*); __libc_free(ptr); } void* calloc(size_t nmemb, size_t size) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } void* realloc(void* ptr, size_t size) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } #else // malloc wrapper - intercepts system malloc() calls void* malloc(size_t size) { // ======================================================================== // Phase 6-4: ULTRA-FAST PATH (Option A optimization) // Priority: initialized + tiny size → direct to fast cache (2-3 branches) // Expected hit rate: 95%+ for tiny allocations // ======================================================================== #ifdef HAKMEM_TINY_FAST_PATH // Branch 1+2: initialized check + size check (combined for branch prediction) if (__builtin_expect(g_initialized && size <= TINY_FAST_THRESHOLD, 1)) { extern void* tiny_fast_alloc(size_t); extern void tiny_fast_init(void); extern __thread int g_tiny_fast_initialized; // Branch 3: init check (rarely taken) if (__builtin_expect(!g_tiny_fast_initialized, 0)) { tiny_fast_init(); } // Fast path: TLS cache pop (3-4 instructions inside tiny_fast_alloc) void* ptr = tiny_fast_alloc(size); if (__builtin_expect(ptr != NULL, 1)) { return ptr; // 🚀 FAST PATH HIT: 3 branches total! } // Fall through to slow path on cache miss } #endif // ======================================================================== // ======================================================================== // SLOW PATH: All guard checks (for non-tiny, uninitialized, or special cases) // ======================================================================== // Recursion guard: if we're inside the allocator already, fall back to libc if (g_hakmem_lock_depth > 0) { // Nested call detected - fallback to system malloc extern void* __libc_malloc(size_t); return __libc_malloc(size); } // Initialization guard: during hak_init() bootstrap, use libc directly if (__builtin_expect(g_initializing != 0, 0)) { extern void* __libc_malloc(size_t); return __libc_malloc(size); } if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void* __libc_malloc(size_t); return __libc_malloc(size); } // LD safe modes: 1=tiny-only, 2=pass-through // Determine LD_PRELOAD mode early (before hak_init) to avoid misrouting int ld_mode = hak_ld_env_mode(); if (ld_mode) { // Avoid mixing with jemalloc-managed programs (e.g., redis) if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) { extern void* __libc_malloc(size_t); return __libc_malloc(size); } // Before hakmem initialization completes, always delegate to libc if (!g_initialized || g_initializing) { extern void* __libc_malloc(size_t); return __libc_malloc(size); } const char* lds = getenv("HAKMEM_LD_SAFE"); int mode = (lds ? atoi(lds) : 1); if (mode >= 2 || size > TINY_MAX_SIZE) { extern void* __libc_malloc(size_t); return __libc_malloc(size); } } // First-level call: enter allocator (no global lock) g_hakmem_lock_depth++; void* ptr = hak_alloc_at(size, HAK_CALLSITE()); g_hakmem_lock_depth--; return ptr; } // free wrapper - intercepts system free() calls void free(void* ptr) { if (!ptr) return; // NULL check // ======================================================================== // Phase 6-4: ULTRA-FAST PATH (Option A optimization) // Priority: initialized → direct to fast free path (1-2 branches) // Expected hit rate: 95%+ for tiny allocations // ======================================================================== // Branch 1: initialized check (fast path for common case) if (__builtin_expect(g_initialized, 1)) { // Fast path: normal operation, no special handling needed // Phase 6 Fast Path variants (when enabled) #ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE g_hakmem_lock_depth++; hak_tiny_free_ultra_simple(ptr); g_hakmem_lock_depth--; return; #elif defined(HAKMEM_TINY_PHASE6_METADATA) g_hakmem_lock_depth++; hak_tiny_free_metadata(ptr); g_hakmem_lock_depth--; return; #else // Default fast path g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; return; #endif } // ======================================================================== // SLOW PATH: All guard checks (for uninitialized or special cases) // ======================================================================== // Recursion guard: if we're inside the allocator already, fall back to libc if (g_hakmem_lock_depth > 0) { // Nested call detected - fallback to system free extern void __libc_free(void*); __libc_free(ptr); return; } if (__builtin_expect(g_initializing != 0, 0)) { extern void __libc_free(void*); __libc_free(ptr); return; } if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void __libc_free(void*); __libc_free(ptr); return; } // In LD_PRELOAD mode, before hakmem initialization completes, always delegate { if (hak_ld_env_mode()) { if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) { extern void __libc_free(void*); __libc_free(ptr); return; } if (!g_initialized || g_initializing) { extern void __libc_free(void*); __libc_free(ptr); return; } } } // Fallback (should not reach here in normal case) g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; } // calloc wrapper - intercepts system calloc() calls void* calloc(size_t nmemb, size_t size) { // Recursion guard if (g_hakmem_lock_depth > 0) { // Nested call detected - fallback to system calloc extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } if (__builtin_expect(g_initializing != 0, 0)) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } // Overflow check before any multiplication if (size != 0 && nmemb > (SIZE_MAX / size)) { errno = ENOMEM; return NULL; } if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } // Determine LD_PRELOAD mode early (before hak_init) int ld_mode = hak_ld_env_mode(); if (ld_mode) { if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } if (!g_initialized || g_initializing) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } const char* lds = getenv("HAKMEM_LD_SAFE"); int mode = (lds ? atoi(lds) : 1); size_t total = nmemb * size; // safe: overflow checked above if (mode >= 2 || total > TINY_MAX_SIZE) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } } g_hakmem_lock_depth++; size_t total_size = nmemb * size; // safe: overflow checked above void* ptr = hak_alloc_at(total_size, HAK_CALLSITE()); if (ptr) { memset(ptr, 0, total_size); // calloc zeros memory } g_hakmem_lock_depth--; return ptr; } // realloc wrapper - intercepts system realloc() calls void* realloc(void* ptr, size_t size) { // Recursion guard if (g_hakmem_lock_depth > 0) { // Nested call detected - fallback to system realloc extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } if (__builtin_expect(g_initializing != 0, 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } // Determine LD_PRELOAD mode early (before hak_init) int ld_mode = hak_ld_env_mode(); if (ld_mode) { if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } if (!g_initialized || g_initializing) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } const char* lds = getenv("HAKMEM_LD_SAFE"); int mode = (lds ? atoi(lds) : 1); // Pass-through mode, or resizing beyond Tiny range → route to libc if (mode >= 2 || size > TINY_MAX_SIZE) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } // Tiny-only safe mode: if the existing pointer is NOT Tiny-managed, // do not touch it — delegate to libc to avoid header mismatches. if (ptr != NULL && !hak_tiny_is_managed(ptr)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } } g_hakmem_lock_depth++; void* new_ptr = NULL; if (!ptr) { // realloc(NULL, size) = malloc(size) new_ptr = hak_alloc_at(size, HAK_CALLSITE()); } else if (size == 0) { // realloc(ptr, 0) = free(ptr) hak_free_at(ptr, 0, HAK_CALLSITE()); new_ptr = NULL; } else { // Allocate new block new_ptr = hak_alloc_at(size, HAK_CALLSITE()); if (new_ptr) { // Get old size from header void* raw = (char*)ptr - HEADER_SIZE; AllocHeader* hdr = (AllocHeader*)raw; if (hdr->magic == HAKMEM_MAGIC) { size_t old_size = hdr->size - HEADER_SIZE; // User-visible size size_t copy_size = (old_size < size) ? old_size : size; memcpy(new_ptr, ptr, copy_size); } else { // Invalid header, copy what we can (best effort) memcpy(new_ptr, ptr, size); } // Free old block hak_free_at(ptr, 0, HAK_CALLSITE()); } } g_hakmem_lock_depth--; return new_ptr; } #endif // HAKMEM_FORCE_LIBC_ALLOC_BUILD