### Changes: - **Removed duplicate fast paths**: Disabled HAKMEM_TINY_FAST_PATH in: - malloc() entry point (line ~1257) - hak_alloc_at() helper (line ~682) - **Unified to Box Theory**: All tiny allocations now use Box Theory's hak_tiny_alloc_fast_wrapper() at line ~712 (HAKMEM_TINY_PHASE6_BOX_REFACTOR) ### Rationale: - Previous implementation had **2 fast path checks** (double overhead) - Box Theory (tiny_alloc_fast.inc.h) provides optimized 3-4 instruction path - CLAUDE.md claims +64% (debug), +150% (production) with Box Theory - Attempt to eliminate redundant checks and unify to single fast path ### Performance Results: ⚠️ **REGRESSION** - Performance decreased: ``` Baseline (old tiny_fast_alloc): 1.68M ops/s Box Theory (unified): 1.35M ops/s (-20%) System malloc: 8.08M ops/s (reference) ``` ### Status: 🔬 **EXPERIMENTAL** - This commit documents the attempt but shows regression. Possible issues: 1. Box Theory may need additional tuning (env vars not sufficient) 2. Refill backend may be slower than old implementation 3. TLS freelist initialization overhead 4. Missing optimizations in Box Theory integration ### Next Steps: - Profile to identify why Box Theory is slower - Compare refill efficiency: old vs Box Theory - Check if TLS SLL variables are properly initialized - Consider reverting if root cause not found Related: LARSON_PERFORMANCE_ANALYSIS_2025_11_05.md, CLAUDE.md Phase 6-1.7
1513 lines
55 KiB
C
1513 lines
55 KiB
C
// hakmem.c - Minimal PoC Implementation
|
||
// Purpose: Verify call-site profiling concept
|
||
|
||
#define _GNU_SOURCE // For mincore, madvise on Linux
|
||
|
||
#include "hakmem.h"
|
||
#include "hakmem_config.h" // NEW Phase 6.8: Mode-based configuration
|
||
#include "hakmem_internal.h" // NEW Phase 6.8: Static inline helpers
|
||
#include "hakmem_bigcache.h" // NEW: BigCache Box
|
||
#include "hakmem_pool.h" // NEW Phase 6.9: L2 Hybrid Pool (2-32KiB)
|
||
#include "hakmem_l25_pool.h" // NEW Phase 6.13: L2.5 LargePool (64KB-1MB)
|
||
#include "hakmem_policy.h" // NEW Phase 6.16: FrozenPolicy (SACS-3)
|
||
#include "hakmem_learner.h" // NEW: CAP auto-tuner (background)
|
||
#include "hakmem_size_hist.h" // NEW: size histogram sampling (off hot path)
|
||
#include "hakmem_ace.h" // NEW Phase 6.16: ACE layer (L1)
|
||
#include "hakmem_site_rules.h" // NEW Phase 6.10: Site-Aware Cache Routing
|
||
#include "hakmem_tiny.h" // NEW Phase 6.12: Tiny Pool (≤1KB)
|
||
#include "hakmem_tiny_superslab.h" // NEW Phase 7.6: SuperSlab for Tiny Pool
|
||
#include "tiny_fastcache.h" // NEW Phase 6-3: Tiny Fast Path (System tcache style)
|
||
#include "hakmem_mid_mt.h" // NEW Phase Hybrid: Mid Range MT (8-32KB, mimalloc-style)
|
||
#include "hakmem_super_registry.h" // NEW Phase 1: SuperSlab Registry (mincore elimination)
|
||
#include "hakmem_elo.h" // NEW: ELO Strategy Selection (Phase 6.2)
|
||
#include "hakmem_ace_stats.h" // NEW: ACE lightweight stats (avoid implicit decl warnings)
|
||
#include "hakmem_batch.h" // NEW: madvise Batching (Phase 6.3)
|
||
#include "hakmem_evo.h" // NEW: Learning Lifecycle (Phase 6.5)
|
||
#include "hakmem_debug.h" // NEW Phase 6.11.1: Debug Timing
|
||
#include "hakmem_sys.h" // NEW Phase 6.11.1: Syscall Wrappers
|
||
#include "hakmem_whale.h" // NEW Phase 6.11.1: Whale Fast-Path (≥2MB)
|
||
#include "hakmem_prof.h" // NEW Phase 6.16: Sampling profiler
|
||
#include "hakmem_syscall.h" // NEW Phase 6.X P0 FIX: Box 3 (dlsym direct libc)
|
||
#include "hakmem_ace_controller.h" // NEW Phase ACE: Adaptive Control Engine
|
||
#include "hakmem_ace_metrics.h" // NEW Phase ACE: Metrics tracking (inline helpers)
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <stdio.h>
|
||
#include <time.h>
|
||
#include <dlfcn.h>
|
||
#include <stdatomic.h> // NEW Phase 6.5: For atomic tick counter
|
||
#include <pthread.h> // Phase 6.15: Threading primitives (recursion guard only)
|
||
#include <errno.h> // calloc overflow handling
|
||
|
||
// For mmap (Linux)
|
||
#ifdef __linux__
|
||
#include <sys/mman.h>
|
||
#include <unistd.h>
|
||
|
||
// MADV_FREE support (Linux kernel 4.5+)
|
||
#ifndef MADV_FREE
|
||
#define MADV_FREE 8 // Linux MADV_FREE
|
||
#endif
|
||
#endif
|
||
|
||
// ============================================================================
|
||
// Configuration
|
||
// ============================================================================
|
||
|
||
#define MAX_SITES 256 // Hash table size (power of 2)
|
||
#define SAMPLING_RATE 1 // Sample ALL (PoC demo: no sampling)
|
||
#define HASH_MASK (MAX_SITES - 1)
|
||
|
||
// Phase 6.8: FREE_POLICY/FreePolicy moved to hakmem_config.h
|
||
// Phase 6.8: FreeThermal/THERMAL_* constants moved to hakmem_internal.h
|
||
// Phase 6.8: THP_POLICY/THPPolicy moved to hakmem_config.h
|
||
|
||
|
||
// ============================================================================
|
||
// Global State
|
||
// ============================================================================
|
||
|
||
// NEW Phase ACE: Adaptive Control Engine
|
||
static struct hkm_ace_controller g_ace_controller;
|
||
|
||
static int g_initialized = 0;
|
||
static int g_strict_free = 0; // runtime: HAKMEM_SAFE_FREE=1 enables extra safety checks
|
||
int g_invalid_free_log = 0; // runtime: HAKMEM_INVALID_FREE_LOG=1 to log invalid-free messages (extern visible)
|
||
// Phase 7.4: Cache HAKMEM_INVALID_FREE to eliminate 44% CPU overhead (getenv on hot path)
|
||
// Perf analysis showed getenv("HAKMEM_INVALID_FREE") consumed 43.96% of CPU time!
|
||
static int g_invalid_free_mode = 1; // 1 = skip invalid-free check (default), 0 = fallback to libc
|
||
|
||
// Statistics
|
||
static uint64_t g_malloc_count = 0; // Used for optimization stats display
|
||
|
||
// Phase 6.11.4 P0-2: Cached Strategy (atomic, updated by hak_evo_tick)
|
||
static _Atomic int g_cached_strategy_id = 0; // Cached strategy ID (updated every window closure)
|
||
|
||
// Phase 6.15 P0.3: EVO Sampling Control (environment variable)
|
||
static uint64_t g_evo_sample_mask = 0; // 0 = disabled (default), (1<<N)-1 = sample every 2^N calls
|
||
|
||
// Phase 6.15 P1: Site Rules enable (env: HAKMEM_SITE_RULES=1 to enable)
|
||
static int g_site_rules_enabled = 0; // default off to avoid contention in MT
|
||
static int g_bench_tiny_only = 0; // bench preset: Tiny-only fast path
|
||
int g_ldpreload_mode = 0; // 1 when running via LD_PRELOAD=libhakmem.so
|
||
static int g_flush_tiny_on_exit = 0; // HAKMEM_TINY_FLUSH_ON_EXIT=1
|
||
static int g_ultra_debug_on_exit = 0; // HAKMEM_TINY_ULTRA_DEBUG=1
|
||
// Cached LD_PRELOAD detection for wrapper hot paths (avoid getenv per call)
|
||
static int g_ldpre_env_cached = -1; // -1 = unknown, 0/1 cached
|
||
static inline int hak_ld_env_mode(void) {
|
||
if (g_ldpre_env_cached < 0) {
|
||
const char* ldpre = getenv("LD_PRELOAD");
|
||
g_ldpre_env_cached = (ldpre && strstr(ldpre, "libhakmem.so")) ? 1 : 0;
|
||
}
|
||
return g_ldpre_env_cached;
|
||
}
|
||
|
||
// Sanitizer / guard rails: allow forcing libc allocator even when wrappers are linked
|
||
#ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD
|
||
static int g_force_libc_alloc = 1;
|
||
#else
|
||
static int g_force_libc_alloc = -1; // 1=force libc, 0=use hakmem, -1=uninitialized
|
||
#endif
|
||
static inline int hak_force_libc_alloc(void) {
|
||
if (g_force_libc_alloc < 0) {
|
||
const char* force = getenv("HAKMEM_FORCE_LIBC_ALLOC");
|
||
if (force && *force) {
|
||
g_force_libc_alloc = (atoi(force) != 0);
|
||
} else {
|
||
const char* wrap = getenv("HAKMEM_WRAP_TINY");
|
||
if (wrap && *wrap && atoi(wrap) == 0) {
|
||
g_force_libc_alloc = 1;
|
||
} else {
|
||
g_force_libc_alloc = 0;
|
||
}
|
||
}
|
||
}
|
||
return g_force_libc_alloc;
|
||
}
|
||
|
||
// LD_PRELOAD safety: avoid interposing when jemalloc is present
|
||
static int g_ld_block_jemalloc = -1; // env: HAKMEM_LD_BLOCK_JEMALLOC (default 1)
|
||
static int g_jemalloc_loaded = -1; // -1 unknown, 0/1 cached
|
||
static inline int hak_jemalloc_loaded(void) {
|
||
if (g_jemalloc_loaded < 0) {
|
||
void* h = dlopen("libjemalloc.so.2", RTLD_NOLOAD | RTLD_NOW);
|
||
if (!h) h = dlopen("libjemalloc.so.1", RTLD_NOLOAD | RTLD_NOW);
|
||
g_jemalloc_loaded = (h != NULL) ? 1 : 0;
|
||
if (h) dlclose(h);
|
||
}
|
||
return g_jemalloc_loaded;
|
||
}
|
||
static inline int hak_ld_block_jemalloc(void) {
|
||
if (g_ld_block_jemalloc < 0) {
|
||
const char* e = getenv("HAKMEM_LD_BLOCK_JEMALLOC");
|
||
g_ld_block_jemalloc = (e == NULL) ? 1 : (atoi(e) != 0);
|
||
}
|
||
return g_ld_block_jemalloc;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Phase 6.15 P1: Remove global lock; keep recursion guard only
|
||
// ---------------------------------------------------------------------------
|
||
// We no longer serialize all allocations with a single global mutex.
|
||
// Instead, each submodule is responsible for its own fine‑grained locking.
|
||
// We keep a per‑thread recursion guard so that internal use of malloc/free
|
||
// within the allocator routes to libc (avoids infinite recursion).
|
||
//
|
||
// Phase 6.X P0 FIX (2025-10-24): Reverted to simple g_hakmem_lock_depth check
|
||
// Box Theory - Layer 1 (API Layer):
|
||
// This guard protects against LD_PRELOAD recursion (Box 1 → Box 1)
|
||
// Box 2 (Core) → Box 3 (Syscall) uses hkm_libc_malloc() (dlsym, no guard needed!)
|
||
static __thread int g_hakmem_lock_depth = 0; // 0 = outermost call
|
||
|
||
int hak_in_wrapper(void) {
|
||
return g_hakmem_lock_depth > 0; // Simple and correct!
|
||
}
|
||
|
||
// Initialization guard
|
||
static int g_initializing = 0;
|
||
int hak_is_initializing(void) { return g_initializing; }
|
||
|
||
// ============================================================================
|
||
// Phase 6-1.5: Ultra-Simple Fast Path Forward Declarations
|
||
// ============================================================================
|
||
// Forward declarations for Phase 6 fast path variants
|
||
// Phase 6-1.5: Alignment guessing (hakmem_tiny_ultra_simple.inc)
|
||
#ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE
|
||
extern void* hak_tiny_alloc_ultra_simple(size_t size);
|
||
extern void hak_tiny_free_ultra_simple(void* ptr);
|
||
#endif
|
||
|
||
// Phase 6-1.6: Metadata header (hakmem_tiny_metadata.inc)
|
||
#ifdef HAKMEM_TINY_PHASE6_METADATA
|
||
extern void* hak_tiny_alloc_metadata(size_t size);
|
||
extern void hak_tiny_free_metadata(void* ptr);
|
||
#endif
|
||
|
||
// Phase 6-1.7: Box Theory Refactoring - Wrapper function declarations
|
||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
extern void* hak_tiny_alloc_fast_wrapper(size_t size);
|
||
extern void hak_tiny_free_fast_wrapper(void* ptr);
|
||
#endif
|
||
|
||
static void hak_flush_tiny_exit(void) {
|
||
// Best-effort: flush Tiny magazines at process exit
|
||
if (g_flush_tiny_on_exit) {
|
||
hak_tiny_magazine_flush_all();
|
||
hak_tiny_trim();
|
||
}
|
||
if (g_ultra_debug_on_exit) {
|
||
hak_tiny_ultra_debug_dump();
|
||
}
|
||
// Path debug dump (optional): HAKMEM_TINY_PATH_DEBUG=1
|
||
hak_tiny_path_debug_dump();
|
||
// Extended counters (optional): HAKMEM_TINY_COUNTERS_DUMP=1
|
||
extern void hak_tiny_debug_counters_dump(void);
|
||
hak_tiny_debug_counters_dump();
|
||
}
|
||
|
||
// ============================================================================
|
||
// KPI Measurement (for UCB1) - NEW!
|
||
// ============================================================================
|
||
|
||
#ifdef __linux__
|
||
// Latency histogram (simple buckets for P50/P95/P99)
|
||
#define LATENCY_BUCKETS 100
|
||
static uint64_t g_latency_histogram[LATENCY_BUCKETS];
|
||
static uint64_t g_latency_samples = 0;
|
||
|
||
// Baseline page faults (at init)
|
||
static uint64_t g_baseline_soft_pf = 0;
|
||
static uint64_t g_baseline_hard_pf = 0;
|
||
static uint64_t g_baseline_rss_kb = 0;
|
||
|
||
// Get page faults from /proc/self/stat
|
||
static void get_page_faults(uint64_t* soft_pf, uint64_t* hard_pf) {
|
||
FILE* f = fopen("/proc/self/stat", "r");
|
||
if (!f) {
|
||
*soft_pf = 0;
|
||
*hard_pf = 0;
|
||
return;
|
||
}
|
||
|
||
// Format: pid (comm) state ... minflt cminflt majflt cmajflt ...
|
||
// Fields: 1 2 3 ... 10(minflt) 11(cminflt) 12(majflt) 13(cmajflt)
|
||
unsigned long minflt = 0, majflt = 0;
|
||
unsigned long dummy;
|
||
char comm[256], state;
|
||
|
||
(void)fscanf(f, "%lu %s %c %lu %lu %lu %lu %lu %lu %lu %lu %lu",
|
||
&dummy, comm, &state, &dummy, &dummy, &dummy, &dummy, &dummy,
|
||
&dummy, &minflt, &dummy, &majflt);
|
||
|
||
fclose(f);
|
||
|
||
*soft_pf = minflt;
|
||
*hard_pf = majflt;
|
||
}
|
||
|
||
// Get RSS from /proc/self/statm (in KB)
|
||
static uint64_t get_rss_kb(void) {
|
||
FILE* f = fopen("/proc/self/statm", "r");
|
||
if (!f) return 0;
|
||
|
||
// Format: size resident shared text lib data dt
|
||
// We want 'resident' (field 2) in pages
|
||
unsigned long size, resident;
|
||
(void)fscanf(f, "%lu %lu", &size, &resident);
|
||
fclose(f);
|
||
|
||
long page_size = sysconf(_SC_PAGESIZE);
|
||
return (resident * page_size) / 1024; // Convert to KB
|
||
}
|
||
|
||
// NOTE: Latency measurement functions (currently unused, for future use)
|
||
/*
|
||
static inline uint64_t measure_latency_ns(void (*func)(void*), void* arg) {
|
||
struct timespec start, end;
|
||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||
|
||
func(arg); // Execute function
|
||
|
||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||
|
||
uint64_t ns = (end.tv_sec - start.tv_sec) * 1000000000ULL +
|
||
(end.tv_nsec - start.tv_nsec);
|
||
return ns;
|
||
}
|
||
|
||
static void record_latency(uint64_t ns) {
|
||
// Bucket: 0-10ns, 10-20ns, ..., 990-1000ns, 1000+ns
|
||
size_t bucket = ns / 10;
|
||
if (bucket >= LATENCY_BUCKETS) bucket = LATENCY_BUCKETS - 1;
|
||
|
||
g_latency_histogram[bucket]++;
|
||
g_latency_samples++;
|
||
}
|
||
*/
|
||
|
||
// Calculate percentile from histogram
|
||
static uint64_t calculate_percentile(double percentile) {
|
||
if (g_latency_samples == 0) return 0;
|
||
|
||
uint64_t target = (uint64_t)(g_latency_samples * percentile);
|
||
uint64_t cumulative = 0;
|
||
|
||
for (size_t i = 0; i < LATENCY_BUCKETS; i++) {
|
||
cumulative += g_latency_histogram[i];
|
||
if (cumulative >= target) {
|
||
return i * 10; // Return bucket midpoint (ns)
|
||
}
|
||
}
|
||
|
||
return (LATENCY_BUCKETS - 1) * 10;
|
||
}
|
||
|
||
// Implement hak_get_kpi()
|
||
void hak_get_kpi(hak_kpi_t* out) {
|
||
memset(out, 0, sizeof(hak_kpi_t));
|
||
|
||
// Latency (from histogram)
|
||
out->p50_alloc_ns = calculate_percentile(0.50);
|
||
out->p95_alloc_ns = calculate_percentile(0.95);
|
||
out->p99_alloc_ns = calculate_percentile(0.99);
|
||
|
||
// Page Faults (delta from baseline)
|
||
uint64_t soft_pf, hard_pf;
|
||
get_page_faults(&soft_pf, &hard_pf);
|
||
out->soft_page_faults = soft_pf - g_baseline_soft_pf;
|
||
out->hard_page_faults = hard_pf - g_baseline_hard_pf;
|
||
|
||
// RSS (delta from baseline, in MB)
|
||
uint64_t rss_kb = get_rss_kb();
|
||
int64_t rss_delta_kb = (int64_t)rss_kb - (int64_t)g_baseline_rss_kb;
|
||
out->rss_delta_mb = rss_delta_kb / 1024;
|
||
}
|
||
|
||
#else
|
||
// Non-Linux: stub implementation
|
||
void hak_get_kpi(hak_kpi_t* out) {
|
||
memset(out, 0, sizeof(hak_kpi_t));
|
||
}
|
||
#endif
|
||
|
||
// ============================================================================
|
||
// Internal Helpers
|
||
// ============================================================================
|
||
|
||
// Phase 6.8: All legacy profiling functions removed
|
||
// - hash_site(), get_site_profile(), infer_policy(), record_alloc(), allocate_with_policy()
|
||
// Replaced by ELO-based allocation (hakmem_elo.c)
|
||
|
||
// ============================================================================
|
||
// BigCache eviction callback
|
||
// ============================================================================
|
||
|
||
// BigCache eviction callback (called when cache is full and needs to evict)
|
||
static void bigcache_free_callback(void* ptr, size_t size) {
|
||
(void)size; // Not used
|
||
if (!ptr) return;
|
||
|
||
// Get raw pointer and header
|
||
void* raw = (char*)ptr - HEADER_SIZE;
|
||
AllocHeader* hdr = (AllocHeader*)raw;
|
||
|
||
// Verify magic before accessing method field
|
||
if (hdr->magic != HAKMEM_MAGIC) {
|
||
fprintf(stderr, "[hakmem] BigCache eviction: invalid magic, fallback to free()\n");
|
||
free(raw);
|
||
return;
|
||
}
|
||
|
||
// Dispatch based on allocation method
|
||
switch (hdr->method) {
|
||
case ALLOC_METHOD_MALLOC:
|
||
free(raw);
|
||
break;
|
||
|
||
case ALLOC_METHOD_MMAP:
|
||
// Cold eviction: route through batch for large blocks
|
||
// This completes Phase 6.3 architecture
|
||
#ifdef __linux__
|
||
if (hdr->size >= BATCH_MIN_SIZE) {
|
||
// Large blocks: use batch (deferred munmap + TLB optimization)
|
||
hak_batch_add(raw, hdr->size);
|
||
} else {
|
||
// Small blocks: direct munmap (not worth batching)
|
||
// Phase 6.11.1: Try whale cache first
|
||
if (hkm_whale_put(raw, hdr->size) != 0) {
|
||
// Whale cache full or not a whale: munmap
|
||
madvise(raw, hdr->size, MADV_FREE); // Best-effort
|
||
hkm_sys_munmap(raw, hdr->size);
|
||
}
|
||
// else: Successfully cached in whale cache (no munmap!)
|
||
}
|
||
#else
|
||
free(raw); // Fallback (should not happen)
|
||
#endif
|
||
break;
|
||
|
||
default:
|
||
fprintf(stderr, "[hakmem] BigCache eviction: unknown method %d\n", hdr->method);
|
||
free(raw); // Fallback
|
||
break;
|
||
}
|
||
}
|
||
|
||
// ============================================================================
|
||
// Public API
|
||
// ============================================================================
|
||
|
||
// Thread-safe one-time initialization
|
||
static void hak_init_impl(void);
|
||
static pthread_once_t g_init_once = PTHREAD_ONCE_INIT;
|
||
|
||
void hak_init(void) {
|
||
(void)pthread_once(&g_init_once, hak_init_impl);
|
||
}
|
||
|
||
static void hak_init_impl(void) {
|
||
g_initializing = 1;
|
||
|
||
// Phase 6.X P0 FIX (2025-10-24): Initialize Box 3 (Syscall Layer) FIRST!
|
||
// This MUST be called before ANY allocation (Tiny/Mid/Large/Learner)
|
||
// dlsym() initializes function pointers to real libc (bypasses LD_PRELOAD)
|
||
hkm_syscall_init();
|
||
|
||
// NEW Phase 6.11.1: Initialize debug timing
|
||
hkm_timing_init();
|
||
|
||
// NEW Phase 6.11.1: Initialize whale fast-path cache
|
||
hkm_whale_init();
|
||
|
||
// NEW Phase Hybrid: Initialize Mid Range MT allocator (8-32KB, mimalloc-style)
|
||
mid_mt_init();
|
||
|
||
// NEW Phase 6.8: Initialize configuration system (replaces init_free_policy + init_thp_policy)
|
||
hak_config_init();
|
||
|
||
// Phase 6.16: Initialize FrozenPolicy (SACS-3)
|
||
hkm_policy_init();
|
||
|
||
// Phase 6.15 P0.3: Configure EVO sampling from environment variable
|
||
// HAKMEM_EVO_SAMPLE: 0=disabled (default), N=sample every 2^N calls
|
||
// Example: HAKMEM_EVO_SAMPLE=10 → sample every 1024 calls
|
||
// HAKMEM_EVO_SAMPLE=16 → sample every 65536 calls
|
||
char* evo_sample_str = getenv("HAKMEM_EVO_SAMPLE");
|
||
if (evo_sample_str && atoi(evo_sample_str) > 0) {
|
||
int freq = atoi(evo_sample_str);
|
||
if (freq >= 64) {
|
||
fprintf(stderr, "[hakmem] Warning: HAKMEM_EVO_SAMPLE=%d too large, using 63\n", freq);
|
||
freq = 63;
|
||
}
|
||
g_evo_sample_mask = (1ULL << freq) - 1;
|
||
HAKMEM_LOG("EVO sampling enabled: every 2^%d = %llu calls\n",
|
||
freq, (unsigned long long)(g_evo_sample_mask + 1));
|
||
} else {
|
||
g_evo_sample_mask = 0; // Disabled by default
|
||
HAKMEM_LOG("EVO sampling disabled (HAKMEM_EVO_SAMPLE not set or 0)\n");
|
||
}
|
||
|
||
#ifdef __linux__
|
||
// Record baseline KPIs
|
||
memset(g_latency_histogram, 0, sizeof(g_latency_histogram));
|
||
g_latency_samples = 0;
|
||
|
||
get_page_faults(&g_baseline_soft_pf, &g_baseline_hard_pf);
|
||
g_baseline_rss_kb = get_rss_kb();
|
||
|
||
HAKMEM_LOG("Baseline: soft_pf=%lu, hard_pf=%lu, rss=%lu KB\n",
|
||
(unsigned long)g_baseline_soft_pf,
|
||
(unsigned long)g_baseline_hard_pf,
|
||
(unsigned long)g_baseline_rss_kb);
|
||
#endif
|
||
|
||
HAKMEM_LOG("Initialized (PoC version)\n");
|
||
HAKMEM_LOG("Sampling rate: 1/%d\n", SAMPLING_RATE);
|
||
HAKMEM_LOG("Max sites: %d\n", MAX_SITES);
|
||
|
||
// Bench preset: Tiny-only (disable non-essential subsystems)
|
||
{
|
||
char* bt = getenv("HAKMEM_BENCH_TINY_ONLY");
|
||
if (bt && atoi(bt) != 0) {
|
||
g_bench_tiny_only = 1;
|
||
}
|
||
}
|
||
|
||
// Under LD_PRELOAD, enforce safer defaults for Tiny path unless overridden
|
||
{
|
||
char* ldpre = getenv("LD_PRELOAD");
|
||
if (ldpre && strstr(ldpre, "libhakmem.so")) {
|
||
g_ldpreload_mode = 1;
|
||
// Default LD-safe mode if not set: 1 (Tiny-only)
|
||
char* lds = getenv("HAKMEM_LD_SAFE");
|
||
if (lds) { /* NOP used in wrappers */ } else { setenv("HAKMEM_LD_SAFE", "1", 0); }
|
||
if (!getenv("HAKMEM_TINY_TLS_SLL")) {
|
||
setenv("HAKMEM_TINY_TLS_SLL", "0", 0); // disable TLS SLL by default
|
||
}
|
||
if (!getenv("HAKMEM_TINY_USE_SUPERSLAB")) {
|
||
setenv("HAKMEM_TINY_USE_SUPERSLAB", "0", 0); // disable SuperSlab path by default
|
||
}
|
||
}
|
||
}
|
||
|
||
// Runtime safety toggle
|
||
char* safe_free_env = getenv("HAKMEM_SAFE_FREE");
|
||
if (safe_free_env && atoi(safe_free_env) != 0) {
|
||
g_strict_free = 1;
|
||
HAKMEM_LOG("Strict free safety enabled (HAKMEM_SAFE_FREE=1)\n");
|
||
} else {
|
||
// Heuristic: if loaded via LD_PRELOAD, enable strict free by default
|
||
char* ldpre = getenv("LD_PRELOAD");
|
||
if (ldpre && strstr(ldpre, "libhakmem.so")) {
|
||
g_ldpreload_mode = 1;
|
||
g_strict_free = 1;
|
||
HAKMEM_LOG("Strict free safety auto-enabled under LD_PRELOAD\n");
|
||
}
|
||
}
|
||
|
||
// Invalid free logging toggle (default off to avoid spam under LD_PRELOAD)
|
||
char* invlog = getenv("HAKMEM_INVALID_FREE_LOG");
|
||
if (invlog && atoi(invlog) != 0) {
|
||
g_invalid_free_log = 1;
|
||
HAKMEM_LOG("Invalid free logging enabled (HAKMEM_INVALID_FREE_LOG=1)\n");
|
||
}
|
||
|
||
// Phase 7.4: Cache HAKMEM_INVALID_FREE to eliminate 44% CPU overhead
|
||
// Perf showed getenv() on hot path consumed 43.96% CPU time (26.41% strcmp + 17.55% getenv)
|
||
char* inv = getenv("HAKMEM_INVALID_FREE");
|
||
if (inv && strcmp(inv, "fallback") == 0) {
|
||
g_invalid_free_mode = 0; // fallback mode: route invalid frees to libc
|
||
HAKMEM_LOG("Invalid free mode: fallback to libc (HAKMEM_INVALID_FREE=fallback)\n");
|
||
} else {
|
||
// Under LD_PRELOAD, prefer safety: default to fallback unless explicitly overridden
|
||
char* ldpre = getenv("LD_PRELOAD");
|
||
if (ldpre && strstr(ldpre, "libhakmem.so")) {
|
||
g_ldpreload_mode = 1;
|
||
g_invalid_free_mode = 0;
|
||
HAKMEM_LOG("Invalid free mode: fallback to libc (auto under LD_PRELOAD)\n");
|
||
} else {
|
||
g_invalid_free_mode = 1; // default: skip invalid-free check
|
||
HAKMEM_LOG("Invalid free mode: skip check (default)\n");
|
||
}
|
||
}
|
||
|
||
// NEW Phase 6.8: Feature-gated initialization (check g_hakem_config flags)
|
||
if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) {
|
||
hak_pool_init();
|
||
}
|
||
|
||
// NEW Phase 6.13: L2.5 LargePool (64KB-1MB allocations)
|
||
hak_l25_pool_init();
|
||
|
||
if (!g_bench_tiny_only && HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE)) {
|
||
hak_bigcache_init();
|
||
hak_bigcache_set_free_callback(bigcache_free_callback);
|
||
}
|
||
|
||
if (!g_bench_tiny_only && HAK_ENABLED_LEARNING(HAKMEM_FEATURE_ELO)) {
|
||
hak_elo_init();
|
||
// Phase 6.11.4 P0-2: Initialize cached strategy to default (strategy 0)
|
||
atomic_store(&g_cached_strategy_id, 0);
|
||
}
|
||
|
||
if (!g_bench_tiny_only && HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE)) {
|
||
hak_batch_init();
|
||
}
|
||
|
||
if (!g_bench_tiny_only && HAK_ENABLED_LEARNING(HAKMEM_FEATURE_EVOLUTION)) {
|
||
hak_evo_init();
|
||
}
|
||
|
||
if (!g_bench_tiny_only) {
|
||
// Phase 6.16: Initialize ACE stats (sampling) – default off
|
||
hkm_ace_stats_init();
|
||
// Phase 6.16: Initialize sampling profiler – default off
|
||
hkm_prof_init();
|
||
// Size histogram sampling (optional)
|
||
hkm_size_hist_init();
|
||
}
|
||
|
||
if (!g_bench_tiny_only) {
|
||
// Start CAP learner (optional, env-gated)
|
||
hkm_learner_init();
|
||
}
|
||
|
||
// NEW Phase 6.10: Site Rules (MVP: always ON)
|
||
// MT note: default disabled unless HAKMEM_SITE_RULES=1
|
||
char* sr_env = getenv("HAKMEM_SITE_RULES");
|
||
g_site_rules_enabled = (sr_env && atoi(sr_env) != 0);
|
||
if (!g_bench_tiny_only && g_site_rules_enabled) {
|
||
hak_site_rules_init();
|
||
}
|
||
|
||
// NEW Phase 6.12: Tiny Pool (≤1KB allocations)
|
||
hak_tiny_init();
|
||
|
||
// Env: optional Tiny flush on exit (memory efficiency evaluation)
|
||
{
|
||
char* tf = getenv("HAKMEM_TINY_FLUSH_ON_EXIT");
|
||
if (tf && atoi(tf) != 0) {
|
||
g_flush_tiny_on_exit = 1;
|
||
}
|
||
char* ud = getenv("HAKMEM_TINY_ULTRA_DEBUG");
|
||
if (ud && atoi(ud) != 0) {
|
||
g_ultra_debug_on_exit = 1;
|
||
}
|
||
// Register exit hook if any of the debug/flush toggles are on
|
||
// or when path debug is requested.
|
||
if (g_flush_tiny_on_exit || g_ultra_debug_on_exit || getenv("HAKMEM_TINY_PATH_DEBUG")) {
|
||
atexit(hak_flush_tiny_exit);
|
||
}
|
||
}
|
||
|
||
// NEW Phase ACE: Initialize Adaptive Control Engine
|
||
hkm_ace_controller_init(&g_ace_controller);
|
||
if (g_ace_controller.enabled) {
|
||
hkm_ace_controller_start(&g_ace_controller);
|
||
HAKMEM_LOG("ACE Learning Layer enabled and started\n");
|
||
}
|
||
|
||
g_initializing = 0;
|
||
// Publish that initialization is complete
|
||
atomic_thread_fence(memory_order_seq_cst);
|
||
g_initialized = 1;
|
||
}
|
||
|
||
void hak_shutdown(void) {
|
||
if (!g_initialized) return;
|
||
|
||
// NEW Phase ACE: Shutdown Adaptive Control Engine FIRST (before other subsystems)
|
||
hkm_ace_controller_destroy(&g_ace_controller);
|
||
|
||
if (!g_bench_tiny_only) {
|
||
printf("[hakmem] Shutting down...\n");
|
||
hak_print_stats();
|
||
}
|
||
|
||
// NEW Phase 6.9: Shutdown L2 Pool
|
||
if (!g_bench_tiny_only) hak_pool_shutdown();
|
||
|
||
// NEW Phase 6.13: Shutdown L2.5 LargePool
|
||
if (!g_bench_tiny_only) hak_l25_pool_shutdown();
|
||
|
||
// NEW: Shutdown BigCache Box
|
||
if (!g_bench_tiny_only) hak_bigcache_shutdown();
|
||
|
||
// NEW Phase 6.2: Shutdown ELO Strategy Selection
|
||
if (!g_bench_tiny_only) hak_elo_shutdown();
|
||
|
||
// NEW Phase 6.3: Shutdown madvise Batching
|
||
if (!g_bench_tiny_only) hak_batch_shutdown();
|
||
|
||
// NEW Phase 6.10: Shutdown Site Rules
|
||
if (!g_bench_tiny_only) hak_site_rules_shutdown();
|
||
|
||
// NEW Phase 6.12: Print Tiny Pool statistics
|
||
if (!g_bench_tiny_only) hak_tiny_print_stats();
|
||
|
||
// NEW Phase 6.11.1: Print whale cache statistics
|
||
if (!g_bench_tiny_only) {
|
||
hkm_whale_dump_stats();
|
||
// NEW Phase 6.11.1: Shutdown whale cache
|
||
hkm_whale_shutdown();
|
||
}
|
||
|
||
// NEW Phase 6.11.1: Shutdown debug timing (must be last!)
|
||
if (!g_bench_tiny_only) hkm_timing_shutdown();
|
||
|
||
// Phase 6.16: Dump sampling profiler
|
||
if (!g_bench_tiny_only) hkm_prof_shutdown();
|
||
|
||
// Stop learner thread
|
||
if (!g_bench_tiny_only) hkm_learner_shutdown();
|
||
|
||
// Stop Tiny background components (e.g., Intelligence Engine)
|
||
hak_tiny_shutdown();
|
||
|
||
g_initialized = 0;
|
||
}
|
||
|
||
// Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%)
|
||
__attribute__((always_inline))
|
||
inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t0); // Profiling (build-time gated)
|
||
#endif
|
||
|
||
if (!g_initialized) hak_init();
|
||
|
||
// ========================================================================
|
||
// Phase 6-3: Tiny Fast Path - DISABLED (using Box Theory instead at line ~712)
|
||
// Reason: Avoid double fast path overhead
|
||
// Box Theory (HAKMEM_TINY_PHASE6_BOX_REFACTOR) provides optimized 3-4 instruction path
|
||
// ========================================================================
|
||
|
||
uintptr_t site_id = (uintptr_t)site;
|
||
|
||
// Phase 6.12: Tiny Pool fast-path (≤1KB allocations)
|
||
// Priority: highest for tiny allocations (most frequent)
|
||
if (__builtin_expect(size <= TINY_MAX_SIZE, 1)) {
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t_tiny);
|
||
#endif
|
||
void* tiny_ptr = NULL;
|
||
|
||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
// Phase 6-1.7: Box Theory Refactoring (3-4 instruction fast path)
|
||
tiny_ptr = hak_tiny_alloc_fast_wrapper(size);
|
||
#elif defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
|
||
// Phase 6-1.5: Ultra Simple (alignment guessing)
|
||
tiny_ptr = hak_tiny_alloc_ultra_simple(size);
|
||
#elif defined(HAKMEM_TINY_PHASE6_METADATA)
|
||
// Phase 6-1.6: Metadata header
|
||
tiny_ptr = hak_tiny_alloc_metadata(size);
|
||
#else
|
||
// Default: Standard Tiny path
|
||
tiny_ptr = hak_tiny_alloc(size);
|
||
#endif
|
||
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_TINY_ALLOC, t_tiny);
|
||
#endif
|
||
if (tiny_ptr) {
|
||
// NEW Phase ACE: Track allocation for learning
|
||
hkm_ace_track_alloc();
|
||
// Tiny Pool hit! Return immediately (no header needed)
|
||
return tiny_ptr;
|
||
}
|
||
// DEBUG: Tiny Pool returned NULL - fallback to other paths
|
||
static int log_count = 0;
|
||
if (log_count < 3) {
|
||
fprintf(stderr, "[DEBUG] tiny_alloc(%zu) returned NULL, falling back\n", size);
|
||
log_count++;
|
||
}
|
||
// Tiny Pool miss: fallback to other paths below
|
||
}
|
||
|
||
// Record size histogram (sampling) — moved after Tiny fast-path to
|
||
// keep hottest path minimal. Tiny hits skip histogram to reduce overhead.
|
||
hkm_size_hist_record(size);
|
||
|
||
// Phase Hybrid: Mid Range MT fast-path (8-32KB allocations)
|
||
// Priority: second highest (after Tiny Pool)
|
||
// Uses mimalloc-style per-thread segments for optimal MT performance
|
||
if (__builtin_expect(mid_is_in_range(size), 0)) {
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t_mid);
|
||
#endif
|
||
void* mid_ptr = mid_mt_alloc(size);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_POOL_GET, t_mid);
|
||
#endif
|
||
if (mid_ptr) {
|
||
// Mid MT hit! Return immediately (no header, lock-free)
|
||
return mid_ptr;
|
||
}
|
||
// Mid MT miss: fallback to other paths below (should be rare)
|
||
}
|
||
|
||
// Phase 6.11.4 P0-1 & P0-2: Compile-time guard + cached strategy update
|
||
// Phase 6.15 P0.3: Restored with environment variable control (default disabled)
|
||
#if HAKMEM_FEATURE_EVOLUTION
|
||
// Only sample if enabled via HAKMEM_EVO_SAMPLE environment variable
|
||
if (g_evo_sample_mask > 0) {
|
||
static _Atomic uint64_t tick_counter = 0;
|
||
if ((atomic_fetch_add(&tick_counter, 1) & g_evo_sample_mask) == 0) {
|
||
struct timespec now;
|
||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||
uint64_t now_ns = now.tv_sec * 1000000000ULL + now.tv_nsec;
|
||
|
||
// P0-2: Update cached strategy when window closes
|
||
if (hak_evo_tick(now_ns)) {
|
||
// Window closed, update cached strategy
|
||
int new_strategy = hak_elo_select_strategy();
|
||
atomic_store(&g_cached_strategy_id, new_strategy);
|
||
}
|
||
}
|
||
}
|
||
#endif
|
||
|
||
// Phase 6.11.4 P0-2: Always use cached strategy (LEARN/FROZEN/CANARY all use same path)
|
||
size_t threshold;
|
||
|
||
if (HAK_ENABLED_LEARNING(HAKMEM_FEATURE_ELO)) {
|
||
// ELO enabled: use cached strategy (updated by hak_evo_tick)
|
||
int strategy_id = atomic_load(&g_cached_strategy_id);
|
||
threshold = hak_elo_get_threshold(strategy_id);
|
||
} else {
|
||
// ELO disabled: use default threshold (2MB - mimalloc's large threshold)
|
||
threshold = 2097152; // 2MB
|
||
}
|
||
|
||
// Phase SACS-3: BigCache only for very large blocks (>= threshold)
|
||
if (HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE) && size >= threshold) {
|
||
void* cached_ptr = NULL;
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t_bc);
|
||
#endif
|
||
if (hak_bigcache_try_get(size, site_id, &cached_ptr)) {
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_BIGCACHE_GET, t_bc);
|
||
#endif
|
||
// Cache hit! Return immediately
|
||
return cached_ptr;
|
||
}
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_BIGCACHE_GET, t_bc);
|
||
#endif
|
||
}
|
||
|
||
// Phase SACS-3: No Site Rules in tier selection (size-only decision)
|
||
|
||
// Phase 6.16 SACS-3: L1 via ACE unified path
|
||
if (size > TINY_MAX_SIZE && size < threshold) {
|
||
const FrozenPolicy* pol = hkm_policy_get();
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t_ace);
|
||
#endif
|
||
void* l1 = hkm_ace_alloc(size, site_id, pol);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_POOL_GET, t_ace);
|
||
#endif
|
||
if (l1) return l1;
|
||
}
|
||
|
||
// Phase SACS-3: For < threshold, prefer malloc; for >= threshold prefer mmap
|
||
void* ptr;
|
||
if (size >= threshold) {
|
||
// Large allocation (L2): use mmap (enables batch madvise)
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t_mmap);
|
||
#endif
|
||
ptr = hak_alloc_mmap_impl(size);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_SYSCALL_MMAP, t_mmap);
|
||
#endif
|
||
} else {
|
||
// Small/medium allocation (L0/L1): use malloc (faster for <2MB)
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t_malloc);
|
||
#endif
|
||
ptr = hak_alloc_malloc_impl(size);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_FALLBACK_MALLOC, t_malloc);
|
||
#endif
|
||
}
|
||
|
||
if (!ptr) return NULL;
|
||
|
||
// NEW Phase 6.5: Record allocation size for distribution signature (gated)
|
||
if (g_evo_sample_mask > 0) {
|
||
hak_evo_record_size(size);
|
||
}
|
||
|
||
// NEW: Set alloc_site and class_bytes in header (for BigCache Phase 2)
|
||
AllocHeader* hdr = (AllocHeader*)((char*)ptr - HEADER_SIZE);
|
||
|
||
// Verify magic (fail-fast if header corrupted)
|
||
if (hdr->magic != HAKMEM_MAGIC) {
|
||
fprintf(stderr, "[hakmem] ERROR: Invalid magic in allocated header!\n");
|
||
return ptr; // Return anyway, but log error
|
||
}
|
||
|
||
// Set allocation site (for per-site cache reuse)
|
||
hdr->alloc_site = site_id;
|
||
|
||
// Set size class for caching (L2 only → threshold class)
|
||
if (size >= threshold) {
|
||
hdr->class_bytes = threshold; // cacheable at L2 threshold
|
||
} else {
|
||
hdr->class_bytes = 0; // Not cacheable
|
||
}
|
||
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_HAK_ALLOC, t0); // Profiling (build-time gated)
|
||
#endif
|
||
return ptr;
|
||
}
|
||
|
||
// Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%)
|
||
// Phase 6-1.7: Disable inline for box refactor to avoid recursive inlining
|
||
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
__attribute__((always_inline))
|
||
inline
|
||
#endif
|
||
void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t0); // Profiling (build-time gated)
|
||
#endif
|
||
|
||
(void)site; // Not used yet (will be used in BigCache Phase 2)
|
||
(void)size; // Size stored in header
|
||
|
||
if (!ptr) {
|
||
#if HAKMEM_DEBUG_TIMING
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
|
||
#endif
|
||
#endif
|
||
return;
|
||
}
|
||
|
||
// OPTIMIZATION PHASE 2+1 (2025-11-01): Check Tiny Pool FIRST
|
||
// Phase 2: Ultra-fast owner_slab with TLS range check (1-2 cycles negative lookup)
|
||
// Phase 1: Reorder to avoid Mid MT mutex overhead for Tiny allocations (90% of mixed workload)
|
||
//
|
||
// Target: +12-13% improvement (16.24 → 18.4-18.6 M ops/sec)
|
||
// - Tiny allocations (90%): Skip Mid MT mutex entirely → ~12% improvement
|
||
// - Mid allocations (10%): Fast negative lookup from owner_slab → minimal overhead
|
||
TinySlab* tiny_slab = hak_tiny_owner_slab(ptr);
|
||
if (tiny_slab) {
|
||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
// Phase 6-1.7: Box Theory Refactoring (2-3 instruction fast path)
|
||
// Box 6 handles both same-thread (fast) and cross-thread (remote) internally
|
||
hak_tiny_free_fast_wrapper(ptr);
|
||
return;
|
||
#elif defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
|
||
// Phase 6-1.5: Only use ultra-simple free on same-thread pointers.
|
||
// Cross-thread frees must go through the full tiny free path
|
||
// to ensure proper remote-queue handling and slab reuse.
|
||
pthread_t self_pt = pthread_self();
|
||
if (__builtin_expect(pthread_equal(tiny_slab->owner_tid, self_pt), 1)) {
|
||
hak_tiny_free_ultra_simple(ptr);
|
||
return;
|
||
}
|
||
#elif defined(HAKMEM_TINY_PHASE6_METADATA)
|
||
// Phase 6-1.6: Metadata header
|
||
hak_tiny_free_metadata(ptr);
|
||
return;
|
||
#endif
|
||
// Fallback: full tiny free (handles cross-thread case correctly)
|
||
hak_tiny_free(ptr);
|
||
return;
|
||
}
|
||
|
||
// Phase Hybrid: Mid Range MT check (8-32KB, headerless)
|
||
{
|
||
size_t mid_block_size = 0;
|
||
int mid_class_idx = 0;
|
||
|
||
// First check if ptr is in current thread's segment (fast path)
|
||
for (int i = 0; i < MID_NUM_CLASSES; i++) {
|
||
MidThreadSegment* seg = &g_mid_segments[i];
|
||
if (seg->chunk_base && ptr >= seg->chunk_base && ptr < seg->end) {
|
||
*(void**)ptr = seg->free_list;
|
||
seg->free_list = ptr;
|
||
seg->used_count--;
|
||
return;
|
||
}
|
||
}
|
||
|
||
// Not in current thread's segment - try registry (mutex + binary search)
|
||
if (mid_registry_lookup(ptr, &mid_block_size, &mid_class_idx)) {
|
||
mid_mt_free(ptr, mid_block_size);
|
||
return;
|
||
}
|
||
}
|
||
|
||
// DISABLED: SuperSlab Registry lookup causes false positives
|
||
// Problem: L25 allocations aligned to 1MB boundary are misidentified as SuperSlabs
|
||
// causing crashes when checking magic number on unmapped/invalid memory
|
||
// TODO: Fix SuperSlab registry to avoid false positives (descriptor-based check?)
|
||
#if 0
|
||
SuperSlab* ss = hak_super_lookup(ptr);
|
||
if (ss) {
|
||
hak_tiny_free(ptr);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
|
||
#endif
|
||
return;
|
||
}
|
||
#endif
|
||
|
||
// Mid Pool headerless fast route: use page descriptor before header read
|
||
{
|
||
extern int hak_pool_mid_lookup(void* ptr, size_t* out_size);
|
||
extern void hak_pool_free_fast(void* ptr, uintptr_t site_id);
|
||
size_t mid_sz = 0;
|
||
if (hak_pool_mid_lookup(ptr, &mid_sz)) {
|
||
// For Mid, header read is unnecessary; free directly via pool.
|
||
hak_pool_free_fast(ptr, (uintptr_t)site);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
|
||
#endif
|
||
return;
|
||
}
|
||
}
|
||
|
||
// L2.5 headerless route: use page descriptor before header read
|
||
{
|
||
extern int hak_l25_lookup(void* ptr, size_t* out_size);
|
||
extern void hak_l25_pool_free_fast(void* ptr, uintptr_t site_id);
|
||
size_t l25_sz = 0;
|
||
if (hak_l25_lookup(ptr, &l25_sz)) {
|
||
// Stats (optional): count as large free
|
||
hkm_ace_stat_large_free();
|
||
hak_l25_pool_free_fast(ptr, (uintptr_t)site);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
|
||
#endif
|
||
return;
|
||
}
|
||
}
|
||
|
||
// NEW Phase 6.5: Measure free latency (start timing)
|
||
// Gate by EVO sampling mask to avoid per-op overhead when disabled
|
||
int _do_evo = (g_evo_sample_mask > 0);
|
||
struct timespec start_time, end_time;
|
||
if (_do_evo) {
|
||
clock_gettime(CLOCK_MONOTONIC, &start_time);
|
||
}
|
||
|
||
// Helper macro to record latency before returning (build-time gated timing)
|
||
#if HAKMEM_DEBUG_TIMING
|
||
#define RECORD_FREE_LATENCY() do { \
|
||
if (_do_evo) { \
|
||
clock_gettime(CLOCK_MONOTONIC, &end_time); \
|
||
uint64_t ns = (end_time.tv_sec - start_time.tv_sec) * 1000000000ULL + \
|
||
(end_time.tv_nsec - start_time.tv_nsec); \
|
||
hak_evo_record_latency((double)ns); \
|
||
if (hak_evo_is_canary()) { \
|
||
hak_evo_record_canary_result(0, (double)ns); \
|
||
} \
|
||
} \
|
||
HKM_TIME_END(HKM_CAT_HAK_FREE, t0); \
|
||
} while(0)
|
||
#else
|
||
#define RECORD_FREE_LATENCY() do { \
|
||
if (_do_evo) { \
|
||
clock_gettime(CLOCK_MONOTONIC, &end_time); \
|
||
uint64_t ns = (end_time.tv_sec - start_time.tv_sec) * 1000000000ULL + \
|
||
(end_time.tv_nsec - start_time.tv_nsec); \
|
||
hak_evo_record_latency((double)ns); \
|
||
if (hak_evo_is_canary()) { \
|
||
hak_evo_record_canary_result(0, (double)ns); \
|
||
} \
|
||
} \
|
||
} while(0)
|
||
#endif
|
||
|
||
// Get raw pointer (before header)
|
||
void* raw = (char*)ptr - HEADER_SIZE;
|
||
|
||
#ifdef __linux__
|
||
if (g_strict_free) {
|
||
// Safety: ensure header address is mapped before touching it (optional)
|
||
long _ps = sysconf(_SC_PAGESIZE);
|
||
void* _pg = (void*)((uintptr_t)raw & ~((uintptr_t)_ps - 1));
|
||
unsigned char _vec;
|
||
if (mincore(_pg, (size_t)_ps, &_vec) != 0) {
|
||
// Not a valid mapped region → fallback directly to libc free
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
}
|
||
}
|
||
#endif
|
||
|
||
// Read header
|
||
AllocHeader* hdr = (AllocHeader*)raw;
|
||
|
||
// NEW: Verify magic (fail-fast if corrupted or not from hakmem)
|
||
if (hdr->magic != HAKMEM_MAGIC) {
|
||
if (g_invalid_free_log) {
|
||
fprintf(stderr, "[hakmem] ERROR: Invalid magic 0x%X (expected 0x%X) - possible corruption or non-hakmem pointer\n",
|
||
hdr->magic, HAKMEM_MAGIC);
|
||
}
|
||
// Phase 7.4: Use cached mode (eliminates 44% CPU overhead from getenv on hot path!)
|
||
// OLD CODE (44% CPU time!): const char* inv = getenv("HAKMEM_INVALID_FREE");
|
||
// if (inv && strcmp(inv, "fallback") == 0) mode_skip = 0;
|
||
int mode_skip = g_invalid_free_mode; // 1 = skip, 0 = fallback to libc
|
||
if (mode_skip) {
|
||
// Skip freeing unknown pointer to avoid abort (possible mmap region). Log only.
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
} else {
|
||
fprintf(stderr, "[hakmem] Attempting fallback to system free()...\n");
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
}
|
||
}
|
||
|
||
// Phase SACS-3: BigCache put only for L2 (class_bytes >= 2MB)
|
||
if (HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE) && hdr->class_bytes >= 2097152) {
|
||
// Pass actual allocated size (hdr->size), not class_bytes!
|
||
// This prevents buffer overflow when BigCache returns undersized blocks
|
||
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) {
|
||
RECORD_FREE_LATENCY();
|
||
return; // Successfully cached, skip actual free
|
||
}
|
||
}
|
||
|
||
// Phase 6.9.1: Pool allocations are now handled via header method
|
||
// (no separate detection needed, just dispatch on method)
|
||
|
||
// Dispatch to correct free function
|
||
switch (hdr->method) {
|
||
case ALLOC_METHOD_POOL:
|
||
// Phase 6.9.1: Pool allocation - return to pool
|
||
if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) {
|
||
// Stats: record free in ACE L1 Mid
|
||
hkm_ace_stat_mid_free();
|
||
hak_pool_free(ptr, hdr->size, hdr->alloc_site);
|
||
} else {
|
||
// Pool disabled, shouldn't happen (fail-fast)
|
||
fprintf(stderr, "[hakmem] ERROR: POOL allocation but POOL feature disabled!\\n");
|
||
}
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
|
||
case ALLOC_METHOD_L25_POOL:
|
||
// Phase 6.13: L2.5 Pool allocation - return to pool
|
||
hkm_ace_stat_large_free();
|
||
hak_l25_pool_free(ptr, hdr->size, hdr->alloc_site);
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
|
||
case ALLOC_METHOD_MALLOC:
|
||
free(raw);
|
||
break;
|
||
|
||
case ALLOC_METHOD_MMAP:
|
||
// Phase 6.4 P1: Apply free policy (Hot/Warm/Cold)
|
||
if (g_hakem_config.free_policy == FREE_POLICY_KEEP) {
|
||
// KEEP: 何もしない(VA保持、madviseもしない)
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
} else if (g_hakem_config.free_policy == FREE_POLICY_ADAPTIVE) {
|
||
// ADAPTIVE: Hot/Warm/Cold判定
|
||
FreeThermal thermal = hak_classify_thermal(hdr->size);
|
||
|
||
switch (thermal) {
|
||
case FREE_THERMAL_HOT:
|
||
// HOT (< 1MB): 何もしない(すぐ再利用される)
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
|
||
case FREE_THERMAL_WARM:
|
||
// WARM (1-2MB): MADV_FREE(munmapしない、物理ページのみ返す)
|
||
#ifdef __linux__
|
||
madvise(raw, hdr->size, MADV_FREE);
|
||
#endif
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
|
||
case FREE_THERMAL_COLD:
|
||
// COLD (>= 2MB): batch (Phase 6.8: feature-gated)
|
||
if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) {
|
||
hak_batch_add(raw, hdr->size);
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
}
|
||
// Small blocks: immediate munmap
|
||
#ifdef __linux__
|
||
// Phase 6.11.1: Try whale cache first
|
||
if (hkm_whale_put(raw, hdr->size) != 0) {
|
||
hkm_sys_munmap(raw, hdr->size);
|
||
}
|
||
#else
|
||
free(raw);
|
||
#endif
|
||
break;
|
||
}
|
||
} else {
|
||
// BATCH (default): Phase 6.8 feature-gated
|
||
// - Keep VA mapped for reuse (mimalloc strategy)
|
||
// - Only MADV_FREE on batch flush (release physical pages)
|
||
// - munmap happens on cold eviction only
|
||
if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) {
|
||
hak_batch_add(raw, hdr->size);
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
}
|
||
|
||
// Small blocks: immediate munmap (not worth batching)
|
||
#ifdef __linux__
|
||
// Phase 6.11.1: Try whale cache first
|
||
if (hkm_whale_put(raw, hdr->size) != 0) {
|
||
hkm_sys_munmap(raw, hdr->size);
|
||
}
|
||
#else
|
||
free(raw);
|
||
#endif
|
||
}
|
||
break;
|
||
|
||
default:
|
||
fprintf(stderr, "[hakmem] ERROR: Unknown allocation method: %d\n", hdr->method);
|
||
break;
|
||
}
|
||
|
||
// Record latency for all paths that reach here
|
||
RECORD_FREE_LATENCY();
|
||
|
||
#undef RECORD_FREE_LATENCY
|
||
}
|
||
|
||
|
||
void hak_print_stats(void) {
|
||
printf("\n========================================\n");
|
||
printf("hakmem ELO-based Profiling Statistics\n");
|
||
printf("========================================\n");
|
||
|
||
printf("\nOptimization Stats:\n");
|
||
printf(" malloc() calls: %llu\n", (unsigned long long)g_malloc_count);
|
||
|
||
hak_elo_print_leaderboard();
|
||
|
||
printf("========================================\n\n");
|
||
}
|
||
|
||
// ============================================================================
|
||
// Phase 6.15 P0: Standard C Library Wrappers (for LD_PRELOAD)
|
||
// ============================================================================
|
||
|
||
#ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD
|
||
|
||
// Sanitizer/diagnostic builds: bypass hakmem allocator completely.
|
||
void* malloc(size_t size) {
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
|
||
void free(void* ptr) {
|
||
if (!ptr) return;
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
}
|
||
|
||
void* calloc(size_t nmemb, size_t size) {
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
|
||
void* realloc(void* ptr, size_t size) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
|
||
#else
|
||
|
||
// malloc wrapper - intercepts system malloc() calls
|
||
// Debug counters for malloc routing (Phase 6-6 analysis)
|
||
__thread uint64_t g_malloc_total_calls = 0;
|
||
__thread uint64_t g_malloc_tiny_size_match = 0;
|
||
__thread uint64_t g_malloc_fast_path_tried = 0;
|
||
__thread uint64_t g_malloc_fast_path_null = 0;
|
||
__thread uint64_t g_malloc_slow_path = 0;
|
||
|
||
void* malloc(size_t size) {
|
||
g_malloc_total_calls++;
|
||
|
||
// ========================================================================
|
||
// Phase 6-5: ULTRA-FAST PATH FIRST (mimalloc/tcache style)
|
||
// Phase 6-1.7: Box Theory Integration - Simplified path
|
||
// ========================================================================
|
||
// NOTE: Disabled duplicate HAKMEM_TINY_FAST_PATH here to avoid double overhead.
|
||
// Box Theory handles fast path in hak_alloc_at() at line ~712.
|
||
// This eliminates redundant checks and allows Box Theory's optimized path.
|
||
// ========================================================================
|
||
// ========================================================================
|
||
// SLOW PATH: All guards moved here (only executed on fast path miss)
|
||
// ========================================================================
|
||
g_malloc_slow_path++;
|
||
|
||
// Recursion guard: if we're inside the allocator already, fall back to libc
|
||
if (g_hakmem_lock_depth > 0) {
|
||
// Nested call detected - fallback to system malloc
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
|
||
// Initialization guard: during hak_init() bootstrap, use libc directly
|
||
if (__builtin_expect(g_initializing != 0, 0)) {
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
|
||
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
|
||
// LD safe modes: 1=tiny-only, 2=pass-through
|
||
// Determine LD_PRELOAD mode early (before hak_init) to avoid misrouting
|
||
int ld_mode = hak_ld_env_mode();
|
||
if (ld_mode) {
|
||
// Avoid mixing with jemalloc-managed programs (e.g., redis)
|
||
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) {
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
// Before hakmem initialization completes, always delegate to libc
|
||
if (!g_initialized || g_initializing) {
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
const char* lds = getenv("HAKMEM_LD_SAFE");
|
||
int mode = (lds ? atoi(lds) : 1);
|
||
if (mode >= 2 || size > TINY_MAX_SIZE) {
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
}
|
||
|
||
// First-level call: enter allocator (no global lock)
|
||
g_hakmem_lock_depth++;
|
||
void* ptr = hak_alloc_at(size, HAK_CALLSITE());
|
||
g_hakmem_lock_depth--;
|
||
return ptr;
|
||
}
|
||
|
||
// free wrapper - intercepts system free() calls
|
||
void free(void* ptr) {
|
||
if (!ptr) return; // NULL check
|
||
|
||
// Recursion guard: if we're inside the allocator already, fall back to libc
|
||
if (g_hakmem_lock_depth > 0) {
|
||
// Nested call detected - fallback to system free
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
return;
|
||
}
|
||
|
||
if (__builtin_expect(g_initializing != 0, 0)) {
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
return;
|
||
}
|
||
|
||
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
return;
|
||
}
|
||
|
||
// In LD_PRELOAD mode, before hakmem initialization completes, always delegate
|
||
{
|
||
if (hak_ld_env_mode()) {
|
||
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) {
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
return;
|
||
}
|
||
if (!g_initialized || g_initializing) {
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
return;
|
||
}
|
||
}
|
||
}
|
||
|
||
// ========================================================================
|
||
// Phase 6 Fast Path: Ultra-Simple Free (when enabled)
|
||
// ========================================================================
|
||
// This bypasses free.part.0 complexity (38.43% overhead in perf analysis)
|
||
// - free.part.0: 15.83% → eliminated!
|
||
// - mid_lookup: 9.55% → eliminated for tiny!
|
||
// - pthread locks: 8.81% → eliminated!
|
||
// Two variants:
|
||
// Phase 6-1.5: Alignment guessing (3-4 instructions, 235 M ops/sec)
|
||
// Phase 6-1.6: Metadata header (1-2 instructions, ~480 M ops/sec expected)
|
||
#ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE
|
||
g_hakmem_lock_depth++;
|
||
hak_tiny_free_ultra_simple(ptr);
|
||
g_hakmem_lock_depth--;
|
||
return;
|
||
#elif defined(HAKMEM_TINY_PHASE6_METADATA)
|
||
g_hakmem_lock_depth++;
|
||
hak_tiny_free_metadata(ptr);
|
||
g_hakmem_lock_depth--;
|
||
return;
|
||
#endif
|
||
// ========================================================================
|
||
|
||
g_hakmem_lock_depth++;
|
||
hak_free_at(ptr, 0, HAK_CALLSITE());
|
||
g_hakmem_lock_depth--;
|
||
}
|
||
|
||
// calloc wrapper - intercepts system calloc() calls
|
||
void* calloc(size_t nmemb, size_t size) {
|
||
// Recursion guard
|
||
if (g_hakmem_lock_depth > 0) {
|
||
// Nested call detected - fallback to system calloc
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
|
||
if (__builtin_expect(g_initializing != 0, 0)) {
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
|
||
// Overflow check before any multiplication
|
||
if (size != 0 && nmemb > (SIZE_MAX / size)) {
|
||
errno = ENOMEM;
|
||
return NULL;
|
||
}
|
||
|
||
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
|
||
// Determine LD_PRELOAD mode early (before hak_init)
|
||
int ld_mode = hak_ld_env_mode();
|
||
if (ld_mode) {
|
||
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) {
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
if (!g_initialized || g_initializing) {
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
const char* lds = getenv("HAKMEM_LD_SAFE");
|
||
int mode = (lds ? atoi(lds) : 1);
|
||
size_t total = nmemb * size; // safe: overflow checked above
|
||
if (mode >= 2 || total > TINY_MAX_SIZE) {
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
}
|
||
|
||
g_hakmem_lock_depth++;
|
||
size_t total_size = nmemb * size; // safe: overflow checked above
|
||
void* ptr = hak_alloc_at(total_size, HAK_CALLSITE());
|
||
|
||
if (ptr) {
|
||
memset(ptr, 0, total_size); // calloc zeros memory
|
||
}
|
||
|
||
g_hakmem_lock_depth--;
|
||
return ptr;
|
||
}
|
||
|
||
// realloc wrapper - intercepts system realloc() calls
|
||
void* realloc(void* ptr, size_t size) {
|
||
// Recursion guard
|
||
if (g_hakmem_lock_depth > 0) {
|
||
// Nested call detected - fallback to system realloc
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
|
||
if (__builtin_expect(g_initializing != 0, 0)) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
|
||
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
|
||
// Determine LD_PRELOAD mode early (before hak_init)
|
||
int ld_mode = hak_ld_env_mode();
|
||
if (ld_mode) {
|
||
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
if (!g_initialized || g_initializing) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
const char* lds = getenv("HAKMEM_LD_SAFE");
|
||
int mode = (lds ? atoi(lds) : 1);
|
||
// Pass-through mode, or resizing beyond Tiny range → route to libc
|
||
if (mode >= 2 || size > TINY_MAX_SIZE) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
// Tiny-only safe mode: if the existing pointer is NOT Tiny-managed,
|
||
// do not touch it — delegate to libc to avoid header mismatches.
|
||
if (ptr != NULL && !hak_tiny_is_managed(ptr)) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
}
|
||
|
||
g_hakmem_lock_depth++;
|
||
void* new_ptr = NULL;
|
||
|
||
if (!ptr) {
|
||
// realloc(NULL, size) = malloc(size)
|
||
new_ptr = hak_alloc_at(size, HAK_CALLSITE());
|
||
} else if (size == 0) {
|
||
// realloc(ptr, 0) = free(ptr)
|
||
hak_free_at(ptr, 0, HAK_CALLSITE());
|
||
new_ptr = NULL;
|
||
} else {
|
||
// Allocate new block
|
||
new_ptr = hak_alloc_at(size, HAK_CALLSITE());
|
||
|
||
if (new_ptr) {
|
||
// Get old size from header
|
||
void* raw = (char*)ptr - HEADER_SIZE;
|
||
AllocHeader* hdr = (AllocHeader*)raw;
|
||
|
||
if (hdr->magic == HAKMEM_MAGIC) {
|
||
size_t old_size = hdr->size - HEADER_SIZE; // User-visible size
|
||
size_t copy_size = (old_size < size) ? old_size : size;
|
||
memcpy(new_ptr, ptr, copy_size);
|
||
} else {
|
||
// Invalid header, copy what we can (best effort)
|
||
memcpy(new_ptr, ptr, size);
|
||
}
|
||
|
||
// Free old block
|
||
hak_free_at(ptr, 0, HAK_CALLSITE());
|
||
}
|
||
}
|
||
|
||
g_hakmem_lock_depth--;
|
||
return new_ptr;
|
||
}
|
||
|
||
#endif // HAKMEM_FORCE_LIBC_ALLOC_BUILD
|