Two optimizations to improve Larson benchmark performance: 1. **Option A: Fast Path Priority** (core/hakmem.c) - Move HAKMEM_TINY_FAST_PATH check before all guard checks - Reduce malloc() fast path from 8+ branches to 3 branches - Results: +42% ST, -20% MT (mixed results) 2. **LUT Optimization** (core/tiny_fastcache.h) - Replace 11-branch linear search with O(1) lookup table - Use size_to_class_lut[size >> 3] for fast mapping - Results: +24% MT, -24% ST (MT-optimized tradeoff) Benchmark results (Larson 2s 8-128B 1024 chunks): - Original: ST 0.498M ops/s, MT 1.502M ops/s - LUT version: ST 0.377M ops/s, MT 1.856M ops/s Analysis: - ST regression: Branch predictor learns linear search pattern - MT improvement: LUT avoids branch misprediction on context switch - Recommendation: Keep LUT for multi-threaded workloads Related: LARSON_PERFORMANCE_ANALYSIS_2025_11_05.md
1527 lines
55 KiB
C
1527 lines
55 KiB
C
// hakmem.c - Minimal PoC Implementation
|
||
// Purpose: Verify call-site profiling concept
|
||
|
||
#define _GNU_SOURCE // For mincore, madvise on Linux
|
||
|
||
#include "hakmem.h"
|
||
#include "hakmem_config.h" // NEW Phase 6.8: Mode-based configuration
|
||
#include "hakmem_internal.h" // NEW Phase 6.8: Static inline helpers
|
||
#include "hakmem_bigcache.h" // NEW: BigCache Box
|
||
#include "hakmem_pool.h" // NEW Phase 6.9: L2 Hybrid Pool (2-32KiB)
|
||
#include "hakmem_l25_pool.h" // NEW Phase 6.13: L2.5 LargePool (64KB-1MB)
|
||
#include "hakmem_policy.h" // NEW Phase 6.16: FrozenPolicy (SACS-3)
|
||
#include "hakmem_learner.h" // NEW: CAP auto-tuner (background)
|
||
#include "hakmem_size_hist.h" // NEW: size histogram sampling (off hot path)
|
||
#include "hakmem_ace.h" // NEW Phase 6.16: ACE layer (L1)
|
||
#include "hakmem_site_rules.h" // NEW Phase 6.10: Site-Aware Cache Routing
|
||
#include "hakmem_tiny.h" // NEW Phase 6.12: Tiny Pool (≤1KB)
|
||
#include "hakmem_tiny_superslab.h" // NEW Phase 7.6: SuperSlab for Tiny Pool
|
||
#include "tiny_fastcache.h" // NEW Phase 6-3: Tiny Fast Path (System tcache style)
|
||
#include "hakmem_mid_mt.h" // NEW Phase Hybrid: Mid Range MT (8-32KB, mimalloc-style)
|
||
#include "hakmem_super_registry.h" // NEW Phase 1: SuperSlab Registry (mincore elimination)
|
||
#include "hakmem_elo.h" // NEW: ELO Strategy Selection (Phase 6.2)
|
||
#include "hakmem_ace_stats.h" // NEW: ACE lightweight stats (avoid implicit decl warnings)
|
||
#include "hakmem_batch.h" // NEW: madvise Batching (Phase 6.3)
|
||
#include "hakmem_evo.h" // NEW: Learning Lifecycle (Phase 6.5)
|
||
#include "hakmem_debug.h" // NEW Phase 6.11.1: Debug Timing
|
||
#include "hakmem_sys.h" // NEW Phase 6.11.1: Syscall Wrappers
|
||
#include "hakmem_whale.h" // NEW Phase 6.11.1: Whale Fast-Path (≥2MB)
|
||
#include "hakmem_prof.h" // NEW Phase 6.16: Sampling profiler
|
||
#include "hakmem_syscall.h" // NEW Phase 6.X P0 FIX: Box 3 (dlsym direct libc)
|
||
#include "hakmem_ace_controller.h" // NEW Phase ACE: Adaptive Control Engine
|
||
#include "hakmem_ace_metrics.h" // NEW Phase ACE: Metrics tracking (inline helpers)
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <stdio.h>
|
||
#include <time.h>
|
||
#include <dlfcn.h>
|
||
#include <stdatomic.h> // NEW Phase 6.5: For atomic tick counter
|
||
#include <pthread.h> // Phase 6.15: Threading primitives (recursion guard only)
|
||
#include <errno.h> // calloc overflow handling
|
||
|
||
// For mmap (Linux)
|
||
#ifdef __linux__
|
||
#include <sys/mman.h>
|
||
#include <unistd.h>
|
||
|
||
// MADV_FREE support (Linux kernel 4.5+)
|
||
#ifndef MADV_FREE
|
||
#define MADV_FREE 8 // Linux MADV_FREE
|
||
#endif
|
||
#endif
|
||
|
||
// ============================================================================
|
||
// Configuration
|
||
// ============================================================================
|
||
|
||
#define MAX_SITES 256 // Hash table size (power of 2)
|
||
#define SAMPLING_RATE 1 // Sample ALL (PoC demo: no sampling)
|
||
#define HASH_MASK (MAX_SITES - 1)
|
||
|
||
// Phase 6.8: FREE_POLICY/FreePolicy moved to hakmem_config.h
|
||
// Phase 6.8: FreeThermal/THERMAL_* constants moved to hakmem_internal.h
|
||
// Phase 6.8: THP_POLICY/THPPolicy moved to hakmem_config.h
|
||
|
||
|
||
// ============================================================================
|
||
// Global State
|
||
// ============================================================================
|
||
|
||
// NEW Phase ACE: Adaptive Control Engine
|
||
static struct hkm_ace_controller g_ace_controller;
|
||
|
||
static int g_initialized = 0;
|
||
static int g_strict_free = 0; // runtime: HAKMEM_SAFE_FREE=1 enables extra safety checks
|
||
int g_invalid_free_log = 0; // runtime: HAKMEM_INVALID_FREE_LOG=1 to log invalid-free messages (extern visible)
|
||
// Phase 7.4: Cache HAKMEM_INVALID_FREE to eliminate 44% CPU overhead (getenv on hot path)
|
||
// Perf analysis showed getenv("HAKMEM_INVALID_FREE") consumed 43.96% of CPU time!
|
||
static int g_invalid_free_mode = 1; // 1 = skip invalid-free check (default), 0 = fallback to libc
|
||
|
||
// Statistics
|
||
static uint64_t g_malloc_count = 0; // Used for optimization stats display
|
||
|
||
// Phase 6.11.4 P0-2: Cached Strategy (atomic, updated by hak_evo_tick)
|
||
static _Atomic int g_cached_strategy_id = 0; // Cached strategy ID (updated every window closure)
|
||
|
||
// Phase 6.15 P0.3: EVO Sampling Control (environment variable)
|
||
static uint64_t g_evo_sample_mask = 0; // 0 = disabled (default), (1<<N)-1 = sample every 2^N calls
|
||
|
||
// Phase 6.15 P1: Site Rules enable (env: HAKMEM_SITE_RULES=1 to enable)
|
||
static int g_site_rules_enabled = 0; // default off to avoid contention in MT
|
||
static int g_bench_tiny_only = 0; // bench preset: Tiny-only fast path
|
||
int g_ldpreload_mode = 0; // 1 when running via LD_PRELOAD=libhakmem.so
|
||
static int g_flush_tiny_on_exit = 0; // HAKMEM_TINY_FLUSH_ON_EXIT=1
|
||
static int g_ultra_debug_on_exit = 0; // HAKMEM_TINY_ULTRA_DEBUG=1
|
||
// Cached LD_PRELOAD detection for wrapper hot paths (avoid getenv per call)
|
||
static int g_ldpre_env_cached = -1; // -1 = unknown, 0/1 cached
|
||
static inline int hak_ld_env_mode(void) {
|
||
if (g_ldpre_env_cached < 0) {
|
||
const char* ldpre = getenv("LD_PRELOAD");
|
||
g_ldpre_env_cached = (ldpre && strstr(ldpre, "libhakmem.so")) ? 1 : 0;
|
||
}
|
||
return g_ldpre_env_cached;
|
||
}
|
||
|
||
// Sanitizer / guard rails: allow forcing libc allocator even when wrappers are linked
|
||
#ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD
|
||
static int g_force_libc_alloc = 1;
|
||
#else
|
||
static int g_force_libc_alloc = -1; // 1=force libc, 0=use hakmem, -1=uninitialized
|
||
#endif
|
||
static inline int hak_force_libc_alloc(void) {
|
||
if (g_force_libc_alloc < 0) {
|
||
const char* force = getenv("HAKMEM_FORCE_LIBC_ALLOC");
|
||
if (force && *force) {
|
||
g_force_libc_alloc = (atoi(force) != 0);
|
||
} else {
|
||
const char* wrap = getenv("HAKMEM_WRAP_TINY");
|
||
if (wrap && *wrap && atoi(wrap) == 0) {
|
||
g_force_libc_alloc = 1;
|
||
} else {
|
||
g_force_libc_alloc = 0;
|
||
}
|
||
}
|
||
}
|
||
return g_force_libc_alloc;
|
||
}
|
||
|
||
// LD_PRELOAD safety: avoid interposing when jemalloc is present
|
||
static int g_ld_block_jemalloc = -1; // env: HAKMEM_LD_BLOCK_JEMALLOC (default 1)
|
||
static int g_jemalloc_loaded = -1; // -1 unknown, 0/1 cached
|
||
static inline int hak_jemalloc_loaded(void) {
|
||
if (g_jemalloc_loaded < 0) {
|
||
void* h = dlopen("libjemalloc.so.2", RTLD_NOLOAD | RTLD_NOW);
|
||
if (!h) h = dlopen("libjemalloc.so.1", RTLD_NOLOAD | RTLD_NOW);
|
||
g_jemalloc_loaded = (h != NULL) ? 1 : 0;
|
||
if (h) dlclose(h);
|
||
}
|
||
return g_jemalloc_loaded;
|
||
}
|
||
static inline int hak_ld_block_jemalloc(void) {
|
||
if (g_ld_block_jemalloc < 0) {
|
||
const char* e = getenv("HAKMEM_LD_BLOCK_JEMALLOC");
|
||
g_ld_block_jemalloc = (e == NULL) ? 1 : (atoi(e) != 0);
|
||
}
|
||
return g_ld_block_jemalloc;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Phase 6.15 P1: Remove global lock; keep recursion guard only
|
||
// ---------------------------------------------------------------------------
|
||
// We no longer serialize all allocations with a single global mutex.
|
||
// Instead, each submodule is responsible for its own fine‑grained locking.
|
||
// We keep a per‑thread recursion guard so that internal use of malloc/free
|
||
// within the allocator routes to libc (avoids infinite recursion).
|
||
//
|
||
// Phase 6.X P0 FIX (2025-10-24): Reverted to simple g_hakmem_lock_depth check
|
||
// Box Theory - Layer 1 (API Layer):
|
||
// This guard protects against LD_PRELOAD recursion (Box 1 → Box 1)
|
||
// Box 2 (Core) → Box 3 (Syscall) uses hkm_libc_malloc() (dlsym, no guard needed!)
|
||
static __thread int g_hakmem_lock_depth = 0; // 0 = outermost call
|
||
|
||
int hak_in_wrapper(void) {
|
||
return g_hakmem_lock_depth > 0; // Simple and correct!
|
||
}
|
||
|
||
// Initialization guard
|
||
static int g_initializing = 0;
|
||
int hak_is_initializing(void) { return g_initializing; }
|
||
|
||
// ============================================================================
|
||
// Phase 6-1.5: Ultra-Simple Fast Path Forward Declarations
|
||
// ============================================================================
|
||
// Forward declarations for Phase 6 fast path variants
|
||
// Phase 6-1.5: Alignment guessing (hakmem_tiny_ultra_simple.inc)
|
||
#ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE
|
||
extern void* hak_tiny_alloc_ultra_simple(size_t size);
|
||
extern void hak_tiny_free_ultra_simple(void* ptr);
|
||
#endif
|
||
|
||
// Phase 6-1.6: Metadata header (hakmem_tiny_metadata.inc)
|
||
#ifdef HAKMEM_TINY_PHASE6_METADATA
|
||
extern void* hak_tiny_alloc_metadata(size_t size);
|
||
extern void hak_tiny_free_metadata(void* ptr);
|
||
#endif
|
||
|
||
// Phase 6-1.7: Box Theory Refactoring - Wrapper function declarations
|
||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
extern void* hak_tiny_alloc_fast_wrapper(size_t size);
|
||
extern void hak_tiny_free_fast_wrapper(void* ptr);
|
||
#endif
|
||
|
||
static void hak_flush_tiny_exit(void) {
|
||
// Best-effort: flush Tiny magazines at process exit
|
||
if (g_flush_tiny_on_exit) {
|
||
hak_tiny_magazine_flush_all();
|
||
hak_tiny_trim();
|
||
}
|
||
if (g_ultra_debug_on_exit) {
|
||
hak_tiny_ultra_debug_dump();
|
||
}
|
||
// Path debug dump (optional): HAKMEM_TINY_PATH_DEBUG=1
|
||
hak_tiny_path_debug_dump();
|
||
// Extended counters (optional): HAKMEM_TINY_COUNTERS_DUMP=1
|
||
extern void hak_tiny_debug_counters_dump(void);
|
||
hak_tiny_debug_counters_dump();
|
||
}
|
||
|
||
// ============================================================================
|
||
// KPI Measurement (for UCB1) - NEW!
|
||
// ============================================================================
|
||
|
||
#ifdef __linux__
|
||
// Latency histogram (simple buckets for P50/P95/P99)
|
||
#define LATENCY_BUCKETS 100
|
||
static uint64_t g_latency_histogram[LATENCY_BUCKETS];
|
||
static uint64_t g_latency_samples = 0;
|
||
|
||
// Baseline page faults (at init)
|
||
static uint64_t g_baseline_soft_pf = 0;
|
||
static uint64_t g_baseline_hard_pf = 0;
|
||
static uint64_t g_baseline_rss_kb = 0;
|
||
|
||
// Get page faults from /proc/self/stat
|
||
static void get_page_faults(uint64_t* soft_pf, uint64_t* hard_pf) {
|
||
FILE* f = fopen("/proc/self/stat", "r");
|
||
if (!f) {
|
||
*soft_pf = 0;
|
||
*hard_pf = 0;
|
||
return;
|
||
}
|
||
|
||
// Format: pid (comm) state ... minflt cminflt majflt cmajflt ...
|
||
// Fields: 1 2 3 ... 10(minflt) 11(cminflt) 12(majflt) 13(cmajflt)
|
||
unsigned long minflt = 0, majflt = 0;
|
||
unsigned long dummy;
|
||
char comm[256], state;
|
||
|
||
(void)fscanf(f, "%lu %s %c %lu %lu %lu %lu %lu %lu %lu %lu %lu",
|
||
&dummy, comm, &state, &dummy, &dummy, &dummy, &dummy, &dummy,
|
||
&dummy, &minflt, &dummy, &majflt);
|
||
|
||
fclose(f);
|
||
|
||
*soft_pf = minflt;
|
||
*hard_pf = majflt;
|
||
}
|
||
|
||
// Get RSS from /proc/self/statm (in KB)
|
||
static uint64_t get_rss_kb(void) {
|
||
FILE* f = fopen("/proc/self/statm", "r");
|
||
if (!f) return 0;
|
||
|
||
// Format: size resident shared text lib data dt
|
||
// We want 'resident' (field 2) in pages
|
||
unsigned long size, resident;
|
||
(void)fscanf(f, "%lu %lu", &size, &resident);
|
||
fclose(f);
|
||
|
||
long page_size = sysconf(_SC_PAGESIZE);
|
||
return (resident * page_size) / 1024; // Convert to KB
|
||
}
|
||
|
||
// NOTE: Latency measurement functions (currently unused, for future use)
|
||
/*
|
||
static inline uint64_t measure_latency_ns(void (*func)(void*), void* arg) {
|
||
struct timespec start, end;
|
||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||
|
||
func(arg); // Execute function
|
||
|
||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||
|
||
uint64_t ns = (end.tv_sec - start.tv_sec) * 1000000000ULL +
|
||
(end.tv_nsec - start.tv_nsec);
|
||
return ns;
|
||
}
|
||
|
||
static void record_latency(uint64_t ns) {
|
||
// Bucket: 0-10ns, 10-20ns, ..., 990-1000ns, 1000+ns
|
||
size_t bucket = ns / 10;
|
||
if (bucket >= LATENCY_BUCKETS) bucket = LATENCY_BUCKETS - 1;
|
||
|
||
g_latency_histogram[bucket]++;
|
||
g_latency_samples++;
|
||
}
|
||
*/
|
||
|
||
// Calculate percentile from histogram
|
||
static uint64_t calculate_percentile(double percentile) {
|
||
if (g_latency_samples == 0) return 0;
|
||
|
||
uint64_t target = (uint64_t)(g_latency_samples * percentile);
|
||
uint64_t cumulative = 0;
|
||
|
||
for (size_t i = 0; i < LATENCY_BUCKETS; i++) {
|
||
cumulative += g_latency_histogram[i];
|
||
if (cumulative >= target) {
|
||
return i * 10; // Return bucket midpoint (ns)
|
||
}
|
||
}
|
||
|
||
return (LATENCY_BUCKETS - 1) * 10;
|
||
}
|
||
|
||
// Implement hak_get_kpi()
|
||
void hak_get_kpi(hak_kpi_t* out) {
|
||
memset(out, 0, sizeof(hak_kpi_t));
|
||
|
||
// Latency (from histogram)
|
||
out->p50_alloc_ns = calculate_percentile(0.50);
|
||
out->p95_alloc_ns = calculate_percentile(0.95);
|
||
out->p99_alloc_ns = calculate_percentile(0.99);
|
||
|
||
// Page Faults (delta from baseline)
|
||
uint64_t soft_pf, hard_pf;
|
||
get_page_faults(&soft_pf, &hard_pf);
|
||
out->soft_page_faults = soft_pf - g_baseline_soft_pf;
|
||
out->hard_page_faults = hard_pf - g_baseline_hard_pf;
|
||
|
||
// RSS (delta from baseline, in MB)
|
||
uint64_t rss_kb = get_rss_kb();
|
||
int64_t rss_delta_kb = (int64_t)rss_kb - (int64_t)g_baseline_rss_kb;
|
||
out->rss_delta_mb = rss_delta_kb / 1024;
|
||
}
|
||
|
||
#else
|
||
// Non-Linux: stub implementation
|
||
void hak_get_kpi(hak_kpi_t* out) {
|
||
memset(out, 0, sizeof(hak_kpi_t));
|
||
}
|
||
#endif
|
||
|
||
// ============================================================================
|
||
// Internal Helpers
|
||
// ============================================================================
|
||
|
||
// Phase 6.8: All legacy profiling functions removed
|
||
// - hash_site(), get_site_profile(), infer_policy(), record_alloc(), allocate_with_policy()
|
||
// Replaced by ELO-based allocation (hakmem_elo.c)
|
||
|
||
// ============================================================================
|
||
// BigCache eviction callback
|
||
// ============================================================================
|
||
|
||
// BigCache eviction callback (called when cache is full and needs to evict)
|
||
static void bigcache_free_callback(void* ptr, size_t size) {
|
||
(void)size; // Not used
|
||
if (!ptr) return;
|
||
|
||
// Get raw pointer and header
|
||
void* raw = (char*)ptr - HEADER_SIZE;
|
||
AllocHeader* hdr = (AllocHeader*)raw;
|
||
|
||
// Verify magic before accessing method field
|
||
if (hdr->magic != HAKMEM_MAGIC) {
|
||
fprintf(stderr, "[hakmem] BigCache eviction: invalid magic, fallback to free()\n");
|
||
free(raw);
|
||
return;
|
||
}
|
||
|
||
// Dispatch based on allocation method
|
||
switch (hdr->method) {
|
||
case ALLOC_METHOD_MALLOC:
|
||
free(raw);
|
||
break;
|
||
|
||
case ALLOC_METHOD_MMAP:
|
||
// Cold eviction: route through batch for large blocks
|
||
// This completes Phase 6.3 architecture
|
||
#ifdef __linux__
|
||
if (hdr->size >= BATCH_MIN_SIZE) {
|
||
// Large blocks: use batch (deferred munmap + TLB optimization)
|
||
hak_batch_add(raw, hdr->size);
|
||
} else {
|
||
// Small blocks: direct munmap (not worth batching)
|
||
// Phase 6.11.1: Try whale cache first
|
||
if (hkm_whale_put(raw, hdr->size) != 0) {
|
||
// Whale cache full or not a whale: munmap
|
||
madvise(raw, hdr->size, MADV_FREE); // Best-effort
|
||
hkm_sys_munmap(raw, hdr->size);
|
||
}
|
||
// else: Successfully cached in whale cache (no munmap!)
|
||
}
|
||
#else
|
||
free(raw); // Fallback (should not happen)
|
||
#endif
|
||
break;
|
||
|
||
default:
|
||
fprintf(stderr, "[hakmem] BigCache eviction: unknown method %d\n", hdr->method);
|
||
free(raw); // Fallback
|
||
break;
|
||
}
|
||
}
|
||
|
||
// ============================================================================
|
||
// Public API
|
||
// ============================================================================
|
||
|
||
// Thread-safe one-time initialization
|
||
static void hak_init_impl(void);
|
||
static pthread_once_t g_init_once = PTHREAD_ONCE_INIT;
|
||
|
||
void hak_init(void) {
|
||
(void)pthread_once(&g_init_once, hak_init_impl);
|
||
}
|
||
|
||
static void hak_init_impl(void) {
|
||
g_initializing = 1;
|
||
|
||
// Phase 6.X P0 FIX (2025-10-24): Initialize Box 3 (Syscall Layer) FIRST!
|
||
// This MUST be called before ANY allocation (Tiny/Mid/Large/Learner)
|
||
// dlsym() initializes function pointers to real libc (bypasses LD_PRELOAD)
|
||
hkm_syscall_init();
|
||
|
||
// NEW Phase 6.11.1: Initialize debug timing
|
||
hkm_timing_init();
|
||
|
||
// NEW Phase 6.11.1: Initialize whale fast-path cache
|
||
hkm_whale_init();
|
||
|
||
// NEW Phase Hybrid: Initialize Mid Range MT allocator (8-32KB, mimalloc-style)
|
||
mid_mt_init();
|
||
|
||
// NEW Phase 6.8: Initialize configuration system (replaces init_free_policy + init_thp_policy)
|
||
hak_config_init();
|
||
|
||
// Phase 6.16: Initialize FrozenPolicy (SACS-3)
|
||
hkm_policy_init();
|
||
|
||
// Phase 6.15 P0.3: Configure EVO sampling from environment variable
|
||
// HAKMEM_EVO_SAMPLE: 0=disabled (default), N=sample every 2^N calls
|
||
// Example: HAKMEM_EVO_SAMPLE=10 → sample every 1024 calls
|
||
// HAKMEM_EVO_SAMPLE=16 → sample every 65536 calls
|
||
char* evo_sample_str = getenv("HAKMEM_EVO_SAMPLE");
|
||
if (evo_sample_str && atoi(evo_sample_str) > 0) {
|
||
int freq = atoi(evo_sample_str);
|
||
if (freq >= 64) {
|
||
fprintf(stderr, "[hakmem] Warning: HAKMEM_EVO_SAMPLE=%d too large, using 63\n", freq);
|
||
freq = 63;
|
||
}
|
||
g_evo_sample_mask = (1ULL << freq) - 1;
|
||
HAKMEM_LOG("EVO sampling enabled: every 2^%d = %llu calls\n",
|
||
freq, (unsigned long long)(g_evo_sample_mask + 1));
|
||
} else {
|
||
g_evo_sample_mask = 0; // Disabled by default
|
||
HAKMEM_LOG("EVO sampling disabled (HAKMEM_EVO_SAMPLE not set or 0)\n");
|
||
}
|
||
|
||
#ifdef __linux__
|
||
// Record baseline KPIs
|
||
memset(g_latency_histogram, 0, sizeof(g_latency_histogram));
|
||
g_latency_samples = 0;
|
||
|
||
get_page_faults(&g_baseline_soft_pf, &g_baseline_hard_pf);
|
||
g_baseline_rss_kb = get_rss_kb();
|
||
|
||
HAKMEM_LOG("Baseline: soft_pf=%lu, hard_pf=%lu, rss=%lu KB\n",
|
||
(unsigned long)g_baseline_soft_pf,
|
||
(unsigned long)g_baseline_hard_pf,
|
||
(unsigned long)g_baseline_rss_kb);
|
||
#endif
|
||
|
||
HAKMEM_LOG("Initialized (PoC version)\n");
|
||
HAKMEM_LOG("Sampling rate: 1/%d\n", SAMPLING_RATE);
|
||
HAKMEM_LOG("Max sites: %d\n", MAX_SITES);
|
||
|
||
// Bench preset: Tiny-only (disable non-essential subsystems)
|
||
{
|
||
char* bt = getenv("HAKMEM_BENCH_TINY_ONLY");
|
||
if (bt && atoi(bt) != 0) {
|
||
g_bench_tiny_only = 1;
|
||
}
|
||
}
|
||
|
||
// Under LD_PRELOAD, enforce safer defaults for Tiny path unless overridden
|
||
{
|
||
char* ldpre = getenv("LD_PRELOAD");
|
||
if (ldpre && strstr(ldpre, "libhakmem.so")) {
|
||
g_ldpreload_mode = 1;
|
||
// Default LD-safe mode if not set: 1 (Tiny-only)
|
||
char* lds = getenv("HAKMEM_LD_SAFE");
|
||
if (lds) { /* NOP used in wrappers */ } else { setenv("HAKMEM_LD_SAFE", "1", 0); }
|
||
if (!getenv("HAKMEM_TINY_TLS_SLL")) {
|
||
setenv("HAKMEM_TINY_TLS_SLL", "0", 0); // disable TLS SLL by default
|
||
}
|
||
if (!getenv("HAKMEM_TINY_USE_SUPERSLAB")) {
|
||
setenv("HAKMEM_TINY_USE_SUPERSLAB", "0", 0); // disable SuperSlab path by default
|
||
}
|
||
}
|
||
}
|
||
|
||
// Runtime safety toggle
|
||
char* safe_free_env = getenv("HAKMEM_SAFE_FREE");
|
||
if (safe_free_env && atoi(safe_free_env) != 0) {
|
||
g_strict_free = 1;
|
||
HAKMEM_LOG("Strict free safety enabled (HAKMEM_SAFE_FREE=1)\n");
|
||
} else {
|
||
// Heuristic: if loaded via LD_PRELOAD, enable strict free by default
|
||
char* ldpre = getenv("LD_PRELOAD");
|
||
if (ldpre && strstr(ldpre, "libhakmem.so")) {
|
||
g_ldpreload_mode = 1;
|
||
g_strict_free = 1;
|
||
HAKMEM_LOG("Strict free safety auto-enabled under LD_PRELOAD\n");
|
||
}
|
||
}
|
||
|
||
// Invalid free logging toggle (default off to avoid spam under LD_PRELOAD)
|
||
char* invlog = getenv("HAKMEM_INVALID_FREE_LOG");
|
||
if (invlog && atoi(invlog) != 0) {
|
||
g_invalid_free_log = 1;
|
||
HAKMEM_LOG("Invalid free logging enabled (HAKMEM_INVALID_FREE_LOG=1)\n");
|
||
}
|
||
|
||
// Phase 7.4: Cache HAKMEM_INVALID_FREE to eliminate 44% CPU overhead
|
||
// Perf showed getenv() on hot path consumed 43.96% CPU time (26.41% strcmp + 17.55% getenv)
|
||
char* inv = getenv("HAKMEM_INVALID_FREE");
|
||
if (inv && strcmp(inv, "fallback") == 0) {
|
||
g_invalid_free_mode = 0; // fallback mode: route invalid frees to libc
|
||
HAKMEM_LOG("Invalid free mode: fallback to libc (HAKMEM_INVALID_FREE=fallback)\n");
|
||
} else {
|
||
// Under LD_PRELOAD, prefer safety: default to fallback unless explicitly overridden
|
||
char* ldpre = getenv("LD_PRELOAD");
|
||
if (ldpre && strstr(ldpre, "libhakmem.so")) {
|
||
g_ldpreload_mode = 1;
|
||
g_invalid_free_mode = 0;
|
||
HAKMEM_LOG("Invalid free mode: fallback to libc (auto under LD_PRELOAD)\n");
|
||
} else {
|
||
g_invalid_free_mode = 1; // default: skip invalid-free check
|
||
HAKMEM_LOG("Invalid free mode: skip check (default)\n");
|
||
}
|
||
}
|
||
|
||
// NEW Phase 6.8: Feature-gated initialization (check g_hakem_config flags)
|
||
if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) {
|
||
hak_pool_init();
|
||
}
|
||
|
||
// NEW Phase 6.13: L2.5 LargePool (64KB-1MB allocations)
|
||
hak_l25_pool_init();
|
||
|
||
if (!g_bench_tiny_only && HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE)) {
|
||
hak_bigcache_init();
|
||
hak_bigcache_set_free_callback(bigcache_free_callback);
|
||
}
|
||
|
||
if (!g_bench_tiny_only && HAK_ENABLED_LEARNING(HAKMEM_FEATURE_ELO)) {
|
||
hak_elo_init();
|
||
// Phase 6.11.4 P0-2: Initialize cached strategy to default (strategy 0)
|
||
atomic_store(&g_cached_strategy_id, 0);
|
||
}
|
||
|
||
if (!g_bench_tiny_only && HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE)) {
|
||
hak_batch_init();
|
||
}
|
||
|
||
if (!g_bench_tiny_only && HAK_ENABLED_LEARNING(HAKMEM_FEATURE_EVOLUTION)) {
|
||
hak_evo_init();
|
||
}
|
||
|
||
if (!g_bench_tiny_only) {
|
||
// Phase 6.16: Initialize ACE stats (sampling) – default off
|
||
hkm_ace_stats_init();
|
||
// Phase 6.16: Initialize sampling profiler – default off
|
||
hkm_prof_init();
|
||
// Size histogram sampling (optional)
|
||
hkm_size_hist_init();
|
||
}
|
||
|
||
if (!g_bench_tiny_only) {
|
||
// Start CAP learner (optional, env-gated)
|
||
hkm_learner_init();
|
||
}
|
||
|
||
// NEW Phase 6.10: Site Rules (MVP: always ON)
|
||
// MT note: default disabled unless HAKMEM_SITE_RULES=1
|
||
char* sr_env = getenv("HAKMEM_SITE_RULES");
|
||
g_site_rules_enabled = (sr_env && atoi(sr_env) != 0);
|
||
if (!g_bench_tiny_only && g_site_rules_enabled) {
|
||
hak_site_rules_init();
|
||
}
|
||
|
||
// NEW Phase 6.12: Tiny Pool (≤1KB allocations)
|
||
hak_tiny_init();
|
||
|
||
// Env: optional Tiny flush on exit (memory efficiency evaluation)
|
||
{
|
||
char* tf = getenv("HAKMEM_TINY_FLUSH_ON_EXIT");
|
||
if (tf && atoi(tf) != 0) {
|
||
g_flush_tiny_on_exit = 1;
|
||
}
|
||
char* ud = getenv("HAKMEM_TINY_ULTRA_DEBUG");
|
||
if (ud && atoi(ud) != 0) {
|
||
g_ultra_debug_on_exit = 1;
|
||
}
|
||
// Register exit hook if any of the debug/flush toggles are on
|
||
// or when path debug is requested.
|
||
if (g_flush_tiny_on_exit || g_ultra_debug_on_exit || getenv("HAKMEM_TINY_PATH_DEBUG")) {
|
||
atexit(hak_flush_tiny_exit);
|
||
}
|
||
}
|
||
|
||
// NEW Phase ACE: Initialize Adaptive Control Engine
|
||
hkm_ace_controller_init(&g_ace_controller);
|
||
if (g_ace_controller.enabled) {
|
||
hkm_ace_controller_start(&g_ace_controller);
|
||
HAKMEM_LOG("ACE Learning Layer enabled and started\n");
|
||
}
|
||
|
||
g_initializing = 0;
|
||
// Publish that initialization is complete
|
||
atomic_thread_fence(memory_order_seq_cst);
|
||
g_initialized = 1;
|
||
}
|
||
|
||
void hak_shutdown(void) {
|
||
if (!g_initialized) return;
|
||
|
||
// NEW Phase ACE: Shutdown Adaptive Control Engine FIRST (before other subsystems)
|
||
hkm_ace_controller_destroy(&g_ace_controller);
|
||
|
||
if (!g_bench_tiny_only) {
|
||
printf("[hakmem] Shutting down...\n");
|
||
hak_print_stats();
|
||
}
|
||
|
||
// NEW Phase 6.9: Shutdown L2 Pool
|
||
if (!g_bench_tiny_only) hak_pool_shutdown();
|
||
|
||
// NEW Phase 6.13: Shutdown L2.5 LargePool
|
||
if (!g_bench_tiny_only) hak_l25_pool_shutdown();
|
||
|
||
// NEW: Shutdown BigCache Box
|
||
if (!g_bench_tiny_only) hak_bigcache_shutdown();
|
||
|
||
// NEW Phase 6.2: Shutdown ELO Strategy Selection
|
||
if (!g_bench_tiny_only) hak_elo_shutdown();
|
||
|
||
// NEW Phase 6.3: Shutdown madvise Batching
|
||
if (!g_bench_tiny_only) hak_batch_shutdown();
|
||
|
||
// NEW Phase 6.10: Shutdown Site Rules
|
||
if (!g_bench_tiny_only) hak_site_rules_shutdown();
|
||
|
||
// NEW Phase 6.12: Print Tiny Pool statistics
|
||
if (!g_bench_tiny_only) hak_tiny_print_stats();
|
||
|
||
// NEW Phase 6.11.1: Print whale cache statistics
|
||
if (!g_bench_tiny_only) {
|
||
hkm_whale_dump_stats();
|
||
// NEW Phase 6.11.1: Shutdown whale cache
|
||
hkm_whale_shutdown();
|
||
}
|
||
|
||
// NEW Phase 6.11.1: Shutdown debug timing (must be last!)
|
||
if (!g_bench_tiny_only) hkm_timing_shutdown();
|
||
|
||
// Phase 6.16: Dump sampling profiler
|
||
if (!g_bench_tiny_only) hkm_prof_shutdown();
|
||
|
||
// Stop learner thread
|
||
if (!g_bench_tiny_only) hkm_learner_shutdown();
|
||
|
||
// Stop Tiny background components (e.g., Intelligence Engine)
|
||
hak_tiny_shutdown();
|
||
|
||
g_initialized = 0;
|
||
}
|
||
|
||
// Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%)
|
||
__attribute__((always_inline))
|
||
inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t0); // Profiling (build-time gated)
|
||
#endif
|
||
|
||
if (!g_initialized) hak_init();
|
||
|
||
// ========================================================================
|
||
// Phase 6-3: Tiny Fast Path (System tcache style, 3-4 instruction fast path)
|
||
// ========================================================================
|
||
#ifdef HAKMEM_TINY_FAST_PATH
|
||
if (size <= TINY_FAST_THRESHOLD) {
|
||
// Ultra-simple TLS cache pop (bypasses Magazine/SuperSlab)
|
||
extern void* tiny_fast_alloc(size_t);
|
||
extern void tiny_fast_init(void);
|
||
extern __thread int g_tiny_fast_initialized;
|
||
|
||
if (__builtin_expect(!g_tiny_fast_initialized, 0)) {
|
||
tiny_fast_init();
|
||
}
|
||
|
||
void* ptr = tiny_fast_alloc(size);
|
||
if (ptr) return ptr;
|
||
// Fall through to slow path on failure
|
||
}
|
||
#endif
|
||
// ========================================================================
|
||
|
||
uintptr_t site_id = (uintptr_t)site;
|
||
|
||
// Phase 6.12: Tiny Pool fast-path (≤1KB allocations)
|
||
// Priority: highest for tiny allocations (most frequent)
|
||
if (__builtin_expect(size <= TINY_MAX_SIZE, 1)) {
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t_tiny);
|
||
#endif
|
||
void* tiny_ptr = NULL;
|
||
|
||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
// Phase 6-1.7: Box Theory Refactoring (3-4 instruction fast path)
|
||
tiny_ptr = hak_tiny_alloc_fast_wrapper(size);
|
||
#elif defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
|
||
// Phase 6-1.5: Ultra Simple (alignment guessing)
|
||
tiny_ptr = hak_tiny_alloc_ultra_simple(size);
|
||
#elif defined(HAKMEM_TINY_PHASE6_METADATA)
|
||
// Phase 6-1.6: Metadata header
|
||
tiny_ptr = hak_tiny_alloc_metadata(size);
|
||
#else
|
||
// Default: Standard Tiny path
|
||
tiny_ptr = hak_tiny_alloc(size);
|
||
#endif
|
||
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_TINY_ALLOC, t_tiny);
|
||
#endif
|
||
if (tiny_ptr) {
|
||
// NEW Phase ACE: Track allocation for learning
|
||
hkm_ace_track_alloc();
|
||
// Tiny Pool hit! Return immediately (no header needed)
|
||
return tiny_ptr;
|
||
}
|
||
// DEBUG: Tiny Pool returned NULL - fallback to other paths
|
||
static int log_count = 0;
|
||
if (log_count < 3) {
|
||
fprintf(stderr, "[DEBUG] tiny_alloc(%zu) returned NULL, falling back\n", size);
|
||
log_count++;
|
||
}
|
||
// Tiny Pool miss: fallback to other paths below
|
||
}
|
||
|
||
// Record size histogram (sampling) — moved after Tiny fast-path to
|
||
// keep hottest path minimal. Tiny hits skip histogram to reduce overhead.
|
||
hkm_size_hist_record(size);
|
||
|
||
// Phase Hybrid: Mid Range MT fast-path (8-32KB allocations)
|
||
// Priority: second highest (after Tiny Pool)
|
||
// Uses mimalloc-style per-thread segments for optimal MT performance
|
||
if (__builtin_expect(mid_is_in_range(size), 0)) {
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t_mid);
|
||
#endif
|
||
void* mid_ptr = mid_mt_alloc(size);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_POOL_GET, t_mid);
|
||
#endif
|
||
if (mid_ptr) {
|
||
// Mid MT hit! Return immediately (no header, lock-free)
|
||
return mid_ptr;
|
||
}
|
||
// Mid MT miss: fallback to other paths below (should be rare)
|
||
}
|
||
|
||
// Phase 6.11.4 P0-1 & P0-2: Compile-time guard + cached strategy update
|
||
// Phase 6.15 P0.3: Restored with environment variable control (default disabled)
|
||
#if HAKMEM_FEATURE_EVOLUTION
|
||
// Only sample if enabled via HAKMEM_EVO_SAMPLE environment variable
|
||
if (g_evo_sample_mask > 0) {
|
||
static _Atomic uint64_t tick_counter = 0;
|
||
if ((atomic_fetch_add(&tick_counter, 1) & g_evo_sample_mask) == 0) {
|
||
struct timespec now;
|
||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||
uint64_t now_ns = now.tv_sec * 1000000000ULL + now.tv_nsec;
|
||
|
||
// P0-2: Update cached strategy when window closes
|
||
if (hak_evo_tick(now_ns)) {
|
||
// Window closed, update cached strategy
|
||
int new_strategy = hak_elo_select_strategy();
|
||
atomic_store(&g_cached_strategy_id, new_strategy);
|
||
}
|
||
}
|
||
}
|
||
#endif
|
||
|
||
// Phase 6.11.4 P0-2: Always use cached strategy (LEARN/FROZEN/CANARY all use same path)
|
||
size_t threshold;
|
||
|
||
if (HAK_ENABLED_LEARNING(HAKMEM_FEATURE_ELO)) {
|
||
// ELO enabled: use cached strategy (updated by hak_evo_tick)
|
||
int strategy_id = atomic_load(&g_cached_strategy_id);
|
||
threshold = hak_elo_get_threshold(strategy_id);
|
||
} else {
|
||
// ELO disabled: use default threshold (2MB - mimalloc's large threshold)
|
||
threshold = 2097152; // 2MB
|
||
}
|
||
|
||
// Phase SACS-3: BigCache only for very large blocks (>= threshold)
|
||
if (HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE) && size >= threshold) {
|
||
void* cached_ptr = NULL;
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t_bc);
|
||
#endif
|
||
if (hak_bigcache_try_get(size, site_id, &cached_ptr)) {
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_BIGCACHE_GET, t_bc);
|
||
#endif
|
||
// Cache hit! Return immediately
|
||
return cached_ptr;
|
||
}
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_BIGCACHE_GET, t_bc);
|
||
#endif
|
||
}
|
||
|
||
// Phase SACS-3: No Site Rules in tier selection (size-only decision)
|
||
|
||
// Phase 6.16 SACS-3: L1 via ACE unified path
|
||
if (size > TINY_MAX_SIZE && size < threshold) {
|
||
const FrozenPolicy* pol = hkm_policy_get();
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t_ace);
|
||
#endif
|
||
void* l1 = hkm_ace_alloc(size, site_id, pol);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_POOL_GET, t_ace);
|
||
#endif
|
||
if (l1) return l1;
|
||
}
|
||
|
||
// Phase SACS-3: For < threshold, prefer malloc; for >= threshold prefer mmap
|
||
void* ptr;
|
||
if (size >= threshold) {
|
||
// Large allocation (L2): use mmap (enables batch madvise)
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t_mmap);
|
||
#endif
|
||
ptr = hak_alloc_mmap_impl(size);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_SYSCALL_MMAP, t_mmap);
|
||
#endif
|
||
} else {
|
||
// Small/medium allocation (L0/L1): use malloc (faster for <2MB)
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t_malloc);
|
||
#endif
|
||
ptr = hak_alloc_malloc_impl(size);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_FALLBACK_MALLOC, t_malloc);
|
||
#endif
|
||
}
|
||
|
||
if (!ptr) return NULL;
|
||
|
||
// NEW Phase 6.5: Record allocation size for distribution signature (gated)
|
||
if (g_evo_sample_mask > 0) {
|
||
hak_evo_record_size(size);
|
||
}
|
||
|
||
// NEW: Set alloc_site and class_bytes in header (for BigCache Phase 2)
|
||
AllocHeader* hdr = (AllocHeader*)((char*)ptr - HEADER_SIZE);
|
||
|
||
// Verify magic (fail-fast if header corrupted)
|
||
if (hdr->magic != HAKMEM_MAGIC) {
|
||
fprintf(stderr, "[hakmem] ERROR: Invalid magic in allocated header!\n");
|
||
return ptr; // Return anyway, but log error
|
||
}
|
||
|
||
// Set allocation site (for per-site cache reuse)
|
||
hdr->alloc_site = site_id;
|
||
|
||
// Set size class for caching (L2 only → threshold class)
|
||
if (size >= threshold) {
|
||
hdr->class_bytes = threshold; // cacheable at L2 threshold
|
||
} else {
|
||
hdr->class_bytes = 0; // Not cacheable
|
||
}
|
||
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_HAK_ALLOC, t0); // Profiling (build-time gated)
|
||
#endif
|
||
return ptr;
|
||
}
|
||
|
||
// Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%)
|
||
// Phase 6-1.7: Disable inline for box refactor to avoid recursive inlining
|
||
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
__attribute__((always_inline))
|
||
inline
|
||
#endif
|
||
void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_START(t0); // Profiling (build-time gated)
|
||
#endif
|
||
|
||
(void)site; // Not used yet (will be used in BigCache Phase 2)
|
||
(void)size; // Size stored in header
|
||
|
||
if (!ptr) {
|
||
#if HAKMEM_DEBUG_TIMING
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
|
||
#endif
|
||
#endif
|
||
return;
|
||
}
|
||
|
||
// OPTIMIZATION PHASE 2+1 (2025-11-01): Check Tiny Pool FIRST
|
||
// Phase 2: Ultra-fast owner_slab with TLS range check (1-2 cycles negative lookup)
|
||
// Phase 1: Reorder to avoid Mid MT mutex overhead for Tiny allocations (90% of mixed workload)
|
||
//
|
||
// Target: +12-13% improvement (16.24 → 18.4-18.6 M ops/sec)
|
||
// - Tiny allocations (90%): Skip Mid MT mutex entirely → ~12% improvement
|
||
// - Mid allocations (10%): Fast negative lookup from owner_slab → minimal overhead
|
||
TinySlab* tiny_slab = hak_tiny_owner_slab(ptr);
|
||
if (tiny_slab) {
|
||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
// Phase 6-1.7: Box Theory Refactoring (2-3 instruction fast path)
|
||
// Box 6 handles both same-thread (fast) and cross-thread (remote) internally
|
||
hak_tiny_free_fast_wrapper(ptr);
|
||
return;
|
||
#elif defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
|
||
// Phase 6-1.5: Only use ultra-simple free on same-thread pointers.
|
||
// Cross-thread frees must go through the full tiny free path
|
||
// to ensure proper remote-queue handling and slab reuse.
|
||
pthread_t self_pt = pthread_self();
|
||
if (__builtin_expect(pthread_equal(tiny_slab->owner_tid, self_pt), 1)) {
|
||
hak_tiny_free_ultra_simple(ptr);
|
||
return;
|
||
}
|
||
#elif defined(HAKMEM_TINY_PHASE6_METADATA)
|
||
// Phase 6-1.6: Metadata header
|
||
hak_tiny_free_metadata(ptr);
|
||
return;
|
||
#endif
|
||
// Fallback: full tiny free (handles cross-thread case correctly)
|
||
hak_tiny_free(ptr);
|
||
return;
|
||
}
|
||
|
||
// Phase Hybrid: Mid Range MT check (8-32KB, headerless)
|
||
{
|
||
size_t mid_block_size = 0;
|
||
int mid_class_idx = 0;
|
||
|
||
// First check if ptr is in current thread's segment (fast path)
|
||
for (int i = 0; i < MID_NUM_CLASSES; i++) {
|
||
MidThreadSegment* seg = &g_mid_segments[i];
|
||
if (seg->chunk_base && ptr >= seg->chunk_base && ptr < seg->end) {
|
||
*(void**)ptr = seg->free_list;
|
||
seg->free_list = ptr;
|
||
seg->used_count--;
|
||
return;
|
||
}
|
||
}
|
||
|
||
// Not in current thread's segment - try registry (mutex + binary search)
|
||
if (mid_registry_lookup(ptr, &mid_block_size, &mid_class_idx)) {
|
||
mid_mt_free(ptr, mid_block_size);
|
||
return;
|
||
}
|
||
}
|
||
|
||
// DISABLED: SuperSlab Registry lookup causes false positives
|
||
// Problem: L25 allocations aligned to 1MB boundary are misidentified as SuperSlabs
|
||
// causing crashes when checking magic number on unmapped/invalid memory
|
||
// TODO: Fix SuperSlab registry to avoid false positives (descriptor-based check?)
|
||
#if 0
|
||
SuperSlab* ss = hak_super_lookup(ptr);
|
||
if (ss) {
|
||
hak_tiny_free(ptr);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
|
||
#endif
|
||
return;
|
||
}
|
||
#endif
|
||
|
||
// Mid Pool headerless fast route: use page descriptor before header read
|
||
{
|
||
extern int hak_pool_mid_lookup(void* ptr, size_t* out_size);
|
||
extern void hak_pool_free_fast(void* ptr, uintptr_t site_id);
|
||
size_t mid_sz = 0;
|
||
if (hak_pool_mid_lookup(ptr, &mid_sz)) {
|
||
// For Mid, header read is unnecessary; free directly via pool.
|
||
hak_pool_free_fast(ptr, (uintptr_t)site);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
|
||
#endif
|
||
return;
|
||
}
|
||
}
|
||
|
||
// L2.5 headerless route: use page descriptor before header read
|
||
{
|
||
extern int hak_l25_lookup(void* ptr, size_t* out_size);
|
||
extern void hak_l25_pool_free_fast(void* ptr, uintptr_t site_id);
|
||
size_t l25_sz = 0;
|
||
if (hak_l25_lookup(ptr, &l25_sz)) {
|
||
// Stats (optional): count as large free
|
||
hkm_ace_stat_large_free();
|
||
hak_l25_pool_free_fast(ptr, (uintptr_t)site);
|
||
#if HAKMEM_DEBUG_TIMING
|
||
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
|
||
#endif
|
||
return;
|
||
}
|
||
}
|
||
|
||
// NEW Phase 6.5: Measure free latency (start timing)
|
||
// Gate by EVO sampling mask to avoid per-op overhead when disabled
|
||
int _do_evo = (g_evo_sample_mask > 0);
|
||
struct timespec start_time, end_time;
|
||
if (_do_evo) {
|
||
clock_gettime(CLOCK_MONOTONIC, &start_time);
|
||
}
|
||
|
||
// Helper macro to record latency before returning (build-time gated timing)
|
||
#if HAKMEM_DEBUG_TIMING
|
||
#define RECORD_FREE_LATENCY() do { \
|
||
if (_do_evo) { \
|
||
clock_gettime(CLOCK_MONOTONIC, &end_time); \
|
||
uint64_t ns = (end_time.tv_sec - start_time.tv_sec) * 1000000000ULL + \
|
||
(end_time.tv_nsec - start_time.tv_nsec); \
|
||
hak_evo_record_latency((double)ns); \
|
||
if (hak_evo_is_canary()) { \
|
||
hak_evo_record_canary_result(0, (double)ns); \
|
||
} \
|
||
} \
|
||
HKM_TIME_END(HKM_CAT_HAK_FREE, t0); \
|
||
} while(0)
|
||
#else
|
||
#define RECORD_FREE_LATENCY() do { \
|
||
if (_do_evo) { \
|
||
clock_gettime(CLOCK_MONOTONIC, &end_time); \
|
||
uint64_t ns = (end_time.tv_sec - start_time.tv_sec) * 1000000000ULL + \
|
||
(end_time.tv_nsec - start_time.tv_nsec); \
|
||
hak_evo_record_latency((double)ns); \
|
||
if (hak_evo_is_canary()) { \
|
||
hak_evo_record_canary_result(0, (double)ns); \
|
||
} \
|
||
} \
|
||
} while(0)
|
||
#endif
|
||
|
||
// Get raw pointer (before header)
|
||
void* raw = (char*)ptr - HEADER_SIZE;
|
||
|
||
#ifdef __linux__
|
||
if (g_strict_free) {
|
||
// Safety: ensure header address is mapped before touching it (optional)
|
||
long _ps = sysconf(_SC_PAGESIZE);
|
||
void* _pg = (void*)((uintptr_t)raw & ~((uintptr_t)_ps - 1));
|
||
unsigned char _vec;
|
||
if (mincore(_pg, (size_t)_ps, &_vec) != 0) {
|
||
// Not a valid mapped region → fallback directly to libc free
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
}
|
||
}
|
||
#endif
|
||
|
||
// Read header
|
||
AllocHeader* hdr = (AllocHeader*)raw;
|
||
|
||
// NEW: Verify magic (fail-fast if corrupted or not from hakmem)
|
||
if (hdr->magic != HAKMEM_MAGIC) {
|
||
if (g_invalid_free_log) {
|
||
fprintf(stderr, "[hakmem] ERROR: Invalid magic 0x%X (expected 0x%X) - possible corruption or non-hakmem pointer\n",
|
||
hdr->magic, HAKMEM_MAGIC);
|
||
}
|
||
// Phase 7.4: Use cached mode (eliminates 44% CPU overhead from getenv on hot path!)
|
||
// OLD CODE (44% CPU time!): const char* inv = getenv("HAKMEM_INVALID_FREE");
|
||
// if (inv && strcmp(inv, "fallback") == 0) mode_skip = 0;
|
||
int mode_skip = g_invalid_free_mode; // 1 = skip, 0 = fallback to libc
|
||
if (mode_skip) {
|
||
// Skip freeing unknown pointer to avoid abort (possible mmap region). Log only.
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
} else {
|
||
fprintf(stderr, "[hakmem] Attempting fallback to system free()...\n");
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
}
|
||
}
|
||
|
||
// Phase SACS-3: BigCache put only for L2 (class_bytes >= 2MB)
|
||
if (HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE) && hdr->class_bytes >= 2097152) {
|
||
// Pass actual allocated size (hdr->size), not class_bytes!
|
||
// This prevents buffer overflow when BigCache returns undersized blocks
|
||
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) {
|
||
RECORD_FREE_LATENCY();
|
||
return; // Successfully cached, skip actual free
|
||
}
|
||
}
|
||
|
||
// Phase 6.9.1: Pool allocations are now handled via header method
|
||
// (no separate detection needed, just dispatch on method)
|
||
|
||
// Dispatch to correct free function
|
||
switch (hdr->method) {
|
||
case ALLOC_METHOD_POOL:
|
||
// Phase 6.9.1: Pool allocation - return to pool
|
||
if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) {
|
||
// Stats: record free in ACE L1 Mid
|
||
hkm_ace_stat_mid_free();
|
||
hak_pool_free(ptr, hdr->size, hdr->alloc_site);
|
||
} else {
|
||
// Pool disabled, shouldn't happen (fail-fast)
|
||
fprintf(stderr, "[hakmem] ERROR: POOL allocation but POOL feature disabled!\\n");
|
||
}
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
|
||
case ALLOC_METHOD_L25_POOL:
|
||
// Phase 6.13: L2.5 Pool allocation - return to pool
|
||
hkm_ace_stat_large_free();
|
||
hak_l25_pool_free(ptr, hdr->size, hdr->alloc_site);
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
|
||
case ALLOC_METHOD_MALLOC:
|
||
free(raw);
|
||
break;
|
||
|
||
case ALLOC_METHOD_MMAP:
|
||
// Phase 6.4 P1: Apply free policy (Hot/Warm/Cold)
|
||
if (g_hakem_config.free_policy == FREE_POLICY_KEEP) {
|
||
// KEEP: 何もしない(VA保持、madviseもしない)
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
} else if (g_hakem_config.free_policy == FREE_POLICY_ADAPTIVE) {
|
||
// ADAPTIVE: Hot/Warm/Cold判定
|
||
FreeThermal thermal = hak_classify_thermal(hdr->size);
|
||
|
||
switch (thermal) {
|
||
case FREE_THERMAL_HOT:
|
||
// HOT (< 1MB): 何もしない(すぐ再利用される)
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
|
||
case FREE_THERMAL_WARM:
|
||
// WARM (1-2MB): MADV_FREE(munmapしない、物理ページのみ返す)
|
||
#ifdef __linux__
|
||
madvise(raw, hdr->size, MADV_FREE);
|
||
#endif
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
|
||
case FREE_THERMAL_COLD:
|
||
// COLD (>= 2MB): batch (Phase 6.8: feature-gated)
|
||
if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) {
|
||
hak_batch_add(raw, hdr->size);
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
}
|
||
// Small blocks: immediate munmap
|
||
#ifdef __linux__
|
||
// Phase 6.11.1: Try whale cache first
|
||
if (hkm_whale_put(raw, hdr->size) != 0) {
|
||
hkm_sys_munmap(raw, hdr->size);
|
||
}
|
||
#else
|
||
free(raw);
|
||
#endif
|
||
break;
|
||
}
|
||
} else {
|
||
// BATCH (default): Phase 6.8 feature-gated
|
||
// - Keep VA mapped for reuse (mimalloc strategy)
|
||
// - Only MADV_FREE on batch flush (release physical pages)
|
||
// - munmap happens on cold eviction only
|
||
if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) {
|
||
hak_batch_add(raw, hdr->size);
|
||
RECORD_FREE_LATENCY();
|
||
return;
|
||
}
|
||
|
||
// Small blocks: immediate munmap (not worth batching)
|
||
#ifdef __linux__
|
||
// Phase 6.11.1: Try whale cache first
|
||
if (hkm_whale_put(raw, hdr->size) != 0) {
|
||
hkm_sys_munmap(raw, hdr->size);
|
||
}
|
||
#else
|
||
free(raw);
|
||
#endif
|
||
}
|
||
break;
|
||
|
||
default:
|
||
fprintf(stderr, "[hakmem] ERROR: Unknown allocation method: %d\n", hdr->method);
|
||
break;
|
||
}
|
||
|
||
// Record latency for all paths that reach here
|
||
RECORD_FREE_LATENCY();
|
||
|
||
#undef RECORD_FREE_LATENCY
|
||
}
|
||
|
||
|
||
void hak_print_stats(void) {
|
||
printf("\n========================================\n");
|
||
printf("hakmem ELO-based Profiling Statistics\n");
|
||
printf("========================================\n");
|
||
|
||
printf("\nOptimization Stats:\n");
|
||
printf(" malloc() calls: %llu\n", (unsigned long long)g_malloc_count);
|
||
|
||
hak_elo_print_leaderboard();
|
||
|
||
printf("========================================\n\n");
|
||
}
|
||
|
||
// ============================================================================
|
||
// Phase 6.15 P0: Standard C Library Wrappers (for LD_PRELOAD)
|
||
// ============================================================================
|
||
|
||
#ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD
|
||
|
||
// Sanitizer/diagnostic builds: bypass hakmem allocator completely.
|
||
void* malloc(size_t size) {
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
|
||
void free(void* ptr) {
|
||
if (!ptr) return;
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
}
|
||
|
||
void* calloc(size_t nmemb, size_t size) {
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
|
||
void* realloc(void* ptr, size_t size) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
|
||
#else
|
||
|
||
// malloc wrapper - intercepts system malloc() calls
|
||
void* malloc(size_t size) {
|
||
// Recursion guard: if we're inside the allocator already, fall back to libc
|
||
if (g_hakmem_lock_depth > 0) {
|
||
// Nested call detected - fallback to system malloc
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
|
||
// Initialization guard: during hak_init() bootstrap, use libc directly
|
||
if (__builtin_expect(g_initializing != 0, 0)) {
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
|
||
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
|
||
// LD safe modes: 1=tiny-only, 2=pass-through
|
||
// Determine LD_PRELOAD mode early (before hak_init) to avoid misrouting
|
||
int ld_mode = hak_ld_env_mode();
|
||
if (ld_mode) {
|
||
// Avoid mixing with jemalloc-managed programs (e.g., redis)
|
||
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) {
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
// Before hakmem initialization completes, always delegate to libc
|
||
if (!g_initialized || g_initializing) {
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
const char* lds = getenv("HAKMEM_LD_SAFE");
|
||
int mode = (lds ? atoi(lds) : 1);
|
||
if (mode >= 2 || size > TINY_MAX_SIZE) {
|
||
extern void* __libc_malloc(size_t);
|
||
return __libc_malloc(size);
|
||
}
|
||
}
|
||
|
||
// ========================================================================
|
||
// Phase 6-3: Tiny Fast Path (System tcache style, 3-4 instruction fast path)
|
||
// ========================================================================
|
||
#ifdef HAKMEM_TINY_FAST_PATH
|
||
if (size <= TINY_FAST_THRESHOLD) {
|
||
// Ultra-simple TLS cache pop (bypasses Magazine/SuperSlab)
|
||
extern void* tiny_fast_alloc(size_t);
|
||
extern void tiny_fast_init(void);
|
||
extern __thread int g_tiny_fast_initialized;
|
||
|
||
if (__builtin_expect(!g_tiny_fast_initialized, 0)) {
|
||
tiny_fast_init();
|
||
}
|
||
|
||
void* ptr = tiny_fast_alloc(size);
|
||
if (ptr) return ptr;
|
||
// Fall through to slow path on failure
|
||
}
|
||
#endif
|
||
// ========================================================================
|
||
|
||
// First-level call: enter allocator (no global lock)
|
||
g_hakmem_lock_depth++;
|
||
void* ptr = hak_alloc_at(size, HAK_CALLSITE());
|
||
g_hakmem_lock_depth--;
|
||
return ptr;
|
||
}
|
||
|
||
// free wrapper - intercepts system free() calls
|
||
void free(void* ptr) {
|
||
if (!ptr) return; // NULL check
|
||
|
||
// Recursion guard: if we're inside the allocator already, fall back to libc
|
||
if (g_hakmem_lock_depth > 0) {
|
||
// Nested call detected - fallback to system free
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
return;
|
||
}
|
||
|
||
if (__builtin_expect(g_initializing != 0, 0)) {
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
return;
|
||
}
|
||
|
||
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
return;
|
||
}
|
||
|
||
// In LD_PRELOAD mode, before hakmem initialization completes, always delegate
|
||
{
|
||
if (hak_ld_env_mode()) {
|
||
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) {
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
return;
|
||
}
|
||
if (!g_initialized || g_initializing) {
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
return;
|
||
}
|
||
}
|
||
}
|
||
|
||
// ========================================================================
|
||
// Phase 6 Fast Path: Ultra-Simple Free (when enabled)
|
||
// ========================================================================
|
||
// This bypasses free.part.0 complexity (38.43% overhead in perf analysis)
|
||
// - free.part.0: 15.83% → eliminated!
|
||
// - mid_lookup: 9.55% → eliminated for tiny!
|
||
// - pthread locks: 8.81% → eliminated!
|
||
// Two variants:
|
||
// Phase 6-1.5: Alignment guessing (3-4 instructions, 235 M ops/sec)
|
||
// Phase 6-1.6: Metadata header (1-2 instructions, ~480 M ops/sec expected)
|
||
#ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE
|
||
g_hakmem_lock_depth++;
|
||
hak_tiny_free_ultra_simple(ptr);
|
||
g_hakmem_lock_depth--;
|
||
return;
|
||
#elif defined(HAKMEM_TINY_PHASE6_METADATA)
|
||
g_hakmem_lock_depth++;
|
||
hak_tiny_free_metadata(ptr);
|
||
g_hakmem_lock_depth--;
|
||
return;
|
||
#endif
|
||
// ========================================================================
|
||
|
||
g_hakmem_lock_depth++;
|
||
hak_free_at(ptr, 0, HAK_CALLSITE());
|
||
g_hakmem_lock_depth--;
|
||
}
|
||
|
||
// calloc wrapper - intercepts system calloc() calls
|
||
void* calloc(size_t nmemb, size_t size) {
|
||
// Recursion guard
|
||
if (g_hakmem_lock_depth > 0) {
|
||
// Nested call detected - fallback to system calloc
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
|
||
if (__builtin_expect(g_initializing != 0, 0)) {
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
|
||
// Overflow check before any multiplication
|
||
if (size != 0 && nmemb > (SIZE_MAX / size)) {
|
||
errno = ENOMEM;
|
||
return NULL;
|
||
}
|
||
|
||
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
|
||
// Determine LD_PRELOAD mode early (before hak_init)
|
||
int ld_mode = hak_ld_env_mode();
|
||
if (ld_mode) {
|
||
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) {
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
if (!g_initialized || g_initializing) {
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
const char* lds = getenv("HAKMEM_LD_SAFE");
|
||
int mode = (lds ? atoi(lds) : 1);
|
||
size_t total = nmemb * size; // safe: overflow checked above
|
||
if (mode >= 2 || total > TINY_MAX_SIZE) {
|
||
extern void* __libc_calloc(size_t, size_t);
|
||
return __libc_calloc(nmemb, size);
|
||
}
|
||
}
|
||
|
||
g_hakmem_lock_depth++;
|
||
size_t total_size = nmemb * size; // safe: overflow checked above
|
||
void* ptr = hak_alloc_at(total_size, HAK_CALLSITE());
|
||
|
||
if (ptr) {
|
||
memset(ptr, 0, total_size); // calloc zeros memory
|
||
}
|
||
|
||
g_hakmem_lock_depth--;
|
||
return ptr;
|
||
}
|
||
|
||
// realloc wrapper - intercepts system realloc() calls
|
||
void* realloc(void* ptr, size_t size) {
|
||
// Recursion guard
|
||
if (g_hakmem_lock_depth > 0) {
|
||
// Nested call detected - fallback to system realloc
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
|
||
if (__builtin_expect(g_initializing != 0, 0)) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
|
||
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
|
||
// Determine LD_PRELOAD mode early (before hak_init)
|
||
int ld_mode = hak_ld_env_mode();
|
||
if (ld_mode) {
|
||
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
if (!g_initialized || g_initializing) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
const char* lds = getenv("HAKMEM_LD_SAFE");
|
||
int mode = (lds ? atoi(lds) : 1);
|
||
// Pass-through mode, or resizing beyond Tiny range → route to libc
|
||
if (mode >= 2 || size > TINY_MAX_SIZE) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
// Tiny-only safe mode: if the existing pointer is NOT Tiny-managed,
|
||
// do not touch it — delegate to libc to avoid header mismatches.
|
||
if (ptr != NULL && !hak_tiny_is_managed(ptr)) {
|
||
extern void* __libc_realloc(void*, size_t);
|
||
return __libc_realloc(ptr, size);
|
||
}
|
||
}
|
||
|
||
g_hakmem_lock_depth++;
|
||
void* new_ptr = NULL;
|
||
|
||
if (!ptr) {
|
||
// realloc(NULL, size) = malloc(size)
|
||
new_ptr = hak_alloc_at(size, HAK_CALLSITE());
|
||
} else if (size == 0) {
|
||
// realloc(ptr, 0) = free(ptr)
|
||
hak_free_at(ptr, 0, HAK_CALLSITE());
|
||
new_ptr = NULL;
|
||
} else {
|
||
// Allocate new block
|
||
new_ptr = hak_alloc_at(size, HAK_CALLSITE());
|
||
|
||
if (new_ptr) {
|
||
// Get old size from header
|
||
void* raw = (char*)ptr - HEADER_SIZE;
|
||
AllocHeader* hdr = (AllocHeader*)raw;
|
||
|
||
if (hdr->magic == HAKMEM_MAGIC) {
|
||
size_t old_size = hdr->size - HEADER_SIZE; // User-visible size
|
||
size_t copy_size = (old_size < size) ? old_size : size;
|
||
memcpy(new_ptr, ptr, copy_size);
|
||
} else {
|
||
// Invalid header, copy what we can (best effort)
|
||
memcpy(new_ptr, ptr, size);
|
||
}
|
||
|
||
// Free old block
|
||
hak_free_at(ptr, 0, HAK_CALLSITE());
|
||
}
|
||
}
|
||
|
||
g_hakmem_lock_depth--;
|
||
return new_ptr;
|
||
}
|
||
|
||
#endif // HAKMEM_FORCE_LIBC_ALLOC_BUILD
|