**Problem**: 4T Larson crashed 100% due to "free(): invalid pointer" **Root Causes** (6 bugs found via Task Agent ultrathink): 1. **Invalid magic fallback** (`hak_free_api.inc.h:87`) - When `hdr->magic != HAKMEM_MAGIC`, ptr came from LIBC (no header) - Was calling `free(raw)` where `raw = ptr - HEADER_SIZE` (garbage!) - Fixed: Use `__libc_free(ptr)` instead 2. **BigCache eviction** (`hakmem.c:230`) - Same issue: invalid magic means LIBC allocation - Fixed: Use `__libc_free(ptr)` directly 3. **Malloc wrapper recursion** (`hakmem_internal.h:209`) - `hak_alloc_malloc_impl()` called `malloc()` → wrapper recursion - Fixed: Use `__libc_malloc()` directly 4. **ALLOC_METHOD_MALLOC free** (`hak_free_api.inc.h:106`) - Was calling `free(raw)` → wrapper recursion - Fixed: Use `__libc_free(raw)` directly 5. **fopen/fclose crash** (`hakmem_tiny_superslab.c:131`) - `log_superslab_oom_once()` used `fopen()` → FILE buffer via wrapper - `fclose()` calls `__libc_free()` on HAKMEM-allocated buffer → crash - Fixed: Wrap with `g_hakmem_lock_depth++/--` to force LIBC path 6. **g_hakmem_lock_depth visibility** (`hakmem.c:163`) - Was `static`, needed by hakmem_tiny_superslab.c - Fixed: Remove `static` keyword **Result**: 4T Larson success rate improved 0% → 80% (8/10 runs) ✅ **Remaining**: 20% crash rate still needs investigation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
311 lines
13 KiB
C
311 lines
13 KiB
C
// hakmem.c - Minimal PoC Implementation
|
||
// Purpose: Verify call-site profiling concept
|
||
|
||
#define _GNU_SOURCE // For mincore, madvise on Linux
|
||
|
||
#include "hakmem.h"
|
||
#include "hakmem_config.h" // NEW Phase 6.8: Mode-based configuration
|
||
#include "hakmem_internal.h" // NEW Phase 6.8: Static inline helpers
|
||
#include "hakmem_bigcache.h" // NEW: BigCache Box
|
||
#include "hakmem_pool.h" // NEW Phase 6.9: L2 Hybrid Pool (2-32KiB)
|
||
#include "hakmem_l25_pool.h" // NEW Phase 6.13: L2.5 LargePool (64KB-1MB)
|
||
#include "hakmem_policy.h" // NEW Phase 6.16: FrozenPolicy (SACS-3)
|
||
#include "hakmem_learner.h" // NEW: CAP auto-tuner (background)
|
||
#include "hakmem_size_hist.h" // NEW: size histogram sampling (off hot path)
|
||
#include "hakmem_ace.h" // NEW Phase 6.16: ACE layer (L1)
|
||
#include "hakmem_site_rules.h" // NEW Phase 6.10: Site-Aware Cache Routing
|
||
#include "hakmem_tiny.h" // NEW Phase 6.12: Tiny Pool (≤1KB)
|
||
#include "hakmem_tiny_superslab.h" // NEW Phase 7.6: SuperSlab for Tiny Pool
|
||
#include "tiny_fastcache.h" // NEW Phase 6-3: Tiny Fast Path (System tcache style)
|
||
#include "hakmem_mid_mt.h" // NEW Phase Hybrid: Mid Range MT (8-32KB, mimalloc-style)
|
||
#include "hakmem_super_registry.h" // NEW Phase 1: SuperSlab Registry (mincore elimination)
|
||
#include "hakmem_elo.h" // NEW: ELO Strategy Selection (Phase 6.2)
|
||
#include "hakmem_ace_stats.h" // NEW: ACE lightweight stats (avoid implicit decl warnings)
|
||
#include "hakmem_batch.h" // NEW: madvise Batching (Phase 6.3)
|
||
#include "hakmem_evo.h" // NEW: Learning Lifecycle (Phase 6.5)
|
||
#include "hakmem_debug.h" // NEW Phase 6.11.1: Debug Timing
|
||
#include "hakmem_sys.h" // NEW Phase 6.11.1: Syscall Wrappers
|
||
#include "hakmem_whale.h" // NEW Phase 6.11.1: Whale Fast-Path (≥2MB)
|
||
#include "hakmem_prof.h" // NEW Phase 6.16: Sampling profiler
|
||
#include "hakmem_syscall.h" // NEW Phase 6.X P0 FIX: Box 3 (dlsym direct libc)
|
||
#include "hakmem_ace_controller.h" // NEW Phase ACE: Adaptive Control Engine
|
||
#include "hakmem_ace_metrics.h" // NEW Phase ACE: Metrics tracking (inline helpers)
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <stdio.h>
|
||
#include <time.h>
|
||
#include <dlfcn.h>
|
||
#include <stdatomic.h> // NEW Phase 6.5: For atomic tick counter
|
||
#include <pthread.h> // Phase 6.15: Threading primitives (recursion guard only)
|
||
#include <errno.h> // calloc overflow handling
|
||
|
||
// For mmap (Linux)
|
||
#ifdef __linux__
|
||
#include <sys/mman.h>
|
||
#include <unistd.h>
|
||
|
||
// MADV_FREE support (Linux kernel 4.5+)
|
||
#ifndef MADV_FREE
|
||
#define MADV_FREE 8 // Linux MADV_FREE
|
||
#endif
|
||
#endif
|
||
|
||
// ============================================================================
|
||
// Configuration
|
||
// ============================================================================
|
||
|
||
#define MAX_SITES 256 // Hash table size (power of 2)
|
||
#define SAMPLING_RATE 1 // Sample ALL (PoC demo: no sampling)
|
||
#define HASH_MASK (MAX_SITES - 1)
|
||
|
||
// Phase 6.8: FREE_POLICY/FreePolicy moved to hakmem_config.h
|
||
// Phase 6.8: FreeThermal/THERMAL_* constants moved to hakmem_internal.h
|
||
// Phase 6.8: THP_POLICY/THPPolicy moved to hakmem_config.h
|
||
|
||
|
||
// ============================================================================
|
||
// Global State
|
||
// ============================================================================
|
||
|
||
// NEW Phase ACE: Adaptive Control Engine
|
||
static struct hkm_ace_controller g_ace_controller;
|
||
|
||
static int g_initialized = 0;
|
||
static int g_strict_free = 0; // runtime: HAKMEM_SAFE_FREE=1 enables extra safety checks
|
||
int g_invalid_free_log = 0; // runtime: HAKMEM_INVALID_FREE_LOG=1 to log invalid-free messages (extern visible)
|
||
// Phase 7.4: Cache HAKMEM_INVALID_FREE to eliminate 44% CPU overhead (getenv on hot path)
|
||
// Perf analysis showed getenv("HAKMEM_INVALID_FREE") consumed 43.96% of CPU time!
|
||
static int g_invalid_free_mode = 1; // 1 = skip invalid-free check (default), 0 = fallback to libc
|
||
|
||
// Statistics
|
||
static uint64_t g_malloc_count = 0; // Used for optimization stats display
|
||
|
||
// Phase 6.11.4 P0-2: Cached Strategy (atomic, updated by hak_evo_tick)
|
||
static _Atomic int g_cached_strategy_id = 0; // Cached strategy ID (updated every window closure)
|
||
|
||
// Phase 6.15 P0.3: EVO Sampling Control (environment variable)
|
||
static uint64_t g_evo_sample_mask = 0; // 0 = disabled (default), (1<<N)-1 = sample every 2^N calls
|
||
|
||
// Phase 6.15 P1: Site Rules enable (env: HAKMEM_SITE_RULES=1 to enable)
|
||
static int g_site_rules_enabled = 0; // default off to avoid contention in MT
|
||
static int g_bench_tiny_only = 0; // bench preset: Tiny-only fast path
|
||
int g_ldpreload_mode = 0; // 1 when running via LD_PRELOAD=libhakmem.so
|
||
static int g_flush_tiny_on_exit = 0; // HAKMEM_TINY_FLUSH_ON_EXIT=1
|
||
static int g_ultra_debug_on_exit = 0; // HAKMEM_TINY_ULTRA_DEBUG=1
|
||
// Debug: count free() wrapper entries to confirm free routing (optional)
|
||
_Atomic uint64_t g_free_wrapper_calls = 0;
|
||
// Cached LD_PRELOAD detection for wrapper hot paths (avoid getenv per call)
|
||
static int g_ldpre_env_cached = -1; // -1 = unknown, 0/1 cached
|
||
static inline int hak_ld_env_mode(void) {
|
||
if (g_ldpre_env_cached < 0) {
|
||
const char* ldpre = getenv("LD_PRELOAD");
|
||
g_ldpre_env_cached = (ldpre && strstr(ldpre, "libhakmem.so")) ? 1 : 0;
|
||
}
|
||
return g_ldpre_env_cached;
|
||
}
|
||
|
||
// Sanitizer / guard rails: allow forcing libc allocator even when wrappers are linked
|
||
#ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD
|
||
static int g_force_libc_alloc = 1;
|
||
#else
|
||
static int g_force_libc_alloc = -1; // 1=force libc, 0=use hakmem, -1=uninitialized
|
||
#endif
|
||
static inline int hak_force_libc_alloc(void) {
|
||
if (g_force_libc_alloc < 0) {
|
||
const char* force = getenv("HAKMEM_FORCE_LIBC_ALLOC");
|
||
if (force && *force) {
|
||
g_force_libc_alloc = (atoi(force) != 0);
|
||
} else {
|
||
const char* wrap = getenv("HAKMEM_WRAP_TINY");
|
||
if (wrap && *wrap && atoi(wrap) == 0) {
|
||
g_force_libc_alloc = 1;
|
||
} else {
|
||
g_force_libc_alloc = 0;
|
||
}
|
||
}
|
||
}
|
||
return g_force_libc_alloc;
|
||
}
|
||
|
||
// LD_PRELOAD safety: avoid interposing when jemalloc is present
|
||
static int g_ld_block_jemalloc = -1; // env: HAKMEM_LD_BLOCK_JEMALLOC (default 1)
|
||
static int g_jemalloc_loaded = -1; // -1 unknown, 0/1 cached
|
||
static inline int hak_jemalloc_loaded(void) {
|
||
if (g_jemalloc_loaded < 0) {
|
||
void* h = dlopen("libjemalloc.so.2", RTLD_NOLOAD | RTLD_NOW);
|
||
if (!h) h = dlopen("libjemalloc.so.1", RTLD_NOLOAD | RTLD_NOW);
|
||
g_jemalloc_loaded = (h != NULL) ? 1 : 0;
|
||
if (h) dlclose(h);
|
||
}
|
||
return g_jemalloc_loaded;
|
||
}
|
||
static inline int hak_ld_block_jemalloc(void) {
|
||
if (g_ld_block_jemalloc < 0) {
|
||
const char* e = getenv("HAKMEM_LD_BLOCK_JEMALLOC");
|
||
g_ld_block_jemalloc = (e == NULL) ? 1 : (atoi(e) != 0);
|
||
}
|
||
return g_ld_block_jemalloc;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Phase 6.15 P1: Remove global lock; keep recursion guard only
|
||
// ---------------------------------------------------------------------------
|
||
// We no longer serialize all allocations with a single global mutex.
|
||
// Instead, each submodule is responsible for its own fine‑grained locking.
|
||
// We keep a per‑thread recursion guard so that internal use of malloc/free
|
||
// within the allocator routes to libc (avoids infinite recursion).
|
||
//
|
||
// Phase 6.X P0 FIX (2025-10-24): Reverted to simple g_hakmem_lock_depth check
|
||
// Box Theory - Layer 1 (API Layer):
|
||
// This guard protects against LD_PRELOAD recursion (Box 1 → Box 1)
|
||
// Box 2 (Core) → Box 3 (Syscall) uses hkm_libc_malloc() (dlsym, no guard needed!)
|
||
// NOTE: Removed 'static' to allow access from hakmem_tiny_superslab.c (fopen fix)
|
||
__thread int g_hakmem_lock_depth = 0; // 0 = outermost call
|
||
|
||
int hak_in_wrapper(void) {
|
||
return g_hakmem_lock_depth > 0; // Simple and correct!
|
||
}
|
||
|
||
// Initialization guard
|
||
static int g_initializing = 0;
|
||
int hak_is_initializing(void) { return g_initializing; }
|
||
|
||
// ============================================================================
|
||
// Phase 6-1.5: Ultra-Simple Fast Path Forward Declarations
|
||
// ============================================================================
|
||
// Forward declarations for Phase 6 fast path variants
|
||
// Phase 6-1.5: Alignment guessing (hakmem_tiny_ultra_simple.inc)
|
||
#ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE
|
||
extern void* hak_tiny_alloc_ultra_simple(size_t size);
|
||
extern void hak_tiny_free_ultra_simple(void* ptr);
|
||
#endif
|
||
|
||
// Phase 6-1.6: Metadata header (hakmem_tiny_metadata.inc)
|
||
#ifdef HAKMEM_TINY_PHASE6_METADATA
|
||
extern void* hak_tiny_alloc_metadata(size_t size);
|
||
extern void hak_tiny_free_metadata(void* ptr);
|
||
#endif
|
||
|
||
// Phase 6-1.7: Box Theory Refactoring - Wrapper function declarations
|
||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
extern void* hak_tiny_alloc_fast_wrapper(size_t size);
|
||
extern void hak_tiny_free_fast_wrapper(void* ptr);
|
||
#endif
|
||
|
||
#include "box/hak_exit_debug.inc.h"
|
||
|
||
// ============================================================================
|
||
// KPI Measurement (for UCB1) - NEW!
|
||
// ============================================================================
|
||
|
||
#include "box/hak_kpi_util.inc.h"
|
||
|
||
// ============================================================================
|
||
// Internal Helpers
|
||
// ============================================================================
|
||
|
||
// Phase 6.8: All legacy profiling functions removed
|
||
// - hash_site(), get_site_profile(), infer_policy(), record_alloc(), allocate_with_policy()
|
||
// Replaced by ELO-based allocation (hakmem_elo.c)
|
||
|
||
// ============================================================================
|
||
// BigCache eviction callback
|
||
// ============================================================================
|
||
|
||
// BigCache eviction callback (called when cache is full and needs to evict)
|
||
static void bigcache_free_callback(void* ptr, size_t size) {
|
||
(void)size; // Not used
|
||
if (!ptr) return;
|
||
|
||
// Get raw pointer and header
|
||
void* raw = (char*)ptr - HEADER_SIZE;
|
||
AllocHeader* hdr = (AllocHeader*)raw;
|
||
|
||
// Verify magic before accessing method field
|
||
if (hdr->magic != HAKMEM_MAGIC) {
|
||
fprintf(stderr, "[hakmem] BigCache eviction: invalid magic, fallback to free()\n");
|
||
// CRITICAL FIX: When magic is invalid, allocation came from LIBC (NO header)
|
||
// Therefore ptr IS the allocated address, not raw (ptr - HEADER_SIZE)
|
||
// MUST use __libc_free to avoid infinite recursion through free() wrapper
|
||
extern void __libc_free(void*);
|
||
__libc_free(ptr);
|
||
return;
|
||
}
|
||
|
||
// Dispatch based on allocation method
|
||
switch (hdr->method) {
|
||
case ALLOC_METHOD_MALLOC:
|
||
free(raw);
|
||
break;
|
||
|
||
case ALLOC_METHOD_MMAP:
|
||
// Cold eviction: route through batch for large blocks
|
||
// This completes Phase 6.3 architecture
|
||
#ifdef __linux__
|
||
if (hdr->size >= BATCH_MIN_SIZE) {
|
||
// Large blocks: use batch (deferred munmap + TLB optimization)
|
||
hak_batch_add(raw, hdr->size);
|
||
} else {
|
||
// Small blocks: direct munmap (not worth batching)
|
||
// Phase 6.11.1: Try whale cache first
|
||
if (hkm_whale_put(raw, hdr->size) != 0) {
|
||
// Whale cache full or not a whale: munmap
|
||
madvise(raw, hdr->size, MADV_FREE); // Best-effort
|
||
hkm_sys_munmap(raw, hdr->size);
|
||
}
|
||
// else: Successfully cached in whale cache (no munmap!)
|
||
}
|
||
#else
|
||
free(raw); // Fallback (should not happen)
|
||
#endif
|
||
break;
|
||
|
||
default:
|
||
fprintf(stderr, "[hakmem] BigCache eviction: unknown method %d\n", hdr->method);
|
||
free(raw); // Fallback
|
||
break;
|
||
}
|
||
}
|
||
|
||
// ============================================================================
|
||
// Public API
|
||
// ============================================================================
|
||
|
||
// Thread-safe one-time initialization
|
||
#include "box/hak_core_init.inc.h"
|
||
|
||
// Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%)
|
||
__attribute__((always_inline))
|
||
// hak_alloc_at() 本体は箱へ
|
||
#include "box/hak_alloc_api.inc.h"
|
||
|
||
// Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%)
|
||
// Phase 6-1.7: Disable inline for box refactor to avoid recursive inlining
|
||
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
__attribute__((always_inline))
|
||
inline
|
||
#endif
|
||
// hak_free_at() 本体は箱へ
|
||
#include "box/hak_free_api.inc.h"
|
||
|
||
|
||
void hak_print_stats(void) {
|
||
printf("\n========================================\n");
|
||
printf("hakmem ELO-based Profiling Statistics\n");
|
||
printf("========================================\n");
|
||
|
||
printf("\nOptimization Stats:\n");
|
||
printf(" malloc() calls: %llu\n", (unsigned long long)g_malloc_count);
|
||
|
||
hak_elo_print_leaderboard();
|
||
|
||
printf("========================================\n\n");
|
||
}
|
||
|
||
// ============================================================================
|
||
// Standard C Library Wrappers (LD_PRELOAD) — boxed include
|
||
// ============================================================================
|
||
#include "box/hak_wrappers.inc.h"
|
||
|
||
// (wrappers moved to box/hak_wrappers.inc.h)
|