// hakmem.c - Minimal PoC Implementation // Purpose: Verify call-site profiling concept #include #include "hakmem.h" #include "hakmem_config.h" // NEW Phase 6.8: Mode-based configuration #include "hakmem_internal.h" // NEW Phase 6.8: Static inline helpers #include "hakmem_bigcache.h" // NEW: BigCache Box #include "hakmem_pool.h" // NEW Phase 6.9: L2 Hybrid Pool (2-32KiB) #include "hakmem_l25_pool.h" // NEW Phase 6.13: L2.5 LargePool (64KB-1MB) #include "hakmem_policy.h" // NEW Phase 6.16: FrozenPolicy (SACS-3) #include "hakmem_learner.h" // NEW: CAP auto-tuner (background) #include "hakmem_size_hist.h" // NEW: size histogram sampling (off hot path) #include "hakmem_ace.h" // NEW Phase 6.16: ACE layer (L1) #include "hakmem_site_rules.h" // NEW Phase 6.10: Site-Aware Cache Routing #include "hakmem_tiny.h" // NEW Phase 6.12: Tiny Pool (≤1KB) #include "hakmem_tiny_superslab.h" // NEW Phase 7.6: SuperSlab for Tiny Pool #include "tiny_fastcache.h" // NEW Phase 6-3: Tiny Fast Path (System tcache style) #include "hakmem_mid_mt.h" // NEW Phase Hybrid: Mid Range MT (8-32KB, mimalloc-style) #include "hakmem_super_registry.h" // NEW Phase 1: SuperSlab Registry (mincore elimination) #include "hakmem_elo.h" // NEW: ELO Strategy Selection (Phase 6.2) #include "hakmem_ace_stats.h" // NEW: ACE lightweight stats (avoid implicit decl warnings) #include "hakmem_batch.h" // NEW: madvise Batching (Phase 6.3) #include "hakmem_evo.h" // NEW: Learning Lifecycle (Phase 6.5) #include "hakmem_debug.h" // NEW Phase 6.11.1: Debug Timing #include "hakmem_sys.h" // NEW Phase 6.11.1: Syscall Wrappers #include "hakmem_whale.h" // NEW Phase 6.11.1: Whale Fast-Path (≥2MB) #include "hakmem_prof.h" // NEW Phase 6.16: Sampling profiler #include "hakmem_syscall.h" // NEW Phase 6.X P0 FIX: Box 3 (dlsym direct libc) #include "hakmem_ace_controller.h" // NEW Phase ACE: Adaptive Control Engine #include "hakmem_ace_metrics.h" // NEW Phase ACE: Metrics tracking (inline helpers) #include "box/bench_fast_box.h" // NEW Phase 20-2: BenchFast Mode (structural ceiling measurement) #include #include #include #include #include #include // NEW Phase 6.5: For atomic tick counter #include // Phase 6.15: Threading primitives (recursion guard only) #include // calloc overflow handling #include #ifdef __GLIBC__ #include #endif #include "ptr_trace.h" // For mmap (Linux) #ifdef __linux__ #include #include // MADV_FREE support (Linux kernel 4.5+) #ifndef MADV_FREE #define MADV_FREE 8 // Linux MADV_FREE #endif // Optional early SIGSEGV handler (runs at load if env toggled) static void hakmem_sigsegv_handler_early(int sig) { #ifdef __GLIBC__ void* bt[64]; int n = backtrace(bt, 64); fprintf(stderr, "\n[HAKMEM][EARLY SIGSEGV] backtrace (%d frames)\n", n); backtrace_symbols_fd(bt, n, fileno(stderr)); #else (void)sig; fprintf(stderr, "\n[HAKMEM][EARLY SIGSEGV]\n"); #endif // Dump pointer trace ring if available ptr_trace_dump_now("signal"); } __attribute__((constructor)) static void hakmem_ctor_install_segv(void) { const char* dbg = getenv("HAKMEM_DEBUG_SEGV"); if (dbg && atoi(dbg) != 0) { #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "[HAKMEM][EARLY] installing SIGSEGV handler\n"); #endif struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_flags = SA_RESETHAND; sa.sa_handler = hakmem_sigsegv_handler_early; sigaction(SIGSEGV, &sa, NULL); // Also handle SIGBUS (common for alignment/unmapped) and SIGABRT (glibc free invalid) sigaction(SIGBUS, &sa, NULL); sigaction(SIGABRT, &sa, NULL); } } #endif // ============================================================================ // Configuration // ============================================================================ #define MAX_SITES 256 // Hash table size (power of 2) #define SAMPLING_RATE 1 // Sample ALL (PoC demo: no sampling) #define HASH_MASK (MAX_SITES - 1) // Phase 6.8: FREE_POLICY/FreePolicy moved to hakmem_config.h // Phase 6.8: FreeThermal/THERMAL_* constants moved to hakmem_internal.h // Phase 6.8: THP_POLICY/THPPolicy moved to hakmem_config.h // ============================================================================ // Global State // ============================================================================ // NEW Phase ACE: Adaptive Control Engine static struct hkm_ace_controller g_ace_controller; static int g_initialized = 0; static int g_strict_free = 0; // runtime: HAKMEM_SAFE_FREE=1 enables extra safety checks int g_invalid_free_log = 0; // runtime: HAKMEM_INVALID_FREE_LOG=1 to log invalid-free messages (extern visible) // Phase 7.4: Cache HAKMEM_INVALID_FREE to eliminate 44% CPU overhead (getenv on hot path) // Perf analysis showed getenv("HAKMEM_INVALID_FREE") consumed 43.96% of CPU time! static int g_invalid_free_mode = 1; // 1 = skip invalid-free check (default), 0 = fallback to libc // Statistics static uint64_t g_malloc_count = 0; // Used for optimization stats display // Phase 6.11.4 P0-2: Cached Strategy (atomic, updated by hak_evo_tick) static _Atomic int g_cached_strategy_id = 0; // Cached strategy ID (updated every window closure) // Phase 6.15 P0.3: EVO Sampling Control (environment variable) static uint64_t g_evo_sample_mask = 0; // 0 = disabled (default), (1< dlsym -> malloc recursion before TLS is ready. if (!g_initialized) { const char* init_only = getenv("HAKMEM_FORCE_LIBC_ALLOC_INIT"); if (init_only && atoi(init_only) != 0) { return 1; } } if (g_force_libc_alloc < 0) { const char* force = getenv("HAKMEM_FORCE_LIBC_ALLOC"); if (force && *force) { g_force_libc_alloc = (atoi(force) != 0); } else { const char* wrap = getenv("HAKMEM_WRAP_TINY"); if (wrap && *wrap && atoi(wrap) == 0) { g_force_libc_alloc = 1; } else { g_force_libc_alloc = 0; } } } return g_force_libc_alloc; } // LD_PRELOAD safety: avoid interposing when jemalloc is present static int g_ld_block_jemalloc = -1; // env: HAKMEM_LD_BLOCK_JEMALLOC (default 1) static int g_jemalloc_loaded = -1; // -1 unknown, 0/1 cached static inline int hak_jemalloc_loaded(void) { if (g_jemalloc_loaded < 0) { void* h = dlopen("libjemalloc.so.2", RTLD_NOLOAD | RTLD_NOW); if (!h) h = dlopen("libjemalloc.so.1", RTLD_NOLOAD | RTLD_NOW); g_jemalloc_loaded = (h != NULL) ? 1 : 0; if (h) dlclose(h); } return g_jemalloc_loaded; } static inline int hak_ld_block_jemalloc(void) { if (g_ld_block_jemalloc < 0) { const char* e = getenv("HAKMEM_LD_BLOCK_JEMALLOC"); g_ld_block_jemalloc = (e == NULL) ? 1 : (atoi(e) != 0); } return g_ld_block_jemalloc; } // ============================================================================ // Phase 6.15 P1: Remove global lock; keep recursion guard only // --------------------------------------------------------------------------- // We no longer serialize all allocations with a single global mutex. // Instead, each submodule is responsible for its own fine‑grained locking. // We keep a per‑thread recursion guard so that internal use of malloc/free // within the allocator routes to libc (avoids infinite recursion). // // Phase 6.X P0 FIX (2025-10-24): Reverted to simple g_hakmem_lock_depth check // Box Theory - Layer 1 (API Layer): // This guard protects against LD_PRELOAD recursion (Box 1 → Box 1) // Box 2 (Core) → Box 3 (Syscall) uses hkm_libc_malloc() (dlsym, no guard needed!) // NOTE: Removed 'static' to allow access from hakmem_tiny_superslab.c (fopen fix) __thread int g_hakmem_lock_depth = 0; // 0 = outermost call int hak_in_wrapper(void) { return g_hakmem_lock_depth > 0; // Simple and correct! } // Initialization guard static int g_initializing = 0; int hak_is_initializing(void) { return g_initializing; } // ============================================================================ // Phase 6-1.5: Ultra-Simple Fast Path Forward Declarations // ============================================================================ // Forward declarations for Phase 6 fast path variants // Phase 6-1.5: Alignment guessing (hakmem_tiny_ultra_simple.inc) #ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE extern void* hak_tiny_alloc_ultra_simple(size_t size); extern void hak_tiny_free_ultra_simple(void* ptr); #endif // Phase 6-1.6: Metadata header (hakmem_tiny_metadata.inc) #ifdef HAKMEM_TINY_PHASE6_METADATA extern void* hak_tiny_alloc_metadata(size_t size); extern void hak_tiny_free_metadata(void* ptr); #endif // Phase 6-1.7: Box Theory Refactoring - Wrapper function declarations #ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR extern void* hak_tiny_alloc_fast_wrapper(size_t size); extern void hak_tiny_free_fast_wrapper(void* ptr); #endif #include "box/hak_exit_debug.inc.h" // ============================================================================ // KPI Measurement (for UCB1) - NEW! // ============================================================================ #include "box/hak_kpi_util.inc.h" // ============================================================================ // Internal Helpers // ============================================================================ // Phase 6.8: All legacy profiling functions removed // - hash_site(), get_site_profile(), infer_policy(), record_alloc(), allocate_with_policy() // Replaced by ELO-based allocation (hakmem_elo.c) // ============================================================================ // BigCache eviction callback // ============================================================================ // BigCache eviction callback (called when cache is full and needs to evict) static void bigcache_free_callback(void* ptr, size_t size) { (void)size; // Not used if (!ptr) return; // Get raw pointer and header void* raw = (char*)ptr - HEADER_SIZE; AllocHeader* hdr = (AllocHeader*)raw; // Verify magic before accessing method field if (hdr->magic != HAKMEM_MAGIC) { HAKMEM_LOG("BigCache eviction: invalid magic, fallback to free()\n"); // CRITICAL FIX: When magic is invalid, allocation came from LIBC (NO header) // Therefore ptr IS the allocated address, not raw (ptr - HEADER_SIZE) // MUST use __libc_free to avoid infinite recursion through free() wrapper extern void __libc_free(void*); ptr_trace_dump_now("bigcache_libc_free_invalid_magic"); __libc_free(ptr); return; } // Dispatch based on allocation method switch (hdr->method) { case ALLOC_METHOD_MALLOC: free(raw); break; case ALLOC_METHOD_MMAP: // Cold eviction: route through batch for large blocks // This completes Phase 6.3 architecture #ifdef __linux__ if (hdr->size >= BATCH_MIN_SIZE) { // Large blocks: use batch (deferred munmap + TLB optimization) hak_batch_add(raw, hdr->size); } else { // Small blocks: direct munmap (not worth batching) // Phase 6.11.1: Try whale cache first if (hkm_whale_put(raw, hdr->size) != 0) { // Whale cache full or not a whale: munmap madvise(raw, hdr->size, MADV_FREE); // Best-effort hkm_sys_munmap(raw, hdr->size); } // else: Successfully cached in whale cache (no munmap!) } #else free(raw); // Fallback (should not happen) #endif break; default: HAKMEM_LOG("BigCache eviction: unknown method %d\n", hdr->method); free(raw); // Fallback break; } } // ============================================================================ // Public API // ============================================================================ // Thread-safe one-time initialization #include "box/hak_core_init.inc.h" // Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%) __attribute__((always_inline)) // hak_alloc_at() 本体は箱へ #include "box/hak_alloc_api.inc.h" // Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%) // Phase 6-1.7: Disable inline for box refactor to avoid recursive inlining #ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR __attribute__((always_inline)) inline #endif // hak_free_at() 本体は箱へ #include "box/hak_free_api.inc.h" void hak_print_stats(void) { printf("\n========================================\n"); printf("hakmem ELO-based Profiling Statistics\n"); printf("========================================\n"); printf("\nOptimization Stats:\n"); printf(" malloc() calls: %llu\n", (unsigned long long)g_malloc_count); hak_elo_print_leaderboard(); printf("========================================\n\n"); } // ============================================================================ // Standard C Library Wrappers (LD_PRELOAD) — boxed include // ============================================================================ #include "box/hak_wrappers.inc.h" // (wrappers moved to box/hak_wrappers.inc.h)