// hakmem.c - Minimal PoC Implementation // Purpose: Verify call-site profiling concept #include #include "hakmem.h" #include "hakmem_config.h" // NEW Phase 6.8: Mode-based configuration #include "hakmem_internal.h" // NEW Phase 6.8: Static inline helpers #include "hakmem_bigcache.h" // NEW: BigCache Box #include "hakmem_pool.h" // NEW Phase 6.9: L2 Hybrid Pool (2-32KiB) #include "hakmem_l25_pool.h" // NEW Phase 6.13: L2.5 LargePool (64KB-1MB) #include "hakmem_policy.h" // NEW Phase 6.16: FrozenPolicy (SACS-3) #include "hakmem_learner.h" // NEW: CAP auto-tuner (background) #include "hakmem_size_hist.h" // NEW: size histogram sampling (off hot path) #include "hakmem_ace.h" // NEW Phase 6.16: ACE layer (L1) #include "hakmem_site_rules.h" // NEW Phase 6.10: Site-Aware Cache Routing #include "hakmem_tiny.h" // NEW Phase 6.12: Tiny Pool (≤1KB) #include "hakmem_tiny_superslab.h" // NEW Phase 7.6: SuperSlab for Tiny Pool #include "tiny_fastcache.h" // NEW Phase 6-3: Tiny Fast Path (System tcache style) #include "hakmem_super_registry.h" // NEW Phase 1: SuperSlab Registry (mincore elimination) #include "hakmem_elo.h" // NEW: ELO Strategy Selection (Phase 6.2) #include "hakmem_ace_stats.h" // NEW: ACE lightweight stats (avoid implicit decl warnings) #include "hakmem_batch.h" // NEW: madvise Batching (Phase 6.3) #include "hakmem_evo.h" // NEW: Learning Lifecycle (Phase 6.5) #include "hakmem_debug.h" // NEW Phase 6.11.1: Debug Timing #include "hakmem_sys.h" // NEW Phase 6.11.1: Syscall Wrappers #include "hakmem_whale.h" // NEW Phase 6.11.1: Whale Fast-Path (≥2MB) #include "hakmem_prof.h" // NEW Phase 6.16: Sampling profiler #include "hakmem_syscall.h" // NEW Phase 6.X P0 FIX: Box 3 (dlsym direct libc) #include "hakmem_ace_controller.h" // NEW Phase ACE: Adaptive Control Engine #include "hakmem_ace_metrics.h" // NEW Phase ACE: Metrics tracking (inline helpers) #include "box/bench_fast_box.h" // NEW Phase 20-2: BenchFast Mode (structural ceiling measurement) #include "hakmem_env_cache.h" // NEW Priority-2: ENV Variable Cache (eliminate hot-path getenv) #include #include #include #include #include #include #include #include // NEW Phase 6.5: For atomic tick counter #include // Phase 6.15: Threading primitives (recursion guard only) #include // Yield during init wait #include // calloc overflow handling #include #ifdef __GLIBC__ #include #endif #include "ptr_trace.h" // For mmap (Linux) #ifdef __linux__ #include #include // MADV_FREE support (Linux kernel 4.5+) #ifndef MADV_FREE #define MADV_FREE 8 // Linux MADV_FREE #endif // Optional early SIGSEGV handler (runs at load if env toggled) static void hakmem_sigsegv_handler_early(int sig) { (void)sig; const char* msg = "\n[HAKMEM] Segmentation Fault (Early Init)\n"; ssize_t written = write(2, msg, 42); (void)written; abort(); } // Extern debug helper #if !HAKMEM_BUILD_RELEASE extern void tiny_debug_dump_last_push(int cls); #endif // Global variables moved out of static scope to resolve dependency issues int g_initialized = 0; int g_strict_free = 0; // runtime: HAKMEM_SAFE_FREE=1 enables extra safety checks int g_invalid_free_log = 0; // runtime: HAKMEM_INVALID_FREE_LOG=1 to log invalid-free messages (extern visible) int g_invalid_free_mode = 1; // 1 = skip invalid-free check (default), 0 = fallback to libc _Atomic int g_cached_strategy_id = 0; // Cached strategy ID (updated every window closure) uint64_t g_evo_sample_mask = 0; // 0 = disabled (default), (1< dlsym -> malloc recursion before TLS is ready. if (!g_initialized) { if (g_force_libc_alloc_init < 0) { const char* init_only = getenv("HAKMEM_FORCE_LIBC_ALLOC_INIT"); g_force_libc_alloc_init = (init_only && atoi(init_only) != 0) ? 1 : 0; } if (g_force_libc_alloc_init) { return 1; } } if (g_force_libc_alloc < 0) { const char* force = getenv("HAKMEM_FORCE_LIBC_ALLOC"); if (force && *force) { g_force_libc_alloc = (atoi(force) != 0); } else { const char* wrap = getenv("HAKMEM_WRAP_TINY"); if (wrap && *wrap && atoi(wrap) == 0) { g_force_libc_alloc = 1; } else { g_force_libc_alloc = 0; } } } return g_force_libc_alloc; } // LD_PRELOAD safety: avoid interposing when jemalloc is present static int g_ld_block_jemalloc = -1; // env: HAKMEM_LD_BLOCK_JEMALLOC (default 1) static inline int hak_jemalloc_loaded(void) { if (g_jemalloc_loaded < 0) { void* h = dlopen("libjemalloc.so.2", RTLD_NOLOAD | RTLD_NOW); if (!h) h = dlopen("libjemalloc.so.1", RTLD_NOLOAD | RTLD_NOW); g_jemalloc_loaded = (h != NULL) ? 1 : 0; if (h) dlclose(h); } return g_jemalloc_loaded; } static inline int hak_ld_block_jemalloc(void) { if (g_ld_block_jemalloc < 0) { const char* e = getenv("HAKMEM_LD_BLOCK_JEMALLOC"); g_ld_block_jemalloc = (e == NULL) ? 1 : (atoi(e) != 0); } return g_ld_block_jemalloc; } // ============================================================================ // Phase 6.15 P1: Remove global lock; keep recursion guard only // --------------------------------------------------------------------------- // We no longer serialize all allocations with a single global mutex. // Instead, each submodule is responsible for its own fine‑grained locking. // We keep a per‑thread recursion guard so that internal use of malloc/free // within the allocator routes to libc (avoids infinite recursion). // // Phase 6.X P0 FIX (2025-10-24): Reverted to simple g_hakmem_lock_depth check // Box Theory - Layer 1 (API Layer): // This guard protects against LD_PRELOAD recursion (Box 1 → Box 1) // Box 2 (Core) → Box 3 (Syscall) uses hkm_libc_malloc() (dlsym, no guard needed!) // NOTE: Removed 'static' to allow access from hakmem_tiny_superslab.c (fopen fix) __thread int g_hakmem_lock_depth = 0; // 0 = outermost call int hak_in_wrapper(void) { return g_hakmem_lock_depth > 0; // Simple and correct! } // Initialization guard int hak_is_initializing(void) { return atomic_load_explicit(&g_initializing, memory_order_acquire); } // Wait helper for non-init threads to avoid libc fallback during init window static inline int hak_init_wait_for_ready(void) { if (__builtin_expect(!atomic_load_explicit(&g_initializing, memory_order_acquire), 1)) { return 1; // Ready } pthread_t self = pthread_self(); if (pthread_equal(self, g_init_thread)) { return 0; // We are the init thread; caller should take the existing fallback path } // No timeout: block until init completes to avoid libc fallback on other threads. for (int i = 0; atomic_load_explicit(&g_initializing, memory_order_acquire); ++i) { #if defined(__x86_64__) || defined(__i386__) if (i < 1024) { __asm__ __volatile__("pause" ::: "memory"); } else #endif { sched_yield(); } } return 1; // Init completed } // Phase 6-1.6: Metadata header (hakmem_tiny_metadata.inc) #ifdef HAKMEM_TINY_PHASE6_METADATA extern void* hak_tiny_alloc_metadata(size_t size); extern void hak_tiny_free_metadata(void* ptr); #endif // ============================================================================ // KPI Measurement (for UCB1) - NEW! // ============================================================================ // NOTE: hak_kpi_util.inc.h is now included earlier (before hak_core_init.inc.h) // to resolve dependency on g_latency_histogram and related variables // ============================================================================ // Internal Helpers // ============================================================================ // Phase 6.8: All legacy profiling functions removed // - hash_site(), get_site_profile(), infer_policy(), record_alloc(), allocate_with_policy() // Replaced by ELO-based allocation (hakmem_elo.c) // ============================================================================ // BigCache eviction callback // ============================================================================ // BigCache eviction callback (called when cache is full and needs to evict) static void bigcache_free_callback(void* ptr, size_t size) { (void)size; // Not used if (!ptr) return; // Get raw pointer and header void* raw = (char*)ptr - HEADER_SIZE; AllocHeader* hdr = (AllocHeader*)raw; extern void __libc_free(void*); // Verify magic before accessing method field if (hdr->magic != HAKMEM_MAGIC) { HAKMEM_LOG("BigCache eviction: invalid magic, fallback to free()\n"); // CRITICAL FIX: When magic is invalid, allocation came from LIBC (NO header) // Therefore ptr IS the allocated address, not raw (ptr - HEADER_SIZE) // MUST use __libc_free to avoid infinite recursion through free() wrapper extern void __libc_free(void*); ptr_trace_dump_now("bigcache_libc_free_invalid_magic"); __libc_free(ptr); return; } // Dispatch based on allocation method switch (hdr->method) { case ALLOC_METHOD_MALLOC: __libc_free(raw); break; case ALLOC_METHOD_MMAP: // Cold eviction: route through batch for large blocks // This completes Phase 6.3 architecture #ifdef __linux__ if (hdr->size >= BATCH_MIN_SIZE) { // Large blocks: use batch (deferred munmap + TLB optimization) hak_batch_add(raw, hdr->size); } else { // Small blocks: direct munmap (not worth batching) // Phase 6.11.1: Try whale cache first if (hkm_whale_put(raw, hdr->size) != 0) { // Whale cache full or not a whale: munmap madvise(raw, hdr->size, MADV_FREE); // Best-effort hkm_sys_munmap(raw, hdr->size); } // else: Successfully cached in whale cache (no munmap!) } #else __libc_free(raw); // Fallback (should not happen) #endif break; default: HAKMEM_LOG("BigCache eviction: unknown method %d\n", hdr->method); __libc_free(raw); // Fallback break; } } // ============================================================================ // Public API // ============================================================================ // Thread-safe one-time initialization // (Now included earlier) // Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%) // (Now included earlier) // Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%) // Phase 6-1.7: Disable inline for box refactor to avoid recursive inlining #ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR __attribute__((always_inline)) inline #endif // hak_free_at() 本体は箱へ // (Now included earlier) void hak_print_stats(void) { printf("\n========================================\n"); printf("hakmem ELO-based Profiling Statistics\n"); printf("========================================\n"); printf("\nOptimization Stats:\n"); printf(" malloc() calls: %llu\n", (unsigned long long)g_malloc_count); hak_elo_print_leaderboard(); printf("========================================\n\n"); } // ============================================================================ // Standard C Library Wrappers (LD_PRELOAD) — boxed include // ============================================================================ #include "box/hak_wrappers.inc.h" // (wrappers moved to box/hak_wrappers.inc.h)