Files
hakmem/core/hakmem.c
Moe Charm (CI) 510cf338f3 MID-V3-6: hakmem.c integration (box modularization)
Integrate MID/Pool v3 into hakmem.c main allocation path using
box modularization pattern.

Changes:
- core/hakmem.c: Include MID v3 headers
- core/box/hak_alloc_api.inc.h: Add v3 allocation gate
  - C6 (145-256B) and C7 (769-1024B) size classes
  - ENV opt-in via HAKMEM_MID_V3_ENABLED + HAKMEM_MID_V3_CLASSES
  - Priority: v6 > v3 > v4 > pool
- core/box/hak_free_api.inc.h: Add v3 free path
  - RegionIdBox lookup based ownership check
- Makefile: Add core/mid_hotbox_v3.o to TINY_BENCH_OBJS_BASE

ENV controls (default OFF):
  HAKMEM_MID_V3_ENABLED=1
  HAKMEM_MID_V3_CLASSES=0x40  (C6)
  HAKMEM_MID_V3_CLASSES=0x80  (C7)
  HAKMEM_MID_V3_DEBUG=1

Verified with bench_mid_large_mt_hakmem (7-9M ops/s, no crashes)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 01:04:55 +09:00

425 lines
17 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem.c - Minimal PoC Implementation
// Purpose: Verify call-site profiling concept
#include <stdatomic.h>
#include "hakmem.h"
#include "hakmem_config.h" // NEW Phase 6.8: Mode-based configuration
#include "hakmem_internal.h" // NEW Phase 6.8: Static inline helpers
#include "hakmem_bigcache.h" // NEW: BigCache Box
#include "hakmem_pool.h" // NEW Phase 6.9: L2 Hybrid Pool (2-32KiB)
#include "hakmem_l25_pool.h" // NEW Phase 6.13: L2.5 LargePool (64KB-1MB)
#include "hakmem_policy.h" // NEW Phase 6.16: FrozenPolicy (SACS-3)
#include "hakmem_learner.h" // NEW: CAP auto-tuner (background)
#include "hakmem_size_hist.h" // NEW: size histogram sampling (off hot path)
#include "hakmem_ace.h" // NEW Phase 6.16: ACE layer (L1)
#include "hakmem_site_rules.h" // NEW Phase 6.10: Site-Aware Cache Routing
#include "hakmem_tiny.h" // NEW Phase 6.12: Tiny Pool (≤1KB)
#include "hakmem_tiny_superslab.h" // NEW Phase 7.6: SuperSlab for Tiny Pool
#include "tiny_fastcache.h" // NEW Phase 6-3: Tiny Fast Path (System tcache style)
#include "hakmem_super_registry.h" // NEW Phase 1: SuperSlab Registry (mincore elimination)
#include "hakmem_elo.h" // NEW: ELO Strategy Selection (Phase 6.2)
#include "hakmem_ace_stats.h" // NEW: ACE lightweight stats (avoid implicit decl warnings)
#include "hakmem_batch.h" // NEW: madvise Batching (Phase 6.3)
#include "hakmem_evo.h" // NEW: Learning Lifecycle (Phase 6.5)
#include "hakmem_debug.h" // NEW Phase 6.11.1: Debug Timing
#include "hakmem_sys.h" // NEW Phase 6.11.1: Syscall Wrappers
#include "hakmem_whale.h" // NEW Phase 6.11.1: Whale Fast-Path (≥2MB)
#include "hakmem_prof.h" // NEW Phase 6.16: Sampling profiler
#include "hakmem_syscall.h" // NEW Phase 6.X P0 FIX: Box 3 (dlsym direct libc)
#include "hakmem_ace_controller.h" // NEW Phase ACE: Adaptive Control Engine
#include "hakmem_ace_metrics.h" // NEW Phase ACE: Metrics tracking (inline helpers)
#include "box/bench_fast_box.h" // NEW Phase 20-2: BenchFast Mode (structural ceiling measurement)
#include "hakmem_env_cache.h" // NEW Priority-2: ENV Variable Cache (eliminate hot-path getenv)
#include "box/mid_hotbox_v3_box.h" // NEW Phase MID-V3: Mid/Pool HotBox v3 (256B-1KB)
#include "box/mid_hotbox_v3_env_box.h" // NEW Phase MID-V3: ENV gate for v3
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
#include <dlfcn.h>
#include <link.h>
#include <math.h>
#include <stdatomic.h> // NEW Phase 6.5: For atomic tick counter
#include <pthread.h> // Phase 6.15: Threading primitives (recursion guard only)
#include <sched.h> // Yield during init wait
#include <errno.h> // calloc overflow handling
#include <signal.h>
#ifdef __GLIBC__
#include <execinfo.h>
#endif
#include "ptr_trace.h"
// For mmap (Linux)
#ifdef __linux__
#include <sys/mman.h>
#include <unistd.h>
// MADV_FREE support (Linux kernel 4.5+)
#ifndef MADV_FREE
#define MADV_FREE 8 // Linux MADV_FREE
#endif
// Optional early SIGSEGV handler (runs at load if env toggled)
static void hakmem_sigsegv_handler_early(int sig) {
(void)sig;
const char* msg = "\n[HAKMEM] Segmentation Fault (Early Init)\n";
ssize_t written = write(2, msg, 42);
(void)written;
abort();
}
// Extern debug helper
#if !HAKMEM_BUILD_RELEASE
extern void tiny_debug_dump_last_push(int cls);
#endif
// Global variables moved out of static scope to resolve dependency issues
int g_initialized = 0;
int g_strict_free = 0; // runtime: HAKMEM_SAFE_FREE=1 enables extra safety checks
int g_invalid_free_log = 0; // runtime: HAKMEM_INVALID_FREE_LOG=1 to log invalid-free messages (extern visible)
int g_invalid_free_mode = 1; // 1 = skip invalid-free check (default), 0 = fallback to libc
_Atomic int g_cached_strategy_id = 0; // Cached strategy ID (updated every window closure)
uint64_t g_evo_sample_mask = 0; // 0 = disabled (default), (1<<N)-1 = sample every 2^N calls
int g_site_rules_enabled = 0; // default off to avoid contention in MT
int g_bench_tiny_only = 0; // bench preset: Tiny-only fast path
struct hkm_ace_controller g_ace_controller;
_Atomic int g_initializing = 0;
pthread_t g_init_thread;
int g_jemalloc_loaded = -1; // -1 unknown, 0/1 cached
// Forward declarations for internal functions used in init/callback
static void bigcache_free_callback(void* ptr, size_t size);
// Phase 6-1.7: Box Theory Refactoring - Wrapper function declarations
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
extern void* hak_tiny_alloc_fast_wrapper(size_t size);
extern void hak_tiny_free_fast_wrapper(void* ptr);
#endif
// KPI utils forward declarations
static void get_page_faults(uint64_t* soft_pf, uint64_t* hard_pf);
static uint64_t get_rss_kb(void);
// KPI measurement helpers - MUST be included before hak_core_init.inc.h
#include "box/hak_kpi_util.inc.h"
#include "box/hak_core_init.inc.h"
#include "box/hak_alloc_api.inc.h"
#include "box/hak_free_api.inc.h"
__attribute__((constructor)) static void hakmem_ctor_install_segv(void) {
HAK_TRACE("[ctor1_hakmem_ctor_install_segv]\n");
const char* dbg = getenv("HAKMEM_DEBUG_SEGV");
if (dbg && atoi(dbg) != 0) {
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[HAKMEM][EARLY] installing SIGSEGV handler\n");
#endif
struct sigaction sa; memset(&sa, 0, sizeof(sa));
sa.sa_flags = SA_RESETHAND;
sa.sa_handler = hakmem_sigsegv_handler_early;
sigaction(SIGSEGV, &sa, NULL);
// Also handle SIGBUS (common for alignment/unmapped) and SIGABRT (glibc free invalid)
sigaction(SIGBUS, &sa, NULL);
sigaction(SIGABRT, &sa, NULL);
}
}
#endif
// ============================================================================
// Configuration
// ============================================================================
#define MAX_SITES 256 // Hash table size (power of 2)
#define SAMPLING_RATE 1 // Sample ALL (PoC demo: no sampling)
#define HASH_MASK (MAX_SITES - 1)
// Phase 6.8: FREE_POLICY/FreePolicy moved to hakmem_config.h
// Phase 6.8: FreeThermal/THERMAL_* constants moved to hakmem_internal.h
// Phase 6.8: THP_POLICY/THPPolicy moved to hakmem_config.h
// ============================================================================
// Global State
// ============================================================================
// Priority-2 Refactoring: ENV cache (eliminate ~2000 getenv syscalls/sec from hot paths)
HakEnvCache g_hak_env_cache;
// Statistics
static uint64_t g_malloc_count = 0; // Used for optimization stats display
int g_ldpreload_mode = 0; // 1 when running via LD_PRELOAD=libhakmem.so
// Debug: count free() wrapper entries to confirm free routing (optional)
_Atomic uint64_t g_free_wrapper_calls = 0;
// Cached LD_PRELOAD detection for wrapper hot paths (avoid getenv per call)
static int g_ldpre_env_cached = -1; // -1 = unknown, 0/1 cached
// Cached libc force flags
static int g_force_libc_alloc_init = -1; // HAKMEM_FORCE_LIBC_ALLOC_INIT
static inline void hak_ld_env_init(void) {
if (g_ldpre_env_cached < 0) {
const char* ldpre = getenv("LD_PRELOAD");
g_ldpre_env_cached = (ldpre && strstr(ldpre, "libhakmem.so")) ? 1 : 0;
}
}
__attribute__((constructor))
static void hak_ld_env_ctor(void) {
HAK_TRACE("[ctor2_hak_ld_env_ctor]\n");
hak_ld_env_init();
}
// Priority-2 Refactoring: Initialize ENV cache at library load time (eliminate ~2000 syscalls/sec)
__attribute__((constructor))
static void hak_env_cache_ctor(void) {
HAK_TRACE("[ctor3_hak_env_cache_ctor]\n");
hakmem_env_cache_init();
}
static inline int hak_ld_env_mode(void) {
return g_ldpre_env_cached;
}
// Sanitizer / guard rails: allow forcing libc allocator even when wrappers are linked
#ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD
static int g_force_libc_alloc = 1;
#else
static int g_force_libc_alloc = -1; // 1=force libc, 0=use hakmem, -1=uninitialized
#endif
__attribute__((constructor))
static void hak_force_libc_ctor(void) {
HAK_TRACE("[ctor4_hak_force_libc_ctor]\n");
// Cache FORCE_LIBC and WRAP_TINY at load time to avoid hot-path getenv
#ifndef HAKMEM_FORCE_LIBC_ALLOC_BUILD
if (g_force_libc_alloc < 0) {
const char* force = getenv("HAKMEM_FORCE_LIBC_ALLOC");
if (force && *force) {
g_force_libc_alloc = (atoi(force) != 0);
} else {
const char* wrap = getenv("HAKMEM_WRAP_TINY");
if (wrap && *wrap && atoi(wrap) == 0) {
g_force_libc_alloc = 1;
} else {
g_force_libc_alloc = 0;
}
}
}
if (g_force_libc_alloc_init < 0) {
const char* init_only = getenv("HAKMEM_FORCE_LIBC_ALLOC_INIT");
g_force_libc_alloc_init = (init_only && atoi(init_only) != 0) ? 1 : 0;
}
#else
g_force_libc_alloc_init = 0;
#endif
}
static inline int hak_force_libc_alloc(void) {
// During early process start or allocator init, optionally force libc until init completes.
// This avoids sanitizer -> dlsym -> malloc recursion before TLS is ready.
if (!g_initialized) {
if (g_force_libc_alloc_init < 0) {
const char* init_only = getenv("HAKMEM_FORCE_LIBC_ALLOC_INIT");
g_force_libc_alloc_init = (init_only && atoi(init_only) != 0) ? 1 : 0;
}
if (g_force_libc_alloc_init) {
return 1;
}
}
if (g_force_libc_alloc < 0) {
const char* force = getenv("HAKMEM_FORCE_LIBC_ALLOC");
if (force && *force) {
g_force_libc_alloc = (atoi(force) != 0);
} else {
const char* wrap = getenv("HAKMEM_WRAP_TINY");
if (wrap && *wrap && atoi(wrap) == 0) {
g_force_libc_alloc = 1;
} else {
g_force_libc_alloc = 0;
}
}
}
return g_force_libc_alloc;
}
// LD_PRELOAD safety: avoid interposing when jemalloc is present
static int g_ld_block_jemalloc = -1; // env: HAKMEM_LD_BLOCK_JEMALLOC (default 1)
static inline int hak_jemalloc_loaded(void) {
if (g_jemalloc_loaded < 0) {
void* h = dlopen("libjemalloc.so.2", RTLD_NOLOAD | RTLD_NOW);
if (!h) h = dlopen("libjemalloc.so.1", RTLD_NOLOAD | RTLD_NOW);
g_jemalloc_loaded = (h != NULL) ? 1 : 0;
if (h) dlclose(h);
}
return g_jemalloc_loaded;
}
static inline int hak_ld_block_jemalloc(void) {
if (g_ld_block_jemalloc < 0) {
const char* e = getenv("HAKMEM_LD_BLOCK_JEMALLOC");
g_ld_block_jemalloc = (e == NULL) ? 1 : (atoi(e) != 0);
}
return g_ld_block_jemalloc;
}
// ============================================================================
// Phase 6.15 P1: Remove global lock; keep recursion guard only
// ---------------------------------------------------------------------------
// We no longer serialize all allocations with a single global mutex.
// Instead, each submodule is responsible for its own finegrained locking.
// We keep a perthread recursion guard so that internal use of malloc/free
// within the allocator routes to libc (avoids infinite recursion).
//
// Phase 6.X P0 FIX (2025-10-24): Reverted to simple g_hakmem_lock_depth check
// Box Theory - Layer 1 (API Layer):
// This guard protects against LD_PRELOAD recursion (Box 1 → Box 1)
// Box 2 (Core) → Box 3 (Syscall) uses hkm_libc_malloc() (dlsym, no guard needed!)
// NOTE: Removed 'static' to allow access from hakmem_tiny_superslab.c (fopen fix)
__thread int g_hakmem_lock_depth = 0; // 0 = outermost call
int hak_in_wrapper(void) {
return g_hakmem_lock_depth > 0; // Simple and correct!
}
// Initialization guard
int hak_is_initializing(void) { return atomic_load_explicit(&g_initializing, memory_order_acquire); }
// Wait helper for non-init threads to avoid libc fallback during init window
static inline int hak_init_wait_for_ready(void) {
if (__builtin_expect(!atomic_load_explicit(&g_initializing, memory_order_acquire), 1)) {
return 1; // Ready
}
pthread_t self = pthread_self();
if (pthread_equal(self, g_init_thread)) {
return 0; // We are the init thread; caller should take the existing fallback path
}
// No timeout: block until init completes to avoid libc fallback on other threads.
for (int i = 0; atomic_load_explicit(&g_initializing, memory_order_acquire); ++i) {
#if defined(__x86_64__) || defined(__i386__)
if (i < 1024) {
__asm__ __volatile__("pause" ::: "memory");
} else
#endif
{
sched_yield();
}
}
return 1; // Init completed
}
// Phase 6-1.6: Metadata header (hakmem_tiny_metadata.inc)
#ifdef HAKMEM_TINY_PHASE6_METADATA
extern void* hak_tiny_alloc_metadata(size_t size);
extern void hak_tiny_free_metadata(void* ptr);
#endif
// ============================================================================
// KPI Measurement (for UCB1) - NEW!
// ============================================================================
// NOTE: hak_kpi_util.inc.h is now included earlier (before hak_core_init.inc.h)
// to resolve dependency on g_latency_histogram and related variables
// ============================================================================
// Internal Helpers
// ============================================================================
// Phase 6.8: All legacy profiling functions removed
// - hash_site(), get_site_profile(), infer_policy(), record_alloc(), allocate_with_policy()
// Replaced by ELO-based allocation (hakmem_elo.c)
// ============================================================================
// BigCache eviction callback
// ============================================================================
// BigCache eviction callback (called when cache is full and needs to evict)
static void bigcache_free_callback(void* ptr, size_t size) {
(void)size; // Not used
if (!ptr) return;
// Get raw pointer and header
void* raw = (char*)ptr - HEADER_SIZE;
AllocHeader* hdr = (AllocHeader*)raw;
extern void __libc_free(void*);
// Verify magic before accessing method field
if (hdr->magic != HAKMEM_MAGIC) {
HAKMEM_LOG("BigCache eviction: invalid magic, fallback to free()\n");
// CRITICAL FIX: When magic is invalid, allocation came from LIBC (NO header)
// Therefore ptr IS the allocated address, not raw (ptr - HEADER_SIZE)
// MUST use __libc_free to avoid infinite recursion through free() wrapper
extern void __libc_free(void*);
ptr_trace_dump_now("bigcache_libc_free_invalid_magic");
__libc_free(ptr);
return;
}
// Dispatch based on allocation method
switch (hdr->method) {
case ALLOC_METHOD_MALLOC:
__libc_free(raw);
break;
case ALLOC_METHOD_MMAP:
// Cold eviction: route through batch for large blocks
// This completes Phase 6.3 architecture
#ifdef __linux__
if (hdr->size >= BATCH_MIN_SIZE) {
// Large blocks: use batch (deferred munmap + TLB optimization)
hak_batch_add(raw, hdr->size);
} else {
// Small blocks: direct munmap (not worth batching)
// Phase 6.11.1: Try whale cache first
if (hkm_whale_put(raw, hdr->size) != 0) {
// Whale cache full or not a whale: munmap
madvise(raw, hdr->size, MADV_FREE); // Best-effort
hkm_sys_munmap(raw, hdr->size);
}
// else: Successfully cached in whale cache (no munmap!)
}
#else
__libc_free(raw); // Fallback (should not happen)
#endif
break;
default:
HAKMEM_LOG("BigCache eviction: unknown method %d\n", hdr->method);
__libc_free(raw); // Fallback
break;
}
}
// ============================================================================
// Public API
// ============================================================================
// Thread-safe one-time initialization
// (Now included earlier)
// Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%)
// (Now included earlier)
// Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%)
// Phase 6-1.7: Disable inline for box refactor to avoid recursive inlining
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
__attribute__((always_inline))
inline
#endif
// hak_free_at() 本体は箱へ
// (Now included earlier)
void hak_print_stats(void) {
printf("\n========================================\n");
printf("hakmem ELO-based Profiling Statistics\n");
printf("========================================\n");
printf("\nOptimization Stats:\n");
printf(" malloc() calls: %llu\n", (unsigned long long)g_malloc_count);
hak_elo_print_leaderboard();
printf("========================================\n\n");
}
// ============================================================================
// Standard C Library Wrappers (LD_PRELOAD) — boxed include
// ============================================================================
#include "box/hak_wrappers.inc.h"
// (wrappers moved to box/hak_wrappers.inc.h)