Files
hakmem/core/box/hak_core_init.inc.h
Moe Charm (CI) 707056b765 feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓

Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
  Result: +180-280% improvement, 85-146% of System malloc

Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)

Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
  Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
  Result: 50% → 95% stability (19/20 4T success)

Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
  Files: core/tiny_adaptive_sizing.c/h (new)

Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
  Files: core/hakmem_bigcache.c/h
  Expected: +10-20% cache hit rate

Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)

Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis

Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files

Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00

333 lines
12 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hak_core_init.inc.h — Box: init/shutdown
#ifndef HAK_CORE_INIT_INC_H
#define HAK_CORE_INIT_INC_H
#include <signal.h>
#ifdef __GLIBC__
#include <execinfo.h>
#endif
#include "hakmem_phase7_config.h" // Phase 7 Task 3
// Debug-only SIGSEGV handler (gated by HAKMEM_DEBUG_SEGV)
static void hakmem_sigsegv_handler(int sig) {
#ifdef __GLIBC__
void* bt[64]; int n = backtrace(bt, 64);
fprintf(stderr, "\n[HAKMEM][SIGSEGV] dumping backtrace (%d frames)\n", n);
backtrace_symbols_fd(bt, n, fileno(stderr));
#else
(void)sig;
fprintf(stderr, "\n[HAKMEM][SIGSEGV] (execinfo unavailable)\n");
#endif
}
// Phase 7 Task 3: Pre-warm TLS cache helper
// Pre-allocate blocks to reduce first-allocation miss penalty
// Note: This function is defined later in hakmem.c after sll_refill_small_from_ss is available
// (moved out of header to avoid linkage issues)
static void hak_init_impl(void);
static pthread_once_t g_init_once = PTHREAD_ONCE_INIT;
void hak_init(void) {
(void)pthread_once(&g_init_once, hak_init_impl);
}
static void hak_init_impl(void) {
g_initializing = 1;
// Phase 6.X P0 FIX (2025-10-24): Initialize Box 3 (Syscall Layer) FIRST!
// This MUST be called before ANY allocation (Tiny/Mid/Large/Learner)
// dlsym() initializes function pointers to real libc (bypasses LD_PRELOAD)
hkm_syscall_init();
// CRITICAL FIX (BUG #10): Pre-detect jemalloc ONCE during init, not on hot path!
// This prevents infinite recursion: malloc → hak_jemalloc_loaded → dlopen → malloc → ...
// We protect dlopen's internal malloc calls with g_hakmem_lock_depth
extern int g_jemalloc_loaded; // Declared in hakmem.c
if (g_jemalloc_loaded < 0) {
void* h = dlopen("libjemalloc.so.2", RTLD_NOLOAD | RTLD_NOW);
if (!h) h = dlopen("libjemalloc.so.1", RTLD_NOLOAD | RTLD_NOW);
g_jemalloc_loaded = (h != NULL) ? 1 : 0;
if (h) dlclose(h);
if (g_jemalloc_loaded) {
HAKMEM_LOG("Detected jemalloc: will avoid interposing\n");
}
}
// Optional: one-shot SIGSEGV backtrace for early crash diagnosis
do {
const char* dbg = getenv("HAKMEM_DEBUG_SEGV");
if (dbg && atoi(dbg) != 0) {
struct sigaction sa; memset(&sa, 0, sizeof(sa));
sa.sa_flags = SA_RESETHAND;
sa.sa_handler = hakmem_sigsegv_handler;
sigaction(SIGSEGV, &sa, NULL);
}
} while (0);
// NEW Phase 6.11.1: Initialize debug timing
hkm_timing_init();
// NEW Phase 6.11.1: Initialize whale fast-path cache
hkm_whale_init();
// NEW Phase Hybrid: Initialize Mid Range MT allocator (8-32KB, mimalloc-style)
mid_mt_init();
// NEW Phase 6.8: Initialize configuration system (replaces init_free_policy + init_thp_policy)
hak_config_init();
// Phase 6.16: Initialize FrozenPolicy (SACS-3)
hkm_policy_init();
// Phase 6.15 P0.3: Configure EVO sampling from environment variable
// HAKMEM_EVO_SAMPLE: 0=disabled (default), N=sample every 2^N calls
// Example: HAKMEM_EVO_SAMPLE=10 → sample every 1024 calls
// HAKMEM_EVO_SAMPLE=16 → sample every 65536 calls
char* evo_sample_str = getenv("HAKMEM_EVO_SAMPLE");
if (evo_sample_str && atoi(evo_sample_str) > 0) {
int freq = atoi(evo_sample_str);
if (freq >= 64) {
fprintf(stderr, "[hakmem] Warning: HAKMEM_EVO_SAMPLE=%d too large, using 63\n", freq);
freq = 63;
}
g_evo_sample_mask = (1ULL << freq) - 1;
HAKMEM_LOG("EVO sampling enabled: every 2^%d = %llu calls\n",
freq, (unsigned long long)(g_evo_sample_mask + 1));
} else {
g_evo_sample_mask = 0; // Disabled by default
HAKMEM_LOG("EVO sampling disabled (HAKMEM_EVO_SAMPLE not set or 0)\n");
}
#ifdef __linux__
// Record baseline KPIs
memset(g_latency_histogram, 0, sizeof(g_latency_histogram));
g_latency_samples = 0;
get_page_faults(&g_baseline_soft_pf, &g_baseline_hard_pf);
g_baseline_rss_kb = get_rss_kb();
HAKMEM_LOG("Baseline: soft_pf=%lu, hard_pf=%lu, rss=%lu KB\n",
(unsigned long)g_baseline_soft_pf,
(unsigned long)g_baseline_hard_pf,
(unsigned long)g_baseline_rss_kb);
#endif
HAKMEM_LOG("Initialized (PoC version)\n");
HAKMEM_LOG("Sampling rate: 1/%d\n", SAMPLING_RATE);
HAKMEM_LOG("Max sites: %d\n", MAX_SITES);
// Bench preset: Tiny-only (disable non-essential subsystems)
{
char* bt = getenv("HAKMEM_BENCH_TINY_ONLY");
if (bt && atoi(bt) != 0) {
g_bench_tiny_only = 1;
}
}
// Under LD_PRELOAD, enforce safer defaults for Tiny path unless overridden
{
char* ldpre = getenv("LD_PRELOAD");
if (ldpre && strstr(ldpre, "libhakmem.so")) {
g_ldpreload_mode = 1;
// Default LD-safe mode if not set: 1 (Tiny-only)
char* lds = getenv("HAKMEM_LD_SAFE");
if (lds) { /* NOP used in wrappers */ } else { setenv("HAKMEM_LD_SAFE", "1", 0); }
if (!getenv("HAKMEM_TINY_TLS_SLL")) {
setenv("HAKMEM_TINY_TLS_SLL", "0", 0); // disable TLS SLL by default
}
if (!getenv("HAKMEM_TINY_USE_SUPERSLAB")) {
setenv("HAKMEM_TINY_USE_SUPERSLAB", "0", 0); // disable SuperSlab path by default
}
}
}
// Runtime safety toggle
char* safe_free_env = getenv("HAKMEM_SAFE_FREE");
if (safe_free_env && atoi(safe_free_env) != 0) {
g_strict_free = 1;
HAKMEM_LOG("Strict free safety enabled (HAKMEM_SAFE_FREE=1)\n");
} else {
// Heuristic: if loaded via LD_PRELOAD, enable strict free by default
char* ldpre = getenv("LD_PRELOAD");
if (ldpre && strstr(ldpre, "libhakmem.so")) {
g_ldpreload_mode = 1;
g_strict_free = 1;
HAKMEM_LOG("Strict free safety auto-enabled under LD_PRELOAD\n");
}
}
// Invalid free logging toggle (default off to avoid spam under LD_PRELOAD)
char* invlog = getenv("HAKMEM_INVALID_FREE_LOG");
if (invlog && atoi(invlog) != 0) {
g_invalid_free_log = 1;
HAKMEM_LOG("Invalid free logging enabled (HAKMEM_INVALID_FREE_LOG=1)\n");
}
// Phase 7.4: Cache HAKMEM_INVALID_FREE to eliminate 44% CPU overhead
// Perf showed getenv() on hot path consumed 43.96% CPU time (26.41% strcmp + 17.55% getenv)
char* inv = getenv("HAKMEM_INVALID_FREE");
if (inv && strcmp(inv, "fallback") == 0) {
g_invalid_free_mode = 0; // fallback mode: route invalid frees to libc
HAKMEM_LOG("Invalid free mode: fallback to libc (HAKMEM_INVALID_FREE=fallback)\n");
} else {
// Under LD_PRELOAD, prefer safety: default to fallback unless explicitly overridden
char* ldpre = getenv("LD_PRELOAD");
if (ldpre && strstr(ldpre, "libhakmem.so")) {
g_ldpreload_mode = 1;
g_invalid_free_mode = 0;
HAKMEM_LOG("Invalid free mode: fallback to libc (auto under LD_PRELOAD)\n");
} else {
g_invalid_free_mode = 1; // default: skip invalid-free check
HAKMEM_LOG("Invalid free mode: skip check (default)\n");
}
}
// NEW Phase 6.8: Feature-gated initialization (check g_hakem_config flags)
if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) {
hak_pool_init();
}
// NEW Phase 6.13: L2.5 LargePool (64KB-1MB allocations)
hak_l25_pool_init();
if (!g_bench_tiny_only && HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE)) {
hak_bigcache_init();
hak_bigcache_set_free_callback(bigcache_free_callback);
}
if (!g_bench_tiny_only && HAK_ENABLED_LEARNING(HAKMEM_FEATURE_ELO)) {
hak_elo_init();
// Phase 6.11.4 P0-2: Initialize cached strategy to default (strategy 0)
atomic_store(&g_cached_strategy_id, 0);
}
if (!g_bench_tiny_only && HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE)) {
hak_batch_init();
}
if (!g_bench_tiny_only && HAK_ENABLED_LEARNING(HAKMEM_FEATURE_EVOLUTION)) {
hak_evo_init();
}
if (!g_bench_tiny_only) {
// Phase 6.16: Initialize ACE stats (sampling) default off
hkm_ace_stats_init();
// Phase 6.16: Initialize sampling profiler default off
hkm_prof_init();
// Size histogram sampling (optional)
hkm_size_hist_init();
}
if (!g_bench_tiny_only) {
// Start CAP learner (optional, env-gated)
hkm_learner_init();
}
// NEW Phase 6.10: Site Rules (MVP: always ON)
// MT note: default disabled unless HAKMEM_SITE_RULES=1
char* sr_env = getenv("HAKMEM_SITE_RULES");
g_site_rules_enabled = (sr_env && atoi(sr_env) != 0);
if (!g_bench_tiny_only && g_site_rules_enabled) {
hak_site_rules_init();
}
// NEW Phase 6.12: Tiny Pool (≤1KB allocations)
hak_tiny_init();
// Env: optional Tiny flush on exit (memory efficiency evaluation)
{
char* tf = getenv("HAKMEM_TINY_FLUSH_ON_EXIT");
if (tf && atoi(tf) != 0) {
g_flush_tiny_on_exit = 1;
}
char* ud = getenv("HAKMEM_TINY_ULTRA_DEBUG");
if (ud && atoi(ud) != 0) {
g_ultra_debug_on_exit = 1;
}
// Register exit hook if any of the debug/flush toggles are on
// or when path debug is requested.
if (g_flush_tiny_on_exit || g_ultra_debug_on_exit || getenv("HAKMEM_TINY_PATH_DEBUG")) {
atexit(hak_flush_tiny_exit);
}
}
// NEW Phase ACE: Initialize Adaptive Control Engine
hkm_ace_controller_init(&g_ace_controller);
if (g_ace_controller.enabled) {
hkm_ace_controller_start(&g_ace_controller);
HAKMEM_LOG("ACE Learning Layer enabled and started\n");
}
// Phase 7 Task 3: Pre-warm TLS cache (reduce first-allocation miss penalty)
#if HAKMEM_TINY_PREWARM_TLS
// Forward declaration from hakmem_tiny.c
extern void hak_tiny_prewarm_tls_cache(void);
hak_tiny_prewarm_tls_cache();
HAKMEM_LOG("TLS cache pre-warmed for %d classes\n", TINY_NUM_CLASSES);
#endif
g_initializing = 0;
// Publish that initialization is complete
atomic_thread_fence(memory_order_seq_cst);
g_initialized = 1;
}
void hak_shutdown(void) {
if (!g_initialized) return;
// NEW Phase ACE: Shutdown Adaptive Control Engine FIRST (before other subsystems)
hkm_ace_controller_destroy(&g_ace_controller);
if (!g_bench_tiny_only) {
printf("[hakmem] Shutting down...\n");
hak_print_stats();
}
// NEW Phase 6.9: Shutdown L2 Pool
if (!g_bench_tiny_only) hak_pool_shutdown();
// NEW Phase 6.13: Shutdown L2.5 LargePool
if (!g_bench_tiny_only) hak_l25_pool_shutdown();
// NEW: Shutdown BigCache Box
if (!g_bench_tiny_only) hak_bigcache_shutdown();
// NEW Phase 6.2: Shutdown ELO Strategy Selection
if (!g_bench_tiny_only) hak_elo_shutdown();
// NEW Phase 6.3: Shutdown madvise Batching
if (!g_bench_tiny_only) hak_batch_shutdown();
// NEW Phase 6.10: Shutdown Site Rules
if (!g_bench_tiny_only) hak_site_rules_shutdown();
// NEW Phase 6.12: Print Tiny Pool statistics
if (!g_bench_tiny_only) hak_tiny_print_stats();
// NEW Phase 6.11.1: Print whale cache statistics
if (!g_bench_tiny_only) {
hkm_whale_dump_stats();
// NEW Phase 6.11.1: Shutdown whale cache
hkm_whale_shutdown();
}
// NEW Phase 6.11.1: Shutdown debug timing (must be last!)
if (!g_bench_tiny_only) hkm_timing_shutdown();
// Phase 6.16: Dump sampling profiler
if (!g_bench_tiny_only) hkm_prof_shutdown();
// Stop learner thread
if (!g_bench_tiny_only) hkm_learner_shutdown();
// Stop Tiny background components (e.g., Intelligence Engine)
hak_tiny_shutdown();
g_initialized = 0;
}
#endif // HAK_CORE_INIT_INC_H