Files
hakmem/core/hakmem.c
Moe Charm (CI) 4ef0171bc0 feat: Add ACE allocation failure tracing and debug hooks
This commit introduces a comprehensive tracing mechanism for allocation failures within the Adaptive Cache Engine (ACE) component. This feature allows for precise identification of the root cause for Out-Of-Memory (OOM) issues related to ACE allocations.

Key changes include:
- **ACE Tracing Implementation**:
  - Added  environment variable to enable/disable detailed logging of allocation failures.
  - Instrumented , , and  to distinguish between "Threshold" (size class mismatch), "Exhaustion" (pool depletion), and "MapFail" (OS memory allocation failure).
- **Build System Fixes**:
  - Corrected  to ensure  is properly linked into , resolving an  error.
- **LD_PRELOAD Wrapper Adjustments**:
  - Investigated and understood the  wrapper's behavior under , particularly its interaction with  and  checks.
  - Enabled debugging flags for  environment to prevent unintended fallbacks to 's  for non-tiny allocations, allowing comprehensive testing of the  allocator.
- **Debugging & Verification**:
  - Introduced temporary verbose logging to pinpoint execution flow issues within  interception and  routing. These temporary logs have been removed.
  - Created  to facilitate testing of the tracing features.

This feature will significantly aid in diagnosing and resolving allocation-related OOM issues in  by providing clear insights into the failure pathways.
2025-12-01 16:37:59 +09:00

354 lines
15 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem.c - Minimal PoC Implementation
// Purpose: Verify call-site profiling concept
#include <stdatomic.h>
#include "hakmem.h"
#include "hakmem_config.h" // NEW Phase 6.8: Mode-based configuration
#include "hakmem_internal.h" // NEW Phase 6.8: Static inline helpers
#include "hakmem_bigcache.h" // NEW: BigCache Box
#include "hakmem_pool.h" // NEW Phase 6.9: L2 Hybrid Pool (2-32KiB)
#include "hakmem_l25_pool.h" // NEW Phase 6.13: L2.5 LargePool (64KB-1MB)
#include "hakmem_policy.h" // NEW Phase 6.16: FrozenPolicy (SACS-3)
#include "hakmem_learner.h" // NEW: CAP auto-tuner (background)
#include "hakmem_size_hist.h" // NEW: size histogram sampling (off hot path)
#include "hakmem_ace.h" // NEW Phase 6.16: ACE layer (L1)
#include "hakmem_site_rules.h" // NEW Phase 6.10: Site-Aware Cache Routing
#include "hakmem_tiny.h" // NEW Phase 6.12: Tiny Pool (≤1KB)
#include "hakmem_tiny_superslab.h" // NEW Phase 7.6: SuperSlab for Tiny Pool
#include "tiny_fastcache.h" // NEW Phase 6-3: Tiny Fast Path (System tcache style)
#include "hakmem_mid_mt.h" // NEW Phase Hybrid: Mid Range MT (8-32KB, mimalloc-style)
#include "hakmem_super_registry.h" // NEW Phase 1: SuperSlab Registry (mincore elimination)
#include "hakmem_elo.h" // NEW: ELO Strategy Selection (Phase 6.2)
#include "hakmem_ace_stats.h" // NEW: ACE lightweight stats (avoid implicit decl warnings)
#include "hakmem_batch.h" // NEW: madvise Batching (Phase 6.3)
#include "hakmem_evo.h" // NEW: Learning Lifecycle (Phase 6.5)
#include "hakmem_debug.h" // NEW Phase 6.11.1: Debug Timing
#include "hakmem_sys.h" // NEW Phase 6.11.1: Syscall Wrappers
#include "hakmem_whale.h" // NEW Phase 6.11.1: Whale Fast-Path (≥2MB)
#include "hakmem_prof.h" // NEW Phase 6.16: Sampling profiler
#include "hakmem_syscall.h" // NEW Phase 6.X P0 FIX: Box 3 (dlsym direct libc)
#include "hakmem_ace_controller.h" // NEW Phase ACE: Adaptive Control Engine
#include "hakmem_ace_metrics.h" // NEW Phase ACE: Metrics tracking (inline helpers)
#include "box/bench_fast_box.h" // NEW Phase 20-2: BenchFast Mode (structural ceiling measurement)
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
#include <dlfcn.h>
#include <stdatomic.h> // NEW Phase 6.5: For atomic tick counter
#include <pthread.h> // Phase 6.15: Threading primitives (recursion guard only)
#include <errno.h> // calloc overflow handling
#include <signal.h>
#ifdef __GLIBC__
#include <execinfo.h>
#endif
#include "ptr_trace.h"
// For mmap (Linux)
#ifdef __linux__
#include <sys/mman.h>
#include <unistd.h>
// MADV_FREE support (Linux kernel 4.5+)
#ifndef MADV_FREE
#define MADV_FREE 8 // Linux MADV_FREE
#endif
// Optional early SIGSEGV handler (runs at load if env toggled)
static void hakmem_sigsegv_handler_early(int sig) {
#ifdef __GLIBC__
void* bt[64]; int n = backtrace(bt, 64);
fprintf(stderr, "\n[HAKMEM][EARLY SIGSEGV] backtrace (%d frames)\n", n);
backtrace_symbols_fd(bt, n, fileno(stderr));
#else
(void)sig; fprintf(stderr, "\n[HAKMEM][EARLY SIGSEGV]\n");
#endif
// Dump pointer trace ring if available
ptr_trace_dump_now("signal");
}
__attribute__((constructor)) static void hakmem_ctor_install_segv(void) {
const char* dbg = getenv("HAKMEM_DEBUG_SEGV");
if (dbg && atoi(dbg) != 0) {
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[HAKMEM][EARLY] installing SIGSEGV handler\n");
#endif
struct sigaction sa; memset(&sa, 0, sizeof(sa));
sa.sa_flags = SA_RESETHAND;
sa.sa_handler = hakmem_sigsegv_handler_early;
sigaction(SIGSEGV, &sa, NULL);
// Also handle SIGBUS (common for alignment/unmapped) and SIGABRT (glibc free invalid)
sigaction(SIGBUS, &sa, NULL);
sigaction(SIGABRT, &sa, NULL);
}
}
#endif
// ============================================================================
// Configuration
// ============================================================================
#define MAX_SITES 256 // Hash table size (power of 2)
#define SAMPLING_RATE 1 // Sample ALL (PoC demo: no sampling)
#define HASH_MASK (MAX_SITES - 1)
// Phase 6.8: FREE_POLICY/FreePolicy moved to hakmem_config.h
// Phase 6.8: FreeThermal/THERMAL_* constants moved to hakmem_internal.h
// Phase 6.8: THP_POLICY/THPPolicy moved to hakmem_config.h
// ============================================================================
// Global State
// ============================================================================
// NEW Phase ACE: Adaptive Control Engine
static struct hkm_ace_controller g_ace_controller;
static int g_initialized = 0;
static int g_strict_free = 0; // runtime: HAKMEM_SAFE_FREE=1 enables extra safety checks
int g_invalid_free_log = 0; // runtime: HAKMEM_INVALID_FREE_LOG=1 to log invalid-free messages (extern visible)
// Phase 7.4: Cache HAKMEM_INVALID_FREE to eliminate 44% CPU overhead (getenv on hot path)
// Perf analysis showed getenv("HAKMEM_INVALID_FREE") consumed 43.96% of CPU time!
static int g_invalid_free_mode = 1; // 1 = skip invalid-free check (default), 0 = fallback to libc
// Statistics
static uint64_t g_malloc_count = 0; // Used for optimization stats display
// Phase 6.11.4 P0-2: Cached Strategy (atomic, updated by hak_evo_tick)
static _Atomic int g_cached_strategy_id = 0; // Cached strategy ID (updated every window closure)
// Phase 6.15 P0.3: EVO Sampling Control (environment variable)
static uint64_t g_evo_sample_mask = 0; // 0 = disabled (default), (1<<N)-1 = sample every 2^N calls
// Phase 6.15 P1: Site Rules enable (env: HAKMEM_SITE_RULES=1 to enable)
static int g_site_rules_enabled = 0; // default off to avoid contention in MT
static int g_bench_tiny_only = 0; // bench preset: Tiny-only fast path
int g_ldpreload_mode = 0; // 1 when running via LD_PRELOAD=libhakmem.so
static int g_flush_tiny_on_exit = 0; // HAKMEM_TINY_FLUSH_ON_EXIT=1
static int g_ultra_debug_on_exit = 0; // HAKMEM_TINY_ULTRA_DEBUG=1
// Debug: count free() wrapper entries to confirm free routing (optional)
_Atomic uint64_t g_free_wrapper_calls = 0;
// Cached LD_PRELOAD detection for wrapper hot paths (avoid getenv per call)
static int g_ldpre_env_cached = -1; // -1 = unknown, 0/1 cached
static inline int hak_ld_env_mode(void) {
if (g_ldpre_env_cached < 0) {
const char* ldpre = getenv("LD_PRELOAD");
g_ldpre_env_cached = (ldpre && strstr(ldpre, "libhakmem.so")) ? 1 : 0;
}
return g_ldpre_env_cached;
}
// Sanitizer / guard rails: allow forcing libc allocator even when wrappers are linked
#ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD
static int g_force_libc_alloc = 1;
#else
static int g_force_libc_alloc = -1; // 1=force libc, 0=use hakmem, -1=uninitialized
#endif
static inline int hak_force_libc_alloc(void) {
// During early process start or allocator init, optionally force libc until init completes.
// This avoids sanitizer -> dlsym -> malloc recursion before TLS is ready.
if (!g_initialized) {
const char* init_only = getenv("HAKMEM_FORCE_LIBC_ALLOC_INIT");
if (init_only && atoi(init_only) != 0) {
return 1;
}
}
if (g_force_libc_alloc < 0) {
const char* force = getenv("HAKMEM_FORCE_LIBC_ALLOC");
if (force && *force) {
g_force_libc_alloc = (atoi(force) != 0);
} else {
const char* wrap = getenv("HAKMEM_WRAP_TINY");
if (wrap && *wrap && atoi(wrap) == 0) {
g_force_libc_alloc = 1;
} else {
g_force_libc_alloc = 0;
}
}
}
return g_force_libc_alloc;
}
// LD_PRELOAD safety: avoid interposing when jemalloc is present
static int g_ld_block_jemalloc = -1; // env: HAKMEM_LD_BLOCK_JEMALLOC (default 1)
static int g_jemalloc_loaded = -1; // -1 unknown, 0/1 cached
static inline int hak_jemalloc_loaded(void) {
if (g_jemalloc_loaded < 0) {
void* h = dlopen("libjemalloc.so.2", RTLD_NOLOAD | RTLD_NOW);
if (!h) h = dlopen("libjemalloc.so.1", RTLD_NOLOAD | RTLD_NOW);
g_jemalloc_loaded = (h != NULL) ? 1 : 0;
if (h) dlclose(h);
}
return g_jemalloc_loaded;
}
static inline int hak_ld_block_jemalloc(void) {
if (g_ld_block_jemalloc < 0) {
const char* e = getenv("HAKMEM_LD_BLOCK_JEMALLOC");
g_ld_block_jemalloc = (e == NULL) ? 1 : (atoi(e) != 0);
}
return g_ld_block_jemalloc;
}
// ============================================================================
// Phase 6.15 P1: Remove global lock; keep recursion guard only
// ---------------------------------------------------------------------------
// We no longer serialize all allocations with a single global mutex.
// Instead, each submodule is responsible for its own finegrained locking.
// We keep a perthread recursion guard so that internal use of malloc/free
// within the allocator routes to libc (avoids infinite recursion).
//
// Phase 6.X P0 FIX (2025-10-24): Reverted to simple g_hakmem_lock_depth check
// Box Theory - Layer 1 (API Layer):
// This guard protects against LD_PRELOAD recursion (Box 1 → Box 1)
// Box 2 (Core) → Box 3 (Syscall) uses hkm_libc_malloc() (dlsym, no guard needed!)
// NOTE: Removed 'static' to allow access from hakmem_tiny_superslab.c (fopen fix)
__thread int g_hakmem_lock_depth = 0; // 0 = outermost call
int hak_in_wrapper(void) {
return g_hakmem_lock_depth > 0; // Simple and correct!
}
// Initialization guard
static int g_initializing = 0;
int hak_is_initializing(void) { return g_initializing; }
// ============================================================================
// Phase 6-1.5: Ultra-Simple Fast Path Forward Declarations
// ============================================================================
// Forward declarations for Phase 6 fast path variants
// Phase 6-1.5: Alignment guessing (hakmem_tiny_ultra_simple.inc)
#ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE
extern void* hak_tiny_alloc_ultra_simple(size_t size);
extern void hak_tiny_free_ultra_simple(void* ptr);
#endif
// Phase 6-1.6: Metadata header (hakmem_tiny_metadata.inc)
#ifdef HAKMEM_TINY_PHASE6_METADATA
extern void* hak_tiny_alloc_metadata(size_t size);
extern void hak_tiny_free_metadata(void* ptr);
#endif
// Phase 6-1.7: Box Theory Refactoring - Wrapper function declarations
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
extern void* hak_tiny_alloc_fast_wrapper(size_t size);
extern void hak_tiny_free_fast_wrapper(void* ptr);
#endif
#include "box/hak_exit_debug.inc.h"
// ============================================================================
// KPI Measurement (for UCB1) - NEW!
// ============================================================================
#include "box/hak_kpi_util.inc.h"
// ============================================================================
// Internal Helpers
// ============================================================================
// Phase 6.8: All legacy profiling functions removed
// - hash_site(), get_site_profile(), infer_policy(), record_alloc(), allocate_with_policy()
// Replaced by ELO-based allocation (hakmem_elo.c)
// ============================================================================
// BigCache eviction callback
// ============================================================================
// BigCache eviction callback (called when cache is full and needs to evict)
static void bigcache_free_callback(void* ptr, size_t size) {
(void)size; // Not used
if (!ptr) return;
// Get raw pointer and header
void* raw = (char*)ptr - HEADER_SIZE;
AllocHeader* hdr = (AllocHeader*)raw;
extern void __libc_free(void*);
// Verify magic before accessing method field
if (hdr->magic != HAKMEM_MAGIC) {
HAKMEM_LOG("BigCache eviction: invalid magic, fallback to free()\n");
// CRITICAL FIX: When magic is invalid, allocation came from LIBC (NO header)
// Therefore ptr IS the allocated address, not raw (ptr - HEADER_SIZE)
// MUST use __libc_free to avoid infinite recursion through free() wrapper
extern void __libc_free(void*);
ptr_trace_dump_now("bigcache_libc_free_invalid_magic");
__libc_free(ptr);
return;
}
// Dispatch based on allocation method
switch (hdr->method) {
case ALLOC_METHOD_MALLOC:
__libc_free(raw);
break;
case ALLOC_METHOD_MMAP:
// Cold eviction: route through batch for large blocks
// This completes Phase 6.3 architecture
#ifdef __linux__
if (hdr->size >= BATCH_MIN_SIZE) {
// Large blocks: use batch (deferred munmap + TLB optimization)
hak_batch_add(raw, hdr->size);
} else {
// Small blocks: direct munmap (not worth batching)
// Phase 6.11.1: Try whale cache first
if (hkm_whale_put(raw, hdr->size) != 0) {
// Whale cache full or not a whale: munmap
madvise(raw, hdr->size, MADV_FREE); // Best-effort
hkm_sys_munmap(raw, hdr->size);
}
// else: Successfully cached in whale cache (no munmap!)
}
#else
__libc_free(raw); // Fallback (should not happen)
#endif
break;
default:
HAKMEM_LOG("BigCache eviction: unknown method %d\n", hdr->method);
__libc_free(raw); // Fallback
break;
}
}
// ============================================================================
// Public API
// ============================================================================
// Thread-safe one-time initialization
#include "box/hak_core_init.inc.h"
// Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%)
__attribute__((always_inline))
// hak_alloc_at() 本体は箱へ
#include "box/hak_alloc_api.inc.h"
// Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%)
// Phase 6-1.7: Disable inline for box refactor to avoid recursive inlining
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
__attribute__((always_inline))
inline
#endif
// hak_free_at() 本体は箱へ
#include "box/hak_free_api.inc.h"
void hak_print_stats(void) {
printf("\n========================================\n");
printf("hakmem ELO-based Profiling Statistics\n");
printf("========================================\n");
printf("\nOptimization Stats:\n");
printf(" malloc() calls: %llu\n", (unsigned long long)g_malloc_count);
hak_elo_print_leaderboard();
printf("========================================\n\n");
}
// ============================================================================
// Standard C Library Wrappers (LD_PRELOAD) — boxed include
// ============================================================================
#include "box/hak_wrappers.inc.h"
// (wrappers moved to box/hak_wrappers.inc.h)