Files
hakmem/core/hakmem.c
Moe Charm (CI) 77ed72fcf6 Fix: LIBC/HAKMEM mixed allocation crashes (0% → 80% success)
**Problem**: 4T Larson crashed 100% due to "free(): invalid pointer"

**Root Causes** (6 bugs found via Task Agent ultrathink):

1. **Invalid magic fallback** (`hak_free_api.inc.h:87`)
   - When `hdr->magic != HAKMEM_MAGIC`, ptr came from LIBC (no header)
   - Was calling `free(raw)` where `raw = ptr - HEADER_SIZE` (garbage!)
   - Fixed: Use `__libc_free(ptr)` instead

2. **BigCache eviction** (`hakmem.c:230`)
   - Same issue: invalid magic means LIBC allocation
   - Fixed: Use `__libc_free(ptr)` directly

3. **Malloc wrapper recursion** (`hakmem_internal.h:209`)
   - `hak_alloc_malloc_impl()` called `malloc()` → wrapper recursion
   - Fixed: Use `__libc_malloc()` directly

4. **ALLOC_METHOD_MALLOC free** (`hak_free_api.inc.h:106`)
   - Was calling `free(raw)` → wrapper recursion
   - Fixed: Use `__libc_free(raw)` directly

5. **fopen/fclose crash** (`hakmem_tiny_superslab.c:131`)
   - `log_superslab_oom_once()` used `fopen()` → FILE buffer via wrapper
   - `fclose()` calls `__libc_free()` on HAKMEM-allocated buffer → crash
   - Fixed: Wrap with `g_hakmem_lock_depth++/--` to force LIBC path

6. **g_hakmem_lock_depth visibility** (`hakmem.c:163`)
   - Was `static`, needed by hakmem_tiny_superslab.c
   - Fixed: Remove `static` keyword

**Result**: 4T Larson success rate improved 0% → 80% (8/10 runs) 

**Remaining**: 20% crash rate still needs investigation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 02:48:20 +09:00

311 lines
13 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem.c - Minimal PoC Implementation
// Purpose: Verify call-site profiling concept
#define _GNU_SOURCE // For mincore, madvise on Linux
#include "hakmem.h"
#include "hakmem_config.h" // NEW Phase 6.8: Mode-based configuration
#include "hakmem_internal.h" // NEW Phase 6.8: Static inline helpers
#include "hakmem_bigcache.h" // NEW: BigCache Box
#include "hakmem_pool.h" // NEW Phase 6.9: L2 Hybrid Pool (2-32KiB)
#include "hakmem_l25_pool.h" // NEW Phase 6.13: L2.5 LargePool (64KB-1MB)
#include "hakmem_policy.h" // NEW Phase 6.16: FrozenPolicy (SACS-3)
#include "hakmem_learner.h" // NEW: CAP auto-tuner (background)
#include "hakmem_size_hist.h" // NEW: size histogram sampling (off hot path)
#include "hakmem_ace.h" // NEW Phase 6.16: ACE layer (L1)
#include "hakmem_site_rules.h" // NEW Phase 6.10: Site-Aware Cache Routing
#include "hakmem_tiny.h" // NEW Phase 6.12: Tiny Pool (≤1KB)
#include "hakmem_tiny_superslab.h" // NEW Phase 7.6: SuperSlab for Tiny Pool
#include "tiny_fastcache.h" // NEW Phase 6-3: Tiny Fast Path (System tcache style)
#include "hakmem_mid_mt.h" // NEW Phase Hybrid: Mid Range MT (8-32KB, mimalloc-style)
#include "hakmem_super_registry.h" // NEW Phase 1: SuperSlab Registry (mincore elimination)
#include "hakmem_elo.h" // NEW: ELO Strategy Selection (Phase 6.2)
#include "hakmem_ace_stats.h" // NEW: ACE lightweight stats (avoid implicit decl warnings)
#include "hakmem_batch.h" // NEW: madvise Batching (Phase 6.3)
#include "hakmem_evo.h" // NEW: Learning Lifecycle (Phase 6.5)
#include "hakmem_debug.h" // NEW Phase 6.11.1: Debug Timing
#include "hakmem_sys.h" // NEW Phase 6.11.1: Syscall Wrappers
#include "hakmem_whale.h" // NEW Phase 6.11.1: Whale Fast-Path (≥2MB)
#include "hakmem_prof.h" // NEW Phase 6.16: Sampling profiler
#include "hakmem_syscall.h" // NEW Phase 6.X P0 FIX: Box 3 (dlsym direct libc)
#include "hakmem_ace_controller.h" // NEW Phase ACE: Adaptive Control Engine
#include "hakmem_ace_metrics.h" // NEW Phase ACE: Metrics tracking (inline helpers)
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
#include <dlfcn.h>
#include <stdatomic.h> // NEW Phase 6.5: For atomic tick counter
#include <pthread.h> // Phase 6.15: Threading primitives (recursion guard only)
#include <errno.h> // calloc overflow handling
// For mmap (Linux)
#ifdef __linux__
#include <sys/mman.h>
#include <unistd.h>
// MADV_FREE support (Linux kernel 4.5+)
#ifndef MADV_FREE
#define MADV_FREE 8 // Linux MADV_FREE
#endif
#endif
// ============================================================================
// Configuration
// ============================================================================
#define MAX_SITES 256 // Hash table size (power of 2)
#define SAMPLING_RATE 1 // Sample ALL (PoC demo: no sampling)
#define HASH_MASK (MAX_SITES - 1)
// Phase 6.8: FREE_POLICY/FreePolicy moved to hakmem_config.h
// Phase 6.8: FreeThermal/THERMAL_* constants moved to hakmem_internal.h
// Phase 6.8: THP_POLICY/THPPolicy moved to hakmem_config.h
// ============================================================================
// Global State
// ============================================================================
// NEW Phase ACE: Adaptive Control Engine
static struct hkm_ace_controller g_ace_controller;
static int g_initialized = 0;
static int g_strict_free = 0; // runtime: HAKMEM_SAFE_FREE=1 enables extra safety checks
int g_invalid_free_log = 0; // runtime: HAKMEM_INVALID_FREE_LOG=1 to log invalid-free messages (extern visible)
// Phase 7.4: Cache HAKMEM_INVALID_FREE to eliminate 44% CPU overhead (getenv on hot path)
// Perf analysis showed getenv("HAKMEM_INVALID_FREE") consumed 43.96% of CPU time!
static int g_invalid_free_mode = 1; // 1 = skip invalid-free check (default), 0 = fallback to libc
// Statistics
static uint64_t g_malloc_count = 0; // Used for optimization stats display
// Phase 6.11.4 P0-2: Cached Strategy (atomic, updated by hak_evo_tick)
static _Atomic int g_cached_strategy_id = 0; // Cached strategy ID (updated every window closure)
// Phase 6.15 P0.3: EVO Sampling Control (environment variable)
static uint64_t g_evo_sample_mask = 0; // 0 = disabled (default), (1<<N)-1 = sample every 2^N calls
// Phase 6.15 P1: Site Rules enable (env: HAKMEM_SITE_RULES=1 to enable)
static int g_site_rules_enabled = 0; // default off to avoid contention in MT
static int g_bench_tiny_only = 0; // bench preset: Tiny-only fast path
int g_ldpreload_mode = 0; // 1 when running via LD_PRELOAD=libhakmem.so
static int g_flush_tiny_on_exit = 0; // HAKMEM_TINY_FLUSH_ON_EXIT=1
static int g_ultra_debug_on_exit = 0; // HAKMEM_TINY_ULTRA_DEBUG=1
// Debug: count free() wrapper entries to confirm free routing (optional)
_Atomic uint64_t g_free_wrapper_calls = 0;
// Cached LD_PRELOAD detection for wrapper hot paths (avoid getenv per call)
static int g_ldpre_env_cached = -1; // -1 = unknown, 0/1 cached
static inline int hak_ld_env_mode(void) {
if (g_ldpre_env_cached < 0) {
const char* ldpre = getenv("LD_PRELOAD");
g_ldpre_env_cached = (ldpre && strstr(ldpre, "libhakmem.so")) ? 1 : 0;
}
return g_ldpre_env_cached;
}
// Sanitizer / guard rails: allow forcing libc allocator even when wrappers are linked
#ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD
static int g_force_libc_alloc = 1;
#else
static int g_force_libc_alloc = -1; // 1=force libc, 0=use hakmem, -1=uninitialized
#endif
static inline int hak_force_libc_alloc(void) {
if (g_force_libc_alloc < 0) {
const char* force = getenv("HAKMEM_FORCE_LIBC_ALLOC");
if (force && *force) {
g_force_libc_alloc = (atoi(force) != 0);
} else {
const char* wrap = getenv("HAKMEM_WRAP_TINY");
if (wrap && *wrap && atoi(wrap) == 0) {
g_force_libc_alloc = 1;
} else {
g_force_libc_alloc = 0;
}
}
}
return g_force_libc_alloc;
}
// LD_PRELOAD safety: avoid interposing when jemalloc is present
static int g_ld_block_jemalloc = -1; // env: HAKMEM_LD_BLOCK_JEMALLOC (default 1)
static int g_jemalloc_loaded = -1; // -1 unknown, 0/1 cached
static inline int hak_jemalloc_loaded(void) {
if (g_jemalloc_loaded < 0) {
void* h = dlopen("libjemalloc.so.2", RTLD_NOLOAD | RTLD_NOW);
if (!h) h = dlopen("libjemalloc.so.1", RTLD_NOLOAD | RTLD_NOW);
g_jemalloc_loaded = (h != NULL) ? 1 : 0;
if (h) dlclose(h);
}
return g_jemalloc_loaded;
}
static inline int hak_ld_block_jemalloc(void) {
if (g_ld_block_jemalloc < 0) {
const char* e = getenv("HAKMEM_LD_BLOCK_JEMALLOC");
g_ld_block_jemalloc = (e == NULL) ? 1 : (atoi(e) != 0);
}
return g_ld_block_jemalloc;
}
// ============================================================================
// Phase 6.15 P1: Remove global lock; keep recursion guard only
// ---------------------------------------------------------------------------
// We no longer serialize all allocations with a single global mutex.
// Instead, each submodule is responsible for its own finegrained locking.
// We keep a perthread recursion guard so that internal use of malloc/free
// within the allocator routes to libc (avoids infinite recursion).
//
// Phase 6.X P0 FIX (2025-10-24): Reverted to simple g_hakmem_lock_depth check
// Box Theory - Layer 1 (API Layer):
// This guard protects against LD_PRELOAD recursion (Box 1 → Box 1)
// Box 2 (Core) → Box 3 (Syscall) uses hkm_libc_malloc() (dlsym, no guard needed!)
// NOTE: Removed 'static' to allow access from hakmem_tiny_superslab.c (fopen fix)
__thread int g_hakmem_lock_depth = 0; // 0 = outermost call
int hak_in_wrapper(void) {
return g_hakmem_lock_depth > 0; // Simple and correct!
}
// Initialization guard
static int g_initializing = 0;
int hak_is_initializing(void) { return g_initializing; }
// ============================================================================
// Phase 6-1.5: Ultra-Simple Fast Path Forward Declarations
// ============================================================================
// Forward declarations for Phase 6 fast path variants
// Phase 6-1.5: Alignment guessing (hakmem_tiny_ultra_simple.inc)
#ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE
extern void* hak_tiny_alloc_ultra_simple(size_t size);
extern void hak_tiny_free_ultra_simple(void* ptr);
#endif
// Phase 6-1.6: Metadata header (hakmem_tiny_metadata.inc)
#ifdef HAKMEM_TINY_PHASE6_METADATA
extern void* hak_tiny_alloc_metadata(size_t size);
extern void hak_tiny_free_metadata(void* ptr);
#endif
// Phase 6-1.7: Box Theory Refactoring - Wrapper function declarations
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
extern void* hak_tiny_alloc_fast_wrapper(size_t size);
extern void hak_tiny_free_fast_wrapper(void* ptr);
#endif
#include "box/hak_exit_debug.inc.h"
// ============================================================================
// KPI Measurement (for UCB1) - NEW!
// ============================================================================
#include "box/hak_kpi_util.inc.h"
// ============================================================================
// Internal Helpers
// ============================================================================
// Phase 6.8: All legacy profiling functions removed
// - hash_site(), get_site_profile(), infer_policy(), record_alloc(), allocate_with_policy()
// Replaced by ELO-based allocation (hakmem_elo.c)
// ============================================================================
// BigCache eviction callback
// ============================================================================
// BigCache eviction callback (called when cache is full and needs to evict)
static void bigcache_free_callback(void* ptr, size_t size) {
(void)size; // Not used
if (!ptr) return;
// Get raw pointer and header
void* raw = (char*)ptr - HEADER_SIZE;
AllocHeader* hdr = (AllocHeader*)raw;
// Verify magic before accessing method field
if (hdr->magic != HAKMEM_MAGIC) {
fprintf(stderr, "[hakmem] BigCache eviction: invalid magic, fallback to free()\n");
// CRITICAL FIX: When magic is invalid, allocation came from LIBC (NO header)
// Therefore ptr IS the allocated address, not raw (ptr - HEADER_SIZE)
// MUST use __libc_free to avoid infinite recursion through free() wrapper
extern void __libc_free(void*);
__libc_free(ptr);
return;
}
// Dispatch based on allocation method
switch (hdr->method) {
case ALLOC_METHOD_MALLOC:
free(raw);
break;
case ALLOC_METHOD_MMAP:
// Cold eviction: route through batch for large blocks
// This completes Phase 6.3 architecture
#ifdef __linux__
if (hdr->size >= BATCH_MIN_SIZE) {
// Large blocks: use batch (deferred munmap + TLB optimization)
hak_batch_add(raw, hdr->size);
} else {
// Small blocks: direct munmap (not worth batching)
// Phase 6.11.1: Try whale cache first
if (hkm_whale_put(raw, hdr->size) != 0) {
// Whale cache full or not a whale: munmap
madvise(raw, hdr->size, MADV_FREE); // Best-effort
hkm_sys_munmap(raw, hdr->size);
}
// else: Successfully cached in whale cache (no munmap!)
}
#else
free(raw); // Fallback (should not happen)
#endif
break;
default:
fprintf(stderr, "[hakmem] BigCache eviction: unknown method %d\n", hdr->method);
free(raw); // Fallback
break;
}
}
// ============================================================================
// Public API
// ============================================================================
// Thread-safe one-time initialization
#include "box/hak_core_init.inc.h"
// Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%)
__attribute__((always_inline))
// hak_alloc_at() 本体は箱へ
#include "box/hak_alloc_api.inc.h"
// Phase 9.1: Force inline for performance (reduces call overhead by ~30-40%)
// Phase 6-1.7: Disable inline for box refactor to avoid recursive inlining
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
__attribute__((always_inline))
inline
#endif
// hak_free_at() 本体は箱へ
#include "box/hak_free_api.inc.h"
void hak_print_stats(void) {
printf("\n========================================\n");
printf("hakmem ELO-based Profiling Statistics\n");
printf("========================================\n");
printf("\nOptimization Stats:\n");
printf(" malloc() calls: %llu\n", (unsigned long long)g_malloc_count);
hak_elo_print_leaderboard();
printf("========================================\n\n");
}
// ============================================================================
// Standard C Library Wrappers (LD_PRELOAD) — boxed include
// ============================================================================
#include "box/hak_wrappers.inc.h"
// (wrappers moved to box/hak_wrappers.inc.h)