Files
hakmem/core/box/hak_free_api.inc.h
Moe Charm (CI) 84f5034e45 Phase 68: PGO training set diversification (seed/WS expansion)
Changes:
- scripts/box/pgo_fast_profile_config.sh: Expanded WS patterns (3→5) and seeds (1→3)
  for reduced overfitting and better production workload representativeness
- PERFORMANCE_TARGETS_SCORECARD.md: Phase 68 baseline promoted (61.614M = 50.93%)
- CURRENT_TASK.md: Phase 68 marked complete, Phase 67a (layout tax forensics) set Active

Results:
- 10-run verification: +1.19% vs Phase 66 baseline (GO, >+1.0% threshold)
- M1 milestone: 50.93% of mimalloc (target 50%, exceeded by +0.93pp)
- Stability: 10-run mean/median with <2.1% CV

🤖 Generated with Claude Code

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-17 21:08:17 +09:00

432 lines
18 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hak_free_api.inc.h — Box: hak_free_at() implementation
// Phase 15: Box Separation - One-way routing (FG → Domain boxes → ExternalGuard)
#ifndef HAK_FREE_API_INC_H
#define HAK_FREE_API_INC_H
#include "../hakmem_build_flags.h" // Phase 39: HAKMEM_BENCH_MINIMAL (GO +1.98%)
#include <sys/mman.h> // For mincore() in AllocHeader safety check
#include "hakmem_tiny_superslab.h" // For SUPERSLAB_MAGIC, SuperSlab
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
#include "../hakmem_trace_master.h" // Unified trace control (HAKMEM_TRACE + per-feature ENV)
#include "front_gate_v2.h" // Phase 15: Box FG V2 - 1-byte header classification
#include "external_guard_box.h" // Phase 15: Box ExternalGuard - mincore (ENV controlled)
#include "fg_tiny_gate_box.h" // Tiny gate guard box (Superslab check)
#include "tiny_free_gate_box.h" // Tiny Free Gatekeeper Box (USER→Fast Path 境界)
#include "free_dispatch_stats_box.h" // Phase FREE-DISPATCHER-OPT-1: free dispatcher stats
#include "region_id_v6_box.h" // Phase MID-V3: RegionIdBox for ownership lookup
#include "mid_hotbox_v3_box.h" // Phase MID-V3: Mid/Pool HotBox v3 types
#include "mid_hotbox_v3_env_box.h" // Phase MID-V3: ENV gate for v3
#ifdef HAKMEM_POOL_TLS_PHASE1
#include "../pool_tls.h"
#endif
#include "mid_large_config_box.h" // Phase 5-Step3: Compile-time config for Mid/Large
// Optional route trace: print first N classification lines when enabled by env
#if !HAKMEM_BUILD_RELEASE
static inline int hak_free_route_trace_on(void) {
static int g_trace = -1;
if (__builtin_expect(g_trace == -1, 0)) {
// Unified trace: HAKMEM_FREE_ROUTE_TRACE or HAKMEM_TRACE=free
g_trace = hak_trace_check("HAKMEM_FREE_ROUTE_TRACE", "free");
}
return g_trace;
}
static inline int* hak_free_route_budget_ptr(void) {
static int g_budget = 32; // first 32 frees only
return &g_budget;
}
static inline void hak_free_route_log(const char* tag, void* p) {
if (!hak_free_route_trace_on()) return;
int* budget = hak_free_route_budget_ptr();
if (*budget <= 0) return;
(*budget)--;
fprintf(stderr, "[FREE_ROUTE] %s ptr=%p\n", tag, p);
}
#else
static inline void hak_free_route_log(const char* tag, void* p) { (void)tag; (void)p; }
#endif
// Optional: request-trace for invalid-magic cases (first N hits)
static inline int hak_super_reg_reqtrace_on(void) {
static int g_on = -1;
if (__builtin_expect(g_on == -1, 0)) {
// Unified trace: HAKMEM_SUPER_REG_REQTRACE or HAKMEM_TRACE=registry
g_on = hak_trace_check("HAKMEM_SUPER_REG_REQTRACE", "registry");
}
return g_on;
}
static inline int* hak_super_reg_reqtrace_budget_ptr(void) {
static int g_budget = 16; // trace first 16 occurrences
return &g_budget;
}
static inline void hak_super_reg_reqtrace_dump(void* ptr) {
if (!hak_super_reg_reqtrace_on()) return;
int* b = hak_super_reg_reqtrace_budget_ptr();
if (*b <= 0) return;
(*b)--;
uintptr_t p = (uintptr_t)ptr;
uintptr_t m20 = ((uintptr_t)1 << 20) - 1;
uintptr_t m21 = ((uintptr_t)1 << 21) - 1;
SuperSlab* s20 = (SuperSlab*)(p & ~m20);
SuperSlab* s21 = (SuperSlab*)(p & ~m21);
unsigned long long mg20 = 0, mg21 = 0;
// Best-effort reads (may be unmapped; wrap in volatile access)
mg20 = (unsigned long long)(s20 ? s20->magic : 0);
mg21 = (unsigned long long)(s21 ? s21->magic : 0);
fprintf(stderr,
"[SUPER_REG_REQTRACE] ptr=%p base1M=%p magic1M=0x%llx base2M=%p magic2M=0x%llx\n",
ptr, (void*)s20, mg20, (void*)s21, mg21);
}
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
__attribute__((always_inline))
inline
#endif
void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
// Phase FREE-DISPATCHER-OPT-1: Total call counter (at function entry)
FREE_DISPATCH_STAT_INC(total_calls);
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t0);
#endif
static _Atomic int g_hak_free_at_trace = 0;
if (atomic_fetch_add_explicit(&g_hak_free_at_trace, 1, memory_order_relaxed) < 128) {
HAK_TRACE("[hak_free_at_enter]\n");
}
(void)site; (void)size;
int fg_misclass = 0; // Set when FG said Tiny but registry rejects
// Optional lightweight trace of early free calls (first few only)
#if !HAKMEM_BUILD_RELEASE
static int free_trace_en = -1; static _Atomic int free_trace_count = 0;
if (__builtin_expect(free_trace_en == -1, 0)) {
// Unified trace: HAKMEM_FREE_WRAP_TRACE or HAKMEM_TRACE=free
free_trace_en = hak_trace_check("HAKMEM_FREE_WRAP_TRACE", "free");
}
if (free_trace_en) {
int n = atomic_fetch_add(&free_trace_count, 1);
if (n < 8) {
fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr);
}
}
#endif
// Bench-only ultra-short path: try header-based tiny fast free first
// Enable with: HAKMEM_BENCH_FAST_FRONT=1
// Phase 39: BENCH_MINIMAL → compile-out (GO +1.98%)
#if !HAKMEM_BENCH_MINIMAL
{
static int g_bench_fast_front = -1;
if (__builtin_expect(g_bench_fast_front == -1, 0)) {
const char* e = getenv("HAKMEM_BENCH_FAST_FRONT");
g_bench_fast_front = (e && *e && *e != '0') ? 1 : 0;
}
#if HAKMEM_TINY_HEADER_CLASSIDX
if (__builtin_expect(g_bench_fast_front && ptr != NULL, 0)) {
if (__builtin_expect(tiny_free_gate_try_fast(ptr), 1)) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
}
#endif
}
#endif
if (!ptr) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
// ========== Phase 15: Box FG V2 Classification ==========
// One-way routing: FG → Domain boxes → ExternalGuard
// Box FG V2: Ultra-fast 1-byte header classification (no mincore, no registry)
fg_classification_t fg = fg_classify_domain(ptr);
hak_free_route_log(fg_domain_name(fg.domain), ptr);
// Fail-Fast: Tiny判定は Superslab 登録が必須。無ければ MIDCAND に戻す(箱化)。
fg_tiny_gate_result_t fg_guard = fg_tiny_gate(ptr, fg);
fg = fg_guard.fg;
fg_misclass = fg_guard.misclassified;
// Phase FREE-DISPATCHER-OPT-1: Domain classification counters
if (__builtin_expect(free_dispatch_stats_enabled(), 0)) {
switch (fg.domain) {
case FG_DOMAIN_TINY:
g_free_dispatch_stats.domain_tiny++;
break;
case FG_DOMAIN_MIDCAND:
case FG_DOMAIN_POOL:
g_free_dispatch_stats.domain_mid++;
break;
case FG_DOMAIN_EXTERNAL:
g_free_dispatch_stats.domain_large++;
break;
}
}
switch (fg.domain) {
case FG_DOMAIN_TINY: {
// Phase FREE-FRONT-V3-2: v3 snapshot routing (optional, default OFF)
// Optimized: No tiny_route_for_class() calls, no redundant ENV checks
// Phase 39: BENCH_MINIMAL → compile-out (GO +1.98%)
#if HAKMEM_TINY_HEADER_CLASSIDX && !HAKMEM_BENCH_MINIMAL
{
// Check if v3 snapshot routing is enabled (cached)
static int g_v3_enabled = -1;
if (__builtin_expect(g_v3_enabled == -1, 0)) {
// For now, v3 snapshot routing is DISABLED by default (experimental)
// Phase v3-2 infrastructure is ready but not yet integrated
g_v3_enabled = 0; // TODO: Enable when ready: free_front_v3_enabled() ? 1 : 0;
}
// Note: v3 snapshot path currently disabled (Phase v3-2 infrastructure only)
// When enabled, it would consolidate free routing logic and remove redundant
// ENV checks from the hot path. For now, use legacy routing below.
(void)g_v3_enabled; // Suppress unused variable warning
}
#endif
// Legacy path (default when v3 is OFF)
// Fast path: Tiny (C0-C7) with 1-byte header (0xa0 | class_idx)
#if HAKMEM_TINY_HEADER_CLASSIDX
if (__builtin_expect(tiny_free_gate_try_fast(ptr), 1)) {
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_fast();
#endif
goto done;
}
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_slow();
#endif
#endif
hak_tiny_free(ptr);
goto done;
}
#ifdef HAKMEM_POOL_TLS_PHASE1
case FG_DOMAIN_POOL: {
// Pool TLS: 8KB-52KB allocations with 1-byte header (0xb0 | class_idx)
pool_free(ptr);
goto done;
}
#endif
case FG_DOMAIN_POOL:
case FG_DOMAIN_MIDCAND:
case FG_DOMAIN_EXTERNAL:
// Fall through to registry lookup + AllocHeader dispatch
break;
}
// ========== Slow Path: 16-byte AllocHeader Dispatch ==========
// Handle Mid/Large allocations (malloc/mmap/Pool/L25)
// Note: All Tiny allocations (C0-C7) already handled by Front Gate above
// ========== Mid/L25/Tiny Registry Lookup (Headerless) ==========
// MIDCAND: Could be Mid/Large/C7, needs registry lookup
// Phase FREE-DISPATCH-SSOT: Single Source of Truth for region lookup
// ENV: HAKMEM_FREE_DISPATCH_SSOT (default: 0 for backward compat, 1 for optimized)
// Problem: Old code did region_id_lookup TWICE in MID-V3 path (once inside mid_hot_v3_free, once after)
// Fix: Do lookup ONCE at top, dispatch based on kind
static int g_free_dispatch_ssot = -1;
if (__builtin_expect(g_free_dispatch_ssot == -1, 0)) {
const char* env = getenv("HAKMEM_FREE_DISPATCH_SSOT");
g_free_dispatch_ssot = (env && *env == '1') ? 1 : 0;
}
#if !HAKMEM_FAST_PROFILE_PRUNE_BACKENDS
if (g_free_dispatch_ssot && __builtin_expect(mid_v3_enabled(), 0)) {
// SSOT=1: Single lookup, then dispatch
extern RegionLookupV6 region_id_lookup_cached_v6(void* ptr);
RegionLookupV6 lk = region_id_lookup_cached_v6(ptr);
if (lk.kind == REGION_KIND_MID_V3) {
// Owned by MID-V3: call free handler directly (no internal lookup)
// Note: We pass the pre-looked-up info implicitly via TLS cache
mid_hot_v3_free(ptr);
if (mid_v3_debug_enabled()) {
static _Atomic int free_log_count = 0;
if (atomic_fetch_add(&free_log_count, 1) < 10) {
fprintf(stderr, "[MID_V3] Free SSOT: ptr=%p\n", ptr);
}
}
goto done;
}
// Not MID-V3: fall through to other dispatch paths below
} else if (__builtin_expect(mid_v3_enabled(), 0)) {
// SSOT=0: Legacy double-lookup path (for A/B comparison)
// RegionIdBox lookup to check if v3 owns this pointer
// mid_hot_v3_free() will check internally and return early if not owned
mid_hot_v3_free(ptr);
// Check if v3 actually owned it by doing a quick verification
// For safety, check ownership explicitly before continuing
// This prevents double-free if v3 handled it
extern RegionLookupV6 region_id_lookup_v6(void* ptr);
RegionLookupV6 lk = region_id_lookup_v6(ptr);
if (lk.kind == REGION_KIND_MID_V3) {
if (mid_v3_debug_enabled()) {
static _Atomic int free_log_count = 0;
if (atomic_fetch_add(&free_log_count, 1) < 10) {
fprintf(stderr, "[MID_V3] Free: ptr=%p\n", ptr);
}
}
goto done;
}
}
#endif
{
extern int hak_pool_mid_lookup(void* ptr, size_t* out_size);
extern void hak_pool_free_fast(void* ptr, uintptr_t site_id);
size_t mid_sz = 0;
if (hak_pool_mid_lookup(ptr, &mid_sz)) {
hak_free_route_log("mid_hit", ptr);
hak_pool_free_fast(ptr, (uintptr_t)site);
goto done;
}
}
{
extern int hak_l25_lookup(void* ptr, size_t* out_size);
extern void hak_l25_pool_free_fast(void* ptr, uintptr_t site_id);
size_t l25_sz = 0;
if (hak_l25_lookup(ptr, &l25_sz)) {
hak_free_route_log("l25_hit", ptr);
hkm_ace_stat_large_free();
hak_l25_pool_free_fast(ptr, (uintptr_t)site);
goto done;
}
}
// PHASE 15: C7 (1KB headerless) registry lookup
// Box FG V2 cannot classify C7 (no header), so use registry
{
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
hak_free_route_log("tiny_c7_registry", ptr);
hak_tiny_free(ptr);
goto done;
}
}
// Raw header dispatchmmap/malloc/BigCacheなど
{
void* raw = (char*)ptr - HEADER_SIZE;
// Phase 3 (2025-11-29): mincore() completely removed
//
// History:
// - Phase 9: Originally used mincore() syscall to verify memory accessibility
// - 2025-11-14: Added DISABLE_MINCORE flag for performance (+10.3% improvement)
// - Phase 1b/2: Registry-based validation provides sufficient safety
// - Phase 3: Dead code removal - mincore no longer needed
//
// Safety: Trust internal metadata (registry/headers/FrontGate classification)
// - SuperSlab registry validates all Tiny allocations (Phase 1b/2)
// - Headers validate Mid/Large allocations
// - FrontGate classifier routes external allocations correctly
int is_mapped = 1;
if (!is_mapped) {
// Memory not accessible, ptr likely has no header
hak_free_route_log("unmapped_header_fallback", ptr);
// Always punt to libc; never route unmapped/unknown pointers to Tiny
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_invalid_hdr");
__libc_free(ptr);
goto done;
}
// Safe to dereference header now
AllocHeader* hdr = (AllocHeader*)raw;
if (hdr->magic != HAKMEM_MAGIC) {
// CRITICAL FIX (2025-11-07): Invalid magic could mean:
// 1. Tiny allocation where SuperSlab lookup failed (NO header exists)
// 2. Libc allocation from mixed environment
// 3. Double-free or corrupted pointer
if (g_invalid_free_log) fprintf(stderr, "[hakmem] ERROR: Invalid magic 0x%X (expected 0x%X)\n", hdr->magic, HAKMEM_MAGIC);
// One-shot request-trace to help diagnose SS registry lookups
hak_super_reg_reqtrace_dump(ptr);
// Fail-fast diagnostics: never hand bad headers to Tiny or libc silently
SuperSlab* ss_diag = hak_super_lookup(ptr);
int slab_diag = ss_diag ? slab_index_for(ss_diag, ptr) : -1;
fprintf(stderr,
"[INVALID_MAGIC_FREE] ptr=%p magic=0x%X mode=%d ss=%p slab=%d\n",
ptr, hdr->magic, g_invalid_free_mode, (void*)ss_diag, slab_diag);
tiny_guard_on_invalid(ptr, hdr->magic);
// If this pointer was a misclassified Tiny header miss, punt to libc to avoid corrupting TLS
if (fg_misclass) {
fprintf(stderr, "[FREE_MISCLASS_SKIP] ptr=%p hdr=0x%x (ignored to avoid corruption)\n",
ptr, hdr->magic);
goto done; // leak-safe skip: not our allocation
}
// Never route invalid headers into Tiny; fail-fast by default
if (g_invalid_free_mode) {
static int leak_warn = 0;
if (!leak_warn) {
fprintf(stderr, "[hakmem] WARNING: Skipping free of invalid pointer %p (may leak memory)\n", ptr);
leak_warn = 1;
}
abort();
} else {
ptr_trace_dump_now("free_api_invalid_magic_failfast");
abort();
}
}
// Phase 5-Step3: Use Mid/Large Config Box (compile-time constant in PGO mode)
if (MID_LARGE_BIGCACHE_ENABLED && hdr->class_bytes >= 2097152) {
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
}
{
static int g_bc_l25_en_free = -1; if (g_bc_l25_en_free == -1) { const char* e = getenv("HAKMEM_BIGCACHE_L25"); g_bc_l25_en_free = (e && atoi(e) != 0) ? 1 : 0; }
if (g_bc_l25_en_free && MID_LARGE_BIGCACHE_ENABLED && hdr->size >= 524288 && hdr->size < 2097152) {
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
}
}
switch (hdr->method) {
case ALLOC_METHOD_POOL: if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) { hkm_ace_stat_mid_free(); hak_pool_free(ptr, hdr->size, hdr->alloc_site); goto done; } break;
case ALLOC_METHOD_L25_POOL: hkm_ace_stat_large_free(); hak_l25_pool_free(ptr, hdr->size, hdr->alloc_site); goto done;
case ALLOC_METHOD_MALLOC:
// CRITICAL FIX: raw was allocated with __libc_malloc, so free with __libc_free
// Using free(raw) would go through wrapper → infinite recursion
hak_free_route_log("malloc_hdr", ptr);
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_malloc_hdr");
fprintf(stderr, "[FREE_LIBC_HDR] raw=%p user=%p size=%zu method=%d magic=0x%X\n",
raw, ptr, hdr->size, (int)hdr->method, hdr->magic);
__libc_free(raw);
break;
case ALLOC_METHOD_MMAP:
#ifdef __linux__
if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) { hak_batch_add(raw, hdr->size); goto done; }
if (hkm_whale_put(raw, hdr->size) != 0) { hkm_sys_munmap(raw, hdr->size); }
#else
// CRITICAL FIX: Same as ALLOC_METHOD_MALLOC
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_mmap_other");
__libc_free(raw);
#endif
break;
default: HAKMEM_LOG("ERROR: Unknown allocation method: %d\n", hdr->method); break;
}
}
done:
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
#endif // HAK_FREE_API_INC_H