Files
hakmem/core/box/hak_free_api.inc.h
Moe Charm (CI) 118c0e4857 Phase FREE-DISPATCHER-OPT-1: free dispatcher 統計計測
**目的**: free dispatcher(29%)の内訳を細分化して計測。

**実装内容**:
- FreeDispatchStats 構造体追加(ENV: HAKMEM_FREE_DISPATCH_STATS, default 0)
- カウンタ: total_calls / domain (tiny/mid/large) / route (ultra/legacy/pool/v6) / env_checks / route_for_class_calls
- hak_free_at / tiny_route_for_class / tiny_route_snapshot_init にカウンタ埋め込み
- 挙動変更なし(計測のみ、ENV OFF 時は overhead ゼロ)

**計測結果**:

Mixed 16-1024B (1M iter, ws=400):
- total=8,081, route_calls=267,967, env_checks=9
- BENCH_FAST_FRONT により大半は早期リターン
- route_for_class は主に alloc 側で呼ばれる(267k calls vs 8k frees)
- ENV check は初期化時の 9回のみ(snapshot 効果)

C6-heavy (257-768B, 1M iter, ws=400):
- total=500,099, route_calls=1,034, env_checks=9
- fg_classify_domain に到達する free が多い
- route_for_class 呼び出しは極小(snapshot 効果)

**結論**:
- ENV check は既に十分最適化されている(初期化時のみ)
- route_for_class は alloc 側での呼び出しが主で、free 側は snapshot で O(1)
- 次フェーズ(OPT-2)では別のアプローチを検討

**ドキュメント追加**:
- docs/analysis/FREE_DISPATCHER_ANALYSIS.md(新規)
- CURRENT_TASK.md に Phase FREE-DISPATCHER-OPT-1 セクション追加

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-11 21:21:40 +09:00

370 lines
15 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hak_free_api.inc.h — Box: hak_free_at() implementation
// Phase 15: Box Separation - One-way routing (FG → Domain boxes → ExternalGuard)
#ifndef HAK_FREE_API_INC_H
#define HAK_FREE_API_INC_H
#include <sys/mman.h> // For mincore() in AllocHeader safety check
#include "hakmem_tiny_superslab.h" // For SUPERSLAB_MAGIC, SuperSlab
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
#include "../hakmem_trace_master.h" // Unified trace control (HAKMEM_TRACE + per-feature ENV)
#include "front_gate_v2.h" // Phase 15: Box FG V2 - 1-byte header classification
#include "external_guard_box.h" // Phase 15: Box ExternalGuard - mincore (ENV controlled)
#include "fg_tiny_gate_box.h" // Tiny gate guard box (Superslab check)
#include "tiny_free_gate_box.h" // Tiny Free Gatekeeper Box (USER→Fast Path 境界)
#include "free_dispatch_stats_box.h" // Phase FREE-DISPATCHER-OPT-1: free dispatcher stats
#ifdef HAKMEM_POOL_TLS_PHASE1
#include "../pool_tls.h"
#endif
#include "mid_large_config_box.h" // Phase 5-Step3: Compile-time config for Mid/Large
// Optional route trace: print first N classification lines when enabled by env
#if !HAKMEM_BUILD_RELEASE
static inline int hak_free_route_trace_on(void) {
static int g_trace = -1;
if (__builtin_expect(g_trace == -1, 0)) {
// Unified trace: HAKMEM_FREE_ROUTE_TRACE or HAKMEM_TRACE=free
g_trace = hak_trace_check("HAKMEM_FREE_ROUTE_TRACE", "free");
}
return g_trace;
}
static inline int* hak_free_route_budget_ptr(void) {
static int g_budget = 32; // first 32 frees only
return &g_budget;
}
static inline void hak_free_route_log(const char* tag, void* p) {
if (!hak_free_route_trace_on()) return;
int* budget = hak_free_route_budget_ptr();
if (*budget <= 0) return;
(*budget)--;
fprintf(stderr, "[FREE_ROUTE] %s ptr=%p\n", tag, p);
}
#else
static inline void hak_free_route_log(const char* tag, void* p) { (void)tag; (void)p; }
#endif
// Optional: request-trace for invalid-magic cases (first N hits)
static inline int hak_super_reg_reqtrace_on(void) {
static int g_on = -1;
if (__builtin_expect(g_on == -1, 0)) {
// Unified trace: HAKMEM_SUPER_REG_REQTRACE or HAKMEM_TRACE=registry
g_on = hak_trace_check("HAKMEM_SUPER_REG_REQTRACE", "registry");
}
return g_on;
}
static inline int* hak_super_reg_reqtrace_budget_ptr(void) {
static int g_budget = 16; // trace first 16 occurrences
return &g_budget;
}
static inline void hak_super_reg_reqtrace_dump(void* ptr) {
if (!hak_super_reg_reqtrace_on()) return;
int* b = hak_super_reg_reqtrace_budget_ptr();
if (*b <= 0) return;
(*b)--;
uintptr_t p = (uintptr_t)ptr;
uintptr_t m20 = ((uintptr_t)1 << 20) - 1;
uintptr_t m21 = ((uintptr_t)1 << 21) - 1;
SuperSlab* s20 = (SuperSlab*)(p & ~m20);
SuperSlab* s21 = (SuperSlab*)(p & ~m21);
unsigned long long mg20 = 0, mg21 = 0;
// Best-effort reads (may be unmapped; wrap in volatile access)
mg20 = (unsigned long long)(s20 ? s20->magic : 0);
mg21 = (unsigned long long)(s21 ? s21->magic : 0);
fprintf(stderr,
"[SUPER_REG_REQTRACE] ptr=%p base1M=%p magic1M=0x%llx base2M=%p magic2M=0x%llx\n",
ptr, (void*)s20, mg20, (void*)s21, mg21);
}
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
__attribute__((always_inline))
inline
#endif
void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
// Phase FREE-DISPATCHER-OPT-1: Total call counter (at function entry)
FREE_DISPATCH_STAT_INC(total_calls);
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t0);
#endif
static _Atomic int g_hak_free_at_trace = 0;
if (atomic_fetch_add_explicit(&g_hak_free_at_trace, 1, memory_order_relaxed) < 128) {
HAK_TRACE("[hak_free_at_enter]\n");
}
(void)site; (void)size;
int fg_misclass = 0; // Set when FG said Tiny but registry rejects
// Optional lightweight trace of early free calls (first few only)
#if !HAKMEM_BUILD_RELEASE
static int free_trace_en = -1; static _Atomic int free_trace_count = 0;
if (__builtin_expect(free_trace_en == -1, 0)) {
// Unified trace: HAKMEM_FREE_WRAP_TRACE or HAKMEM_TRACE=free
free_trace_en = hak_trace_check("HAKMEM_FREE_WRAP_TRACE", "free");
}
if (free_trace_en) {
int n = atomic_fetch_add(&free_trace_count, 1);
if (n < 8) {
fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr);
}
}
#endif
// Bench-only ultra-short path: try header-based tiny fast free first
// Enable with: HAKMEM_BENCH_FAST_FRONT=1
{
static int g_bench_fast_front = -1;
if (__builtin_expect(g_bench_fast_front == -1, 0)) {
const char* e = getenv("HAKMEM_BENCH_FAST_FRONT");
g_bench_fast_front = (e && *e && *e != '0') ? 1 : 0;
}
#if HAKMEM_TINY_HEADER_CLASSIDX
if (__builtin_expect(g_bench_fast_front && ptr != NULL, 0)) {
if (__builtin_expect(tiny_free_gate_try_fast(ptr), 1)) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
}
#endif
}
if (!ptr) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
// ========== Phase 15: Box FG V2 Classification ==========
// One-way routing: FG → Domain boxes → ExternalGuard
// Box FG V2: Ultra-fast 1-byte header classification (no mincore, no registry)
fg_classification_t fg = fg_classify_domain(ptr);
hak_free_route_log(fg_domain_name(fg.domain), ptr);
// Fail-Fast: Tiny判定は Superslab 登録が必須。無ければ MIDCAND に戻す(箱化)。
fg_tiny_gate_result_t fg_guard = fg_tiny_gate(ptr, fg);
fg = fg_guard.fg;
fg_misclass = fg_guard.misclassified;
// Phase FREE-DISPATCHER-OPT-1: Domain classification counters
if (__builtin_expect(free_dispatch_stats_enabled(), 0)) {
switch (fg.domain) {
case FG_DOMAIN_TINY:
g_free_dispatch_stats.domain_tiny++;
break;
case FG_DOMAIN_MIDCAND:
case FG_DOMAIN_POOL:
g_free_dispatch_stats.domain_mid++;
break;
case FG_DOMAIN_EXTERNAL:
g_free_dispatch_stats.domain_large++;
break;
}
}
switch (fg.domain) {
case FG_DOMAIN_TINY: {
// Phase FREE-FRONT-V3-2: v3 snapshot routing (optional, default OFF)
// Optimized: No tiny_route_for_class() calls, no redundant ENV checks
#if HAKMEM_TINY_HEADER_CLASSIDX
{
// Check if v3 snapshot routing is enabled (cached)
static int g_v3_enabled = -1;
if (__builtin_expect(g_v3_enabled == -1, 0)) {
// For now, v3 snapshot routing is DISABLED by default (experimental)
// Phase v3-2 infrastructure is ready but not yet integrated
g_v3_enabled = 0; // TODO: Enable when ready: free_front_v3_enabled() ? 1 : 0;
}
// Note: v3 snapshot path currently disabled (Phase v3-2 infrastructure only)
// When enabled, it would consolidate free routing logic and remove redundant
// ENV checks from the hot path. For now, use legacy routing below.
(void)g_v3_enabled; // Suppress unused variable warning
}
#endif
// Legacy path (default when v3 is OFF)
// Fast path: Tiny (C0-C7) with 1-byte header (0xa0 | class_idx)
#if HAKMEM_TINY_HEADER_CLASSIDX
if (__builtin_expect(tiny_free_gate_try_fast(ptr), 1)) {
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_fast();
#endif
goto done;
}
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_slow();
#endif
#endif
hak_tiny_free(ptr);
goto done;
}
#ifdef HAKMEM_POOL_TLS_PHASE1
case FG_DOMAIN_POOL: {
// Pool TLS: 8KB-52KB allocations with 1-byte header (0xb0 | class_idx)
pool_free(ptr);
goto done;
}
#endif
case FG_DOMAIN_POOL:
case FG_DOMAIN_MIDCAND:
case FG_DOMAIN_EXTERNAL:
// Fall through to registry lookup + AllocHeader dispatch
break;
}
// ========== Slow Path: 16-byte AllocHeader Dispatch ==========
// Handle Mid/Large allocations (malloc/mmap/Pool/L25)
// Note: All Tiny allocations (C0-C7) already handled by Front Gate above
// ========== Mid/L25/Tiny Registry Lookup (Headerless) ==========
// MIDCAND: Could be Mid/Large/C7, needs registry lookup
{
extern int hak_pool_mid_lookup(void* ptr, size_t* out_size);
extern void hak_pool_free_fast(void* ptr, uintptr_t site_id);
size_t mid_sz = 0;
if (hak_pool_mid_lookup(ptr, &mid_sz)) {
hak_free_route_log("mid_hit", ptr);
hak_pool_free_fast(ptr, (uintptr_t)site);
goto done;
}
}
{
extern int hak_l25_lookup(void* ptr, size_t* out_size);
extern void hak_l25_pool_free_fast(void* ptr, uintptr_t site_id);
size_t l25_sz = 0;
if (hak_l25_lookup(ptr, &l25_sz)) {
hak_free_route_log("l25_hit", ptr);
hkm_ace_stat_large_free();
hak_l25_pool_free_fast(ptr, (uintptr_t)site);
goto done;
}
}
// PHASE 15: C7 (1KB headerless) registry lookup
// Box FG V2 cannot classify C7 (no header), so use registry
{
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
hak_free_route_log("tiny_c7_registry", ptr);
hak_tiny_free(ptr);
goto done;
}
}
// Raw header dispatchmmap/malloc/BigCacheなど
{
void* raw = (char*)ptr - HEADER_SIZE;
// Phase 3 (2025-11-29): mincore() completely removed
//
// History:
// - Phase 9: Originally used mincore() syscall to verify memory accessibility
// - 2025-11-14: Added DISABLE_MINCORE flag for performance (+10.3% improvement)
// - Phase 1b/2: Registry-based validation provides sufficient safety
// - Phase 3: Dead code removal - mincore no longer needed
//
// Safety: Trust internal metadata (registry/headers/FrontGate classification)
// - SuperSlab registry validates all Tiny allocations (Phase 1b/2)
// - Headers validate Mid/Large allocations
// - FrontGate classifier routes external allocations correctly
int is_mapped = 1;
if (!is_mapped) {
// Memory not accessible, ptr likely has no header
hak_free_route_log("unmapped_header_fallback", ptr);
// Always punt to libc; never route unmapped/unknown pointers to Tiny
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_invalid_hdr");
__libc_free(ptr);
goto done;
}
// Safe to dereference header now
AllocHeader* hdr = (AllocHeader*)raw;
if (hdr->magic != HAKMEM_MAGIC) {
// CRITICAL FIX (2025-11-07): Invalid magic could mean:
// 1. Tiny allocation where SuperSlab lookup failed (NO header exists)
// 2. Libc allocation from mixed environment
// 3. Double-free or corrupted pointer
if (g_invalid_free_log) fprintf(stderr, "[hakmem] ERROR: Invalid magic 0x%X (expected 0x%X)\n", hdr->magic, HAKMEM_MAGIC);
// One-shot request-trace to help diagnose SS registry lookups
hak_super_reg_reqtrace_dump(ptr);
// Fail-fast diagnostics: never hand bad headers to Tiny or libc silently
SuperSlab* ss_diag = hak_super_lookup(ptr);
int slab_diag = ss_diag ? slab_index_for(ss_diag, ptr) : -1;
fprintf(stderr,
"[INVALID_MAGIC_FREE] ptr=%p magic=0x%X mode=%d ss=%p slab=%d\n",
ptr, hdr->magic, g_invalid_free_mode, (void*)ss_diag, slab_diag);
tiny_guard_on_invalid(ptr, hdr->magic);
// If this pointer was a misclassified Tiny header miss, punt to libc to avoid corrupting TLS
if (fg_misclass) {
fprintf(stderr, "[FREE_MISCLASS_SKIP] ptr=%p hdr=0x%x (ignored to avoid corruption)\n",
ptr, hdr->magic);
goto done; // leak-safe skip: not our allocation
}
// Never route invalid headers into Tiny; fail-fast by default
if (g_invalid_free_mode) {
static int leak_warn = 0;
if (!leak_warn) {
fprintf(stderr, "[hakmem] WARNING: Skipping free of invalid pointer %p (may leak memory)\n", ptr);
leak_warn = 1;
}
abort();
} else {
ptr_trace_dump_now("free_api_invalid_magic_failfast");
abort();
}
}
// Phase 5-Step3: Use Mid/Large Config Box (compile-time constant in PGO mode)
if (MID_LARGE_BIGCACHE_ENABLED && hdr->class_bytes >= 2097152) {
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
}
{
static int g_bc_l25_en_free = -1; if (g_bc_l25_en_free == -1) { const char* e = getenv("HAKMEM_BIGCACHE_L25"); g_bc_l25_en_free = (e && atoi(e) != 0) ? 1 : 0; }
if (g_bc_l25_en_free && MID_LARGE_BIGCACHE_ENABLED && hdr->size >= 524288 && hdr->size < 2097152) {
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
}
}
switch (hdr->method) {
case ALLOC_METHOD_POOL: if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) { hkm_ace_stat_mid_free(); hak_pool_free(ptr, hdr->size, hdr->alloc_site); goto done; } break;
case ALLOC_METHOD_L25_POOL: hkm_ace_stat_large_free(); hak_l25_pool_free(ptr, hdr->size, hdr->alloc_site); goto done;
case ALLOC_METHOD_MALLOC:
// CRITICAL FIX: raw was allocated with __libc_malloc, so free with __libc_free
// Using free(raw) would go through wrapper → infinite recursion
hak_free_route_log("malloc_hdr", ptr);
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_malloc_hdr");
fprintf(stderr, "[FREE_LIBC_HDR] raw=%p user=%p size=%zu method=%d magic=0x%X\n",
raw, ptr, hdr->size, (int)hdr->method, hdr->magic);
__libc_free(raw);
break;
case ALLOC_METHOD_MMAP:
#ifdef __linux__
if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) { hak_batch_add(raw, hdr->size); goto done; }
if (hkm_whale_put(raw, hdr->size) != 0) { hkm_sys_munmap(raw, hdr->size); }
#else
// CRITICAL FIX: Same as ALLOC_METHOD_MALLOC
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_mmap_other");
__libc_free(raw);
#endif
break;
default: HAKMEM_LOG("ERROR: Unknown allocation method: %d\n", hdr->method); break;
}
}
done:
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
#endif // HAK_FREE_API_INC_H