Files
hakmem/core/box/hak_free_api.inc.h
Moe Charm (CI) acc64f2438 Phase ML1: Pool v1 memset 89.73% overhead 軽量化 (+15.34% improvement)
## Summary
- ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え)
- core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理
- core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off)
- A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy)

## Files Modified
- core/box/pool_api.inc.h: pool_zero_mode_box.h include
- core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避)
- core/hakmem_pool.c: zero mode 参照・制御ロジック
- core/box/pool_zero_mode_box.h (新設): enum/getter
- CURRENT_TASK.md: Phase ML1 結果記載

## Test Results
| Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement |
|-----------|----------------|-----------------|------------|
| 10K       | 3.06 M ops/s   | 3.17 M ops/s    | +3.65%     |
| 1M        | 23.71 M ops/s  | 27.34 M ops/s   | **+15.34%** |

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-10 09:08:18 +09:00

330 lines
13 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hak_free_api.inc.h — Box: hak_free_at() implementation
// Phase 15: Box Separation - One-way routing (FG → Domain boxes → ExternalGuard)
#ifndef HAK_FREE_API_INC_H
#define HAK_FREE_API_INC_H
#include <sys/mman.h> // For mincore() in AllocHeader safety check
#include "hakmem_tiny_superslab.h" // For SUPERSLAB_MAGIC, SuperSlab
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
#include "../hakmem_trace_master.h" // Unified trace control (HAKMEM_TRACE + per-feature ENV)
#include "front_gate_v2.h" // Phase 15: Box FG V2 - 1-byte header classification
#include "external_guard_box.h" // Phase 15: Box ExternalGuard - mincore (ENV controlled)
#include "fg_tiny_gate_box.h" // Tiny gate guard box (Superslab check)
#include "tiny_free_gate_box.h" // Tiny Free Gatekeeper Box (USER→Fast Path 境界)
#ifdef HAKMEM_POOL_TLS_PHASE1
#include "../pool_tls.h"
#endif
#include "mid_large_config_box.h" // Phase 5-Step3: Compile-time config for Mid/Large
// Optional route trace: print first N classification lines when enabled by env
#if !HAKMEM_BUILD_RELEASE
static inline int hak_free_route_trace_on(void) {
static int g_trace = -1;
if (__builtin_expect(g_trace == -1, 0)) {
// Unified trace: HAKMEM_FREE_ROUTE_TRACE or HAKMEM_TRACE=free
g_trace = hak_trace_check("HAKMEM_FREE_ROUTE_TRACE", "free");
}
return g_trace;
}
static inline int* hak_free_route_budget_ptr(void) {
static int g_budget = 32; // first 32 frees only
return &g_budget;
}
static inline void hak_free_route_log(const char* tag, void* p) {
if (!hak_free_route_trace_on()) return;
int* budget = hak_free_route_budget_ptr();
if (*budget <= 0) return;
(*budget)--;
fprintf(stderr, "[FREE_ROUTE] %s ptr=%p\n", tag, p);
}
#else
static inline void hak_free_route_log(const char* tag, void* p) { (void)tag; (void)p; }
#endif
// Optional: request-trace for invalid-magic cases (first N hits)
static inline int hak_super_reg_reqtrace_on(void) {
static int g_on = -1;
if (__builtin_expect(g_on == -1, 0)) {
// Unified trace: HAKMEM_SUPER_REG_REQTRACE or HAKMEM_TRACE=registry
g_on = hak_trace_check("HAKMEM_SUPER_REG_REQTRACE", "registry");
}
return g_on;
}
static inline int* hak_super_reg_reqtrace_budget_ptr(void) {
static int g_budget = 16; // trace first 16 occurrences
return &g_budget;
}
static inline void hak_super_reg_reqtrace_dump(void* ptr) {
if (!hak_super_reg_reqtrace_on()) return;
int* b = hak_super_reg_reqtrace_budget_ptr();
if (*b <= 0) return;
(*b)--;
uintptr_t p = (uintptr_t)ptr;
uintptr_t m20 = ((uintptr_t)1 << 20) - 1;
uintptr_t m21 = ((uintptr_t)1 << 21) - 1;
SuperSlab* s20 = (SuperSlab*)(p & ~m20);
SuperSlab* s21 = (SuperSlab*)(p & ~m21);
unsigned long long mg20 = 0, mg21 = 0;
// Best-effort reads (may be unmapped; wrap in volatile access)
mg20 = (unsigned long long)(s20 ? s20->magic : 0);
mg21 = (unsigned long long)(s21 ? s21->magic : 0);
fprintf(stderr,
"[SUPER_REG_REQTRACE] ptr=%p base1M=%p magic1M=0x%llx base2M=%p magic2M=0x%llx\n",
ptr, (void*)s20, mg20, (void*)s21, mg21);
}
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
__attribute__((always_inline))
inline
#endif
void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t0);
#endif
static _Atomic int g_hak_free_at_trace = 0;
if (atomic_fetch_add_explicit(&g_hak_free_at_trace, 1, memory_order_relaxed) < 128) {
HAK_TRACE("[hak_free_at_enter]\n");
}
(void)site; (void)size;
int fg_misclass = 0; // Set when FG said Tiny but registry rejects
// Optional lightweight trace of early free calls (first few only)
#if !HAKMEM_BUILD_RELEASE
static int free_trace_en = -1; static _Atomic int free_trace_count = 0;
if (__builtin_expect(free_trace_en == -1, 0)) {
// Unified trace: HAKMEM_FREE_WRAP_TRACE or HAKMEM_TRACE=free
free_trace_en = hak_trace_check("HAKMEM_FREE_WRAP_TRACE", "free");
}
if (free_trace_en) {
int n = atomic_fetch_add(&free_trace_count, 1);
if (n < 8) {
fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr);
}
}
#endif
// Bench-only ultra-short path: try header-based tiny fast free first
// Enable with: HAKMEM_BENCH_FAST_FRONT=1
{
static int g_bench_fast_front = -1;
if (__builtin_expect(g_bench_fast_front == -1, 0)) {
const char* e = getenv("HAKMEM_BENCH_FAST_FRONT");
g_bench_fast_front = (e && *e && *e != '0') ? 1 : 0;
}
#if HAKMEM_TINY_HEADER_CLASSIDX
if (__builtin_expect(g_bench_fast_front && ptr != NULL, 0)) {
if (__builtin_expect(tiny_free_gate_try_fast(ptr), 1)) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
}
#endif
}
if (!ptr) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
// ========== Phase 15: Box FG V2 Classification ==========
// One-way routing: FG → Domain boxes → ExternalGuard
// Box FG V2: Ultra-fast 1-byte header classification (no mincore, no registry)
fg_classification_t fg = fg_classify_domain(ptr);
hak_free_route_log(fg_domain_name(fg.domain), ptr);
// Fail-Fast: Tiny判定は Superslab 登録が必須。無ければ MIDCAND に戻す(箱化)。
fg_tiny_gate_result_t fg_guard = fg_tiny_gate(ptr, fg);
fg = fg_guard.fg;
fg_misclass = fg_guard.misclassified;
switch (fg.domain) {
case FG_DOMAIN_TINY: {
// Fast path: Tiny (C0-C7) with 1-byte header (0xa0 | class_idx)
#if HAKMEM_TINY_HEADER_CLASSIDX
if (__builtin_expect(tiny_free_gate_try_fast(ptr), 1)) {
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_fast();
#endif
goto done;
}
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_slow();
#endif
#endif
hak_tiny_free(ptr);
goto done;
}
#ifdef HAKMEM_POOL_TLS_PHASE1
case FG_DOMAIN_POOL: {
// Pool TLS: 8KB-52KB allocations with 1-byte header (0xb0 | class_idx)
pool_free(ptr);
goto done;
}
#endif
case FG_DOMAIN_POOL:
case FG_DOMAIN_MIDCAND:
case FG_DOMAIN_EXTERNAL:
// Fall through to registry lookup + AllocHeader dispatch
break;
}
// ========== Slow Path: 16-byte AllocHeader Dispatch ==========
// Handle Mid/Large allocations (malloc/mmap/Pool/L25)
// Note: All Tiny allocations (C0-C7) already handled by Front Gate above
// ========== Mid/L25/Tiny Registry Lookup (Headerless) ==========
// MIDCAND: Could be Mid/Large/C7, needs registry lookup
{
extern int hak_pool_mid_lookup(void* ptr, size_t* out_size);
extern void hak_pool_free_fast(void* ptr, uintptr_t site_id);
size_t mid_sz = 0;
if (hak_pool_mid_lookup(ptr, &mid_sz)) {
hak_free_route_log("mid_hit", ptr);
hak_pool_free_fast(ptr, (uintptr_t)site);
goto done;
}
}
{
extern int hak_l25_lookup(void* ptr, size_t* out_size);
extern void hak_l25_pool_free_fast(void* ptr, uintptr_t site_id);
size_t l25_sz = 0;
if (hak_l25_lookup(ptr, &l25_sz)) {
hak_free_route_log("l25_hit", ptr);
hkm_ace_stat_large_free();
hak_l25_pool_free_fast(ptr, (uintptr_t)site);
goto done;
}
}
// PHASE 15: C7 (1KB headerless) registry lookup
// Box FG V2 cannot classify C7 (no header), so use registry
{
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
hak_free_route_log("tiny_c7_registry", ptr);
hak_tiny_free(ptr);
goto done;
}
}
// Raw header dispatchmmap/malloc/BigCacheなど
{
void* raw = (char*)ptr - HEADER_SIZE;
// Phase 3 (2025-11-29): mincore() completely removed
//
// History:
// - Phase 9: Originally used mincore() syscall to verify memory accessibility
// - 2025-11-14: Added DISABLE_MINCORE flag for performance (+10.3% improvement)
// - Phase 1b/2: Registry-based validation provides sufficient safety
// - Phase 3: Dead code removal - mincore no longer needed
//
// Safety: Trust internal metadata (registry/headers/FrontGate classification)
// - SuperSlab registry validates all Tiny allocations (Phase 1b/2)
// - Headers validate Mid/Large allocations
// - FrontGate classifier routes external allocations correctly
int is_mapped = 1;
if (!is_mapped) {
// Memory not accessible, ptr likely has no header
hak_free_route_log("unmapped_header_fallback", ptr);
// Always punt to libc; never route unmapped/unknown pointers to Tiny
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_invalid_hdr");
__libc_free(ptr);
goto done;
}
// Safe to dereference header now
AllocHeader* hdr = (AllocHeader*)raw;
if (hdr->magic != HAKMEM_MAGIC) {
// CRITICAL FIX (2025-11-07): Invalid magic could mean:
// 1. Tiny allocation where SuperSlab lookup failed (NO header exists)
// 2. Libc allocation from mixed environment
// 3. Double-free or corrupted pointer
if (g_invalid_free_log) fprintf(stderr, "[hakmem] ERROR: Invalid magic 0x%X (expected 0x%X)\n", hdr->magic, HAKMEM_MAGIC);
// One-shot request-trace to help diagnose SS registry lookups
hak_super_reg_reqtrace_dump(ptr);
// Fail-fast diagnostics: never hand bad headers to Tiny or libc silently
SuperSlab* ss_diag = hak_super_lookup(ptr);
int slab_diag = ss_diag ? slab_index_for(ss_diag, ptr) : -1;
fprintf(stderr,
"[INVALID_MAGIC_FREE] ptr=%p magic=0x%X mode=%d ss=%p slab=%d\n",
ptr, hdr->magic, g_invalid_free_mode, (void*)ss_diag, slab_diag);
tiny_guard_on_invalid(ptr, hdr->magic);
// If this pointer was a misclassified Tiny header miss, punt to libc to avoid corrupting TLS
if (fg_misclass) {
fprintf(stderr, "[FREE_MISCLASS_SKIP] ptr=%p hdr=0x%x (ignored to avoid corruption)\n",
ptr, hdr->magic);
goto done; // leak-safe skip: not our allocation
}
// Never route invalid headers into Tiny; fail-fast by default
if (g_invalid_free_mode) {
static int leak_warn = 0;
if (!leak_warn) {
fprintf(stderr, "[hakmem] WARNING: Skipping free of invalid pointer %p (may leak memory)\n", ptr);
leak_warn = 1;
}
abort();
} else {
ptr_trace_dump_now("free_api_invalid_magic_failfast");
abort();
}
}
// Phase 5-Step3: Use Mid/Large Config Box (compile-time constant in PGO mode)
if (MID_LARGE_BIGCACHE_ENABLED && hdr->class_bytes >= 2097152) {
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
}
{
static int g_bc_l25_en_free = -1; if (g_bc_l25_en_free == -1) { const char* e = getenv("HAKMEM_BIGCACHE_L25"); g_bc_l25_en_free = (e && atoi(e) != 0) ? 1 : 0; }
if (g_bc_l25_en_free && MID_LARGE_BIGCACHE_ENABLED && hdr->size >= 524288 && hdr->size < 2097152) {
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
}
}
switch (hdr->method) {
case ALLOC_METHOD_POOL: if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) { hkm_ace_stat_mid_free(); hak_pool_free(ptr, hdr->size, hdr->alloc_site); goto done; } break;
case ALLOC_METHOD_L25_POOL: hkm_ace_stat_large_free(); hak_l25_pool_free(ptr, hdr->size, hdr->alloc_site); goto done;
case ALLOC_METHOD_MALLOC:
// CRITICAL FIX: raw was allocated with __libc_malloc, so free with __libc_free
// Using free(raw) would go through wrapper → infinite recursion
hak_free_route_log("malloc_hdr", ptr);
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_malloc_hdr");
fprintf(stderr, "[FREE_LIBC_HDR] raw=%p user=%p size=%zu method=%d magic=0x%X\n",
raw, ptr, hdr->size, (int)hdr->method, hdr->magic);
__libc_free(raw);
break;
case ALLOC_METHOD_MMAP:
#ifdef __linux__
if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) { hak_batch_add(raw, hdr->size); goto done; }
if (hkm_whale_put(raw, hdr->size) != 0) { hkm_sys_munmap(raw, hdr->size); }
#else
// CRITICAL FIX: Same as ALLOC_METHOD_MALLOC
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_mmap_other");
__libc_free(raw);
#endif
break;
default: HAKMEM_LOG("ERROR: Unknown allocation method: %d\n", hdr->method); break;
}
}
done:
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
#endif // HAK_FREE_API_INC_H