Files
hakmem/core/box/hak_free_api.inc.h
Moe Charm (CI) 0546454168 WIP: Add TLS SLL validation and SuperSlab registry fallback
ChatGPT's diagnostic changes to address TLS_SLL_HDR_RESET issue.
Current status: Partial mitigation, but root cause remains.

Changes Applied:
1. SuperSlab Registry Fallback (hakmem_super_registry.h)
   - Added legacy table probe when hash map lookup misses
   - Prevents NULL returns for valid SuperSlabs during initialization
   - Status:  Works but may hide underlying registration issues

2. TLS SLL Push Validation (tls_sll_box.h)
   - Reject push if SuperSlab lookup returns NULL
   - Reject push if class_idx mismatch detected
   - Added [TLS_SLL_PUSH_NO_SS] diagnostic message
   - Status:  Prevents list corruption (defensive)

3. SuperSlab Allocation Class Fix (superslab_allocate.c)
   - Pass actual class_idx to sp_internal_allocate_superslab
   - Prevents dummy class=8 causing OOB access
   - Status:  Root cause fix for allocation path

4. Debug Output Additions
   - First 256 push/pop operations traced
   - First 4 mismatches logged with details
   - SuperSlab registration state logged
   - Status:  Diagnostic tool (not a fix)

5. TLS Hint Box Removed
   - Deleted ss_tls_hint_box.{c,h} (Phase 1 optimization)
   - Simplified to focus on stability first
   - Status:  Can be re-added after root cause fixed

Current Problem (REMAINS UNSOLVED):
- [TLS_SLL_HDR_RESET] still occurs after ~60 seconds of sh8bench
- Pointer is 16 bytes offset from expected (class 1 → class 2 boundary)
- hak_super_lookup returns NULL for that pointer
- Suggests: Use-After-Free, Double-Free, or pointer arithmetic error

Root Cause Analysis:
- Pattern: Pointer offset by +16 (one class 1 stride)
- Timing: Cumulative problem (appears after 60s, not immediately)
- Location: Header corruption detected during TLS SLL pop

Remaining Issues:
⚠️ Registry fallback is defensive (may hide registration bugs)
⚠️ Push validation prevents symptoms but not root cause
⚠️ 16-byte pointer offset source unidentified

Next Steps for Investigation:
1. Full pointer arithmetic audit (Magazine ⇔ TLS SLL paths)
2. Enhanced logging at HDR_RESET point:
   - Expected vs actual pointer value
   - Pointer provenance (where it came from)
   - Allocation trace for that block
3. Verify Headerless flag is OFF throughout build
4. Check for double-offset application in conversions

Technical Assessment:
- 60% root cause fixes (allocation class, validation)
- 40% defensive mitigation (registry fallback, push rejection)

Performance Impact:
- Registry fallback: +10-30 cycles on cold path (negligible)
- Push validation: +5-10 cycles per push (acceptable)
- Overall: < 2% performance impact estimated

Related Issues:
- Phase 1 TLS Hint Box removed temporarily
- Phase 2 Headerless blocked until stability achieved

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-03 20:42:28 +09:00

328 lines
13 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hak_free_api.inc.h — Box: hak_free_at() implementation
// Phase 15: Box Separation - One-way routing (FG → Domain boxes → ExternalGuard)
#ifndef HAK_FREE_API_INC_H
#define HAK_FREE_API_INC_H
#include <sys/mman.h> // For mincore() in AllocHeader safety check
#include "hakmem_tiny_superslab.h" // For SUPERSLAB_MAGIC, SuperSlab
#include "../tiny_free_fast_v2.inc.h" // Phase 7: Header-based ultra-fast free
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
#include "front_gate_v2.h" // Phase 15: Box FG V2 - 1-byte header classification
#include "external_guard_box.h" // Phase 15: Box ExternalGuard - mincore (ENV controlled)
#include "fg_tiny_gate_box.h" // Tiny gate guard box (Superslab check)
#ifdef HAKMEM_POOL_TLS_PHASE1
#include "../pool_tls.h"
#endif
#include "mid_large_config_box.h" // Phase 5-Step3: Compile-time config for Mid/Large
// Optional route trace: print first N classification lines when enabled by env
#if !HAKMEM_BUILD_RELEASE
static inline int hak_free_route_trace_on(void) {
static int g_trace = -1;
if (__builtin_expect(g_trace == -1, 0)) {
const char* e = getenv("HAKMEM_FREE_ROUTE_TRACE");
g_trace = (e && *e && *e != '0') ? 1 : 0;
}
return g_trace;
}
static inline int* hak_free_route_budget_ptr(void) {
static int g_budget = 32; // first 32 frees only
return &g_budget;
}
static inline void hak_free_route_log(const char* tag, void* p) {
if (!hak_free_route_trace_on()) return;
int* budget = hak_free_route_budget_ptr();
if (*budget <= 0) return;
(*budget)--;
fprintf(stderr, "[FREE_ROUTE] %s ptr=%p\n", tag, p);
}
#else
static inline void hak_free_route_log(const char* tag, void* p) { (void)tag; (void)p; }
#endif
// Optional: request-trace for invalid-magic cases (first N hits)
static inline int hak_super_reg_reqtrace_on(void) {
static int g_on = -1;
if (__builtin_expect(g_on == -1, 0)) {
const char* e = getenv("HAKMEM_SUPER_REG_REQTRACE");
g_on = (e && *e && *e != '0') ? 1 : 0;
}
return g_on;
}
static inline int* hak_super_reg_reqtrace_budget_ptr(void) {
static int g_budget = 16; // trace first 16 occurrences
return &g_budget;
}
static inline void hak_super_reg_reqtrace_dump(void* ptr) {
if (!hak_super_reg_reqtrace_on()) return;
int* b = hak_super_reg_reqtrace_budget_ptr();
if (*b <= 0) return;
(*b)--;
uintptr_t p = (uintptr_t)ptr;
uintptr_t m20 = ((uintptr_t)1 << 20) - 1;
uintptr_t m21 = ((uintptr_t)1 << 21) - 1;
SuperSlab* s20 = (SuperSlab*)(p & ~m20);
SuperSlab* s21 = (SuperSlab*)(p & ~m21);
unsigned long long mg20 = 0, mg21 = 0;
// Best-effort reads (may be unmapped; wrap in volatile access)
mg20 = (unsigned long long)(s20 ? s20->magic : 0);
mg21 = (unsigned long long)(s21 ? s21->magic : 0);
fprintf(stderr,
"[SUPER_REG_REQTRACE] ptr=%p base1M=%p magic1M=0x%llx base2M=%p magic2M=0x%llx\n",
ptr, (void*)s20, mg20, (void*)s21, mg21);
}
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
__attribute__((always_inline))
inline
#endif
void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t0);
#endif
static _Atomic int g_hak_free_at_trace = 0;
if (atomic_fetch_add_explicit(&g_hak_free_at_trace, 1, memory_order_relaxed) < 128) {
HAK_TRACE("[hak_free_at_enter]\n");
}
(void)site; (void)size;
int fg_misclass = 0; // Set when FG said Tiny but registry rejects
// Optional lightweight trace of early free calls (first few only)
#if !HAKMEM_BUILD_RELEASE
static int free_trace_en = -1; static _Atomic int free_trace_count = 0;
if (__builtin_expect(free_trace_en == -1, 0)) {
const char* e = getenv("HAKMEM_FREE_WRAP_TRACE");
free_trace_en = (e && *e && *e != '0') ? 1 : 0;
}
if (free_trace_en) {
int n = atomic_fetch_add(&free_trace_count, 1);
if (n < 8) {
fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr);
}
}
#endif
// Bench-only ultra-short path: try header-based tiny fast free first
// Enable with: HAKMEM_BENCH_FAST_FRONT=1
{
static int g_bench_fast_front = -1;
if (__builtin_expect(g_bench_fast_front == -1, 0)) {
const char* e = getenv("HAKMEM_BENCH_FAST_FRONT");
g_bench_fast_front = (e && *e && *e != '0') ? 1 : 0;
}
#if HAKMEM_TINY_HEADER_CLASSIDX
if (__builtin_expect(g_bench_fast_front && ptr != NULL, 0)) {
if (__builtin_expect(hak_tiny_free_fast_v2(ptr), 1)) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
}
#endif
}
if (!ptr) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
// ========== Phase 15: Box FG V2 Classification ==========
// One-way routing: FG → Domain boxes → ExternalGuard
// Box FG V2: Ultra-fast 1-byte header classification (no mincore, no registry)
fg_classification_t fg = fg_classify_domain(ptr);
hak_free_route_log(fg_domain_name(fg.domain), ptr);
// Fail-Fast: Tiny判定は Superslab 登録が必須。無ければ MIDCAND に戻す(箱化)。
fg_tiny_gate_result_t fg_guard = fg_tiny_gate(ptr, fg);
fg = fg_guard.fg;
fg_misclass = fg_guard.misclassified;
switch (fg.domain) {
case FG_DOMAIN_TINY: {
// Fast path: Tiny (C0-C7) with 1-byte header (0xa0 | class_idx)
#if HAKMEM_TINY_HEADER_CLASSIDX
if (__builtin_expect(hak_tiny_free_fast_v2(ptr), 1)) {
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_fast();
#endif
goto done;
}
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_slow();
#endif
#endif
hak_tiny_free(ptr);
goto done;
}
#ifdef HAKMEM_POOL_TLS_PHASE1
case FG_DOMAIN_POOL: {
// Pool TLS: 8KB-52KB allocations with 1-byte header (0xb0 | class_idx)
pool_free(ptr);
goto done;
}
#endif
case FG_DOMAIN_MIDCAND:
case FG_DOMAIN_EXTERNAL:
// Fall through to registry lookup + AllocHeader dispatch
break;
}
// ========== Slow Path: 16-byte AllocHeader Dispatch ==========
// Handle Mid/Large allocations (malloc/mmap/Pool/L25)
// Note: All Tiny allocations (C0-C7) already handled by Front Gate above
// ========== Mid/L25/Tiny Registry Lookup (Headerless) ==========
// MIDCAND: Could be Mid/Large/C7, needs registry lookup
{
extern int hak_pool_mid_lookup(void* ptr, size_t* out_size);
extern void hak_pool_free_fast(void* ptr, uintptr_t site_id);
size_t mid_sz = 0;
if (hak_pool_mid_lookup(ptr, &mid_sz)) {
hak_free_route_log("mid_hit", ptr);
hak_pool_free_fast(ptr, (uintptr_t)site);
goto done;
}
}
{
extern int hak_l25_lookup(void* ptr, size_t* out_size);
extern void hak_l25_pool_free_fast(void* ptr, uintptr_t site_id);
size_t l25_sz = 0;
if (hak_l25_lookup(ptr, &l25_sz)) {
hak_free_route_log("l25_hit", ptr);
hkm_ace_stat_large_free();
hak_l25_pool_free_fast(ptr, (uintptr_t)site);
goto done;
}
}
// PHASE 15: C7 (1KB headerless) registry lookup
// Box FG V2 cannot classify C7 (no header), so use registry
{
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
hak_free_route_log("tiny_c7_registry", ptr);
hak_tiny_free(ptr);
goto done;
}
}
// Raw header dispatchmmap/malloc/BigCacheなど
{
void* raw = (char*)ptr - HEADER_SIZE;
// Phase 3 (2025-11-29): mincore() completely removed
//
// History:
// - Phase 9: Originally used mincore() syscall to verify memory accessibility
// - 2025-11-14: Added DISABLE_MINCORE flag for performance (+10.3% improvement)
// - Phase 1b/2: Registry-based validation provides sufficient safety
// - Phase 3: Dead code removal - mincore no longer needed
//
// Safety: Trust internal metadata (registry/headers/FrontGate classification)
// - SuperSlab registry validates all Tiny allocations (Phase 1b/2)
// - Headers validate Mid/Large allocations
// - FrontGate classifier routes external allocations correctly
int is_mapped = 1;
if (!is_mapped) {
// Memory not accessible, ptr likely has no header
hak_free_route_log("unmapped_header_fallback", ptr);
// Always punt to libc; never route unmapped/unknown pointers to Tiny
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_invalid_hdr");
__libc_free(ptr);
goto done;
}
// Safe to dereference header now
AllocHeader* hdr = (AllocHeader*)raw;
if (hdr->magic != HAKMEM_MAGIC) {
// CRITICAL FIX (2025-11-07): Invalid magic could mean:
// 1. Tiny allocation where SuperSlab lookup failed (NO header exists)
// 2. Libc allocation from mixed environment
// 3. Double-free or corrupted pointer
if (g_invalid_free_log) fprintf(stderr, "[hakmem] ERROR: Invalid magic 0x%X (expected 0x%X)\n", hdr->magic, HAKMEM_MAGIC);
// One-shot request-trace to help diagnose SS registry lookups
hak_super_reg_reqtrace_dump(ptr);
// Fail-fast diagnostics: never hand bad headers to Tiny or libc silently
SuperSlab* ss_diag = hak_super_lookup(ptr);
int slab_diag = ss_diag ? slab_index_for(ss_diag, ptr) : -1;
fprintf(stderr,
"[INVALID_MAGIC_FREE] ptr=%p magic=0x%X mode=%d ss=%p slab=%d\n",
ptr, hdr->magic, g_invalid_free_mode, (void*)ss_diag, slab_diag);
tiny_guard_on_invalid(ptr, hdr->magic);
// If this pointer was a misclassified Tiny header miss, punt to libc to avoid corrupting TLS
if (fg_misclass) {
fprintf(stderr, "[FREE_MISCLASS_SKIP] ptr=%p hdr=0x%x (ignored to avoid corruption)\n",
ptr, hdr->magic);
goto done; // leak-safe skip: not our allocation
}
// Never route invalid headers into Tiny; fail-fast by default
if (g_invalid_free_mode) {
static int leak_warn = 0;
if (!leak_warn) {
fprintf(stderr, "[hakmem] WARNING: Skipping free of invalid pointer %p (may leak memory)\n", ptr);
leak_warn = 1;
}
abort();
} else {
ptr_trace_dump_now("free_api_invalid_magic_failfast");
abort();
}
}
// Phase 5-Step3: Use Mid/Large Config Box (compile-time constant in PGO mode)
if (MID_LARGE_BIGCACHE_ENABLED && hdr->class_bytes >= 2097152) {
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
}
{
static int g_bc_l25_en_free = -1; if (g_bc_l25_en_free == -1) { const char* e = getenv("HAKMEM_BIGCACHE_L25"); g_bc_l25_en_free = (e && atoi(e) != 0) ? 1 : 0; }
if (g_bc_l25_en_free && MID_LARGE_BIGCACHE_ENABLED && hdr->size >= 524288 && hdr->size < 2097152) {
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
}
}
switch (hdr->method) {
case ALLOC_METHOD_POOL: if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) { hkm_ace_stat_mid_free(); hak_pool_free(ptr, hdr->size, hdr->alloc_site); goto done; } break;
case ALLOC_METHOD_L25_POOL: hkm_ace_stat_large_free(); hak_l25_pool_free(ptr, hdr->size, hdr->alloc_site); goto done;
case ALLOC_METHOD_MALLOC:
// CRITICAL FIX: raw was allocated with __libc_malloc, so free with __libc_free
// Using free(raw) would go through wrapper → infinite recursion
hak_free_route_log("malloc_hdr", ptr);
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_malloc_hdr");
fprintf(stderr, "[FREE_LIBC_HDR] raw=%p user=%p size=%zu method=%d magic=0x%X\n",
raw, ptr, hdr->size, (int)hdr->method, hdr->magic);
__libc_free(raw);
break;
case ALLOC_METHOD_MMAP:
#ifdef __linux__
if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) { hak_batch_add(raw, hdr->size); goto done; }
if (hkm_whale_put(raw, hdr->size) != 0) { hkm_sys_munmap(raw, hdr->size); }
#else
// CRITICAL FIX: Same as ALLOC_METHOD_MALLOC
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_mmap_other");
__libc_free(raw);
#endif
break;
default: HAKMEM_LOG("ERROR: Unknown allocation method: %d\n", hdr->method); break;
}
}
done:
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
#endif // HAK_FREE_API_INC_H