Files
hakmem/core/box/hak_free_api.inc.h
Moe Charm (CI) ccf604778c Front-Direct implementation: SS→FC direct refill + SLL complete bypass
## Summary

Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)

## New Modules

- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
  - Remote drain → Freelist → Carve priority
  - Header restoration for C1-C6 (NOT C0/C7)
  - ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN

- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition

## Allocation Path (core/tiny_alloc_fast.inc.h)

- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
  - Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
  - Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)

## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)

- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)

## Legacy Sealing

- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry

## ENV Controls

- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)

## Benchmarks (Front-Direct Enabled)

```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
     HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
     HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
     HAKMEM_TINY_BUMP_CHUNK=256

bench_random_mixed (16-1040B random, 200K iter):
  256 slots: 1.44M ops/s (STABLE, 0 SEGV)
  128 slots: 1.44M ops/s (STABLE, 0 SEGV)

bench_fixed_size (fixed size, 200K iter):
  256B: 4.06M ops/s (has debug logs, expected >10M without logs)
  128B: Similar (debug logs affect)
```

## Verification

- TRACE_RING test (10K iter): **0 SLL events** detected 
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV

## Next Steps

- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)

Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
2025-11-14 05:41:49 +09:00

296 lines
12 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hak_free_api.inc.h — Box: hak_free_at() implementation
#ifndef HAK_FREE_API_INC_H
#define HAK_FREE_API_INC_H
#include "hakmem_tiny_superslab.h" // For SUPERSLAB_MAGIC, SuperSlab
#include "../tiny_free_fast_v2.inc.h" // Phase 7: Header-based ultra-fast free
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
#include "front_gate_classifier.h" // Box FG: Centralized pointer classification
#ifdef HAKMEM_POOL_TLS_PHASE1
#include "../pool_tls.h"
#endif
// Optional route trace: print first N classification lines when enabled by env
#if !HAKMEM_BUILD_RELEASE
static inline int hak_free_route_trace_on(void) {
static int g_trace = -1;
if (__builtin_expect(g_trace == -1, 0)) {
const char* e = getenv("HAKMEM_FREE_ROUTE_TRACE");
g_trace = (e && *e && *e != '0') ? 1 : 0;
}
return g_trace;
}
static inline int* hak_free_route_budget_ptr(void) {
static int g_budget = 32; // first 32 frees only
return &g_budget;
}
static inline void hak_free_route_log(const char* tag, void* p) {
if (!hak_free_route_trace_on()) return;
int* budget = hak_free_route_budget_ptr();
if (*budget <= 0) return;
(*budget)--;
fprintf(stderr, "[FREE_ROUTE] %s ptr=%p\n", tag, p);
}
#else
static inline void hak_free_route_log(const char* tag, void* p) { (void)tag; (void)p; }
#endif
// Optional: request-trace for invalid-magic cases (first N hits)
static inline int hak_super_reg_reqtrace_on(void) {
static int g_on = -1;
if (__builtin_expect(g_on == -1, 0)) {
const char* e = getenv("HAKMEM_SUPER_REG_REQTRACE");
g_on = (e && *e && *e != '0') ? 1 : 0;
}
return g_on;
}
static inline int* hak_super_reg_reqtrace_budget_ptr(void) {
static int g_budget = 16; // trace first 16 occurrences
return &g_budget;
}
static inline void hak_super_reg_reqtrace_dump(void* ptr) {
if (!hak_super_reg_reqtrace_on()) return;
int* b = hak_super_reg_reqtrace_budget_ptr();
if (*b <= 0) return;
(*b)--;
uintptr_t p = (uintptr_t)ptr;
uintptr_t m20 = ((uintptr_t)1 << 20) - 1;
uintptr_t m21 = ((uintptr_t)1 << 21) - 1;
SuperSlab* s20 = (SuperSlab*)(p & ~m20);
SuperSlab* s21 = (SuperSlab*)(p & ~m21);
unsigned long long mg20 = 0, mg21 = 0;
// Best-effort reads (may be unmapped; wrap in volatile access)
mg20 = (unsigned long long)(s20 ? s20->magic : 0);
mg21 = (unsigned long long)(s21 ? s21->magic : 0);
fprintf(stderr,
"[SUPER_REG_REQTRACE] ptr=%p base1M=%p magic1M=0x%llx base2M=%p magic2M=0x%llx\n",
ptr, (void*)s20, mg20, (void*)s21, mg21);
}
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
__attribute__((always_inline))
inline
#endif
void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t0);
#endif
(void)site; (void)size;
// Optional lightweight trace of early free calls (first few only)
#if !HAKMEM_BUILD_RELEASE
static int free_trace_en = -1; static _Atomic int free_trace_count = 0;
if (__builtin_expect(free_trace_en == -1, 0)) {
const char* e = getenv("HAKMEM_FREE_WRAP_TRACE");
free_trace_en = (e && *e && *e != '0') ? 1 : 0;
}
if (free_trace_en) {
int n = atomic_fetch_add(&free_trace_count, 1);
if (n < 8) {
fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr);
}
}
#endif
// Bench-only ultra-short path: try header-based tiny fast free first
// Enable with: HAKMEM_BENCH_FAST_FRONT=1
{
static int g_bench_fast_front = -1;
if (__builtin_expect(g_bench_fast_front == -1, 0)) {
const char* e = getenv("HAKMEM_BENCH_FAST_FRONT");
g_bench_fast_front = (e && *e && *e != '0') ? 1 : 0;
}
#if HAKMEM_TINY_HEADER_CLASSIDX
if (__builtin_expect(g_bench_fast_front && ptr != NULL, 0)) {
if (__builtin_expect(hak_tiny_free_fast_v2(ptr), 1)) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
}
#endif
}
if (!ptr) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
// ========== Box FG: Single Point of Classification ==========
// Classify pointer once using Front Gate (safe header probe + Registry fallback)
// This eliminates all scattered ptr-1 reads and centralizes classification logic
ptr_classification_t classification = classify_ptr(ptr);
// Route based on classification result
switch (classification.kind) {
case PTR_KIND_TINY_HEADER: {
// C0-C6: Has 1-byte header, class_idx already determined by Front Gate
// Fast path: Use class_idx directly without SuperSlab lookup
hak_free_route_log("tiny_header", ptr);
#if HAKMEM_TINY_HEADER_CLASSIDX
// Use ultra-fast free path with pre-determined class_idx
if (__builtin_expect(hak_tiny_free_fast_v2(ptr), 1)) {
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_fast();
#endif
goto done;
}
// Fallback to slow path if TLS cache full
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_slow();
#endif
#endif
hak_tiny_free(ptr);
goto done;
}
case PTR_KIND_TINY_HEADERLESS: {
// C7: Headerless 1KB blocks, SuperSlab + slab_idx provided by Registry
// Medium path: Use Registry result, no header read needed
hak_free_route_log("tiny_headerless", ptr);
hak_tiny_free(ptr);
goto done;
}
#ifdef HAKMEM_POOL_TLS_PHASE1
case PTR_KIND_POOL_TLS: {
// Pool TLS: 8KB-52KB allocations with 0xb0 magic
hak_free_route_log("pool_tls", ptr);
pool_free(ptr);
goto done;
}
#endif
case PTR_KIND_UNKNOWN:
default: {
// Not Tiny or Pool - check 16-byte AllocHeader (Mid/Large/malloc/mmap)
// This is the slow path for large allocations
break; // Fall through to header dispatch below
}
}
// ========== Slow Path: 16-byte AllocHeader Dispatch ==========
// Handle Mid/Large allocations (malloc/mmap/Pool/L25)
// Note: All Tiny allocations (C0-C7) already handled by Front Gate above
// Mid/L25 headerless経路
{
extern int hak_pool_mid_lookup(void* ptr, size_t* out_size);
extern void hak_pool_free_fast(void* ptr, uintptr_t site_id);
size_t mid_sz = 0; if (hak_pool_mid_lookup(ptr, &mid_sz)) { hak_free_route_log("mid_hit", ptr); hak_pool_free_fast(ptr, (uintptr_t)site); goto done; }
}
{
extern int hak_l25_lookup(void* ptr, size_t* out_size);
extern void hak_l25_pool_free_fast(void* ptr, uintptr_t site_id);
size_t l25_sz = 0; if (hak_l25_lookup(ptr, &l25_sz)) { hak_free_route_log("l25_hit", ptr); hkm_ace_stat_large_free(); hak_l25_pool_free_fast(ptr, (uintptr_t)site); goto done; }
}
// Raw header dispatchmmap/malloc/BigCacheなど
{
void* raw = (char*)ptr - HEADER_SIZE;
// CRITICAL FIX (2025-11-07): Check if memory is accessible before dereferencing
// This prevents SEGV when ptr has no header (Tiny alloc where SS lookup failed, or libc alloc)
if (!hak_is_memory_readable(raw)) {
// Memory not accessible, ptr likely has no header
hak_free_route_log("unmapped_header_fallback", ptr);
// In direct-link mode, try tiny_free (handles headerless Tiny allocs)
if (!g_ldpreload_mode && g_invalid_free_mode) {
hak_tiny_free(ptr);
goto done;
}
// LD_PRELOAD mode: route to libc (might be libc allocation)
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_invalid_hdr");
__libc_free(ptr);
goto done;
}
// Safe to dereference header now
AllocHeader* hdr = (AllocHeader*)raw;
if (hdr->magic != HAKMEM_MAGIC) {
// CRITICAL FIX (2025-11-07): Invalid magic could mean:
// 1. Tiny allocation where SuperSlab lookup failed (NO header exists)
// 2. Libc allocation from mixed environment
// 3. Double-free or corrupted pointer
if (g_invalid_free_log) fprintf(stderr, "[hakmem] ERROR: Invalid magic 0x%X (expected 0x%X)\n", hdr->magic, HAKMEM_MAGIC);
// One-shot request-trace to help diagnose SS registry lookups
hak_super_reg_reqtrace_dump(ptr);
// In direct-link mode, try routing to tiny free as best-effort recovery
// This handles case #1 where SuperSlab lookup failed but allocation is valid
if (!g_ldpreload_mode && g_invalid_free_mode) {
// Attempt tiny free (will validate internally and handle gracefully if invalid)
hak_free_route_log("invalid_magic_tiny_recovery", ptr);
hak_tiny_free(ptr);
goto done;
}
// LD_PRELOAD mode or fallback mode: route to libc
// IMPORTANT: Use ptr (not raw), as NO header exists
if (g_invalid_free_mode) {
// Skip mode: leak memory (original behavior, but logged)
static int leak_warn = 0;
if (!leak_warn) {
fprintf(stderr, "[hakmem] WARNING: Skipping free of invalid pointer %p (may leak memory)\n", ptr);
leak_warn = 1;
}
goto done;
} else {
// Fallback mode: route to libc
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_invalid_magic_fallback");
__libc_free(ptr); // Use ptr, not raw!
goto done;
}
}
if (HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE) && hdr->class_bytes >= 2097152) {
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
}
{
static int g_bc_l25_en_free = -1; if (g_bc_l25_en_free == -1) { const char* e = getenv("HAKMEM_BIGCACHE_L25"); g_bc_l25_en_free = (e && atoi(e) != 0) ? 1 : 0; }
if (g_bc_l25_en_free && HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE) && hdr->size >= 524288 && hdr->size < 2097152) {
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
}
}
switch (hdr->method) {
case ALLOC_METHOD_POOL: if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) { hkm_ace_stat_mid_free(); hak_pool_free(ptr, hdr->size, hdr->alloc_site); goto done; } break;
case ALLOC_METHOD_L25_POOL: hkm_ace_stat_large_free(); hak_l25_pool_free(ptr, hdr->size, hdr->alloc_site); goto done;
case ALLOC_METHOD_MALLOC:
// CRITICAL FIX: raw was allocated with __libc_malloc, so free with __libc_free
// Using free(raw) would go through wrapper → infinite recursion
hak_free_route_log("malloc_hdr", ptr);
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_malloc_hdr");
__libc_free(raw);
break;
case ALLOC_METHOD_MMAP:
#ifdef __linux__
if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) { hak_batch_add(raw, hdr->size); goto done; }
if (hkm_whale_put(raw, hdr->size) != 0) { hkm_sys_munmap(raw, hdr->size); }
#else
// CRITICAL FIX: Same as ALLOC_METHOD_MALLOC
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_mmap_other");
__libc_free(raw);
#endif
break;
default: HAKMEM_LOG("ERROR: Unknown allocation method: %d\n", hdr->method); break;
}
}
done:
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
#endif // HAK_FREE_API_INC_H