Files
hakmem/core/box/hak_free_api.inc.h
Moe Charm (CI) c6a2a6d38a Optimize mincore() with TLS page cache (Phase A optimization)
Problem:
- SEGV fix (696aa7c0b) added 1,591 mincore() syscalls (11.0% time)
- Performance regression: 9.38M → 563K ops/s (-94%)

Solution: TLS page cache for last-checked pages
- Cache s_last_page1/page2 → is_mapped (2 slots)
- Expected hit rate: 90-95% (temporal locality)
- Fallback: mincore() syscall on cache miss

Implementation:
- Fast path: if (page == s_last_page1) → reuse cached result
- Boundary handling: Check both pages if AllocHeader crosses page
- Thread-safe: __thread static variables (no locks)

Expected Impact:
- mincore() calls: 1,591 → ~100-150 (-90-94%)
- Throughput: 563K → 647K ops/s (+15% estimated)

Next: Task B-1 SuperSlab LRU/Prewarm investigation
2025-11-14 06:32:38 +09:00

344 lines
14 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hak_free_api.inc.h — Box: hak_free_at() implementation
#ifndef HAK_FREE_API_INC_H
#define HAK_FREE_API_INC_H
#include <sys/mman.h> // For mincore() in AllocHeader safety check
#include "hakmem_tiny_superslab.h" // For SUPERSLAB_MAGIC, SuperSlab
#include "../tiny_free_fast_v2.inc.h" // Phase 7: Header-based ultra-fast free
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
#include "front_gate_classifier.h" // Box FG: Centralized pointer classification
#ifdef HAKMEM_POOL_TLS_PHASE1
#include "../pool_tls.h"
#endif
// Optional route trace: print first N classification lines when enabled by env
#if !HAKMEM_BUILD_RELEASE
static inline int hak_free_route_trace_on(void) {
static int g_trace = -1;
if (__builtin_expect(g_trace == -1, 0)) {
const char* e = getenv("HAKMEM_FREE_ROUTE_TRACE");
g_trace = (e && *e && *e != '0') ? 1 : 0;
}
return g_trace;
}
static inline int* hak_free_route_budget_ptr(void) {
static int g_budget = 32; // first 32 frees only
return &g_budget;
}
static inline void hak_free_route_log(const char* tag, void* p) {
if (!hak_free_route_trace_on()) return;
int* budget = hak_free_route_budget_ptr();
if (*budget <= 0) return;
(*budget)--;
fprintf(stderr, "[FREE_ROUTE] %s ptr=%p\n", tag, p);
}
#else
static inline void hak_free_route_log(const char* tag, void* p) { (void)tag; (void)p; }
#endif
// Optional: request-trace for invalid-magic cases (first N hits)
static inline int hak_super_reg_reqtrace_on(void) {
static int g_on = -1;
if (__builtin_expect(g_on == -1, 0)) {
const char* e = getenv("HAKMEM_SUPER_REG_REQTRACE");
g_on = (e && *e && *e != '0') ? 1 : 0;
}
return g_on;
}
static inline int* hak_super_reg_reqtrace_budget_ptr(void) {
static int g_budget = 16; // trace first 16 occurrences
return &g_budget;
}
static inline void hak_super_reg_reqtrace_dump(void* ptr) {
if (!hak_super_reg_reqtrace_on()) return;
int* b = hak_super_reg_reqtrace_budget_ptr();
if (*b <= 0) return;
(*b)--;
uintptr_t p = (uintptr_t)ptr;
uintptr_t m20 = ((uintptr_t)1 << 20) - 1;
uintptr_t m21 = ((uintptr_t)1 << 21) - 1;
SuperSlab* s20 = (SuperSlab*)(p & ~m20);
SuperSlab* s21 = (SuperSlab*)(p & ~m21);
unsigned long long mg20 = 0, mg21 = 0;
// Best-effort reads (may be unmapped; wrap in volatile access)
mg20 = (unsigned long long)(s20 ? s20->magic : 0);
mg21 = (unsigned long long)(s21 ? s21->magic : 0);
fprintf(stderr,
"[SUPER_REG_REQTRACE] ptr=%p base1M=%p magic1M=0x%llx base2M=%p magic2M=0x%llx\n",
ptr, (void*)s20, mg20, (void*)s21, mg21);
}
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
__attribute__((always_inline))
inline
#endif
void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t0);
#endif
(void)site; (void)size;
// Optional lightweight trace of early free calls (first few only)
#if !HAKMEM_BUILD_RELEASE
static int free_trace_en = -1; static _Atomic int free_trace_count = 0;
if (__builtin_expect(free_trace_en == -1, 0)) {
const char* e = getenv("HAKMEM_FREE_WRAP_TRACE");
free_trace_en = (e && *e && *e != '0') ? 1 : 0;
}
if (free_trace_en) {
int n = atomic_fetch_add(&free_trace_count, 1);
if (n < 8) {
fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr);
}
}
#endif
// Bench-only ultra-short path: try header-based tiny fast free first
// Enable with: HAKMEM_BENCH_FAST_FRONT=1
{
static int g_bench_fast_front = -1;
if (__builtin_expect(g_bench_fast_front == -1, 0)) {
const char* e = getenv("HAKMEM_BENCH_FAST_FRONT");
g_bench_fast_front = (e && *e && *e != '0') ? 1 : 0;
}
#if HAKMEM_TINY_HEADER_CLASSIDX
if (__builtin_expect(g_bench_fast_front && ptr != NULL, 0)) {
if (__builtin_expect(hak_tiny_free_fast_v2(ptr), 1)) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
}
#endif
}
if (!ptr) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
// ========== Box FG: Single Point of Classification ==========
// Classify pointer once using Front Gate (safe header probe + Registry fallback)
// This eliminates all scattered ptr-1 reads and centralizes classification logic
ptr_classification_t classification = classify_ptr(ptr);
// Route based on classification result
switch (classification.kind) {
case PTR_KIND_TINY_HEADER: {
// C0-C6: Has 1-byte header, class_idx already determined by Front Gate
// Fast path: Use class_idx directly without SuperSlab lookup
hak_free_route_log("tiny_header", ptr);
#if HAKMEM_TINY_HEADER_CLASSIDX
// Use ultra-fast free path with pre-determined class_idx
if (__builtin_expect(hak_tiny_free_fast_v2(ptr), 1)) {
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_fast();
#endif
goto done;
}
// Fallback to slow path if TLS cache full
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_slow();
#endif
#endif
hak_tiny_free(ptr);
goto done;
}
case PTR_KIND_TINY_HEADERLESS: {
// C7: Headerless 1KB blocks, SuperSlab + slab_idx provided by Registry
// Medium path: Use Registry result, no header read needed
hak_free_route_log("tiny_headerless", ptr);
hak_tiny_free(ptr);
goto done;
}
#ifdef HAKMEM_POOL_TLS_PHASE1
case PTR_KIND_POOL_TLS: {
// Pool TLS: 8KB-52KB allocations with 0xb0 magic
hak_free_route_log("pool_tls", ptr);
pool_free(ptr);
goto done;
}
#endif
case PTR_KIND_UNKNOWN:
default: {
// Not Tiny or Pool - check 16-byte AllocHeader (Mid/Large/malloc/mmap)
// This is the slow path for large allocations
break; // Fall through to header dispatch below
}
}
// ========== Slow Path: 16-byte AllocHeader Dispatch ==========
// Handle Mid/Large allocations (malloc/mmap/Pool/L25)
// Note: All Tiny allocations (C0-C7) already handled by Front Gate above
// Mid/L25 headerless経路
{
extern int hak_pool_mid_lookup(void* ptr, size_t* out_size);
extern void hak_pool_free_fast(void* ptr, uintptr_t site_id);
size_t mid_sz = 0; if (hak_pool_mid_lookup(ptr, &mid_sz)) { hak_free_route_log("mid_hit", ptr); hak_pool_free_fast(ptr, (uintptr_t)site); goto done; }
}
{
extern int hak_l25_lookup(void* ptr, size_t* out_size);
extern void hak_l25_pool_free_fast(void* ptr, uintptr_t site_id);
size_t l25_sz = 0; if (hak_l25_lookup(ptr, &l25_sz)) { hak_free_route_log("l25_hit", ptr); hkm_ace_stat_large_free(); hak_l25_pool_free_fast(ptr, (uintptr_t)site); goto done; }
}
// Raw header dispatchmmap/malloc/BigCacheなど
{
void* raw = (char*)ptr - HEADER_SIZE;
// CRITICAL FIX (2025-11-14): Use real mincore() to check memory accessibility
// Phase 9 gutted hak_is_memory_readable() to always return 1 (unsafe!)
// We MUST verify memory is mapped before dereferencing AllocHeader.
//
// Step A (2025-11-14): TLS page cache to reduce mincore() frequency.
// - Cache last-checked pages in __thread statics.
// - Typical case: many frees on the same handful of pages → 90%+ cache hit.
int is_mapped = 0;
#ifdef __linux__
{
// TLS cache for page→is_mapped
static __thread void* s_last_page1 = NULL;
static __thread int s_last_page1_mapped = 0;
static __thread void* s_last_page2 = NULL;
static __thread int s_last_page2_mapped = 0;
unsigned char vec;
void* page1 = (void*)((uintptr_t)raw & ~0xFFFUL);
void* page2 = (void*)(((uintptr_t)raw + sizeof(AllocHeader) - 1) & ~0xFFFUL);
// Fast path: reuse cached result for page1 when possible
if (page1 == s_last_page1) {
is_mapped = s_last_page1_mapped;
} else {
is_mapped = (mincore(page1, 1, &vec) == 0);
s_last_page1 = page1;
s_last_page1_mapped = is_mapped;
}
// If header crosses page boundary, ensure second page is also mapped
if (is_mapped && page2 != page1) {
if (page2 == s_last_page2) {
if (!s_last_page2_mapped) {
is_mapped = 0;
}
} else {
int mapped2 = (mincore(page2, 1, &vec) == 0);
s_last_page2 = page2;
s_last_page2_mapped = mapped2;
if (!mapped2) {
is_mapped = 0;
}
}
}
}
#else
is_mapped = 1; // Assume mapped on non-Linux
#endif
if (!is_mapped) {
// Memory not accessible, ptr likely has no header
hak_free_route_log("unmapped_header_fallback", ptr);
// In direct-link mode, try tiny_free (handles headerless Tiny allocs)
if (!g_ldpreload_mode && g_invalid_free_mode) {
hak_tiny_free(ptr);
goto done;
}
// LD_PRELOAD mode: route to libc (might be libc allocation)
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_invalid_hdr");
__libc_free(ptr);
goto done;
}
// Safe to dereference header now
AllocHeader* hdr = (AllocHeader*)raw;
if (hdr->magic != HAKMEM_MAGIC) {
// CRITICAL FIX (2025-11-07): Invalid magic could mean:
// 1. Tiny allocation where SuperSlab lookup failed (NO header exists)
// 2. Libc allocation from mixed environment
// 3. Double-free or corrupted pointer
if (g_invalid_free_log) fprintf(stderr, "[hakmem] ERROR: Invalid magic 0x%X (expected 0x%X)\n", hdr->magic, HAKMEM_MAGIC);
// One-shot request-trace to help diagnose SS registry lookups
hak_super_reg_reqtrace_dump(ptr);
// In direct-link mode, try routing to tiny free as best-effort recovery
// This handles case #1 where SuperSlab lookup failed but allocation is valid
if (!g_ldpreload_mode && g_invalid_free_mode) {
// Attempt tiny free (will validate internally and handle gracefully if invalid)
hak_free_route_log("invalid_magic_tiny_recovery", ptr);
hak_tiny_free(ptr);
goto done;
}
// LD_PRELOAD mode or fallback mode: route to libc
// IMPORTANT: Use ptr (not raw), as NO header exists
if (g_invalid_free_mode) {
// Skip mode: leak memory (original behavior, but logged)
static int leak_warn = 0;
if (!leak_warn) {
fprintf(stderr, "[hakmem] WARNING: Skipping free of invalid pointer %p (may leak memory)\n", ptr);
leak_warn = 1;
}
goto done;
} else {
// Fallback mode: route to libc
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_invalid_magic_fallback");
__libc_free(ptr); // Use ptr, not raw!
goto done;
}
}
if (HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE) && hdr->class_bytes >= 2097152) {
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
}
{
static int g_bc_l25_en_free = -1; if (g_bc_l25_en_free == -1) { const char* e = getenv("HAKMEM_BIGCACHE_L25"); g_bc_l25_en_free = (e && atoi(e) != 0) ? 1 : 0; }
if (g_bc_l25_en_free && HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE) && hdr->size >= 524288 && hdr->size < 2097152) {
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
}
}
switch (hdr->method) {
case ALLOC_METHOD_POOL: if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) { hkm_ace_stat_mid_free(); hak_pool_free(ptr, hdr->size, hdr->alloc_site); goto done; } break;
case ALLOC_METHOD_L25_POOL: hkm_ace_stat_large_free(); hak_l25_pool_free(ptr, hdr->size, hdr->alloc_site); goto done;
case ALLOC_METHOD_MALLOC:
// CRITICAL FIX: raw was allocated with __libc_malloc, so free with __libc_free
// Using free(raw) would go through wrapper → infinite recursion
hak_free_route_log("malloc_hdr", ptr);
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_malloc_hdr");
__libc_free(raw);
break;
case ALLOC_METHOD_MMAP:
#ifdef __linux__
if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) { hak_batch_add(raw, hdr->size); goto done; }
if (hkm_whale_put(raw, hdr->size) != 0) { hkm_sys_munmap(raw, hdr->size); }
#else
// CRITICAL FIX: Same as ALLOC_METHOD_MALLOC
extern void __libc_free(void*);
ptr_trace_dump_now("free_api_libc_mmap_other");
__libc_free(raw);
#endif
break;
default: HAKMEM_LOG("ERROR: Unknown allocation method: %d\n", hdr->method); break;
}
}
done:
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
#endif
return;
}
#endif // HAK_FREE_API_INC_H