Box TLS-SLL + free boundary hardening: normalize C0–C6 to base (ptr-1) at free boundary; route all caches/freelists via base; replace remaining g_tls_sll_head direct writes with Box API (tls_sll_push/splice) in refill/magazine/ultra; keep C7 excluded. Fixes rbp=0xa0 free crash by preventing header overwrite and centralizing TLS-SLL invariants.

This commit is contained in:
Moe Charm (CI)
2025-11-10 16:48:20 +09:00
parent 1b6624dec4
commit b09ba4d40d
26 changed files with 1079 additions and 354 deletions

View File

@ -4,6 +4,7 @@
#include "hakmem_tiny_superslab.h" // For SUPERSLAB_MAGIC, SuperSlab
#include "../tiny_free_fast_v2.inc.h" // Phase 7: Header-based ultra-fast free
#include "front_gate_classifier.h" // Box FG: Centralized pointer classification
#ifdef HAKMEM_POOL_TLS_PHASE1
#include "../pool_tls.h"
@ -78,120 +79,62 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
return;
}
#ifdef HAKMEM_POOL_TLS_PHASE1
// Phase 1: Try Pool TLS free FIRST for 8KB-52KB range
// CRITICAL: Must come before Phase 7 Tiny to avoid magic mismatch SEGV
// Pool TLS uses magic 0xb0, Tiny uses magic 0xa0 - must distinguish!
{
void* header_addr = (char*)ptr - 1;
// Safety vs performance trade-off:
// - If HAKMEM_TINY_SAFE_FREE=1 (strict), validate with mincore() always
// - Else (default), only validate on page-boundary risk to avoid syscall cost
#if HAKMEM_TINY_SAFE_FREE
if (!hak_is_memory_readable(header_addr)) { goto skip_pool_tls; }
#else
uintptr_t off = (uintptr_t)header_addr & 0xFFF;
if (__builtin_expect(off == 0, 0)) {
if (!hak_is_memory_readable(header_addr)) { goto skip_pool_tls; }
}
#endif
uint8_t header = *(uint8_t*)header_addr;
if ((header & 0xF0) == POOL_MAGIC) {
pool_free(ptr);
hak_free_route_log("pool_tls", ptr);
goto done;
}
// Not Pool TLS - fall through to other paths
}
skip_pool_tls:
#endif
// ========== Box FG: Single Point of Classification ==========
// Classify pointer once using Front Gate (safe header probe + Registry fallback)
// This eliminates all scattered ptr-1 reads and centralizes classification logic
ptr_classification_t classification = classify_ptr(ptr);
// Route based on classification result
switch (classification.kind) {
case PTR_KIND_TINY_HEADER: {
// C0-C6: Has 1-byte header, class_idx already determined by Front Gate
// Fast path: Use class_idx directly without SuperSlab lookup
hak_free_route_log("tiny_header", ptr);
#if HAKMEM_TINY_HEADER_CLASSIDX
// Phase 7: Dual-header dispatch (1-byte Tiny header OR 16-byte malloc/mmap header)
//
// Step 1: Try 1-byte Tiny header (fast path: 5-10 cycles)
if (__builtin_expect(hak_tiny_free_fast_v2(ptr), 1)) {
hak_free_route_log("header_fast", ptr);
// Use ultra-fast free path with pre-determined class_idx
if (__builtin_expect(hak_tiny_free_fast_v2(ptr), 1)) {
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_fast(); // Track hit rate in debug
hak_free_v2_track_fast();
#endif
goto done; // Success - done in 5-10 cycles! NO SuperSlab lookup!
}
// Step 2: Try 16-byte AllocHeader (malloc/mmap allocations)
// CRITICAL: Must check this BEFORE calling hak_tiny_free() to avoid silent failures!
{
void* raw = (char*)ptr - HEADER_SIZE;
// SAFETY: Check if raw header is accessible before dereferencing
// This prevents SEGV when malloc metadata is unmapped
//
// OPTIMIZATION: raw = ptr - HEADER_SIZE (16 bytes)
// Page boundary case: if ptr is in first 16 bytes of page, raw crosses page boundary
// Check: (ptr & 0xFFF) < HEADER_SIZE → raw might be on previous (unmapped) page
uintptr_t offset_in_page = (uintptr_t)ptr & 0xFFF;
if (__builtin_expect(offset_in_page < HEADER_SIZE, 0)) {
// Potential page boundary crossing - do safety check
if (!hak_is_memory_readable(raw)) {
goto slow_path_after_step2;
}
}
// Normal case (99.6%): raw is on same page as ptr (no mincore call!)
// Safe to dereference now
AllocHeader* hdr = (AllocHeader*)raw;
if (hdr->magic == HAKMEM_MAGIC) {
// Valid 16-byte header found (malloc/mmap allocation)
hak_free_route_log("header_16byte", ptr);
if (hdr->method == ALLOC_METHOD_MALLOC) {
// CRITICAL: raw was allocated with __libc_malloc, so free with __libc_free
extern void __libc_free(void*);
__libc_free(raw);
goto done;
}
// Handle other methods (mmap, etc) - continue to slow path below
}
slow_path_after_step2:;
}
// Fallback: Invalid header (non-tiny) or TLS cache full
// Fallback to slow path if TLS cache full
#if !HAKMEM_BUILD_RELEASE
hak_free_v2_track_slow();
hak_free_v2_track_slow();
#endif
#endif
hak_tiny_free(ptr);
goto done;
}
case PTR_KIND_TINY_HEADERLESS: {
// C7: Headerless 1KB blocks, SuperSlab + slab_idx provided by Registry
// Medium path: Use Registry result, no header read needed
hak_free_route_log("tiny_headerless", ptr);
hak_tiny_free(ptr);
goto done;
}
#ifdef HAKMEM_POOL_TLS_PHASE1
case PTR_KIND_POOL_TLS: {
// Pool TLS: 8KB-52KB allocations with 0xb0 magic
hak_free_route_log("pool_tls", ptr);
pool_free(ptr);
goto done;
}
#endif
// SS-first free既定ON
#if !HAKMEM_TINY_HEADER_CLASSIDX
// Only run SS-first if Phase 7 header-based free is not enabled
// (Phase 7 already does the SS lookup and handles SS allocations)
{
static int s_free_to_ss = -2;
if (s_free_to_ss == -2) {
const char* e = getenv("HAKMEM_TINY_FREE_TO_SS");
s_free_to_ss = (e && *e) ? ((*e!='0')?1:0) : 1;
}
if (s_free_to_ss) {
extern int g_use_superslab;
if (__builtin_expect(g_use_superslab != 0, 1)) {
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
int sidx = slab_index_for(ss, ptr);
int cap = ss_slabs_capacity(ss);
if (__builtin_expect(sidx >= 0 && sidx < cap, 1)) { hak_free_route_log("ss_hit", ptr); hak_tiny_free(ptr); goto done; }
}
// FIX: Removed dangerous "guess loop" (lines 92-95)
// The loop dereferenced unmapped memory causing SEGV
// If registry lookup fails, allocation is not from SuperSlab
}
case PTR_KIND_UNKNOWN:
default: {
// Not Tiny or Pool - check 16-byte AllocHeader (Mid/Large/malloc/mmap)
// This is the slow path for large allocations
break; // Fall through to header dispatch below
}
}
#endif
// ========== Slow Path: 16-byte AllocHeader Dispatch ==========
// Handle Mid/Large allocations (malloc/mmap/Pool/L25)
// Note: All Tiny allocations (C0-C7) already handled by Front Gate above
// Mid/L25 headerless経路
{