Box TLS-SLL + free boundary hardening: normalize C0–C6 to base (ptr-1) at free boundary; route all caches/freelists via base; replace remaining g_tls_sll_head direct writes with Box API (tls_sll_push/splice) in refill/magazine/ultra; keep C7 excluded. Fixes rbp=0xa0 free crash by preventing header overwrite and centralizing TLS-SLL invariants.

This commit is contained in:
Moe Charm (CI)
2025-11-10 16:48:20 +09:00
parent 1b6624dec4
commit b09ba4d40d
26 changed files with 1079 additions and 354 deletions

View File

@ -18,6 +18,7 @@
#include "tiny_alloc_fast_sfc.inc.h" // Box 5-NEW: SFC Layer
#include "tiny_region_id.h" // Phase 7: Header-based class_idx lookup
#include "tiny_adaptive_sizing.h" // Phase 2b: Adaptive sizing
#include "box/tls_sll_box.h" // Box TLS-SLL: C7-safe push/pop/splice
#ifdef HAKMEM_TINY_FRONT_GATE_BOX
#include "box/front_gate_box.h"
#endif
@ -164,6 +165,14 @@ extern int g_sfc_enabled;
//
// Expected: 3-4 instructions on SFC hit, 6-8 on SLL hit
static inline void* tiny_alloc_fast_pop(int class_idx) {
// CRITICAL: C7 (1KB) is headerless - delegate to slow path completely
// Reason: Fast path uses SLL which stores next pointer in user data area
// C7's headerless design is incompatible with fast path assumptions
// Solution: Force C7 to use slow path for both alloc and free
if (__builtin_expect(class_idx == 7, 0)) {
return NULL; // Force slow path
}
#ifdef HAKMEM_TINY_FRONT_GATE_BOX
void* out = NULL;
if (front_gate_try_pop(class_idx, &out)) {
@ -207,46 +216,15 @@ static inline void* tiny_alloc_fast_pop(int class_idx) {
// Box Boundary: Layer 1 - TLS SLL freelist の先頭を popenvで無効化可
extern int g_tls_sll_enable; // set at init via HAKMEM_TINY_TLS_SLL
if (__builtin_expect(g_tls_sll_enable, 1)) {
void* head = g_tls_sll_head[class_idx];
if (__builtin_expect(head != NULL, 1)) {
// CORRUPTION DEBUG: Validate TLS SLL head before popping
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
size_t blk = g_tiny_class_sizes[class_idx];
// Check alignment (must be multiple of block size)
if (((uintptr_t)head % blk) != 0) {
fprintf(stderr, "[TLS_SLL_CORRUPT] cls=%d head=%p misaligned (blk=%zu offset=%zu)\n",
class_idx, head, blk, (uintptr_t)head % blk);
fprintf(stderr, "[TLS_SLL_CORRUPT] TLS freelist head is corrupted!\n");
abort();
}
}
// Use Box TLS-SLL API (C7-safe pop)
// CRITICAL: Pop FIRST, do NOT read g_tls_sll_head directly (race condition!)
// Reading head before pop causes stale read → rbp=0xa0 SEGV
void* head = NULL;
if (tls_sll_pop(class_idx, &head)) {
// Front Gate: SLL hit (fast path 3 instructions)
extern unsigned long long g_front_sll_hit[];
g_front_sll_hit[class_idx]++;
// CORRUPTION DEBUG: Validate next pointer before updating head
void* next = *(void**)head;
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
size_t blk = g_tiny_class_sizes[class_idx];
if (next != NULL && ((uintptr_t)next % blk) != 0) {
fprintf(stderr, "[ALLOC_POP_CORRUPT] Reading next from head=%p got corrupted next=%p!\n",
head, next);
fprintf(stderr, "[ALLOC_POP_CORRUPT] cls=%d blk=%zu next_offset=%zu (expected 0)\n",
class_idx, blk, (uintptr_t)next % blk);
fprintf(stderr, "[ALLOC_POP_CORRUPT] TLS SLL head block was corrupted (use-after-free/double-free)!\n");
abort();
}
fprintf(stderr, "[ALLOC_POP] cls=%d head=%p next=%p\n", class_idx, head, next);
}
g_tls_sll_head[class_idx] = next; // Pop: next = *head
// Optional: update count (for stats, can be disabled)
if (g_tls_sll_count[class_idx] > 0) {
g_tls_sll_count[class_idx]--;
}
#if HAKMEM_DEBUG_COUNTERS
// Track TLS freelist hits (compile-time gated, zero runtime cost when disabled)
g_free_via_tls_sll[class_idx]++;
@ -288,12 +266,11 @@ static inline int sfc_refill_from_sll(int class_idx, int target_count) {
break; // SFC full, stop
}
// Pop from SLL (Layer 1)
void* ptr = g_tls_sll_head[class_idx];
if (!ptr) break; // SLL empty
g_tls_sll_head[class_idx] = *(void**)ptr;
g_tls_sll_count[class_idx]--;
// Pop from SLL (Layer 1) using Box TLS-SLL API (C7-safe)
void* ptr = NULL;
if (!tls_sll_pop(class_idx, &ptr)) {
break; // SLL empty
}
// Push to SFC (Layer 0)
*(void**)ptr = g_sfc_head[class_idx];
@ -324,6 +301,13 @@ static inline int sfc_refill_from_sll(int class_idx, int target_count) {
// - Smaller count (8-16): better for diverse workloads, faster warmup
// - Larger count (64-128): better for homogeneous workloads, fewer refills
static inline int tiny_alloc_fast_refill(int class_idx) {
// CRITICAL: C7 (1KB) is headerless - skip refill completely, force slow path
// Reason: Refill pushes blocks to TLS SLL which stores next pointer in user data
// C7's headerless design is incompatible with this mechanism
if (__builtin_expect(class_idx == 7, 0)) {
return 0; // Skip refill, force slow path allocation
}
// Phase 7 Task 3: Profiling overhead removed in release builds
// In release mode, compiler can completely eliminate profiling code
#if !HAKMEM_BUILD_RELEASE
@ -469,28 +453,28 @@ static inline void* tiny_alloc_fast(size_t size) {
ROUTE_BEGIN(class_idx);
// 2. Fast path: TLS freelist pop (3-4 instructions, 95% hit rate)
void* ptr;
#if HAKMEM_TINY_AGGRESSIVE_INLINE
// Task 2: Use inline macro (save 5-10 cycles, no function call)
TINY_ALLOC_FAST_POP_INLINE(class_idx, ptr);
#else
// Standard: Function call (preserves debugging visibility)
ptr = tiny_alloc_fast_pop(class_idx);
#endif
// CRITICAL: Use Box TLS-SLL API (static inline, same performance as macro but SAFE!)
// The old macro had race condition: read head before pop → rbp=0xa0 SEGV
void* ptr = NULL;
tls_sll_pop(class_idx, &ptr);
if (__builtin_expect(ptr != NULL, 1)) {
// C7 (1024B, headerless): clear embedded next pointer before returning to user
if (__builtin_expect(class_idx == 7, 0)) {
*(void**)ptr = NULL;
}
HAK_RET_ALLOC(class_idx, ptr);
}
// 3. Miss: Refill from backend (Box 3: SuperSlab)
int refilled = tiny_alloc_fast_refill(class_idx);
if (__builtin_expect(refilled > 0, 1)) {
// Refill success → retry pop
#if HAKMEM_TINY_AGGRESSIVE_INLINE
TINY_ALLOC_FAST_POP_INLINE(class_idx, ptr);
#else
ptr = tiny_alloc_fast_pop(class_idx);
#endif
// Refill success → retry pop using safe Box TLS-SLL API
ptr = NULL;
tls_sll_pop(class_idx, &ptr);
if (ptr) {
if (__builtin_expect(class_idx == 7, 0)) {
*(void**)ptr = NULL;
}
HAK_RET_ALLOC(class_idx, ptr);
}
}
@ -516,10 +500,16 @@ static inline void tiny_alloc_fast_push(int class_idx, void* ptr) {
#ifdef HAKMEM_TINY_FRONT_GATE_BOX
front_gate_push_tls(class_idx, ptr);
#else
// Box Boundary: Push to TLS freelist
*(void**)ptr = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = ptr;
g_tls_sll_count[class_idx]++;
// Box Boundary: Push to TLS freelist using Box TLS-SLL API (C7-safe)
uint32_t capacity = UINT32_MAX; // Unlimited for helper function
if (!tls_sll_push(class_idx, ptr, capacity)) {
// C7 rejected or SLL somehow full (should not happen)
// In release builds, this is a no-op (caller expects success)
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[WARN] tls_sll_push failed in tiny_alloc_fast_push cls=%d ptr=%p\n",
class_idx, ptr);
#endif
}
#endif
}