Box TLS-SLL + free boundary hardening: normalize C0–C6 to base (ptr-1) at free boundary; route all caches/freelists via base; replace remaining g_tls_sll_head direct writes with Box API (tls_sll_push/splice) in refill/magazine/ultra; keep C7 excluded. Fixes rbp=0xa0 free crash by preventing header overwrite and centralizing TLS-SLL invariants.
This commit is contained in:
@ -18,6 +18,7 @@
|
||||
#include "tiny_alloc_fast_sfc.inc.h" // Box 5-NEW: SFC Layer
|
||||
#include "tiny_region_id.h" // Phase 7: Header-based class_idx lookup
|
||||
#include "tiny_adaptive_sizing.h" // Phase 2b: Adaptive sizing
|
||||
#include "box/tls_sll_box.h" // Box TLS-SLL: C7-safe push/pop/splice
|
||||
#ifdef HAKMEM_TINY_FRONT_GATE_BOX
|
||||
#include "box/front_gate_box.h"
|
||||
#endif
|
||||
@ -164,6 +165,14 @@ extern int g_sfc_enabled;
|
||||
//
|
||||
// Expected: 3-4 instructions on SFC hit, 6-8 on SLL hit
|
||||
static inline void* tiny_alloc_fast_pop(int class_idx) {
|
||||
// CRITICAL: C7 (1KB) is headerless - delegate to slow path completely
|
||||
// Reason: Fast path uses SLL which stores next pointer in user data area
|
||||
// C7's headerless design is incompatible with fast path assumptions
|
||||
// Solution: Force C7 to use slow path for both alloc and free
|
||||
if (__builtin_expect(class_idx == 7, 0)) {
|
||||
return NULL; // Force slow path
|
||||
}
|
||||
|
||||
#ifdef HAKMEM_TINY_FRONT_GATE_BOX
|
||||
void* out = NULL;
|
||||
if (front_gate_try_pop(class_idx, &out)) {
|
||||
@ -207,46 +216,15 @@ static inline void* tiny_alloc_fast_pop(int class_idx) {
|
||||
// Box Boundary: Layer 1 - TLS SLL freelist の先頭を pop(envで無効化可)
|
||||
extern int g_tls_sll_enable; // set at init via HAKMEM_TINY_TLS_SLL
|
||||
if (__builtin_expect(g_tls_sll_enable, 1)) {
|
||||
void* head = g_tls_sll_head[class_idx];
|
||||
if (__builtin_expect(head != NULL, 1)) {
|
||||
// CORRUPTION DEBUG: Validate TLS SLL head before popping
|
||||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||||
size_t blk = g_tiny_class_sizes[class_idx];
|
||||
// Check alignment (must be multiple of block size)
|
||||
if (((uintptr_t)head % blk) != 0) {
|
||||
fprintf(stderr, "[TLS_SLL_CORRUPT] cls=%d head=%p misaligned (blk=%zu offset=%zu)\n",
|
||||
class_idx, head, blk, (uintptr_t)head % blk);
|
||||
fprintf(stderr, "[TLS_SLL_CORRUPT] TLS freelist head is corrupted!\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// Use Box TLS-SLL API (C7-safe pop)
|
||||
// CRITICAL: Pop FIRST, do NOT read g_tls_sll_head directly (race condition!)
|
||||
// Reading head before pop causes stale read → rbp=0xa0 SEGV
|
||||
void* head = NULL;
|
||||
if (tls_sll_pop(class_idx, &head)) {
|
||||
// Front Gate: SLL hit (fast path 3 instructions)
|
||||
extern unsigned long long g_front_sll_hit[];
|
||||
g_front_sll_hit[class_idx]++;
|
||||
|
||||
// CORRUPTION DEBUG: Validate next pointer before updating head
|
||||
void* next = *(void**)head;
|
||||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||||
size_t blk = g_tiny_class_sizes[class_idx];
|
||||
if (next != NULL && ((uintptr_t)next % blk) != 0) {
|
||||
fprintf(stderr, "[ALLOC_POP_CORRUPT] Reading next from head=%p got corrupted next=%p!\n",
|
||||
head, next);
|
||||
fprintf(stderr, "[ALLOC_POP_CORRUPT] cls=%d blk=%zu next_offset=%zu (expected 0)\n",
|
||||
class_idx, blk, (uintptr_t)next % blk);
|
||||
fprintf(stderr, "[ALLOC_POP_CORRUPT] TLS SLL head block was corrupted (use-after-free/double-free)!\n");
|
||||
abort();
|
||||
}
|
||||
fprintf(stderr, "[ALLOC_POP] cls=%d head=%p next=%p\n", class_idx, head, next);
|
||||
}
|
||||
|
||||
g_tls_sll_head[class_idx] = next; // Pop: next = *head
|
||||
|
||||
// Optional: update count (for stats, can be disabled)
|
||||
if (g_tls_sll_count[class_idx] > 0) {
|
||||
g_tls_sll_count[class_idx]--;
|
||||
}
|
||||
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
// Track TLS freelist hits (compile-time gated, zero runtime cost when disabled)
|
||||
g_free_via_tls_sll[class_idx]++;
|
||||
@ -288,12 +266,11 @@ static inline int sfc_refill_from_sll(int class_idx, int target_count) {
|
||||
break; // SFC full, stop
|
||||
}
|
||||
|
||||
// Pop from SLL (Layer 1)
|
||||
void* ptr = g_tls_sll_head[class_idx];
|
||||
if (!ptr) break; // SLL empty
|
||||
|
||||
g_tls_sll_head[class_idx] = *(void**)ptr;
|
||||
g_tls_sll_count[class_idx]--;
|
||||
// Pop from SLL (Layer 1) using Box TLS-SLL API (C7-safe)
|
||||
void* ptr = NULL;
|
||||
if (!tls_sll_pop(class_idx, &ptr)) {
|
||||
break; // SLL empty
|
||||
}
|
||||
|
||||
// Push to SFC (Layer 0)
|
||||
*(void**)ptr = g_sfc_head[class_idx];
|
||||
@ -324,6 +301,13 @@ static inline int sfc_refill_from_sll(int class_idx, int target_count) {
|
||||
// - Smaller count (8-16): better for diverse workloads, faster warmup
|
||||
// - Larger count (64-128): better for homogeneous workloads, fewer refills
|
||||
static inline int tiny_alloc_fast_refill(int class_idx) {
|
||||
// CRITICAL: C7 (1KB) is headerless - skip refill completely, force slow path
|
||||
// Reason: Refill pushes blocks to TLS SLL which stores next pointer in user data
|
||||
// C7's headerless design is incompatible with this mechanism
|
||||
if (__builtin_expect(class_idx == 7, 0)) {
|
||||
return 0; // Skip refill, force slow path allocation
|
||||
}
|
||||
|
||||
// Phase 7 Task 3: Profiling overhead removed in release builds
|
||||
// In release mode, compiler can completely eliminate profiling code
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
@ -469,28 +453,28 @@ static inline void* tiny_alloc_fast(size_t size) {
|
||||
ROUTE_BEGIN(class_idx);
|
||||
|
||||
// 2. Fast path: TLS freelist pop (3-4 instructions, 95% hit rate)
|
||||
void* ptr;
|
||||
#if HAKMEM_TINY_AGGRESSIVE_INLINE
|
||||
// Task 2: Use inline macro (save 5-10 cycles, no function call)
|
||||
TINY_ALLOC_FAST_POP_INLINE(class_idx, ptr);
|
||||
#else
|
||||
// Standard: Function call (preserves debugging visibility)
|
||||
ptr = tiny_alloc_fast_pop(class_idx);
|
||||
#endif
|
||||
// CRITICAL: Use Box TLS-SLL API (static inline, same performance as macro but SAFE!)
|
||||
// The old macro had race condition: read head before pop → rbp=0xa0 SEGV
|
||||
void* ptr = NULL;
|
||||
tls_sll_pop(class_idx, &ptr);
|
||||
if (__builtin_expect(ptr != NULL, 1)) {
|
||||
// C7 (1024B, headerless): clear embedded next pointer before returning to user
|
||||
if (__builtin_expect(class_idx == 7, 0)) {
|
||||
*(void**)ptr = NULL;
|
||||
}
|
||||
HAK_RET_ALLOC(class_idx, ptr);
|
||||
}
|
||||
|
||||
// 3. Miss: Refill from backend (Box 3: SuperSlab)
|
||||
int refilled = tiny_alloc_fast_refill(class_idx);
|
||||
if (__builtin_expect(refilled > 0, 1)) {
|
||||
// Refill success → retry pop
|
||||
#if HAKMEM_TINY_AGGRESSIVE_INLINE
|
||||
TINY_ALLOC_FAST_POP_INLINE(class_idx, ptr);
|
||||
#else
|
||||
ptr = tiny_alloc_fast_pop(class_idx);
|
||||
#endif
|
||||
// Refill success → retry pop using safe Box TLS-SLL API
|
||||
ptr = NULL;
|
||||
tls_sll_pop(class_idx, &ptr);
|
||||
if (ptr) {
|
||||
if (__builtin_expect(class_idx == 7, 0)) {
|
||||
*(void**)ptr = NULL;
|
||||
}
|
||||
HAK_RET_ALLOC(class_idx, ptr);
|
||||
}
|
||||
}
|
||||
@ -516,10 +500,16 @@ static inline void tiny_alloc_fast_push(int class_idx, void* ptr) {
|
||||
#ifdef HAKMEM_TINY_FRONT_GATE_BOX
|
||||
front_gate_push_tls(class_idx, ptr);
|
||||
#else
|
||||
// Box Boundary: Push to TLS freelist
|
||||
*(void**)ptr = g_tls_sll_head[class_idx];
|
||||
g_tls_sll_head[class_idx] = ptr;
|
||||
g_tls_sll_count[class_idx]++;
|
||||
// Box Boundary: Push to TLS freelist using Box TLS-SLL API (C7-safe)
|
||||
uint32_t capacity = UINT32_MAX; // Unlimited for helper function
|
||||
if (!tls_sll_push(class_idx, ptr, capacity)) {
|
||||
// C7 rejected or SLL somehow full (should not happen)
|
||||
// In release builds, this is a no-op (caller expects success)
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
fprintf(stderr, "[WARN] tls_sll_push failed in tiny_alloc_fast_push cls=%d ptr=%p\n",
|
||||
class_idx, ptr);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user