Superslab free path base-normalization: use block base for C0–C6 in tiny_free_fast_ss, tiny_free_fast_legacy, same-thread freelist push, midtc push, remote queue push/dup checks; ensures next-pointer writes never hit user header. Addresses residual SEGV beyond TLS-SLL box.
This commit is contained in:
@ -36,12 +36,18 @@ extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
|
||||
// Push pointer to TLS SLL
|
||||
// Returns: true on success, false if C7 or capacity exceeded
|
||||
//
|
||||
// CRITICAL Phase 7 Header Design:
|
||||
// - C0-C6 (header classes): [1B header][user data]
|
||||
// ^base ^ptr (caller passes this)
|
||||
// - SLL stores "base" (ptr-1) to avoid overwriting header
|
||||
// - C7 (headerless): ptr == base (no offset)
|
||||
//
|
||||
// Safety:
|
||||
// - C7 always rejected (headerless, first 8 bytes = user data)
|
||||
// - Capacity check prevents overflow
|
||||
// - Caller must handle fallback (e.g., meta->freelist)
|
||||
// - Header protection: stores base (ptr-1) for C0-C6
|
||||
//
|
||||
// Performance: 2-3 cycles (C0-C6), < 1 cycle (C7 fast rejection)
|
||||
// Performance: 3-4 cycles (C0-C6), < 1 cycle (C7 fast rejection)
|
||||
static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) {
|
||||
// CRITICAL: C7 (1KB) is headerless - MUST NOT use TLS SLL
|
||||
// Reason: SLL stores next pointer in first 8 bytes (user data for C7)
|
||||
@ -54,7 +60,11 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) {
|
||||
return false; // SLL full
|
||||
}
|
||||
|
||||
// Push to SLL (standard linked list push)
|
||||
// CRITICAL: Caller must pass "base" pointer (NOT user ptr)
|
||||
// Phase 7 carve operations return base (stride includes header)
|
||||
// SLL stores base to avoid overwriting header with next pointer
|
||||
|
||||
// Push to SLL (standard linked list push using base)
|
||||
*(void**)ptr = g_tls_sll_head[class_idx];
|
||||
g_tls_sll_head[class_idx] = ptr;
|
||||
g_tls_sll_count[class_idx]++;
|
||||
@ -65,21 +75,27 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) {
|
||||
// ========== Pop ==========
|
||||
|
||||
// Pop pointer from TLS SLL
|
||||
// Returns: true on success (writes to *out), false if empty
|
||||
// Returns: true on success (writes user ptr to *out), false if empty
|
||||
//
|
||||
// CRITICAL Phase 7 Header Design:
|
||||
// - SLL stores "base" (ptr-1) for C0-C6
|
||||
// - Must return "ptr" (base+1) to user
|
||||
// - C7: base == ptr (no offset)
|
||||
//
|
||||
// Safety:
|
||||
// - C7 protection: clears first 8 bytes on pop (prevents next pointer leak)
|
||||
// - Header protection: returns ptr (base+1) for C0-C6
|
||||
// - NULL check before deref
|
||||
//
|
||||
// Performance: 3-4 cycles
|
||||
// Performance: 4-5 cycles
|
||||
static inline bool tls_sll_pop(int class_idx, void** out) {
|
||||
void* head = g_tls_sll_head[class_idx];
|
||||
if (!head) {
|
||||
void* base = g_tls_sll_head[class_idx];
|
||||
if (!base) {
|
||||
return false; // SLL empty
|
||||
}
|
||||
|
||||
// Pop from SLL
|
||||
void* next = *(void**)head;
|
||||
// Pop from SLL (reads next from base)
|
||||
void* next = *(void**)base;
|
||||
g_tls_sll_head[class_idx] = next;
|
||||
if (g_tls_sll_count[class_idx] > 0) {
|
||||
g_tls_sll_count[class_idx]--;
|
||||
@ -91,11 +107,12 @@ static inline bool tls_sll_pop(int class_idx, void** out) {
|
||||
// Cost: 1 store instruction (~1 cycle), only for C7 (~1% of allocations)
|
||||
//
|
||||
// Note: C0-C6 have 1-byte header, so first 8 bytes are safe (header hides next)
|
||||
// Caller responsibility: Convert base → ptr (base+1) for C0-C6 before returning to user
|
||||
if (__builtin_expect(class_idx == 7, 0)) {
|
||||
*(void**)head = NULL;
|
||||
*(void**)base = NULL;
|
||||
}
|
||||
|
||||
*out = head;
|
||||
*out = base; // Return base (caller converts to ptr if needed)
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -104,10 +121,16 @@ static inline bool tls_sll_pop(int class_idx, void** out) {
|
||||
// Splice chain of pointers to TLS SLL (batch push)
|
||||
// Returns: actual count moved (0 for C7 or if capacity exceeded)
|
||||
//
|
||||
// CRITICAL Phase 7 Header Design:
|
||||
// - Caller MUST pass chain of "base" pointers (ptr-1 for C0-C6)
|
||||
// - Chain links are stored at base (*(void**)base = next_base)
|
||||
// - SLL head stores base pointers
|
||||
//
|
||||
// Safety:
|
||||
// - C7 always returns 0 (no splice)
|
||||
// - Capacity check limits splice size
|
||||
// - Chain traversal with safety (breaks on NULL)
|
||||
// - Assumes chain is already linked using base pointers
|
||||
//
|
||||
// Performance: ~5 cycles + O(count) for chain traversal
|
||||
static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t count, uint32_t capacity) {
|
||||
@ -127,6 +150,7 @@ static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t
|
||||
uint32_t to_move = (count < available) ? count : available;
|
||||
|
||||
// Find chain tail (traverse to_move - 1 nodes)
|
||||
// NOTE: Chain MUST be linked using base pointers (caller responsibility)
|
||||
void* tail = chain_head;
|
||||
for (uint32_t i = 1; i < to_move; i++) {
|
||||
void* next = *(void**)tail;
|
||||
|
||||
@ -117,6 +117,7 @@ static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* ptr, uint
|
||||
|
||||
// Fast path: Same-thread free (2-3 instructions)
|
||||
int class_idx = ss->size_class;
|
||||
void* base = (class_idx == 7) ? ptr : (void*)((uint8_t*)ptr - 1);
|
||||
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
// Track same-thread frees (compile-time gated)
|
||||
@ -127,14 +128,14 @@ static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* ptr, uint
|
||||
extern int g_sfc_enabled;
|
||||
if (g_sfc_enabled) {
|
||||
// Box 5-NEW: Try SFC (128 slots)
|
||||
if (!sfc_free_push(class_idx, ptr)) {
|
||||
if (!sfc_free_push(class_idx, base)) {
|
||||
// SFC full → skip caching, use slow path (return 0)
|
||||
// Do NOT fall back to SLL - it has no capacity check and would grow unbounded!
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
// Box 5-OLD: Use SLL (16 slots)
|
||||
tiny_alloc_fast_push(class_idx, ptr);
|
||||
tiny_alloc_fast_push(class_idx, base);
|
||||
}
|
||||
|
||||
// Active accounting (Box 3: SuperSlab)
|
||||
@ -154,19 +155,20 @@ static inline int tiny_free_fast_legacy(TinySlab* slab, void* ptr) {
|
||||
|
||||
// Fast path: Same-thread free
|
||||
int class_idx = slab->class_idx;
|
||||
void* base = (class_idx == 7) ? ptr : (void*)((uint8_t*)ptr - 1);
|
||||
|
||||
// Box 5-NEW/5-OLD integration: Push to TLS freelist (SFC or SLL)
|
||||
extern int g_sfc_enabled;
|
||||
if (g_sfc_enabled) {
|
||||
// Box 5-NEW: Try SFC (128 slots)
|
||||
if (!sfc_free_push(class_idx, ptr)) {
|
||||
if (!sfc_free_push(class_idx, base)) {
|
||||
// SFC full → skip caching, use slow path (return 0)
|
||||
// Do NOT fall back to SLL - it has no capacity check and would grow unbounded!
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
// Box 5-OLD: Use SLL (16 slots)
|
||||
tiny_alloc_fast_push(class_idx, ptr);
|
||||
tiny_alloc_fast_push(class_idx, base);
|
||||
}
|
||||
|
||||
return 1; // Success
|
||||
|
||||
@ -24,6 +24,8 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
return;
|
||||
}
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
// Normalize to block base for header classes (C0-C6)
|
||||
void* base = (ss->size_class == 7) ? ptr : (void*)((uint8_t*)ptr - 1);
|
||||
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
|
||||
tiny_remote_watch_note("free_enter", ss, slab_idx, ptr, 0xA240u, tiny_self_u32(), 0);
|
||||
extern __thread TinyTLSSlab g_tls_slabs[];
|
||||
@ -67,7 +69,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
// Duplicate in freelist (best-effort scan up to 64)
|
||||
// NOTE: This O(n) scan is VERY expensive (can scan 64 pointers per free!)
|
||||
void* scan = meta->freelist; int scanned = 0; int dup = 0;
|
||||
while (scan && scanned < 64) { if (scan == ptr) { dup = 1; break; } scan = *(void**)scan; scanned++; }
|
||||
while (scan && scanned < 64) { if (scan == base) { dup = 1; break; } scan = *(void**)scan; scanned++; }
|
||||
if (dup) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xDFu, ss_base, ss_size, (uintptr_t)ptr);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
||||
@ -119,7 +121,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "local_free_enter", my_tid);
|
||||
if (!tiny_remote_guard_allow_local_push(ss, slab_idx, meta, ptr, "local_free", my_tid)) {
|
||||
#include "box/free_remote_box.h"
|
||||
int transitioned = tiny_free_remote_box(ss, slab_idx, meta, ptr, my_tid);
|
||||
int transitioned = tiny_free_remote_box(ss, slab_idx, meta, base, my_tid);
|
||||
if (transitioned) {
|
||||
extern unsigned long long g_remote_free_transitions[];
|
||||
g_remote_free_transitions[ss->size_class]++;
|
||||
@ -143,7 +145,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
if (!g_free_to_ss) {
|
||||
int cls = (int)ss->size_class;
|
||||
if (midtc_enabled() && cls >= 4) {
|
||||
if (midtc_push(cls, ptr)) {
|
||||
if (midtc_push(cls, base)) {
|
||||
// Treat as returned to TLS cache (not SS freelist)
|
||||
meta->used--;
|
||||
ss_active_dec_one(ss);
|
||||
@ -156,7 +158,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
#include "box/free_local_box.h"
|
||||
// Perform freelist push (+first-free publish if applicable)
|
||||
void* prev_before = meta->freelist;
|
||||
tiny_free_local_box(ss, slab_idx, meta, ptr, my_tid);
|
||||
tiny_free_local_box(ss, slab_idx, meta, base, my_tid);
|
||||
if (prev_before == NULL) {
|
||||
ROUTE_MARK(19); // first_free_transition
|
||||
extern unsigned long long g_first_free_transitions[];
|
||||
@ -280,7 +282,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
} while (0);
|
||||
if (g_ss_adopt_en2) {
|
||||
// Use remote queue
|
||||
uintptr_t head_word = __atomic_load_n((uintptr_t*)ptr, __ATOMIC_RELAXED);
|
||||
uintptr_t head_word = __atomic_load_n((uintptr_t*)base, __ATOMIC_RELAXED);
|
||||
if (debug_guard) fprintf(stderr, "[REMOTE_PUSH_CALL] cls=%u slab=%d owner=%u my=%u ptr=%p used=%u remote_count=%u head=%p word=0x%016" PRIxPTR "\n",
|
||||
ss->size_class,
|
||||
slab_idx,
|
||||
@ -291,9 +293,9 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed),
|
||||
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed),
|
||||
head_word);
|
||||
int dup_remote = tiny_remote_queue_contains_guard(ss, slab_idx, ptr);
|
||||
int dup_remote = tiny_remote_queue_contains_guard(ss, slab_idx, base);
|
||||
if (!dup_remote && __builtin_expect(g_remote_side_enable, 0)) {
|
||||
dup_remote = (head_word == TINY_REMOTE_SENTINEL) || tiny_remote_side_contains(ss, slab_idx, ptr);
|
||||
dup_remote = (head_word == TINY_REMOTE_SENTINEL) || tiny_remote_side_contains(ss, slab_idx, base);
|
||||
}
|
||||
if (__builtin_expect(head_word == TINY_REMOTE_SENTINEL && !dup_remote && g_debug_remote_guard, 0)) {
|
||||
tiny_remote_watch_note("dup_scan_miss", ss, slab_idx, ptr, 0xA215u, my_tid, 0);
|
||||
@ -317,7 +319,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
|
||||
tiny_remote_watch_note("free_remote", ss, slab_idx, ptr, 0xA232u, my_tid, 0);
|
||||
}
|
||||
int was_empty = ss_remote_push(ss, slab_idx, ptr); // ss_active_dec_one() called inside
|
||||
int was_empty = ss_remote_push(ss, slab_idx, base); // ss_active_dec_one() called inside
|
||||
meta->used--;
|
||||
// ss_active_dec_one(ss); // REMOVED: Already called inside ss_remote_push()
|
||||
if (was_empty) {
|
||||
@ -329,8 +331,8 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
// Fallback: direct freelist push (legacy)
|
||||
if (debug_guard) fprintf(stderr, "[FREE_SS] Using LEGACY freelist push (not remote queue)\n");
|
||||
void* prev = meta->freelist;
|
||||
*(void**)ptr = prev;
|
||||
meta->freelist = ptr;
|
||||
*(void**)base = prev;
|
||||
meta->freelist = base;
|
||||
tiny_failfast_log("free_local_legacy", ss->size_class, ss, meta, ptr, prev);
|
||||
do {
|
||||
static int g_mask_en = -1;
|
||||
|
||||
Reference in New Issue
Block a user