Superslab free path base-normalization: use block base for C0–C6 in tiny_free_fast_ss, tiny_free_fast_legacy, same-thread freelist push, midtc push, remote queue push/dup checks; ensures next-pointer writes never hit user header. Addresses residual SEGV beyond TLS-SLL box.
This commit is contained in:
@ -36,12 +36,18 @@ extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
|
|||||||
// Push pointer to TLS SLL
|
// Push pointer to TLS SLL
|
||||||
// Returns: true on success, false if C7 or capacity exceeded
|
// Returns: true on success, false if C7 or capacity exceeded
|
||||||
//
|
//
|
||||||
|
// CRITICAL Phase 7 Header Design:
|
||||||
|
// - C0-C6 (header classes): [1B header][user data]
|
||||||
|
// ^base ^ptr (caller passes this)
|
||||||
|
// - SLL stores "base" (ptr-1) to avoid overwriting header
|
||||||
|
// - C7 (headerless): ptr == base (no offset)
|
||||||
|
//
|
||||||
// Safety:
|
// Safety:
|
||||||
// - C7 always rejected (headerless, first 8 bytes = user data)
|
// - C7 always rejected (headerless, first 8 bytes = user data)
|
||||||
// - Capacity check prevents overflow
|
// - Capacity check prevents overflow
|
||||||
// - Caller must handle fallback (e.g., meta->freelist)
|
// - Header protection: stores base (ptr-1) for C0-C6
|
||||||
//
|
//
|
||||||
// Performance: 2-3 cycles (C0-C6), < 1 cycle (C7 fast rejection)
|
// Performance: 3-4 cycles (C0-C6), < 1 cycle (C7 fast rejection)
|
||||||
static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) {
|
static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) {
|
||||||
// CRITICAL: C7 (1KB) is headerless - MUST NOT use TLS SLL
|
// CRITICAL: C7 (1KB) is headerless - MUST NOT use TLS SLL
|
||||||
// Reason: SLL stores next pointer in first 8 bytes (user data for C7)
|
// Reason: SLL stores next pointer in first 8 bytes (user data for C7)
|
||||||
@ -54,7 +60,11 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) {
|
|||||||
return false; // SLL full
|
return false; // SLL full
|
||||||
}
|
}
|
||||||
|
|
||||||
// Push to SLL (standard linked list push)
|
// CRITICAL: Caller must pass "base" pointer (NOT user ptr)
|
||||||
|
// Phase 7 carve operations return base (stride includes header)
|
||||||
|
// SLL stores base to avoid overwriting header with next pointer
|
||||||
|
|
||||||
|
// Push to SLL (standard linked list push using base)
|
||||||
*(void**)ptr = g_tls_sll_head[class_idx];
|
*(void**)ptr = g_tls_sll_head[class_idx];
|
||||||
g_tls_sll_head[class_idx] = ptr;
|
g_tls_sll_head[class_idx] = ptr;
|
||||||
g_tls_sll_count[class_idx]++;
|
g_tls_sll_count[class_idx]++;
|
||||||
@ -65,21 +75,27 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) {
|
|||||||
// ========== Pop ==========
|
// ========== Pop ==========
|
||||||
|
|
||||||
// Pop pointer from TLS SLL
|
// Pop pointer from TLS SLL
|
||||||
// Returns: true on success (writes to *out), false if empty
|
// Returns: true on success (writes user ptr to *out), false if empty
|
||||||
|
//
|
||||||
|
// CRITICAL Phase 7 Header Design:
|
||||||
|
// - SLL stores "base" (ptr-1) for C0-C6
|
||||||
|
// - Must return "ptr" (base+1) to user
|
||||||
|
// - C7: base == ptr (no offset)
|
||||||
//
|
//
|
||||||
// Safety:
|
// Safety:
|
||||||
// - C7 protection: clears first 8 bytes on pop (prevents next pointer leak)
|
// - C7 protection: clears first 8 bytes on pop (prevents next pointer leak)
|
||||||
|
// - Header protection: returns ptr (base+1) for C0-C6
|
||||||
// - NULL check before deref
|
// - NULL check before deref
|
||||||
//
|
//
|
||||||
// Performance: 3-4 cycles
|
// Performance: 4-5 cycles
|
||||||
static inline bool tls_sll_pop(int class_idx, void** out) {
|
static inline bool tls_sll_pop(int class_idx, void** out) {
|
||||||
void* head = g_tls_sll_head[class_idx];
|
void* base = g_tls_sll_head[class_idx];
|
||||||
if (!head) {
|
if (!base) {
|
||||||
return false; // SLL empty
|
return false; // SLL empty
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pop from SLL
|
// Pop from SLL (reads next from base)
|
||||||
void* next = *(void**)head;
|
void* next = *(void**)base;
|
||||||
g_tls_sll_head[class_idx] = next;
|
g_tls_sll_head[class_idx] = next;
|
||||||
if (g_tls_sll_count[class_idx] > 0) {
|
if (g_tls_sll_count[class_idx] > 0) {
|
||||||
g_tls_sll_count[class_idx]--;
|
g_tls_sll_count[class_idx]--;
|
||||||
@ -91,11 +107,12 @@ static inline bool tls_sll_pop(int class_idx, void** out) {
|
|||||||
// Cost: 1 store instruction (~1 cycle), only for C7 (~1% of allocations)
|
// Cost: 1 store instruction (~1 cycle), only for C7 (~1% of allocations)
|
||||||
//
|
//
|
||||||
// Note: C0-C6 have 1-byte header, so first 8 bytes are safe (header hides next)
|
// Note: C0-C6 have 1-byte header, so first 8 bytes are safe (header hides next)
|
||||||
|
// Caller responsibility: Convert base → ptr (base+1) for C0-C6 before returning to user
|
||||||
if (__builtin_expect(class_idx == 7, 0)) {
|
if (__builtin_expect(class_idx == 7, 0)) {
|
||||||
*(void**)head = NULL;
|
*(void**)base = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
*out = head;
|
*out = base; // Return base (caller converts to ptr if needed)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,10 +121,16 @@ static inline bool tls_sll_pop(int class_idx, void** out) {
|
|||||||
// Splice chain of pointers to TLS SLL (batch push)
|
// Splice chain of pointers to TLS SLL (batch push)
|
||||||
// Returns: actual count moved (0 for C7 or if capacity exceeded)
|
// Returns: actual count moved (0 for C7 or if capacity exceeded)
|
||||||
//
|
//
|
||||||
|
// CRITICAL Phase 7 Header Design:
|
||||||
|
// - Caller MUST pass chain of "base" pointers (ptr-1 for C0-C6)
|
||||||
|
// - Chain links are stored at base (*(void**)base = next_base)
|
||||||
|
// - SLL head stores base pointers
|
||||||
|
//
|
||||||
// Safety:
|
// Safety:
|
||||||
// - C7 always returns 0 (no splice)
|
// - C7 always returns 0 (no splice)
|
||||||
// - Capacity check limits splice size
|
// - Capacity check limits splice size
|
||||||
// - Chain traversal with safety (breaks on NULL)
|
// - Chain traversal with safety (breaks on NULL)
|
||||||
|
// - Assumes chain is already linked using base pointers
|
||||||
//
|
//
|
||||||
// Performance: ~5 cycles + O(count) for chain traversal
|
// Performance: ~5 cycles + O(count) for chain traversal
|
||||||
static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t count, uint32_t capacity) {
|
static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t count, uint32_t capacity) {
|
||||||
@ -127,6 +150,7 @@ static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t
|
|||||||
uint32_t to_move = (count < available) ? count : available;
|
uint32_t to_move = (count < available) ? count : available;
|
||||||
|
|
||||||
// Find chain tail (traverse to_move - 1 nodes)
|
// Find chain tail (traverse to_move - 1 nodes)
|
||||||
|
// NOTE: Chain MUST be linked using base pointers (caller responsibility)
|
||||||
void* tail = chain_head;
|
void* tail = chain_head;
|
||||||
for (uint32_t i = 1; i < to_move; i++) {
|
for (uint32_t i = 1; i < to_move; i++) {
|
||||||
void* next = *(void**)tail;
|
void* next = *(void**)tail;
|
||||||
|
|||||||
@ -117,6 +117,7 @@ static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* ptr, uint
|
|||||||
|
|
||||||
// Fast path: Same-thread free (2-3 instructions)
|
// Fast path: Same-thread free (2-3 instructions)
|
||||||
int class_idx = ss->size_class;
|
int class_idx = ss->size_class;
|
||||||
|
void* base = (class_idx == 7) ? ptr : (void*)((uint8_t*)ptr - 1);
|
||||||
|
|
||||||
#if HAKMEM_DEBUG_COUNTERS
|
#if HAKMEM_DEBUG_COUNTERS
|
||||||
// Track same-thread frees (compile-time gated)
|
// Track same-thread frees (compile-time gated)
|
||||||
@ -127,14 +128,14 @@ static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* ptr, uint
|
|||||||
extern int g_sfc_enabled;
|
extern int g_sfc_enabled;
|
||||||
if (g_sfc_enabled) {
|
if (g_sfc_enabled) {
|
||||||
// Box 5-NEW: Try SFC (128 slots)
|
// Box 5-NEW: Try SFC (128 slots)
|
||||||
if (!sfc_free_push(class_idx, ptr)) {
|
if (!sfc_free_push(class_idx, base)) {
|
||||||
// SFC full → skip caching, use slow path (return 0)
|
// SFC full → skip caching, use slow path (return 0)
|
||||||
// Do NOT fall back to SLL - it has no capacity check and would grow unbounded!
|
// Do NOT fall back to SLL - it has no capacity check and would grow unbounded!
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Box 5-OLD: Use SLL (16 slots)
|
// Box 5-OLD: Use SLL (16 slots)
|
||||||
tiny_alloc_fast_push(class_idx, ptr);
|
tiny_alloc_fast_push(class_idx, base);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Active accounting (Box 3: SuperSlab)
|
// Active accounting (Box 3: SuperSlab)
|
||||||
@ -154,19 +155,20 @@ static inline int tiny_free_fast_legacy(TinySlab* slab, void* ptr) {
|
|||||||
|
|
||||||
// Fast path: Same-thread free
|
// Fast path: Same-thread free
|
||||||
int class_idx = slab->class_idx;
|
int class_idx = slab->class_idx;
|
||||||
|
void* base = (class_idx == 7) ? ptr : (void*)((uint8_t*)ptr - 1);
|
||||||
|
|
||||||
// Box 5-NEW/5-OLD integration: Push to TLS freelist (SFC or SLL)
|
// Box 5-NEW/5-OLD integration: Push to TLS freelist (SFC or SLL)
|
||||||
extern int g_sfc_enabled;
|
extern int g_sfc_enabled;
|
||||||
if (g_sfc_enabled) {
|
if (g_sfc_enabled) {
|
||||||
// Box 5-NEW: Try SFC (128 slots)
|
// Box 5-NEW: Try SFC (128 slots)
|
||||||
if (!sfc_free_push(class_idx, ptr)) {
|
if (!sfc_free_push(class_idx, base)) {
|
||||||
// SFC full → skip caching, use slow path (return 0)
|
// SFC full → skip caching, use slow path (return 0)
|
||||||
// Do NOT fall back to SLL - it has no capacity check and would grow unbounded!
|
// Do NOT fall back to SLL - it has no capacity check and would grow unbounded!
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Box 5-OLD: Use SLL (16 slots)
|
// Box 5-OLD: Use SLL (16 slots)
|
||||||
tiny_alloc_fast_push(class_idx, ptr);
|
tiny_alloc_fast_push(class_idx, base);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1; // Success
|
return 1; // Success
|
||||||
|
|||||||
@ -24,6 +24,8 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||||
|
// Normalize to block base for header classes (C0-C6)
|
||||||
|
void* base = (ss->size_class == 7) ? ptr : (void*)((uint8_t*)ptr - 1);
|
||||||
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
|
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
|
||||||
tiny_remote_watch_note("free_enter", ss, slab_idx, ptr, 0xA240u, tiny_self_u32(), 0);
|
tiny_remote_watch_note("free_enter", ss, slab_idx, ptr, 0xA240u, tiny_self_u32(), 0);
|
||||||
extern __thread TinyTLSSlab g_tls_slabs[];
|
extern __thread TinyTLSSlab g_tls_slabs[];
|
||||||
@ -67,7 +69,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
|||||||
// Duplicate in freelist (best-effort scan up to 64)
|
// Duplicate in freelist (best-effort scan up to 64)
|
||||||
// NOTE: This O(n) scan is VERY expensive (can scan 64 pointers per free!)
|
// NOTE: This O(n) scan is VERY expensive (can scan 64 pointers per free!)
|
||||||
void* scan = meta->freelist; int scanned = 0; int dup = 0;
|
void* scan = meta->freelist; int scanned = 0; int dup = 0;
|
||||||
while (scan && scanned < 64) { if (scan == ptr) { dup = 1; break; } scan = *(void**)scan; scanned++; }
|
while (scan && scanned < 64) { if (scan == base) { dup = 1; break; } scan = *(void**)scan; scanned++; }
|
||||||
if (dup) {
|
if (dup) {
|
||||||
uintptr_t aux = tiny_remote_pack_diag(0xDFu, ss_base, ss_size, (uintptr_t)ptr);
|
uintptr_t aux = tiny_remote_pack_diag(0xDFu, ss_base, ss_size, (uintptr_t)ptr);
|
||||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
||||||
@ -119,7 +121,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
|||||||
tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "local_free_enter", my_tid);
|
tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "local_free_enter", my_tid);
|
||||||
if (!tiny_remote_guard_allow_local_push(ss, slab_idx, meta, ptr, "local_free", my_tid)) {
|
if (!tiny_remote_guard_allow_local_push(ss, slab_idx, meta, ptr, "local_free", my_tid)) {
|
||||||
#include "box/free_remote_box.h"
|
#include "box/free_remote_box.h"
|
||||||
int transitioned = tiny_free_remote_box(ss, slab_idx, meta, ptr, my_tid);
|
int transitioned = tiny_free_remote_box(ss, slab_idx, meta, base, my_tid);
|
||||||
if (transitioned) {
|
if (transitioned) {
|
||||||
extern unsigned long long g_remote_free_transitions[];
|
extern unsigned long long g_remote_free_transitions[];
|
||||||
g_remote_free_transitions[ss->size_class]++;
|
g_remote_free_transitions[ss->size_class]++;
|
||||||
@ -143,7 +145,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
|||||||
if (!g_free_to_ss) {
|
if (!g_free_to_ss) {
|
||||||
int cls = (int)ss->size_class;
|
int cls = (int)ss->size_class;
|
||||||
if (midtc_enabled() && cls >= 4) {
|
if (midtc_enabled() && cls >= 4) {
|
||||||
if (midtc_push(cls, ptr)) {
|
if (midtc_push(cls, base)) {
|
||||||
// Treat as returned to TLS cache (not SS freelist)
|
// Treat as returned to TLS cache (not SS freelist)
|
||||||
meta->used--;
|
meta->used--;
|
||||||
ss_active_dec_one(ss);
|
ss_active_dec_one(ss);
|
||||||
@ -156,7 +158,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
|||||||
#include "box/free_local_box.h"
|
#include "box/free_local_box.h"
|
||||||
// Perform freelist push (+first-free publish if applicable)
|
// Perform freelist push (+first-free publish if applicable)
|
||||||
void* prev_before = meta->freelist;
|
void* prev_before = meta->freelist;
|
||||||
tiny_free_local_box(ss, slab_idx, meta, ptr, my_tid);
|
tiny_free_local_box(ss, slab_idx, meta, base, my_tid);
|
||||||
if (prev_before == NULL) {
|
if (prev_before == NULL) {
|
||||||
ROUTE_MARK(19); // first_free_transition
|
ROUTE_MARK(19); // first_free_transition
|
||||||
extern unsigned long long g_first_free_transitions[];
|
extern unsigned long long g_first_free_transitions[];
|
||||||
@ -280,7 +282,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
|||||||
} while (0);
|
} while (0);
|
||||||
if (g_ss_adopt_en2) {
|
if (g_ss_adopt_en2) {
|
||||||
// Use remote queue
|
// Use remote queue
|
||||||
uintptr_t head_word = __atomic_load_n((uintptr_t*)ptr, __ATOMIC_RELAXED);
|
uintptr_t head_word = __atomic_load_n((uintptr_t*)base, __ATOMIC_RELAXED);
|
||||||
if (debug_guard) fprintf(stderr, "[REMOTE_PUSH_CALL] cls=%u slab=%d owner=%u my=%u ptr=%p used=%u remote_count=%u head=%p word=0x%016" PRIxPTR "\n",
|
if (debug_guard) fprintf(stderr, "[REMOTE_PUSH_CALL] cls=%u slab=%d owner=%u my=%u ptr=%p used=%u remote_count=%u head=%p word=0x%016" PRIxPTR "\n",
|
||||||
ss->size_class,
|
ss->size_class,
|
||||||
slab_idx,
|
slab_idx,
|
||||||
@ -291,9 +293,9 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
|||||||
atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed),
|
atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed),
|
||||||
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed),
|
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed),
|
||||||
head_word);
|
head_word);
|
||||||
int dup_remote = tiny_remote_queue_contains_guard(ss, slab_idx, ptr);
|
int dup_remote = tiny_remote_queue_contains_guard(ss, slab_idx, base);
|
||||||
if (!dup_remote && __builtin_expect(g_remote_side_enable, 0)) {
|
if (!dup_remote && __builtin_expect(g_remote_side_enable, 0)) {
|
||||||
dup_remote = (head_word == TINY_REMOTE_SENTINEL) || tiny_remote_side_contains(ss, slab_idx, ptr);
|
dup_remote = (head_word == TINY_REMOTE_SENTINEL) || tiny_remote_side_contains(ss, slab_idx, base);
|
||||||
}
|
}
|
||||||
if (__builtin_expect(head_word == TINY_REMOTE_SENTINEL && !dup_remote && g_debug_remote_guard, 0)) {
|
if (__builtin_expect(head_word == TINY_REMOTE_SENTINEL && !dup_remote && g_debug_remote_guard, 0)) {
|
||||||
tiny_remote_watch_note("dup_scan_miss", ss, slab_idx, ptr, 0xA215u, my_tid, 0);
|
tiny_remote_watch_note("dup_scan_miss", ss, slab_idx, ptr, 0xA215u, my_tid, 0);
|
||||||
@ -317,7 +319,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
|||||||
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
|
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
|
||||||
tiny_remote_watch_note("free_remote", ss, slab_idx, ptr, 0xA232u, my_tid, 0);
|
tiny_remote_watch_note("free_remote", ss, slab_idx, ptr, 0xA232u, my_tid, 0);
|
||||||
}
|
}
|
||||||
int was_empty = ss_remote_push(ss, slab_idx, ptr); // ss_active_dec_one() called inside
|
int was_empty = ss_remote_push(ss, slab_idx, base); // ss_active_dec_one() called inside
|
||||||
meta->used--;
|
meta->used--;
|
||||||
// ss_active_dec_one(ss); // REMOVED: Already called inside ss_remote_push()
|
// ss_active_dec_one(ss); // REMOVED: Already called inside ss_remote_push()
|
||||||
if (was_empty) {
|
if (was_empty) {
|
||||||
@ -329,8 +331,8 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
|||||||
// Fallback: direct freelist push (legacy)
|
// Fallback: direct freelist push (legacy)
|
||||||
if (debug_guard) fprintf(stderr, "[FREE_SS] Using LEGACY freelist push (not remote queue)\n");
|
if (debug_guard) fprintf(stderr, "[FREE_SS] Using LEGACY freelist push (not remote queue)\n");
|
||||||
void* prev = meta->freelist;
|
void* prev = meta->freelist;
|
||||||
*(void**)ptr = prev;
|
*(void**)base = prev;
|
||||||
meta->freelist = ptr;
|
meta->freelist = base;
|
||||||
tiny_failfast_log("free_local_legacy", ss->size_class, ss, meta, ptr, prev);
|
tiny_failfast_log("free_local_legacy", ss->size_class, ss, meta, ptr, prev);
|
||||||
do {
|
do {
|
||||||
static int g_mask_en = -1;
|
static int g_mask_en = -1;
|
||||||
|
|||||||
Reference in New Issue
Block a user