Phase 3d-B: TLS Cache Merge - Unified g_tls_sll[] structure (+12-18% expected)
Merge separate g_tls_sll_head[] and g_tls_sll_count[] arrays into unified TinyTLSSLL struct to improve L1D cache locality. Expected performance gain: +12-18% from reducing cache line splits (2 loads → 1 load per operation). Changes: - core/hakmem_tiny.h: Add TinyTLSSLL type (16B aligned, head+count+pad) - core/hakmem_tiny.c: Replace separate arrays with g_tls_sll[8] - core/box/tls_sll_box.h: Update Box API (13 sites) for unified access - Updated 32+ files: All g_tls_sll_head[i] → g_tls_sll[i].head - Updated 32+ files: All g_tls_sll_count[i] → g_tls_sll[i].count - core/hakmem_tiny_integrity.h: Unified canary guards - core/box/integrity_box.c: Simplified canary validation - Makefile: Added core/box/tiny_sizeclass_hist_box.o to link Build: ✅ PASS (10K ops sanity test) Warnings: Only pre-existing LTO type mismatches (unrelated) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -34,9 +34,8 @@
|
||||
#include "../tiny_debug_ring.h"
|
||||
#include "tiny_next_ptr_box.h"
|
||||
|
||||
// External TLS SLL state (defined in hakmem_tiny.c or equivalent)
|
||||
extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES];
|
||||
extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
|
||||
// Phase 3d-B: Unified TLS SLL (defined in hakmem_tiny.c)
|
||||
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
|
||||
extern int g_tls_sll_class_mask; // bit i=1 → SLL allowed for class i
|
||||
|
||||
// ========== Debug guard ==========
|
||||
@ -108,7 +107,7 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity)
|
||||
#endif
|
||||
|
||||
// Capacity check BEFORE any writes.
|
||||
uint32_t cur = g_tls_sll_count[class_idx];
|
||||
uint32_t cur = g_tls_sll[class_idx].count;
|
||||
if (!unlimited && cur >= capacity) {
|
||||
return false;
|
||||
}
|
||||
@ -154,10 +153,10 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity)
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Optional double-free detection: scan a bounded prefix of the list.
|
||||
{
|
||||
void* scan = g_tls_sll_head[class_idx];
|
||||
void* scan = g_tls_sll[class_idx].head;
|
||||
uint32_t scanned = 0;
|
||||
const uint32_t limit = (g_tls_sll_count[class_idx] < 64)
|
||||
? g_tls_sll_count[class_idx]
|
||||
const uint32_t limit = (g_tls_sll[class_idx].count < 64)
|
||||
? g_tls_sll[class_idx].count
|
||||
: 64;
|
||||
while (scan && scanned < limit) {
|
||||
if (scan == ptr) {
|
||||
@ -176,9 +175,9 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity)
|
||||
#endif
|
||||
|
||||
// Link new node to current head via Box API (offset is handled inside tiny_nextptr).
|
||||
PTR_NEXT_WRITE("tls_push", class_idx, ptr, 0, g_tls_sll_head[class_idx]);
|
||||
g_tls_sll_head[class_idx] = ptr;
|
||||
g_tls_sll_count[class_idx] = cur + 1;
|
||||
PTR_NEXT_WRITE("tls_push", class_idx, ptr, 0, g_tls_sll[class_idx].head);
|
||||
g_tls_sll[class_idx].head = ptr;
|
||||
g_tls_sll[class_idx].count = cur + 1;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -197,15 +196,15 @@ static inline bool tls_sll_pop(int class_idx, void** out)
|
||||
}
|
||||
atomic_fetch_add(&g_integrity_check_class_bounds, 1);
|
||||
|
||||
void* base = g_tls_sll_head[class_idx];
|
||||
void* base = g_tls_sll[class_idx].head;
|
||||
if (!base) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Sentinel guard: remote sentinel must never be in TLS SLL.
|
||||
if (__builtin_expect((uintptr_t)base == TINY_REMOTE_SENTINEL, 0)) {
|
||||
g_tls_sll_head[class_idx] = NULL;
|
||||
g_tls_sll_count[class_idx] = 0;
|
||||
g_tls_sll[class_idx].head = NULL;
|
||||
g_tls_sll[class_idx].count = 0;
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
fprintf(stderr,
|
||||
"[TLS_SLL_POP] Remote sentinel detected at head; SLL reset (cls=%d)\n",
|
||||
@ -251,8 +250,8 @@ static inline bool tls_sll_pop(int class_idx, void** out)
|
||||
abort();
|
||||
#else
|
||||
// In release, fail-safe: drop list.
|
||||
g_tls_sll_head[class_idx] = NULL;
|
||||
g_tls_sll_count[class_idx] = 0;
|
||||
g_tls_sll[class_idx].head = NULL;
|
||||
g_tls_sll[class_idx].count = 0;
|
||||
{
|
||||
static int g_sll_ring_en = -1;
|
||||
if (__builtin_expect(g_sll_ring_en == -1, 0)) {
|
||||
@ -285,9 +284,9 @@ static inline bool tls_sll_pop(int class_idx, void** out)
|
||||
}
|
||||
#endif
|
||||
|
||||
g_tls_sll_head[class_idx] = next;
|
||||
if (g_tls_sll_count[class_idx] > 0) {
|
||||
g_tls_sll_count[class_idx]--;
|
||||
g_tls_sll[class_idx].head = next;
|
||||
if (g_tls_sll[class_idx].count > 0) {
|
||||
g_tls_sll[class_idx].count--;
|
||||
}
|
||||
|
||||
// Clear next inside popped node to avoid stale-chain issues.
|
||||
@ -314,7 +313,7 @@ static inline uint32_t tls_sll_splice(int class_idx,
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t cur = g_tls_sll_count[class_idx];
|
||||
uint32_t cur = g_tls_sll[class_idx].count;
|
||||
if (cur >= capacity) {
|
||||
return 0;
|
||||
}
|
||||
@ -361,10 +360,10 @@ static inline uint32_t tls_sll_splice(int class_idx,
|
||||
|
||||
// Link tail to existing head and install new head.
|
||||
tls_sll_debug_guard(class_idx, tail, "splice_tail");
|
||||
PTR_NEXT_WRITE("tls_splice_link", class_idx, tail, 0, g_tls_sll_head[class_idx]);
|
||||
PTR_NEXT_WRITE("tls_splice_link", class_idx, tail, 0, g_tls_sll[class_idx].head);
|
||||
|
||||
g_tls_sll_head[class_idx] = chain_head;
|
||||
g_tls_sll_count[class_idx] = cur + moved;
|
||||
g_tls_sll[class_idx].head = chain_head;
|
||||
g_tls_sll[class_idx].count = cur + moved;
|
||||
|
||||
return moved;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user