Phase 3d-B: TLS Cache Merge - Unified g_tls_sll[] structure (+12-18% expected)
Merge separate g_tls_sll_head[] and g_tls_sll_count[] arrays into unified TinyTLSSLL struct to improve L1D cache locality. Expected performance gain: +12-18% from reducing cache line splits (2 loads → 1 load per operation). Changes: - core/hakmem_tiny.h: Add TinyTLSSLL type (16B aligned, head+count+pad) - core/hakmem_tiny.c: Replace separate arrays with g_tls_sll[8] - core/box/tls_sll_box.h: Update Box API (13 sites) for unified access - Updated 32+ files: All g_tls_sll_head[i] → g_tls_sll[i].head - Updated 32+ files: All g_tls_sll_count[i] → g_tls_sll[i].count - core/hakmem_tiny_integrity.h: Unified canary guards - core/box/integrity_box.c: Simplified canary validation - Makefile: Added core/box/tiny_sizeclass_hist_box.o to link Build: ✅ PASS (10K ops sanity test) Warnings: Only pre-existing LTO type mismatches (unrelated) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -73,8 +73,8 @@ extern unsigned long long g_free_via_tls_sll[];
|
||||
// - Cross-thread allocation は考慮しない(Backend が処理)
|
||||
|
||||
// External TLS variables (defined in hakmem_tiny.c)
|
||||
extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES];
|
||||
extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
|
||||
// Phase 3d-B: TLS Cache Merge - Unified TLS SLL structure
|
||||
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
|
||||
|
||||
// External backend functions
|
||||
// P0 Fix: Use appropriate refill function based on P0 status
|
||||
@ -185,7 +185,7 @@ static inline void* tiny_alloc_fast_pop(int class_idx) {
|
||||
uint64_t pop_call = atomic_fetch_add(&g_fast_pop_count, 1);
|
||||
if (0 && class_idx == 2 && pop_call > 5840 && pop_call < 5900) {
|
||||
fprintf(stderr, "[FAST_POP_C2] call=%lu cls=%d head=%p count=%u\n",
|
||||
pop_call, class_idx, g_tls_sll_head[class_idx], g_tls_sll_count[class_idx]);
|
||||
pop_call, class_idx, g_tls_sll[class_idx].head, g_tls_sll[class_idx].count);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
@ -323,7 +323,7 @@ static inline int sfc_refill_from_sll(int class_idx, int target_count) {
|
||||
int want = (target_count * pct) / 100;
|
||||
if (want <= 0) want = target_count / 2; // safety fallback
|
||||
|
||||
while (transferred < want && g_tls_sll_count[class_idx] > 0) {
|
||||
while (transferred < want && g_tls_sll[class_idx].count > 0) {
|
||||
// Check SFC capacity before transfer
|
||||
if (g_sfc_count[class_idx] >= cap) {
|
||||
break; // SFC full, stop
|
||||
@ -525,8 +525,8 @@ static inline void* tiny_alloc_fast(size_t size) {
|
||||
}
|
||||
|
||||
// Phase 3c L1D Opt: Prefetch TLS cache head early
|
||||
__builtin_prefetch(&g_tls_sll_head[class_idx], 0, 3);
|
||||
__builtin_prefetch(&g_tls_sll_count[class_idx], 0, 3);
|
||||
// Phase 3d-B: Prefetch unified TLS SLL struct (single prefetch for both head+count)
|
||||
__builtin_prefetch(&g_tls_sll[class_idx], 0, 3);
|
||||
|
||||
// Phase 22: Lazy per-class init (on first use)
|
||||
lazy_init_class(class_idx);
|
||||
@ -554,7 +554,7 @@ static inline void* tiny_alloc_fast(size_t size) {
|
||||
if (0 && call_num > 14250 && call_num < 14280) {
|
||||
fprintf(stderr, "[TINY_ALLOC] call=%lu size=%zu class=%d sll_head[%d]=%p count=%u\n",
|
||||
call_num, size, class_idx, class_idx,
|
||||
g_tls_sll_head[class_idx], g_tls_sll_count[class_idx]);
|
||||
g_tls_sll[class_idx].head, g_tls_sll[class_idx].count);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
@ -672,8 +672,8 @@ typedef struct {
|
||||
static inline TinyAllocFastStats tiny_alloc_fast_stats(int class_idx) {
|
||||
TinyAllocFastStats stats = {
|
||||
.class_idx = class_idx,
|
||||
.head = g_tls_sll_head[class_idx],
|
||||
.count = g_tls_sll_count[class_idx]
|
||||
.head = g_tls_sll[class_idx].head,
|
||||
.count = g_tls_sll[class_idx].count
|
||||
};
|
||||
return stats;
|
||||
}
|
||||
@ -681,8 +681,8 @@ static inline TinyAllocFastStats tiny_alloc_fast_stats(int class_idx) {
|
||||
// Reset TLS freelist (for testing/benchmarking)
|
||||
// WARNING: This leaks memory! Only use in controlled test environments.
|
||||
static inline void tiny_alloc_fast_reset(int class_idx) {
|
||||
g_tls_sll_head[class_idx] = NULL;
|
||||
g_tls_sll_count[class_idx] = 0;
|
||||
g_tls_sll[class_idx].head = NULL;
|
||||
g_tls_sll[class_idx].count = 0;
|
||||
}
|
||||
|
||||
// ========== Performance Notes ==========
|
||||
|
||||
Reference in New Issue
Block a user