Phase 7: header-aware TLS front caches and FG gating

- core/hakmem_tiny_fastcache.inc.h: make tiny_fast_pop/push read/write next at base+1 for C0–C6; clear C7 next on pop
- core/hakmem_tiny_hot_pop.inc.h: header-aware next reads for g_fast_head pops (classes 0–3)
- core/tiny_free_magazine.inc.h: header-aware chain linking for BG spill chain (base+1 for C0–C6)
- core/box/front_gate_classifier.c: registry fallback classifies headerless only for class 7; others as headered

Build OK; bench_fixed_size_hakmem still SIGBUS right after init. FREE_ROUTE trace shows invalid frees (ptr=0xa0, etc.). Next steps: instrument early frees and audit remaining header-aware writes in any front caches not yet patched.
This commit is contained in:
Moe Charm (CI)
2025-11-10 18:04:08 +09:00
parent d739ea7769
commit dde490f842
13 changed files with 166 additions and 37 deletions

View File

@ -368,7 +368,7 @@ test-box-refactor: box-refactor
./larson_hakmem 10 8 128 1024 1 12345 4
# Phase 4: Tiny Pool benchmarks (properly linked with hakmem)
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/link_stubs.o
TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE)
ifeq ($(POOL_TLS_PHASE1),1)
TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o

View File

@ -63,6 +63,10 @@ static void __attribute__((destructor)) front_gate_stats_destructor(void) {
//
// Performance: 2-3 cycles (L1 cache hit)
static inline int safe_header_probe(void* ptr) {
// Reject obviously invalid/sentinel-sized pointers (defense-in-depth)
if ((uintptr_t)ptr < 4096) {
return -1;
}
// Safety check: header must be in same page as ptr
uintptr_t offset_in_page = (uintptr_t)ptr & 0xFFF;
if (offset_in_page == 0) {
@ -140,9 +144,13 @@ static inline ptr_classification_t registry_lookup(void* ptr) {
return result;
}
// Valid Tiny allocation (headerless)
// Note: C7 (1KB) is the only headerless class, but Registry handles all
result.kind = PTR_KIND_TINY_HEADERLESS;
// Valid Tiny allocation
// Only class 7 (1KB) is headerless. Other classes use header-based free path.
if (ss->size_class == 7) {
result.kind = PTR_KIND_TINY_HEADERLESS;
} else {
result.kind = PTR_KIND_TINY_HEADER;
}
return result;
}
@ -177,6 +185,11 @@ ptr_classification_t classify_ptr(void* ptr) {
};
if (!ptr) return result;
// Early guard: reject non-canonical tiny integers to avoid ptr-1 probe crashes
if ((uintptr_t)ptr < 4096) {
result.kind = PTR_KIND_UNKNOWN;
return result;
}
// Step 1: Try safe header probe (C0-C6 fast path: 5-10 cycles)
// Skip header probe on 1KB-aligned pointers to avoid misclassifying C7/headerless
@ -186,13 +199,22 @@ ptr_classification_t classify_ptr(void* ptr) {
}
if (class_idx >= 0) {
// Header found - C0-C6 with header
result.kind = PTR_KIND_TINY_HEADER;
result.class_idx = class_idx;
// Additional safety: verify pointer belongs to a SuperSlab region.
// This avoids rare false positives where random header bytes look like 0xA0.
struct SuperSlab* ss_chk = hak_super_lookup(ptr);
if (!ss_chk) {
// Not in Tiny registry; treat as UNKNOWN and continue
// (fall back to later checks)
} else {
result.kind = PTR_KIND_TINY_HEADER;
result.class_idx = class_idx;
result.ss = ss_chk;
#if !HAKMEM_BUILD_RELEASE
g_classify_header_hit++;
#endif
return result;
return result;
}
}
// Step 2: Check Pool TLS (before Registry to avoid false positives)

View File

@ -64,8 +64,13 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) {
// Phase 7 carve operations return base (stride includes header)
// SLL stores base to avoid overwriting header with next pointer
// Push to SLL (standard linked list push using base)
*(void**)ptr = g_tls_sll_head[class_idx];
// Phase 7: Store next pointer at header-safe offset (base+1 for C0-C6)
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_offset = 1; // C7 is rejected above; always skip header
#else
const size_t next_offset = 0;
#endif
*(void**)((uint8_t*)ptr + next_offset) = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = ptr;
g_tls_sll_count[class_idx]++;
@ -95,7 +100,13 @@ static inline bool tls_sll_pop(int class_idx, void** out) {
}
// Pop from SLL (reads next from base)
void* next = *(void**)base;
// Phase 7: Read next pointer at header-safe offset
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_offset = (class_idx == 7) ? 0 : 1;
#else
const size_t next_offset = 0;
#endif
void* next = *(void**)((uint8_t*)base + next_offset);
g_tls_sll_head[class_idx] = next;
if (g_tls_sll_count[class_idx] > 0) {
g_tls_sll_count[class_idx]--;
@ -152,8 +163,13 @@ static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t
// Find chain tail (traverse to_move - 1 nodes)
// NOTE: Chain MUST be linked using base pointers (caller responsibility)
void* tail = chain_head;
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_offset = 1; // Chain is built from header-safe links (C7 rejected)
#else
const size_t next_offset = 0;
#endif
for (uint32_t i = 1; i < to_move; i++) {
void* next = *(void**)tail;
void* next = *(void**)((uint8_t*)tail + next_offset);
if (!next) {
// Chain shorter than expected, adjust to_move
to_move = i;
@ -163,7 +179,7 @@ static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t
}
// Splice chain to SLL head
*(void**)tail = g_tls_sll_head[class_idx];
*(void**)((uint8_t*)tail + next_offset) = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = chain_head;
g_tls_sll_count[class_idx] += to_move;

View File

@ -79,7 +79,13 @@ static inline __attribute__((always_inline)) void* tiny_fast_pop(int class_idx)
if (cap == 0) return NULL;
void* head = g_fast_head[class_idx];
if (!head) return NULL;
void* next = *(void**)head;
// Phase 7: header-aware next pointer (C0-C6: base+1, C7: base)
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_offset = (class_idx == 7) ? 0 : 1;
#else
const size_t next_offset = 0;
#endif
void* next = *(void**)((uint8_t*)head + next_offset);
g_fast_head[class_idx] = next;
uint16_t count = g_fast_count[class_idx];
if (count > 0) {
@ -112,7 +118,13 @@ static inline __attribute__((always_inline)) int tiny_fast_push(int class_idx, v
tiny_fast_debug_log(class_idx, "full", count, cap);
return 0;
}
*(void**)ptr = g_fast_head[class_idx];
// Phase 7: header-aware next pointer (C0-C6: base+1, C7: base)
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_offset2 = (class_idx == 7) ? 0 : 1;
#else
const size_t next_offset2 = 0;
#endif
*(void**)((uint8_t*)ptr + next_offset2) = g_fast_head[class_idx];
g_fast_head[class_idx] = ptr;
g_fast_count[class_idx] = (uint16_t)(count + 1);
g_fast_push_hits[class_idx]++;

View File

@ -39,13 +39,20 @@ static inline __attribute__((always_inline)) void* tiny_hot_pop_class0(void) {
if (__builtin_expect(cap == 0, 0)) return NULL;
void* head = g_fast_head[0];
if (__builtin_expect(head == NULL, 0)) return NULL;
g_fast_head[0] = *(void**)head;
// Phase 7: header-aware next pointer (C0-C6: base+1, C7: base)
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off0 = 1; // class 0 is headered
#else
const size_t next_off0 = 0;
#endif
g_fast_head[0] = *(void**)((uint8_t*)head + next_off0);
uint16_t count = g_fast_count[0];
if (count > 0) {
g_fast_count[0] = (uint16_t)(count - 1);
} else {
g_fast_count[0] = 0;
}
// No C7 here (class 0), just return base
return head;
}
@ -61,7 +68,13 @@ static inline __attribute__((always_inline)) void* tiny_hot_pop_class1(void) {
if (__builtin_expect(cap == 0, 0)) return NULL;
void* head = g_fast_head[1];
if (__builtin_expect(head == NULL, 0)) return NULL;
g_fast_head[1] = *(void**)head;
// Phase 7: header-aware next pointer (C0-C6: base+1)
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off1 = 1;
#else
const size_t next_off1 = 0;
#endif
g_fast_head[1] = *(void**)((uint8_t*)head + next_off1);
uint16_t count = g_fast_count[1];
if (count > 0) {
g_fast_count[1] = (uint16_t)(count - 1);
@ -83,7 +96,13 @@ static inline __attribute__((always_inline)) void* tiny_hot_pop_class2(void) {
if (__builtin_expect(cap == 0, 0)) return NULL;
void* head = g_fast_head[2];
if (__builtin_expect(head == NULL, 0)) return NULL;
g_fast_head[2] = *(void**)head;
// Phase 7: header-aware next pointer (C0-C6: base+1)
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off2 = 1;
#else
const size_t next_off2 = 0;
#endif
g_fast_head[2] = *(void**)((uint8_t*)head + next_off2);
uint16_t count = g_fast_count[2];
if (count > 0) {
g_fast_count[2] = (uint16_t)(count - 1);
@ -105,7 +124,13 @@ static inline __attribute__((always_inline)) void* tiny_hot_pop_class3(void) {
if (__builtin_expect(cap == 0, 0)) return NULL;
void* head = g_fast_head[3];
if (__builtin_expect(head == NULL, 0)) return NULL;
g_fast_head[3] = *(void**)head;
// Phase 7: header-aware next pointer (C0-C6: base+1)
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off3 = 1;
#else
const size_t next_off3 = 0;
#endif
g_fast_head[3] = *(void**)((uint8_t*)head + next_off3);
uint16_t count = g_fast_count[3];
if (count > 0) {
g_fast_count[3] = (uint16_t)(count - 1);

View File

@ -310,7 +310,7 @@ static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
}
TinyRefillChain carve;
trc_linear_carve(slab_base, bs, meta, batch, &carve);
trc_linear_carve(slab_base, bs, meta, batch, class_idx, &carve);
trc_splice_to_sll(class_idx, &carve, &g_tls_sll_head[class_idx], &g_tls_sll_count[class_idx]);
// FIX: Update SuperSlab active counter (was missing!)
ss_active_add(tls->ss, batch);

View File

@ -7,6 +7,15 @@
__attribute__((weak)) void hak_tiny_prewarm_tls_cache(void) {}
// Weak stubs for remote tracking (avoid LTO link errors when tiny_remote.c is GC'ed)
struct SuperSlab; // forward decl to avoid heavy includes
__attribute__((weak)) void tiny_remote_track_on_local_free(struct SuperSlab* ss, int slab_idx, void* node, const char* stage, unsigned int tid) {
(void)ss; (void)slab_idx; (void)node; (void)stage; (void)tid;
}
__attribute__((weak)) void tiny_remote_track_expect_alloc(struct SuperSlab* ss, int slab_idx, void* node, const char* stage, unsigned int tid) {
(void)ss; (void)slab_idx; (void)node; (void)stage; (void)tid;
}
__attribute__((weak)) void* pool_alloc(size_t size) {
// Fallback to malloc if Pool TLS not linked
return malloc(size);
@ -16,4 +25,3 @@ __attribute__((weak)) void pool_free(void* ptr) {
// Fallback to free if Pool TLS not linked
free(ptr);
}

1
core/link_stubs.d Normal file
View File

@ -0,0 +1 @@
core/link_stubs.o: core/link_stubs.c

View File

@ -272,8 +272,13 @@ static inline int sfc_refill_from_sll(int class_idx, int target_count) {
break; // SLL empty
}
// Push to SFC (Layer 0)
*(void**)ptr = g_sfc_head[class_idx];
// Push to SFC (Layer 0) — header-aware
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t sfc_next_off = (class_idx == 7) ? 0 : 1;
#else
const size_t sfc_next_off = 0;
#endif
*(void**)((uint8_t*)ptr + sfc_next_off) = g_sfc_head[class_idx];
g_sfc_head[class_idx] = ptr;
g_sfc_count[class_idx]++;

View File

@ -75,18 +75,23 @@ extern sfc_stats_t g_sfc_stats[TINY_NUM_CLASSES];
// Contract: Caller owns returned pointer
// Invariants: count ≥ 0, all pointers belong to correct class
static inline void* sfc_alloc(int cls) {
void* head = g_sfc_head[cls];
void* base = g_sfc_head[cls];
if (__builtin_expect(head != NULL, 1)) {
// Pop: 3 instructions (mimalloc/tcache style)
g_sfc_head[cls] = *(void**)head; // next = *head
if (__builtin_expect(base != NULL, 1)) {
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_offset = (cls == 7) ? 0 : 1;
#else
const size_t next_offset = 0;
#endif
// Pop: header-aware next
g_sfc_head[cls] = *(void**)((uint8_t*)base + next_offset);
g_sfc_count[cls]--; // count--
#if HAKMEM_DEBUG_COUNTERS
g_sfc_stats[cls].alloc_hits++;
#endif
return head; // 🚀 SFC HIT!
return base; // 🚀 SFC HIT! (returns base)
}
#if HAKMEM_DEBUG_COUNTERS
@ -114,9 +119,14 @@ static inline int sfc_free_push(int cls, void* ptr) {
}
if (__builtin_expect(cnt < cap, 1)) {
// Push: 3 instructions
*(void**)ptr = g_sfc_head[cls]; // *ptr = head
g_sfc_head[cls] = ptr; // head = ptr
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_offset = (cls == 7) ? 0 : 1;
#else
const size_t next_offset = 0;
#endif
// Push: header-aware next placement
*(void**)((uint8_t*)ptr + next_offset) = g_sfc_head[cls];
g_sfc_head[cls] = ptr; // head = base
g_sfc_count[cls] = cnt + 1; // count++
#if HAKMEM_DEBUG_COUNTERS

View File

@ -75,12 +75,22 @@
if (limit > cap/2) limit = cap/2;
if (limit > 32) limit = 32; // keep free-path bounded
void* head = (class_idx == 7) ? ptr : (void*)((uint8_t*)ptr - 1);
*(void**)head = NULL;
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off = (class_idx == 7) ? 0 : 1;
#else
const size_t next_off = 0;
#endif
*(void**)((uint8_t*)head + next_off) = NULL;
void* tail = head; // current tail
int taken = 1;
while (taken < limit && mag->top > 0) {
void* p2 = mag->items[--mag->top].ptr;
*(void**)p2 = head;
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off2 = (class_idx == 7) ? 0 : 1;
#else
const size_t next_off2 = 0;
#endif
*(void**)((uint8_t*)p2 + next_off2) = head;
head = p2;
taken++;
}

View File

@ -41,11 +41,17 @@ static inline void refill_opt_dbg(const char* stage, int class_idx, uint32_t n)
#endif
}
static inline void trc_push_front(TinyRefillChain* c, void* node) {
// Phase 7 header-aware push_front: link using base+1 for C0-C6 (C7 not used here)
static inline void trc_push_front(TinyRefillChain* c, void* node, int class_idx) {
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_offset = (class_idx == 7) ? 0 : 1;
#else
const size_t next_offset = 0;
#endif
if (c->head == NULL) {
c->head = node; c->tail = node; *(void**)node = NULL; c->count = 1;
c->head = node; c->tail = node; *(void**)((uint8_t*)node + next_offset) = NULL; c->count = 1;
} else {
*(void**)node = c->head; c->head = node; c->count++;
*(void**)((uint8_t*)node + next_offset) = c->head; c->head = node; c->count++;
}
}
@ -167,7 +173,7 @@ static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta,
trc_failfast_abort("freelist_next", class_idx, ss_base, ss_limit, next);
}
meta->freelist = next;
trc_push_front(out, p);
trc_push_front(out, p, class_idx);
taken++;
}
// DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead)
@ -175,9 +181,12 @@ static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta,
}
// Carve a contiguous batch of size 'batch' from linear area, return as chain
// Phase 7 header-aware carve: link chain using header-safe next location
// class_idx is required to decide headerless (C7) vs headered (C0-C6)
static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs,
struct TinySlabMeta* meta,
uint32_t batch,
int class_idx,
TinyRefillChain* out) {
if (!out || batch == 0) return 0;
trc_init(out);
@ -206,9 +215,18 @@ static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs,
(void*)base, meta->carved, batch, (void*)cursor);
}
// CRITICAL FIX (Phase 7): header-aware next pointer placement
// For header classes (C0-C6), the first byte at base is the 1-byte header.
// Store the SLL next pointer at base+1 to avoid clobbering the header.
// For C7 (headerless), store at base.
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_offset = (class_idx == 7) ? 0 : 1;
#else
const size_t next_offset = 0;
#endif
for (uint32_t i = 1; i < batch; i++) {
uint8_t* next = cursor + stride;
*(void**)cursor = (void*)next;
*(void**)(cursor + next_offset) = (void*)next;
cursor = next;
}
void* tail = (void*)cursor;

View File

@ -64,6 +64,7 @@ static inline void* tiny_region_id_write_header(void* base, int class_idx) {
// Returns: class_idx (0-7), or -1 if invalid
static inline int tiny_region_id_read_header(void* ptr) {
if (!ptr) return -1;
if ((uintptr_t)ptr < 4096) return -1; // reject invalid tiny values
uint8_t* header_ptr = (uint8_t*)ptr - 1;
@ -129,6 +130,7 @@ static inline int tiny_region_id_read_header(void* ptr) {
static inline int tiny_region_id_has_header(void* ptr) {
#if !HAKMEM_BUILD_RELEASE
if (!ptr) return 0;
if ((uintptr_t)ptr < 4096) return 0;
uint8_t* header_ptr = (uint8_t*)ptr - 1;
uint8_t header = *header_ptr;