Phase 7 follow-up: header-aware in BG spill, TLS drain, and aggressive inline macros

- bg_spill: link/traverse next at base+1 for C0–C6, base for C7
- lifecycle: drain TLS SLL and fast caches reading next with header-aware offsets
- tiny_alloc_fast_inline: POP/PUSH macros made header-aware to match tls_sll_box rules
- add optional FREE_WRAP_ENTER trace (HAKMEM_FREE_WRAP_TRACE) for early triage

Result: 0xa0/…0099 bogus free logs gone; remaining SIGBUS appears in free path early. Next: instrument early libc fallback or guard invalid pointers during init to pinpoint source.
This commit is contained in:
Moe Charm (CI)
2025-11-10 18:21:32 +09:00
parent dde490f842
commit d5302e9c87
5 changed files with 62 additions and 11 deletions

View File

@ -72,6 +72,18 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
HKM_TIME_START(t0); HKM_TIME_START(t0);
#endif #endif
(void)site; (void)size; (void)site; (void)size;
// Optional lightweight trace of early free calls (first few only)
static int free_trace_en = -1; static _Atomic int free_trace_count = 0;
if (__builtin_expect(free_trace_en == -1, 0)) {
const char* e = getenv("HAKMEM_FREE_WRAP_TRACE");
free_trace_en = (e && *e && *e != '0') ? 1 : 0;
}
if (free_trace_en) {
int n = atomic_fetch_add(&free_trace_count, 1);
if (n < 8) {
fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr);
}
}
if (!ptr) { if (!ptr) {
#if HAKMEM_DEBUG_TIMING #if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_FREE, t0); HKM_TIME_END(HKM_CAT_HAK_FREE, t0);

View File

@ -45,19 +45,25 @@ void bg_spill_drain_class(int class_idx, pthread_mutex_t* lock) {
void* rest = NULL; void* rest = NULL;
void* cur = (void*)chain; void* cur = (void*)chain;
void* prev = NULL; void* prev = NULL;
// Phase 7: header-aware next pointer (C0-C6: base+1, C7: base)
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off = (class_idx == 7) ? 0 : 1;
#else
const size_t next_off = 0;
#endif
while (cur && processed < g_bg_spill_max_batch) { while (cur && processed < g_bg_spill_max_batch) {
prev = cur; prev = cur;
cur = *(void**)cur; cur = *(void**)((uint8_t*)cur + next_off);
processed++; processed++;
} }
if (cur != NULL) { rest = cur; *(void**)prev = NULL; } if (cur != NULL) { rest = cur; *(void**)((uint8_t*)prev + next_off) = NULL; }
// Return processed nodes to SS freelists // Return processed nodes to SS freelists
pthread_mutex_lock(lock); pthread_mutex_lock(lock);
uint32_t self_tid = tiny_self_u32_guard(); uint32_t self_tid = tiny_self_u32_guard();
void* node = (void*)chain; void* node = (void*)chain;
while (node) { while (node) {
void* next = *(void**)node; void* next = *(void**)((uint8_t*)node + next_off);
SuperSlab* owner_ss = hak_super_lookup(node); SuperSlab* owner_ss = hak_super_lookup(node);
if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) { if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) {
int slab_idx = slab_index_for(owner_ss, node); int slab_idx = slab_index_for(owner_ss, node);
@ -69,6 +75,7 @@ void bg_spill_drain_class(int class_idx, pthread_mutex_t* lock) {
continue; continue;
} }
void* prev = meta->freelist; void* prev = meta->freelist;
// SuperSlab freelist uses base offset (no header while free)
*(void**)node = prev; *(void**)node = prev;
meta->freelist = node; meta->freelist = node;
tiny_failfast_log("bg_spill", owner_ss->size_class, owner_ss, meta, node, prev); tiny_failfast_log("bg_spill", owner_ss->size_class, owner_ss, meta, node, prev);
@ -87,10 +94,10 @@ void bg_spill_drain_class(int class_idx, pthread_mutex_t* lock) {
// Prepend remainder back to head // Prepend remainder back to head
uintptr_t old_head; uintptr_t old_head;
void* tail = rest; void* tail = rest;
while (*(void**)tail) tail = *(void**)tail; while (*(void**)((uint8_t*)tail + next_off)) tail = *(void**)((uint8_t*)tail + next_off);
do { do {
old_head = atomic_load_explicit(&g_bg_spill_head[class_idx], memory_order_acquire); old_head = atomic_load_explicit(&g_bg_spill_head[class_idx], memory_order_acquire);
*(void**)tail = (void*)old_head; *(void**)((uint8_t*)tail + next_off) = (void*)old_head;
} while (!atomic_compare_exchange_weak_explicit(&g_bg_spill_head[class_idx], &old_head, } while (!atomic_compare_exchange_weak_explicit(&g_bg_spill_head[class_idx], &old_head,
(uintptr_t)rest, (uintptr_t)rest,
memory_order_release, memory_order_relaxed)); memory_order_release, memory_order_relaxed));

View File

@ -24,7 +24,13 @@ static inline void bg_spill_push_one(int class_idx, void* p) {
uintptr_t old_head; uintptr_t old_head;
do { do {
old_head = atomic_load_explicit(&g_bg_spill_head[class_idx], memory_order_acquire); old_head = atomic_load_explicit(&g_bg_spill_head[class_idx], memory_order_acquire);
*(void**)p = (void*)old_head; // Phase 7: header-aware next placement (C0-C6: base+1, C7: base)
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off = (class_idx == 7) ? 0 : 1;
#else
const size_t next_off = 0;
#endif
*(void**)((uint8_t*)p + next_off) = (void*)old_head;
} while (!atomic_compare_exchange_weak_explicit(&g_bg_spill_head[class_idx], &old_head, } while (!atomic_compare_exchange_weak_explicit(&g_bg_spill_head[class_idx], &old_head,
(uintptr_t)p, (uintptr_t)p,
memory_order_release, memory_order_relaxed)); memory_order_release, memory_order_relaxed));
@ -36,7 +42,13 @@ static inline void bg_spill_push_chain(int class_idx, void* head, void* tail, in
uintptr_t old_head; uintptr_t old_head;
do { do {
old_head = atomic_load_explicit(&g_bg_spill_head[class_idx], memory_order_acquire); old_head = atomic_load_explicit(&g_bg_spill_head[class_idx], memory_order_acquire);
*(void**)tail = (void*)old_head; // Phase 7: header-aware next placement for tail link
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off = (class_idx == 7) ? 0 : 1;
#else
const size_t next_off = 0;
#endif
*(void**)((uint8_t*)tail + next_off) = (void*)old_head;
} while (!atomic_compare_exchange_weak_explicit(&g_bg_spill_head[class_idx], &old_head, } while (!atomic_compare_exchange_weak_explicit(&g_bg_spill_head[class_idx], &old_head,
(uintptr_t)head, (uintptr_t)head,
memory_order_release, memory_order_relaxed)); memory_order_release, memory_order_relaxed));

View File

@ -149,7 +149,12 @@ static void tiny_tls_cache_drain(int class_idx) {
g_tls_sll_head[class_idx] = NULL; g_tls_sll_head[class_idx] = NULL;
g_tls_sll_count[class_idx] = 0; g_tls_sll_count[class_idx] = 0;
while (sll) { while (sll) {
void* next = *(void**)sll; #if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off_sll = (class_idx == 7) ? 0 : 1;
#else
const size_t next_off_sll = 0;
#endif
void* next = *(void**)((uint8_t*)sll + next_off_sll);
tiny_tls_list_guard_push(class_idx, tls, sll); tiny_tls_list_guard_push(class_idx, tls, sll);
tls_list_push(tls, sll); tls_list_push(tls, sll);
sll = next; sll = next;
@ -160,7 +165,12 @@ static void tiny_tls_cache_drain(int class_idx) {
g_fast_head[class_idx] = NULL; g_fast_head[class_idx] = NULL;
g_fast_count[class_idx] = 0; g_fast_count[class_idx] = 0;
while (fast) { while (fast) {
void* next = *(void**)fast; #if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off_fast = (class_idx == 7) ? 0 : 1;
#else
const size_t next_off_fast = 0;
#endif
void* next = *(void**)((uint8_t*)fast + next_off_fast);
tiny_tls_list_guard_push(class_idx, tls, fast); tiny_tls_list_guard_push(class_idx, tls, fast);
tls_list_push(tls, fast); tls_list_push(tls, fast);
fast = next; fast = next;

View File

@ -49,7 +49,12 @@ extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
if (g_tls_sll_count[(class_idx)] > 0) g_tls_sll_count[(class_idx)]--; \ if (g_tls_sll_count[(class_idx)] > 0) g_tls_sll_count[(class_idx)]--; \
(ptr_out) = NULL; \ (ptr_out) = NULL; \
} else { \ } else { \
void* _next = *(void**)_head; \ /* Phase 7: header-aware next (C0-C6: base+1, C7: base) */ \
size_t _off = 0; \
#if HAKMEM_TINY_HEADER_CLASSIDX \
_off = ((class_idx) == 7) ? 0 : 1; \
#endif \
void* _next = *(void**)((uint8_t*)_head + _off); \
g_tls_sll_head[(class_idx)] = _next; \ g_tls_sll_head[(class_idx)] = _next; \
if (g_tls_sll_count[(class_idx)] > 0) { \ if (g_tls_sll_count[(class_idx)] > 0) { \
g_tls_sll_count[(class_idx)]--; \ g_tls_sll_count[(class_idx)]--; \
@ -81,7 +86,12 @@ extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
// mov %rsi, g_tls_sll_head(%rdi) // mov %rsi, g_tls_sll_head(%rdi)
// //
#define TINY_ALLOC_FAST_PUSH_INLINE(class_idx, ptr) do { \ #define TINY_ALLOC_FAST_PUSH_INLINE(class_idx, ptr) do { \
*(void**)(ptr) = g_tls_sll_head[(class_idx)]; \ /* Phase 7: header-aware next (C0-C6: base+1, C7: base) */ \
size_t _off = 0; \
#if HAKMEM_TINY_HEADER_CLASSIDX \
_off = ((class_idx) == 7) ? 0 : 1; \
#endif \
*(void**)((uint8_t*)(ptr) + _off) = g_tls_sll_head[(class_idx)]; \
g_tls_sll_head[(class_idx)] = (ptr); \ g_tls_sll_head[(class_idx)] = (ptr); \
g_tls_sll_count[(class_idx)]++; \ g_tls_sll_count[(class_idx)]++; \
} while(0) } while(0)