From d5302e9c874412cc4ab81c76050f1b0ebdcd9c5d Mon Sep 17 00:00:00 2001 From: "Moe Charm (CI)" Date: Mon, 10 Nov 2025 18:21:32 +0900 Subject: [PATCH] Phase 7 follow-up: header-aware in BG spill, TLS drain, and aggressive inline macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - bg_spill: link/traverse next at base+1 for C0–C6, base for C7 - lifecycle: drain TLS SLL and fast caches reading next with header-aware offsets - tiny_alloc_fast_inline: POP/PUSH macros made header-aware to match tls_sll_box rules - add optional FREE_WRAP_ENTER trace (HAKMEM_FREE_WRAP_TRACE) for early triage Result: 0xa0/…0099 bogus free logs gone; remaining SIGBUS appears in free path early. Next: instrument early libc fallback or guard invalid pointers during init to pinpoint source. --- core/box/hak_free_api.inc.h | 12 ++++++++++++ core/hakmem_tiny_bg_spill.c | 17 ++++++++++++----- core/hakmem_tiny_bg_spill.h | 16 ++++++++++++++-- core/hakmem_tiny_lifecycle.inc | 14 ++++++++++++-- core/tiny_alloc_fast_inline.h | 14 ++++++++++++-- 5 files changed, 62 insertions(+), 11 deletions(-) diff --git a/core/box/hak_free_api.inc.h b/core/box/hak_free_api.inc.h index 0887c1ca..25c85a5d 100644 --- a/core/box/hak_free_api.inc.h +++ b/core/box/hak_free_api.inc.h @@ -72,6 +72,18 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) { HKM_TIME_START(t0); #endif (void)site; (void)size; + // Optional lightweight trace of early free calls (first few only) + static int free_trace_en = -1; static _Atomic int free_trace_count = 0; + if (__builtin_expect(free_trace_en == -1, 0)) { + const char* e = getenv("HAKMEM_FREE_WRAP_TRACE"); + free_trace_en = (e && *e && *e != '0') ? 1 : 0; + } + if (free_trace_en) { + int n = atomic_fetch_add(&free_trace_count, 1); + if (n < 8) { + fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr); + } + } if (!ptr) { #if HAKMEM_DEBUG_TIMING HKM_TIME_END(HKM_CAT_HAK_FREE, t0); diff --git a/core/hakmem_tiny_bg_spill.c b/core/hakmem_tiny_bg_spill.c index 46132a45..f983f97d 100644 --- a/core/hakmem_tiny_bg_spill.c +++ b/core/hakmem_tiny_bg_spill.c @@ -45,19 +45,25 @@ void bg_spill_drain_class(int class_idx, pthread_mutex_t* lock) { void* rest = NULL; void* cur = (void*)chain; void* prev = NULL; + // Phase 7: header-aware next pointer (C0-C6: base+1, C7: base) +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_off = (class_idx == 7) ? 0 : 1; +#else + const size_t next_off = 0; +#endif while (cur && processed < g_bg_spill_max_batch) { prev = cur; - cur = *(void**)cur; + cur = *(void**)((uint8_t*)cur + next_off); processed++; } - if (cur != NULL) { rest = cur; *(void**)prev = NULL; } + if (cur != NULL) { rest = cur; *(void**)((uint8_t*)prev + next_off) = NULL; } // Return processed nodes to SS freelists pthread_mutex_lock(lock); uint32_t self_tid = tiny_self_u32_guard(); void* node = (void*)chain; while (node) { - void* next = *(void**)node; + void* next = *(void**)((uint8_t*)node + next_off); SuperSlab* owner_ss = hak_super_lookup(node); if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) { int slab_idx = slab_index_for(owner_ss, node); @@ -69,6 +75,7 @@ void bg_spill_drain_class(int class_idx, pthread_mutex_t* lock) { continue; } void* prev = meta->freelist; + // SuperSlab freelist uses base offset (no header while free) *(void**)node = prev; meta->freelist = node; tiny_failfast_log("bg_spill", owner_ss->size_class, owner_ss, meta, node, prev); @@ -87,10 +94,10 @@ void bg_spill_drain_class(int class_idx, pthread_mutex_t* lock) { // Prepend remainder back to head uintptr_t old_head; void* tail = rest; - while (*(void**)tail) tail = *(void**)tail; + while (*(void**)((uint8_t*)tail + next_off)) tail = *(void**)((uint8_t*)tail + next_off); do { old_head = atomic_load_explicit(&g_bg_spill_head[class_idx], memory_order_acquire); - *(void**)tail = (void*)old_head; + *(void**)((uint8_t*)tail + next_off) = (void*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_bg_spill_head[class_idx], &old_head, (uintptr_t)rest, memory_order_release, memory_order_relaxed)); diff --git a/core/hakmem_tiny_bg_spill.h b/core/hakmem_tiny_bg_spill.h index 4434ab3d..a378c09e 100644 --- a/core/hakmem_tiny_bg_spill.h +++ b/core/hakmem_tiny_bg_spill.h @@ -24,7 +24,13 @@ static inline void bg_spill_push_one(int class_idx, void* p) { uintptr_t old_head; do { old_head = atomic_load_explicit(&g_bg_spill_head[class_idx], memory_order_acquire); - *(void**)p = (void*)old_head; + // Phase 7: header-aware next placement (C0-C6: base+1, C7: base) +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_off = (class_idx == 7) ? 0 : 1; +#else + const size_t next_off = 0; +#endif + *(void**)((uint8_t*)p + next_off) = (void*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_bg_spill_head[class_idx], &old_head, (uintptr_t)p, memory_order_release, memory_order_relaxed)); @@ -36,7 +42,13 @@ static inline void bg_spill_push_chain(int class_idx, void* head, void* tail, in uintptr_t old_head; do { old_head = atomic_load_explicit(&g_bg_spill_head[class_idx], memory_order_acquire); - *(void**)tail = (void*)old_head; + // Phase 7: header-aware next placement for tail link +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_off = (class_idx == 7) ? 0 : 1; +#else + const size_t next_off = 0; +#endif + *(void**)((uint8_t*)tail + next_off) = (void*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_bg_spill_head[class_idx], &old_head, (uintptr_t)head, memory_order_release, memory_order_relaxed)); diff --git a/core/hakmem_tiny_lifecycle.inc b/core/hakmem_tiny_lifecycle.inc index 094b807b..2015162d 100644 --- a/core/hakmem_tiny_lifecycle.inc +++ b/core/hakmem_tiny_lifecycle.inc @@ -149,7 +149,12 @@ static void tiny_tls_cache_drain(int class_idx) { g_tls_sll_head[class_idx] = NULL; g_tls_sll_count[class_idx] = 0; while (sll) { - void* next = *(void**)sll; +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_off_sll = (class_idx == 7) ? 0 : 1; +#else + const size_t next_off_sll = 0; +#endif + void* next = *(void**)((uint8_t*)sll + next_off_sll); tiny_tls_list_guard_push(class_idx, tls, sll); tls_list_push(tls, sll); sll = next; @@ -160,7 +165,12 @@ static void tiny_tls_cache_drain(int class_idx) { g_fast_head[class_idx] = NULL; g_fast_count[class_idx] = 0; while (fast) { - void* next = *(void**)fast; +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_off_fast = (class_idx == 7) ? 0 : 1; +#else + const size_t next_off_fast = 0; +#endif + void* next = *(void**)((uint8_t*)fast + next_off_fast); tiny_tls_list_guard_push(class_idx, tls, fast); tls_list_push(tls, fast); fast = next; diff --git a/core/tiny_alloc_fast_inline.h b/core/tiny_alloc_fast_inline.h index 0197f5cd..53932a8a 100644 --- a/core/tiny_alloc_fast_inline.h +++ b/core/tiny_alloc_fast_inline.h @@ -49,7 +49,12 @@ extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES]; if (g_tls_sll_count[(class_idx)] > 0) g_tls_sll_count[(class_idx)]--; \ (ptr_out) = NULL; \ } else { \ - void* _next = *(void**)_head; \ + /* Phase 7: header-aware next (C0-C6: base+1, C7: base) */ \ + size_t _off = 0; \ +#if HAKMEM_TINY_HEADER_CLASSIDX \ + _off = ((class_idx) == 7) ? 0 : 1; \ +#endif \ + void* _next = *(void**)((uint8_t*)_head + _off); \ g_tls_sll_head[(class_idx)] = _next; \ if (g_tls_sll_count[(class_idx)] > 0) { \ g_tls_sll_count[(class_idx)]--; \ @@ -81,7 +86,12 @@ extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES]; // mov %rsi, g_tls_sll_head(%rdi) // #define TINY_ALLOC_FAST_PUSH_INLINE(class_idx, ptr) do { \ - *(void**)(ptr) = g_tls_sll_head[(class_idx)]; \ + /* Phase 7: header-aware next (C0-C6: base+1, C7: base) */ \ + size_t _off = 0; \ +#if HAKMEM_TINY_HEADER_CLASSIDX \ + _off = ((class_idx) == 7) ? 0 : 1; \ +#endif \ + *(void**)((uint8_t*)(ptr) + _off) = g_tls_sll_head[(class_idx)]; \ g_tls_sll_head[(class_idx)] = (ptr); \ g_tls_sll_count[(class_idx)]++; \ } while(0)