Phase 7 follow-up: header-aware in BG spill, TLS drain, and aggressive inline macros

- bg_spill: link/traverse next at base+1 for C0–C6, base for C7 - lifecycle: drain TLS SLL and fast caches reading next with header-aware offsets - tiny_alloc_fast_inline: POP/PUSH macros made header-aware to match tls_sll_box rules - add optional FREE_WRAP_ENTER trace (HAKMEM_FREE_WRAP_TRACE) for early triage Result: 0xa0/…0099 bogus free logs gone; remaining SIGBUS appears in free path early. Next: instrument early libc fallback or guard invalid pointers during init to pinpoint source.
2025-11-10 18:21:32 +09:00
parent dde490f842
commit d5302e9c87
5 changed files with 62 additions and 11 deletions
--- a/core/box/hak_free_api.inc.h
+++ b/core/box/hak_free_api.inc.h
@ -72,6 +72,18 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
    HKM_TIME_START(t0);
 #endif
    (void)site; (void)size;
    // Optional lightweight trace of early free calls (first few only)
    static int free_trace_en = -1; static _Atomic int free_trace_count = 0;
    if (__builtin_expect(free_trace_en == -1, 0)) {
        const char* e = getenv("HAKMEM_FREE_WRAP_TRACE");
        free_trace_en = (e && *e && *e != '0') ? 1 : 0;
    }
    if (free_trace_en) {
        int n = atomic_fetch_add(&free_trace_count, 1);
        if (n < 8) {
            fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr);
        }
    }
    if (!ptr) {
 #if HAKMEM_DEBUG_TIMING
        HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
--- a/core/hakmem_tiny_bg_spill.c
+++ b/core/hakmem_tiny_bg_spill.c
@ -45,19 +45,25 @@ void bg_spill_drain_class(int class_idx, pthread_mutex_t* lock) {
    void* rest = NULL;
    void* cur = (void*)chain;
    void* prev = NULL;
    // Phase 7: header-aware next pointer (C0-C6: base+1, C7: base)
 #if HAKMEM_TINY_HEADER_CLASSIDX
    const size_t next_off = (class_idx == 7) ? 0 : 1;
 #else
    const size_t next_off = 0;
 #endif
    while (cur && processed < g_bg_spill_max_batch) {
        prev = cur;
-        cur = *(void**)cur;
+        cur = *(void**)((uint8_t*)cur + next_off);
        processed++;
    }
-    if (cur != NULL) { rest = cur; *(void**)prev = NULL; }
+    if (cur != NULL) { rest = cur; *(void**)((uint8_t*)prev + next_off) = NULL; }
    // Return processed nodes to SS freelists
    pthread_mutex_lock(lock);
    uint32_t self_tid = tiny_self_u32_guard();
    void* node = (void*)chain;
    while (node) {
-        void* next = *(void**)node;
+        void* next = *(void**)((uint8_t*)node + next_off);
        SuperSlab* owner_ss = hak_super_lookup(node);
        if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) {
            int slab_idx = slab_index_for(owner_ss, node);
@ -69,6 +75,7 @@ void bg_spill_drain_class(int class_idx, pthread_mutex_t* lock) {
                continue;
            }
            void* prev = meta->freelist;
            // SuperSlab freelist uses base offset (no header while free)
            *(void**)node = prev;
            meta->freelist = node;
            tiny_failfast_log("bg_spill", owner_ss->size_class, owner_ss, meta, node, prev);
@ -87,10 +94,10 @@ void bg_spill_drain_class(int class_idx, pthread_mutex_t* lock) {
        // Prepend remainder back to head
        uintptr_t old_head;
        void* tail = rest;
-        while (*(void**)tail) tail = *(void**)tail;
+        while (*(void**)((uint8_t*)tail + next_off)) tail = *(void**)((uint8_t*)tail + next_off);
        do {
            old_head = atomic_load_explicit(&g_bg_spill_head[class_idx], memory_order_acquire);
-            *(void**)tail = (void*)old_head;
+            *(void**)((uint8_t*)tail + next_off) = (void*)old_head;
        } while (!atomic_compare_exchange_weak_explicit(&g_bg_spill_head[class_idx], &old_head,
                                                        (uintptr_t)rest,
                                                        memory_order_release, memory_order_relaxed));
--- a/core/hakmem_tiny_bg_spill.h
+++ b/core/hakmem_tiny_bg_spill.h
@ -24,7 +24,13 @@ static inline void bg_spill_push_one(int class_idx, void* p) {
    uintptr_t old_head;
    do {
        old_head = atomic_load_explicit(&g_bg_spill_head[class_idx], memory_order_acquire);
-        *(void**)p = (void*)old_head;
+        // Phase 7: header-aware next placement (C0-C6: base+1, C7: base)
 #if HAKMEM_TINY_HEADER_CLASSIDX
        const size_t next_off = (class_idx == 7) ? 0 : 1;
 #else
        const size_t next_off = 0;
 #endif
        *(void**)((uint8_t*)p + next_off) = (void*)old_head;
    } while (!atomic_compare_exchange_weak_explicit(&g_bg_spill_head[class_idx], &old_head,
                                                    (uintptr_t)p,
                                                    memory_order_release, memory_order_relaxed));
@ -36,7 +42,13 @@ static inline void bg_spill_push_chain(int class_idx, void* head, void* tail, in
    uintptr_t old_head;
    do {
        old_head = atomic_load_explicit(&g_bg_spill_head[class_idx], memory_order_acquire);
-        *(void**)tail = (void*)old_head;
+        // Phase 7: header-aware next placement for tail link
 #if HAKMEM_TINY_HEADER_CLASSIDX
        const size_t next_off = (class_idx == 7) ? 0 : 1;
 #else
        const size_t next_off = 0;
 #endif
        *(void**)((uint8_t*)tail + next_off) = (void*)old_head;
    } while (!atomic_compare_exchange_weak_explicit(&g_bg_spill_head[class_idx], &old_head,
                                                    (uintptr_t)head,
                                                    memory_order_release, memory_order_relaxed));
--- a/core/hakmem_tiny_lifecycle.inc
+++ b/core/hakmem_tiny_lifecycle.inc
@ -149,7 +149,12 @@ static void tiny_tls_cache_drain(int class_idx) {
    g_tls_sll_head[class_idx] = NULL;
    g_tls_sll_count[class_idx] = 0;
    while (sll) {
-        void* next = *(void**)sll;
+#if HAKMEM_TINY_HEADER_CLASSIDX
        const size_t next_off_sll = (class_idx == 7) ? 0 : 1;
 #else
        const size_t next_off_sll = 0;
 #endif
        void* next = *(void**)((uint8_t*)sll + next_off_sll);
        tiny_tls_list_guard_push(class_idx, tls, sll);
        tls_list_push(tls, sll);
        sll = next;
@ -160,7 +165,12 @@ static void tiny_tls_cache_drain(int class_idx) {
    g_fast_head[class_idx] = NULL;
    g_fast_count[class_idx] = 0;
    while (fast) {
-        void* next = *(void**)fast;
+#if HAKMEM_TINY_HEADER_CLASSIDX
        const size_t next_off_fast = (class_idx == 7) ? 0 : 1;
 #else
        const size_t next_off_fast = 0;
 #endif
        void* next = *(void**)((uint8_t*)fast + next_off_fast);
        tiny_tls_list_guard_push(class_idx, tls, fast);
        tls_list_push(tls, fast);
        fast = next;
--- a/core/tiny_alloc_fast_inline.h
+++ b/core/tiny_alloc_fast_inline.h
@ -49,7 +49,12 @@ extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
            if (g_tls_sll_count[(class_idx)] > 0) g_tls_sll_count[(class_idx)]--; \
            (ptr_out) = NULL; \
        } else { \
-            void* _next = *(void**)_head; \
+            /* Phase 7: header-aware next (C0-C6: base+1, C7: base) */ \
            size_t _off = 0; \
 #if HAKMEM_TINY_HEADER_CLASSIDX \
            _off = ((class_idx) == 7) ? 0 : 1; \
 #endif \
            void* _next = *(void**)((uint8_t*)_head + _off); \
            g_tls_sll_head[(class_idx)] = _next; \
            if (g_tls_sll_count[(class_idx)] > 0) { \
                g_tls_sll_count[(class_idx)]--; \
@ -81,7 +86,12 @@ extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
 //     mov    %rsi, g_tls_sll_head(%rdi)
 //
 #define TINY_ALLOC_FAST_PUSH_INLINE(class_idx, ptr) do { \
-    *(void**)(ptr) = g_tls_sll_head[(class_idx)]; \
+    /* Phase 7: header-aware next (C0-C6: base+1, C7: base) */ \
    size_t _off = 0; \
 #if HAKMEM_TINY_HEADER_CLASSIDX \
    _off = ((class_idx) == 7) ? 0 : 1; \
 #endif \
    *(void**)((uint8_t*)(ptr) + _off) = g_tls_sll_head[(class_idx)]; \
    g_tls_sll_head[(class_idx)] = (ptr); \
    g_tls_sll_count[(class_idx)]++; \
 } while(0)