diff --git a/core/box/ss_allocation_box.c b/core/box/ss_allocation_box.c index 94591b28..aba4fe33 100644 --- a/core/box/ss_allocation_box.c +++ b/core/box/ss_allocation_box.c @@ -329,6 +329,19 @@ void superslab_free(SuperSlab* ss) { return; // Invalid SuperSlab } + // Guard: do not free while pinned by TLS/remote holders + uint32_t ss_refs = atomic_load_explicit(&ss->refcount, memory_order_acquire); + if (__builtin_expect(ss_refs != 0, 0)) { +#if !HAKMEM_BUILD_RELEASE + static _Atomic uint32_t g_ss_free_pinned = 0; + uint32_t shot = atomic_fetch_add_explicit(&g_ss_free_pinned, 1, memory_order_relaxed); + if (shot < 8) { + fprintf(stderr, "[SS_FREE_SKIP_PINNED] ss=%p refcount=%u\n", (void*)ss, (unsigned)ss_refs); + } +#endif + return; + } + // ADD DEBUG LOGGING static __thread int dbg = -1; #if HAKMEM_BUILD_RELEASE diff --git a/core/box/tls_sll_box.h b/core/box/tls_sll_box.h index 364cc391..a05bdf19 100644 --- a/core/box/tls_sll_box.h +++ b/core/box/tls_sll_box.h @@ -220,6 +220,117 @@ static inline void tls_sll_sanitize_head(int class_idx, const char* stage) } } +static inline int tls_sll_check_node(int class_idx, void* raw, void* from_base, const char* stage) +{ + if (!raw) return 1; + uintptr_t addr = (uintptr_t)raw; + if (addr < 4096 || addr > 0x00007fffffffffffULL) { + goto bad; + } + SuperSlab* ss = hak_super_lookup(raw); + int cap = ss ? ss_slabs_capacity(ss) : 0; + int idx = (ss && ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(ss, raw) : -1; + uint8_t meta_cls = (idx >= 0 && idx < cap) ? ss->slabs[idx].class_idx : 0xff; + if (!ss || ss->magic != SUPERSLAB_MAGIC || idx < 0 || idx >= cap || meta_cls != (uint8_t)class_idx) { + goto bad; + } +#if HAKMEM_TINY_HEADER_CLASSIDX + { + uint8_t hdr = *(uint8_t*)raw; + uint8_t expect = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK); + if (hdr != expect) { + goto bad; + } + } +#endif + return 1; +bad: + static _Atomic uint32_t g_head_set_diag = 0; + uint32_t shot = atomic_fetch_add_explicit(&g_head_set_diag, 1, memory_order_relaxed); + if (shot < 8) { + uint8_t from_meta_cls = 0xff; + int from_idx = -1; + SuperSlab* from_ss = NULL; + TinySlabMeta* from_meta = NULL; + uint64_t from_meta_used = 0; + void* from_meta_freelist = NULL; + if (from_base) { + from_ss = hak_super_lookup(from_base); + int from_cap = from_ss ? ss_slabs_capacity(from_ss) : 0; + from_idx = (from_ss && from_ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(from_ss, from_base) : -1; + if (from_idx >= 0 && from_idx < from_cap) { + from_meta = &from_ss->slabs[from_idx]; + from_meta_cls = from_meta->class_idx; + from_meta_used = from_meta->used; + from_meta_freelist = from_meta->freelist; + } + } + // Dump raw next pointers stored in from_base for extra forensics + uintptr_t from_next_off0 = 0; + uintptr_t from_next_off1 = 0; + size_t next_off_dbg = tiny_next_off(class_idx); + if (from_base) { + memcpy(&from_next_off0, from_base, sizeof(from_next_off0)); + memcpy(&from_next_off1, (uint8_t*)from_base + next_off_dbg, sizeof(from_next_off1)); + } + + fprintf(stderr, + "[TLS_SLL_SET_INVALID] stage=%s cls=%d head=%p meta_cls=%u idx=%d ss=%p " + "from_base=%p from_meta_cls=%u from_idx=%d from_ss=%p " + "from_meta_used=%llu from_meta_freelist=%p next_off=%zu next_raw0=%p next_raw1=%p " + "canary_before=%#llx canary_after=%#llx last_writer=%s last_push=%p\n", + stage ? stage : "(null)", + class_idx, + raw, + (unsigned)meta_cls, + idx, + ss, + from_base, + (unsigned)from_meta_cls, + from_idx, + (void*)from_ss, + (unsigned long long)from_meta_used, + from_meta_freelist, + next_off_dbg, + (void*)from_next_off0, + (void*)from_next_off1, + (unsigned long long)g_tls_canary_before_sll, + (unsigned long long)g_tls_canary_after_sll, + g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)", + HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx])); + void* bt[16]; + int frames = backtrace(bt, 16); + backtrace_symbols_fd(bt, frames, fileno(stderr)); + fflush(stderr); + } + return 0; +} + +static inline void tls_sll_set_head(int class_idx, hak_base_ptr_t head, const char* stage) +{ + void* raw = HAK_BASE_TO_RAW(head); + if (!tls_sll_check_node(class_idx, raw, NULL, stage)) { + abort(); + } + g_tls_sll[class_idx].head = head; + tls_sll_record_writer(class_idx, stage ? stage : "set_head"); +} + +static inline void tls_sll_set_head_from(int class_idx, hak_base_ptr_t head, void* from_base, const char* stage) +{ + void* raw = HAK_BASE_TO_RAW(head); + if (!tls_sll_check_node(class_idx, raw, from_base, stage)) { + abort(); + } + g_tls_sll[class_idx].head = head; + tls_sll_record_writer(class_idx, stage ? stage : "set_head"); +} + +static inline void tls_sll_set_head_raw(int class_idx, void* raw_head, const char* stage) +{ + tls_sll_set_head(class_idx, HAK_BASE_FROM_RAW(raw_head), stage); +} + static inline void tls_sll_log_hdr_mismatch(int class_idx, hak_base_ptr_t base, uint8_t got, uint8_t expect, const char* stage) { static _Atomic uint32_t g_hdr_mismatch_log = 0; @@ -328,10 +439,12 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t // Detect meta/class mismatch on push (first few only). bool push_valid = true; + SuperSlab* ss_ptr = NULL; do { static _Atomic uint32_t g_tls_sll_push_meta_mis = 0; struct SuperSlab* ss = hak_super_lookup(raw_ptr); if (ss && ss->magic == SUPERSLAB_MAGIC) { + ss_ptr = ss; int sidx = slab_index_for(ss, raw_ptr); if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) { uint8_t meta_cls = ss->slabs[sidx].class_idx; @@ -435,6 +548,11 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t return false; } + // Pin SuperSlab while node resides in TLS SLL (prevents premature free) + if (ss_ptr && ss_ptr->magic == SUPERSLAB_MAGIC) { + superslab_ref_inc(ss_ptr); + } + // DEBUG: Strict address check on push to catch corruption early uintptr_t ptr_val = (uintptr_t)raw_ptr; if (ptr_val < 4096 || ptr_val > 0x00007fffffffffffULL) { @@ -528,8 +646,7 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t // Link new node to current head via Box API (offset is handled inside tiny_nextptr). // Note: g_tls_sll[...].head is hak_base_ptr_t, but PTR_NEXT_WRITE takes void* val. PTR_NEXT_WRITE("tls_push", class_idx, raw_ptr, 0, HAK_BASE_TO_RAW(g_tls_sll[class_idx].head)); - g_tls_sll[class_idx].head = ptr; - tls_sll_record_writer(class_idx, "push"); + tls_sll_set_head(class_idx, ptr, "push"); g_tls_sll[class_idx].count = cur + 1; s_tls_sll_last_push[class_idx] = ptr; @@ -587,7 +704,7 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch // Sentinel guard: remote sentinel must never be in TLS SLL. if (__builtin_expect((uintptr_t)raw_base == TINY_REMOTE_SENTINEL, 0)) { - g_tls_sll[class_idx].head = HAK_BASE_FROM_RAW(NULL); + tls_sll_set_head(class_idx, HAK_BASE_FROM_RAW(NULL), "pop_sentinel"); g_tls_sll[class_idx].count = 0; tls_sll_record_writer(class_idx, "pop_sentinel_reset"); #if !HAKMEM_BUILD_RELEASE @@ -634,9 +751,8 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch class_idx, HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx])); } tls_sll_dump_tls_window(class_idx, "head_range"); - g_tls_sll[class_idx].head = HAK_BASE_FROM_RAW(NULL); + tls_sll_set_head(class_idx, HAK_BASE_FROM_RAW(NULL), "pop_invalid_head"); g_tls_sll[class_idx].count = 0; - tls_sll_record_writer(class_idx, "pop_invalid_head"); return false; } #endif @@ -719,9 +835,8 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch fprintf(stderr, "[TLS_SLL_HDR_RESET] cls=%d base=%p got=0x%02x expect=0x%02x count=%llu\n", class_idx, raw_base, got, expect, (unsigned long long)cnt); } - g_tls_sll[class_idx].head = HAK_BASE_FROM_RAW(NULL); + tls_sll_set_head(class_idx, HAK_BASE_FROM_RAW(NULL), "header_reset"); g_tls_sll[class_idx].count = 0; - tls_sll_record_writer(class_idx, "header_reset"); { static int g_sll_ring_en = -1; if (__builtin_expect(g_sll_ring_en == -1, 0)) { @@ -746,6 +861,34 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch hak_base_ptr_t next = HAK_BASE_FROM_RAW(raw_next); tls_sll_diag_next(class_idx, base, next, "pop_next"); + // Validate next pointer before installing as new head. + if (!hak_base_is_null(next)) { + SuperSlab* next_ss = hak_super_lookup(raw_next); + int next_cap = next_ss ? ss_slabs_capacity(next_ss) : 0; + int next_idx = (next_ss && next_ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(next_ss, raw_next) : -1; + uint8_t next_meta_cls = (next_idx >= 0 && next_idx < next_cap) ? next_ss->slabs[next_idx].class_idx : 0xff; + if (!next_ss || next_ss->magic != SUPERSLAB_MAGIC || next_idx < 0 || next_idx >= next_cap || next_meta_cls != (uint8_t)class_idx) { + static _Atomic uint32_t g_next_invalid = 0; + uint32_t shot = atomic_fetch_add_explicit(&g_next_invalid, 1, memory_order_relaxed); + if (shot < 8) { + fprintf(stderr, + "[TLS_SLL_NEXT_INVALID] cls=%d next=%p meta_cls=%u idx=%d ss=%p from_base=%p head=%p last_writer=%s\n", + class_idx, + raw_next, + (unsigned)next_meta_cls, + next_idx, + (void*)next_ss, + raw_base, + HAK_BASE_TO_RAW(g_tls_sll[class_idx].head), + g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)"); + } + // Drop remainder of list to avoid chasing stale pointers. + next = HAK_BASE_FROM_RAW(NULL); + tls_sll_set_head(class_idx, next, "pop_next_invalid"); + g_tls_sll[class_idx].count = 0; + } + } + #if !HAKMEM_BUILD_RELEASE if (!hak_base_is_null(next) && !validate_ptr_range(raw_next, "tls_sll_pop_next")) { fprintf(stderr, @@ -756,15 +899,14 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch } #endif - g_tls_sll[class_idx].head = next; - tls_sll_record_writer(class_idx, "pop"); + tls_sll_set_head_from(class_idx, next, raw_base, where ? where : "pop"); if ((class_idx == 4 || class_idx == 6) && !hak_base_is_null(next) && !tls_sll_head_valid(next)) { fprintf(stderr, "[TLS_SLL_POP_POST_INVALID] cls=%d next=%p last_writer=%s\n", class_idx, raw_next, g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)"); tls_sll_dump_tls_window(class_idx, "pop_post"); - g_tls_sll[class_idx].head = HAK_BASE_FROM_RAW(NULL); + tls_sll_set_head(class_idx, HAK_BASE_FROM_RAW(NULL), "pop_post"); g_tls_sll[class_idx].count = 0; return false; } @@ -775,6 +917,14 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch // Clear next inside popped node to avoid stale-chain issues. tiny_next_write(class_idx, raw_base, NULL); + // Release SuperSlab pin now that node left TLS SLL + do { + SuperSlab* ss_pop = hak_super_lookup(raw_base); + if (ss_pop && ss_pop->magic == SUPERSLAB_MAGIC) { + superslab_ref_dec(ss_pop); + } + } while (0); + #if !HAKMEM_BUILD_RELEASE // Trace TLS SLL pop (debug only) extern void ptr_trace_record_impl(int event, void* ptr, int class_idx, uint64_t op_num, @@ -874,8 +1024,7 @@ static inline uint32_t tls_sll_splice(int class_idx, tls_sll_debug_guard(class_idx, tail, "splice_tail"); PTR_NEXT_WRITE("tls_splice_link", class_idx, HAK_BASE_TO_RAW(tail), 0, HAK_BASE_TO_RAW(g_tls_sll[class_idx].head)); - g_tls_sll[class_idx].head = chain_head; - tls_sll_record_writer(class_idx, "splice"); + tls_sll_set_head(class_idx, chain_head, "splice"); g_tls_sll[class_idx].count = cur + moved; return moved; diff --git a/core/front/tiny_unified_cache.c b/core/front/tiny_unified_cache.c index b461dbc4..1250ec14 100644 --- a/core/front/tiny_unified_cache.c +++ b/core/front/tiny_unified_cache.c @@ -11,6 +11,7 @@ #include "../hakmem_env_cache.h" // Priority-2: ENV cache (eliminate syscalls) #include #include +#include // Phase 23-E: Forward declarations extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES]; // From hakmem_tiny_superslab.c @@ -337,6 +338,39 @@ void* unified_cache_refill(int class_idx) { if (m->freelist) { // Freelist pop void* p = m->freelist; + + // Validate freelist head before dereferencing + do { + SuperSlab* fl_ss = hak_super_lookup(p); + int fl_cap = fl_ss ? ss_slabs_capacity(fl_ss) : 0; + int fl_idx = (fl_ss && fl_ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(fl_ss, p) : -1; + uint8_t fl_cls = (fl_idx >= 0 && fl_idx < fl_cap) ? fl_ss->slabs[fl_idx].class_idx : 0xff; + if (!fl_ss || fl_ss->magic != SUPERSLAB_MAGIC || + fl_idx != tls->slab_idx || fl_ss != tls->ss || + fl_cls != (uint8_t)class_idx) { + static _Atomic uint32_t g_fl_invalid = 0; + uint32_t shot = atomic_fetch_add_explicit(&g_fl_invalid, 1, memory_order_relaxed); + if (shot < 8) { + fprintf(stderr, + "[UNIFIED_FREELIST_INVALID] cls=%d p=%p ss=%p slab=%d meta_used=%u tls_ss=%p tls_slab=%d cls_meta=%u\n", + class_idx, + p, + (void*)fl_ss, + fl_idx, + m->used, + (void*)tls->ss, + tls->slab_idx, + (unsigned)fl_cls); + } + // Drop invalid freelist to avoid SEGV and force slow refill + m->freelist = NULL; + p = NULL; + } + } while (0); + if (!p) { + break; + } + void* next_node = tiny_next_read(class_idx, p); // ROOT CAUSE FIX: Write header BEFORE exposing block (but AFTER reading next) diff --git a/core/hakmem_shared_pool_release.c b/core/hakmem_shared_pool_release.c index 90f9e7bb..4cb5a6db 100644 --- a/core/hakmem_shared_pool_release.c +++ b/core/hakmem_shared_pool_release.c @@ -3,6 +3,7 @@ #include "box/ss_slab_meta_box.h" #include "box/ss_hot_cold_box.h" #include "hakmem_env_cache.h" // Priority-2: ENV cache +#include "superslab/superslab_inline.h" // superslab_ref_get guard for TLS pins #include #include @@ -186,14 +187,19 @@ shared_pool_release_slab(SuperSlab* ss, int slab_idx) // BUGFIX: Double check total_active_blocks. Legacy Backend might have // allocated from ANOTHER slab in this SS just before we removed it. // If so, we must NOT free the SS. - if (atomic_load(&ss->total_active_blocks) == 0) { + uint32_t active_blocks = atomic_load(&ss->total_active_blocks); + uint32_t ss_refs = superslab_ref_get(ss); + if (active_blocks == 0 && ss_refs == 0) { extern void superslab_free(SuperSlab* ss); superslab_free(ss); } else { #if !HAKMEM_BUILD_RELEASE if (dbg == 1) { - fprintf(stderr, "[SP_SLOT_RELEASE] SKIP free ss=%p: total_active_blocks=%u > 0\n", - (void*)ss, atomic_load(&ss->total_active_blocks)); + fprintf(stderr, + "[SP_SLOT_RELEASE] SKIP free ss=%p: total_active_blocks=%u refcount=%u\n", + (void*)ss, + (unsigned)active_blocks, + (unsigned)ss_refs); } #endif } diff --git a/core/superslab_allocate.c b/core/superslab_allocate.c index 8fbd5dd2..4bd0508d 100644 --- a/core/superslab_allocate.c +++ b/core/superslab_allocate.c @@ -256,6 +256,19 @@ void superslab_free(SuperSlab* ss) { return; // Invalid SuperSlab } + // Guard: do not free while pinned by TLS/remote holders + uint32_t ss_refs = atomic_load_explicit(&ss->refcount, memory_order_acquire); + if (__builtin_expect(ss_refs != 0, 0)) { +#if !HAKMEM_BUILD_RELEASE + static _Atomic uint32_t g_ss_free_pinned = 0; + uint32_t shot = atomic_fetch_add_explicit(&g_ss_free_pinned, 1, memory_order_relaxed); + if (shot < 8) { + fprintf(stderr, "[SS_FREE_SKIP_PINNED] ss=%p refcount=%u\n", (void*)ss, (unsigned)ss_refs); + } +#endif + return; + } + do { static _Atomic uint32_t g_ss_free_log = 0; uint32_t shot = atomic_fetch_add_explicit(&g_ss_free_log, 1, memory_order_relaxed); diff --git a/core/tiny_alloc_fast.inc.h b/core/tiny_alloc_fast.inc.h index 6826dc19..f33bbd63 100644 --- a/core/tiny_alloc_fast.inc.h +++ b/core/tiny_alloc_fast.inc.h @@ -914,7 +914,7 @@ static inline TinyAllocFastStats tiny_alloc_fast_stats(int class_idx) { // Reset TLS freelist (for testing/benchmarking) // WARNING: This leaks memory! Only use in controlled test environments. static inline void tiny_alloc_fast_reset(int class_idx) { - g_tls_sll[class_idx].head = NULL; + tls_sll_set_head_raw(class_idx, NULL, "fast_reset"); g_tls_sll[class_idx].count = 0; } diff --git a/core/tiny_alloc_fast_inline.h b/core/tiny_alloc_fast_inline.h index 396dc21f..e99ff2d1 100644 --- a/core/tiny_alloc_fast_inline.h +++ b/core/tiny_alloc_fast_inline.h @@ -56,7 +56,7 @@ extern __thread const char* g_tls_sll_last_writer[TINY_NUM_CLASSES]; if (__builtin_expect(_head != NULL, 1)) { \ if (__builtin_expect((uintptr_t)_head == TINY_REMOTE_SENTINEL, 0)) { \ /* Break the chain defensively if sentinel leaked into TLS SLL */ \ - g_tls_sll[(class_idx)].head = NULL; \ + tls_sll_set_head_raw((class_idx), NULL, "fast_pop_sentinel"); \ g_tls_sll_last_writer[(class_idx)] = "fast_pop_sentinel"; \ if (g_tls_sll[(class_idx)].count > 0) g_tls_sll[(class_idx)].count--; \ (ptr_out) = NULL; \ @@ -66,15 +66,14 @@ extern __thread const char* g_tls_sll_last_writer[TINY_NUM_CLASSES]; if (__builtin_expect(class_idx == 4 || class_idx == 6, 0)) { \ tls_sll_diag_next(class_idx, _head, _next, "fast_pop_next"); \ } \ - g_tls_sll[(class_idx)].head = _next; \ - g_tls_sll_last_writer[(class_idx)] = "fast_pop"; \ + tls_sll_set_head_raw((class_idx), _next, "fast_pop"); \ if ((class_idx == 4 || class_idx == 6) && _next && ((uintptr_t)_next < 4096 || (uintptr_t)_next > 0x00007fffffffffffULL)) { \ static __thread uint8_t s_fast_pop_invalid_log[8] = {0}; \ if (s_fast_pop_invalid_log[(class_idx)] < 4) { \ fprintf(stderr, "[TLS_SLL_FAST_POP_INVALID] cls=%d head=%p next=%p\n", (class_idx), _head, _next); \ s_fast_pop_invalid_log[(class_idx)]++; \ } \ - g_tls_sll[(class_idx)].head = NULL; \ + tls_sll_set_head_raw((class_idx), NULL, "fast_pop_post_invalid"); \ /* keep count unchanged to flag drop */ \ g_tls_sll_last_writer[(class_idx)] = "fast_pop_post_invalid"; \ (ptr_out) = NULL; \ @@ -126,15 +125,13 @@ extern __thread const char* g_tls_sll_last_writer[TINY_NUM_CLASSES]; } \ /* Link node using BASE as the canonical SLL node address. */ \ tiny_next_write((class_idx), _base, g_tls_sll[(class_idx)].head); \ - g_tls_sll[(class_idx)].head = _base; \ - g_tls_sll_last_writer[(class_idx)] = "fast_push"; \ + tls_sll_set_head_raw((class_idx), _base, "fast_push"); \ g_tls_sll[(class_idx)].count++; \ } while(0) #else #define TINY_ALLOC_FAST_PUSH_INLINE(class_idx, ptr) do { \ tiny_next_write(class_idx, (ptr), g_tls_sll[(class_idx)].head); \ - g_tls_sll[(class_idx)].head = (ptr); \ - g_tls_sll_last_writer[(class_idx)] = "fast_push"; \ + tls_sll_set_head_raw((class_idx), (ptr), "fast_push"); \ g_tls_sll[(class_idx)].count++; \ } while(0) #endif diff --git a/core/tiny_free_fast.inc.h b/core/tiny_free_fast.inc.h index 8d0c57fb..0bbb43f5 100644 --- a/core/tiny_free_fast.inc.h +++ b/core/tiny_free_fast.inc.h @@ -144,10 +144,6 @@ static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* base, uin tiny_alloc_fast_push(class_idx, base); } - // Active accounting (Box 3: SuperSlab) - // This is relatively cheap (atomic decrement) and necessary for memory management - ss_active_dec_one(ss); - return 1; // Success }