// tiny_refill_opt.h - Inline helpers to batch and splice refill chains // Box: Refill Boundary optimization helpers (kept header-only) #pragma once #include #include #include #include #ifndef HAKMEM_TINY_REFILL_OPT #define HAKMEM_TINY_REFILL_OPT 1 #endif // Local chain structure (head/tail pointers) typedef struct TinyRefillChain { void* head; void* tail; uint32_t count; } TinyRefillChain; static inline void trc_init(TinyRefillChain* c) { c->head = NULL; c->tail = NULL; c->count = 0; } static inline void refill_opt_dbg(const char* stage, int class_idx, uint32_t n) { #if HAKMEM_TINY_REFILL_OPT static int en = -1; static _Atomic int printed = 0; if (__builtin_expect(en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_REFILL_OPT_DEBUG"); en = (e && *e && *e != '0') ? 1 : 0; } if (!en) return; int exp = 0; if (atomic_compare_exchange_strong(&printed, &exp, 1)) { fprintf(stderr, "[REFILL_OPT] stage=%s cls=%d n=%u\n", stage ? stage : "(null)", class_idx, (unsigned)n); fflush(stderr); } #else (void)stage; (void)class_idx; (void)n; #endif } // Phase 7 header-aware push_front: link using base+1 for C0-C6 (C7 not used here) static inline void trc_push_front(TinyRefillChain* c, void* node, int class_idx) { #if HAKMEM_TINY_HEADER_CLASSIDX const size_t next_offset = (class_idx == 7) ? 0 : 1; #else const size_t next_offset = 0; #endif if (c->head == NULL) { c->head = node; c->tail = node; *(void**)((uint8_t*)node + next_offset) = NULL; c->count = 1; } else { *(void**)((uint8_t*)node + next_offset) = c->head; c->head = node; c->count++; } } // Forward declaration of guard function static inline int trc_refill_guard_enabled(void); // Forward declare Box TLS-SLL API #include "box/tls_sll_box.h" // Splice local chain into TLS SLL using Box TLS-SLL API (C7-safe) static inline void trc_splice_to_sll(int class_idx, TinyRefillChain* c, void** sll_head, uint32_t* sll_count) { if (!c || c->head == NULL) return; // CORRUPTION DEBUG: Log chain splice (alignment check removed - false positive) // NOTE: Blocks are stride-aligned from slab base, not absolutely aligned // A slab at 0x1000 with 513B blocks is valid: 0x1000, 0x1201, 0x1402, etc. if (__builtin_expect(trc_refill_guard_enabled(), 0)) { fprintf(stderr, "[SPLICE_TO_SLL] cls=%d head=%p tail=%p count=%u\n", class_idx, c->head, c->tail, c->count); } // CRITICAL: Use Box TLS-SLL API for splice (C7-safe, no race) // Note: tls_sll_splice() requires capacity parameter (use large value for refill) uint32_t moved = tls_sll_splice(class_idx, c->head, c->count, 4096); // Update sll_count if provided (Box API already updated g_tls_sll_count internally) // Note: sll_count parameter is typically &g_tls_sll_count[class_idx], already updated (void)sll_count; // Suppress unused warning (void)sll_head; // Suppress unused warning // If splice was partial, warn (should not happen in refill path) if (__builtin_expect(moved < c->count, 0)) { fprintf(stderr, "[SPLICE_WARNING] Only moved %u/%u blocks (SLL capacity limit)\n", moved, c->count); } } static inline int trc_refill_guard_enabled(void) { // FIX: Allow runtime override even in release builds for debugging static int g_trc_guard = -1; if (__builtin_expect(g_trc_guard == -1, 0)) { const char* env = getenv("HAKMEM_TINY_REFILL_FAILFAST"); #if HAKMEM_BUILD_RELEASE // Release: Default OFF, but allow explicit enable g_trc_guard = (env && *env && *env != '0') ? 1 : 0; #else // Debug: Default ON, but allow explicit disable g_trc_guard = (env && *env) ? ((*env != '0') ? 1 : 0) : 1; #endif fprintf(stderr, "[TRC_GUARD] failfast=%d env=%s mode=%s\n", g_trc_guard, env ? env : "(null)", HAKMEM_BUILD_RELEASE ? "release" : "debug"); fflush(stderr); } return g_trc_guard; } static inline int trc_ptr_is_valid(uintptr_t base, uintptr_t limit, size_t blk, const void* node) { if (!node || limit <= base) return 1; uintptr_t addr = (uintptr_t)node; if (addr < base || addr >= limit) return 0; if (blk == 0) return 1; return ((addr - base) % blk) == 0; } static inline void trc_failfast_abort(const char* stage, int class_idx, uintptr_t base, uintptr_t limit, const void* node) { fprintf(stderr, "[TRC_FAILFAST] stage=%s cls=%d node=%p base=%p limit=%p\n", stage ? stage : "(null)", class_idx, node, (void*)base, (void*)limit); fflush(stderr); abort(); } // Pop up to 'want' nodes from freelist into local chain static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta, int class_idx, uintptr_t ss_base, uintptr_t ss_limit, size_t block_size, uint32_t want, TinyRefillChain* out) { if (!out || want == 0) return 0; trc_init(out); uint32_t taken = 0; while (taken < want && meta->freelist) { void* p = meta->freelist; if (__builtin_expect(trc_refill_guard_enabled() && !trc_ptr_is_valid(ss_base, ss_limit, block_size, p), 0)) { fprintf(stderr, "[FREELIST_CORRUPT] Reading freelist head: p=%p (ss_base=%p ss_limit=%p blk=%zu)\n", p, (void*)ss_base, (void*)ss_limit, block_size); fprintf(stderr, "[FREELIST_CORRUPT] Head pointer is corrupted (invalid range/alignment)\n"); trc_failfast_abort("freelist_head", class_idx, ss_base, ss_limit, p); } void* next = *(void**)p; if (__builtin_expect(trc_refill_guard_enabled() && !trc_ptr_is_valid(ss_base, ss_limit, block_size, next), 0)) { fprintf(stderr, "[FREELIST_CORRUPT] Reading freelist node: p=%p next=%p (ss_base=%p ss_limit=%p blk=%zu)\n", p, next, (void*)ss_base, (void*)ss_limit, block_size); fprintf(stderr, "[FREELIST_CORRUPT] Next pointer is corrupted (cls=%d taken=%u/%u)\n", class_idx, taken, want); // Log offset details if (next != NULL) { uintptr_t offset = (uintptr_t)next - ss_base; size_t expected_align = offset % block_size; fprintf(stderr, "[FREELIST_CORRUPT] Corrupted offset=%zu (0x%zx) expected_align=%zu\n", offset, offset, expected_align); } trc_failfast_abort("freelist_next", class_idx, ss_base, ss_limit, next); } meta->freelist = next; trc_push_front(out, p, class_idx); taken++; } // DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead) return taken; } // Carve a contiguous batch of size 'batch' from linear area, return as chain // Phase 7 header-aware carve: link chain using header-safe next location // class_idx is required to decide headerless (C7) vs headered (C0-C6) static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs, struct TinySlabMeta* meta, uint32_t batch, int class_idx, TinyRefillChain* out) { if (!out || batch == 0) return 0; trc_init(out); // FIX: Use carved (monotonic) instead of used (decrements on free) // CORRUPTION DEBUG: Validate capacity before carving if (__builtin_expect(trc_refill_guard_enabled(), 0)) { if (meta->carved + batch > meta->capacity) { fprintf(stderr, "[LINEAR_CARVE_CORRUPT] Carving beyond capacity!\n"); fprintf(stderr, "[LINEAR_CARVE_CORRUPT] carved=%u batch=%u capacity=%u (would be %u)\n", meta->carved, batch, meta->capacity, meta->carved + batch); fprintf(stderr, "[LINEAR_CARVE_CORRUPT] base=%p bs=%zu\n", (void*)base, bs); abort(); } } // FIX: Use carved counter (monotonic) instead of used (which decrements on free) // Caller passes bs as the effective stride already (includes header when enabled) size_t stride = bs; uint8_t* cursor = base + ((size_t)meta->carved * stride); void* head = (void*)cursor; // CORRUPTION DEBUG: Log carve operation if (__builtin_expect(trc_refill_guard_enabled(), 0)) { fprintf(stderr, "[LINEAR_CARVE] base=%p carved=%u batch=%u cursor=%p\n", (void*)base, meta->carved, batch, (void*)cursor); } // CRITICAL FIX (Phase 7): header-aware next pointer placement // For header classes (C0-C6), the first byte at base is the 1-byte header. // Store the SLL next pointer at base+1 to avoid clobbering the header. // For C7 (headerless), store at base. #if HAKMEM_TINY_HEADER_CLASSIDX const size_t next_offset = (class_idx == 7) ? 0 : 1; #else const size_t next_offset = 0; #endif for (uint32_t i = 1; i < batch; i++) { uint8_t* next = cursor + stride; *(void**)(cursor + next_offset) = (void*)next; cursor = next; } void* tail = (void*)cursor; // Debug: validate first link #if !HAKMEM_BUILD_RELEASE if (batch >= 2) { void* first_next = *(void**)((uint8_t*)head + next_offset); fprintf(stderr, "[LINEAR_LINK] cls=%d head=%p off=%zu next=%p tail=%p\n", class_idx, head, (size_t)next_offset, first_next, tail); } else { fprintf(stderr, "[LINEAR_LINK] cls=%d head=%p off=%zu next=%p tail=%p\n", class_idx, head, (size_t)next_offset, (void*)0, tail); } #endif // FIX: Update both carved (monotonic) and used (active count) meta->carved += batch; meta->used += batch; out->head = head; out->tail = tail; out->count = batch; // DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead) return batch; }