// tiny_refill_opt.h - Inline helpers to batch and splice refill chains // Box: Refill Boundary optimization helpers (kept header-only) #pragma once #include #include #include #include #ifndef HAKMEM_TINY_REFILL_OPT #define HAKMEM_TINY_REFILL_OPT 1 #endif // Local chain structure (head/tail pointers) typedef struct TinyRefillChain { void* head; void* tail; uint32_t count; } TinyRefillChain; static inline void trc_init(TinyRefillChain* c) { c->head = NULL; c->tail = NULL; c->count = 0; } static inline void refill_opt_dbg(const char* stage, int class_idx, uint32_t n) { #if HAKMEM_TINY_REFILL_OPT static int en = -1; static _Atomic int printed = 0; if (__builtin_expect(en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_REFILL_OPT_DEBUG"); en = (e && *e && *e != '0') ? 1 : 0; } if (!en) return; int exp = 0; if (atomic_compare_exchange_strong(&printed, &exp, 1)) { fprintf(stderr, "[REFILL_OPT] stage=%s cls=%d n=%u\n", stage ? stage : "(null)", class_idx, (unsigned)n); fflush(stderr); } #else (void)stage; (void)class_idx; (void)n; #endif } static inline void trc_push_front(TinyRefillChain* c, void* node) { if (c->head == NULL) { c->head = node; c->tail = node; *(void**)node = NULL; c->count = 1; } else { *(void**)node = c->head; c->head = node; c->count++; } } // Forward declaration of guard function static inline int trc_refill_guard_enabled(void); // Splice local chain into TLS SLL (single meta write) static inline void trc_splice_to_sll(int class_idx, TinyRefillChain* c, void** sll_head, uint32_t* sll_count) { if (!c || c->head == NULL) return; // CORRUPTION DEBUG: Validate chain before splicing if (__builtin_expect(trc_refill_guard_enabled(), 0)) { extern const size_t g_tiny_class_sizes[]; size_t blk = g_tiny_class_sizes[class_idx]; fprintf(stderr, "[SPLICE_TO_SLL] cls=%d head=%p tail=%p count=%u\n", class_idx, c->head, c->tail, c->count); // Check alignment of chain head if (((uintptr_t)c->head % blk) != 0) { fprintf(stderr, "[SPLICE_CORRUPT] Chain head %p misaligned (blk=%zu offset=%zu)!\n", c->head, blk, (uintptr_t)c->head % blk); fprintf(stderr, "[SPLICE_CORRUPT] Corruption detected BEFORE writing to TLS!\n"); abort(); } } if (c->tail) { *(void**)c->tail = *sll_head; } *sll_head = c->head; if (sll_count) *sll_count += c->count; } static inline int trc_refill_guard_enabled(void) { #if HAKMEM_BUILD_RELEASE return 0; // Always disabled in release builds #else static int g_trc_guard = -1; if (__builtin_expect(g_trc_guard == -1, 0)) { const char* env = getenv("HAKMEM_TINY_REFILL_FAILFAST"); g_trc_guard = (env && *env) ? ((*env != '0') ? 1 : 0) : 1; fprintf(stderr, "[TRC_GUARD] failfast=%d env=%s\n", g_trc_guard, env ? env : "(null)"); fflush(stderr); } return g_trc_guard; #endif } static inline int trc_ptr_is_valid(uintptr_t base, uintptr_t limit, size_t blk, const void* node) { if (!node || limit <= base) return 1; uintptr_t addr = (uintptr_t)node; if (addr < base || addr >= limit) return 0; if (blk == 0) return 1; return ((addr - base) % blk) == 0; } static inline void trc_failfast_abort(const char* stage, int class_idx, uintptr_t base, uintptr_t limit, const void* node) { fprintf(stderr, "[TRC_FAILFAST] stage=%s cls=%d node=%p base=%p limit=%p\n", stage ? stage : "(null)", class_idx, node, (void*)base, (void*)limit); fflush(stderr); abort(); } // Pop up to 'want' nodes from freelist into local chain static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta, int class_idx, uintptr_t ss_base, uintptr_t ss_limit, size_t block_size, uint32_t want, TinyRefillChain* out) { if (!out || want == 0) return 0; trc_init(out); uint32_t taken = 0; while (taken < want && meta->freelist) { void* p = meta->freelist; if (__builtin_expect(trc_refill_guard_enabled() && !trc_ptr_is_valid(ss_base, ss_limit, block_size, p), 0)) { fprintf(stderr, "[FREELIST_CORRUPT] Reading freelist head: p=%p (ss_base=%p ss_limit=%p blk=%zu)\n", p, (void*)ss_base, (void*)ss_limit, block_size); fprintf(stderr, "[FREELIST_CORRUPT] Head pointer is corrupted (invalid range/alignment)\n"); trc_failfast_abort("freelist_head", class_idx, ss_base, ss_limit, p); } void* next = *(void**)p; if (__builtin_expect(trc_refill_guard_enabled() && !trc_ptr_is_valid(ss_base, ss_limit, block_size, next), 0)) { fprintf(stderr, "[FREELIST_CORRUPT] Reading freelist node: p=%p next=%p (ss_base=%p ss_limit=%p blk=%zu)\n", p, next, (void*)ss_base, (void*)ss_limit, block_size); fprintf(stderr, "[FREELIST_CORRUPT] Next pointer is corrupted (cls=%d taken=%u/%u)\n", class_idx, taken, want); // Log offset details if (next != NULL) { uintptr_t offset = (uintptr_t)next - ss_base; size_t expected_align = offset % block_size; fprintf(stderr, "[FREELIST_CORRUPT] Corrupted offset=%zu (0x%zx) expected_align=%zu\n", offset, offset, expected_align); } trc_failfast_abort("freelist_next", class_idx, ss_base, ss_limit, next); } meta->freelist = next; trc_push_front(out, p); taken++; } // DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead) return taken; } // Carve a contiguous batch of size 'batch' from linear area, return as chain static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs, struct TinySlabMeta* meta, uint32_t batch, TinyRefillChain* out) { if (!out || batch == 0) return 0; trc_init(out); // FIX: Use carved (monotonic) instead of used (decrements on free) // CORRUPTION DEBUG: Validate capacity before carving if (__builtin_expect(trc_refill_guard_enabled(), 0)) { if (meta->carved + batch > meta->capacity) { fprintf(stderr, "[LINEAR_CARVE_CORRUPT] Carving beyond capacity!\n"); fprintf(stderr, "[LINEAR_CARVE_CORRUPT] carved=%u batch=%u capacity=%u (would be %u)\n", meta->carved, batch, meta->capacity, meta->carved + batch); fprintf(stderr, "[LINEAR_CARVE_CORRUPT] base=%p bs=%zu\n", (void*)base, bs); abort(); } } // FIX: Use carved counter (monotonic) instead of used (which decrements on free) uint8_t* cursor = base + ((size_t)meta->carved * bs); void* head = (void*)cursor; // CORRUPTION DEBUG: Log carve operation if (__builtin_expect(trc_refill_guard_enabled(), 0)) { fprintf(stderr, "[LINEAR_CARVE] base=%p carved=%u batch=%u cursor=%p\n", (void*)base, meta->carved, batch, (void*)cursor); } for (uint32_t i = 1; i < batch; i++) { uint8_t* next = cursor + bs; *(void**)cursor = (void*)next; cursor = next; } void* tail = (void*)cursor; // FIX: Update both carved (monotonic) and used (active count) meta->carved += batch; meta->used += batch; out->head = head; out->tail = tail; out->count = batch; // DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead) return batch; }