// tiny_refill_opt.h - Inline helpers to batch and splice refill chains // Box: Refill Boundary optimization helpers (kept header-only) #pragma once #include #include #include #include #ifndef HAKMEM_TINY_REFILL_OPT #define HAKMEM_TINY_REFILL_OPT 1 #endif // Local chain structure (head/tail pointers) typedef struct TinyRefillChain { void* head; void* tail; uint32_t count; } TinyRefillChain; static inline void trc_init(TinyRefillChain* c) { c->head = NULL; c->tail = NULL; c->count = 0; } static inline void refill_opt_dbg(const char* stage, int class_idx, uint32_t n) { #if HAKMEM_TINY_REFILL_OPT static int en = -1; static _Atomic int printed = 0; if (__builtin_expect(en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_REFILL_OPT_DEBUG"); en = (e && *e && *e != '0') ? 1 : 0; } if (!en) return; int exp = 0; if (atomic_compare_exchange_strong(&printed, &exp, 1)) { fprintf(stderr, "[REFILL_OPT] stage=%s cls=%d n=%u\n", stage ? stage : "(null)", class_idx, (unsigned)n); fflush(stderr); } #else (void)stage; (void)class_idx; (void)n; #endif } static inline void trc_push_front(TinyRefillChain* c, void* node) { if (c->head == NULL) { c->head = node; c->tail = node; *(void**)node = NULL; c->count = 1; } else { *(void**)node = c->head; c->head = node; c->count++; } } // Splice local chain into TLS SLL (single meta write) static inline void trc_splice_to_sll(int class_idx, TinyRefillChain* c, void** sll_head, uint32_t* sll_count) { if (!c || c->head == NULL) return; if (c->tail) { *(void**)c->tail = *sll_head; } *sll_head = c->head; if (sll_count) *sll_count += c->count; } static inline int trc_refill_guard_enabled(void) { static int g_trc_guard = -1; if (__builtin_expect(g_trc_guard == -1, 0)) { const char* env = getenv("HAKMEM_TINY_REFILL_FAILFAST"); g_trc_guard = (env && *env) ? ((*env != '0') ? 1 : 0) : 1; fprintf(stderr, "[TRC_GUARD] failfast=%d env=%s\n", g_trc_guard, env ? env : "(null)"); fflush(stderr); } return g_trc_guard; } static inline int trc_ptr_is_valid(uintptr_t base, uintptr_t limit, size_t blk, const void* node) { if (!node || limit <= base) return 1; uintptr_t addr = (uintptr_t)node; if (addr < base || addr >= limit) return 0; if (blk == 0) return 1; return ((addr - base) % blk) == 0; } static inline void trc_failfast_abort(const char* stage, int class_idx, uintptr_t base, uintptr_t limit, const void* node) { fprintf(stderr, "[TRC_FAILFAST] stage=%s cls=%d node=%p base=%p limit=%p\n", stage ? stage : "(null)", class_idx, node, (void*)base, (void*)limit); fflush(stderr); abort(); } // Pop up to 'want' nodes from freelist into local chain static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta, int class_idx, uintptr_t ss_base, uintptr_t ss_limit, size_t block_size, uint32_t want, TinyRefillChain* out) { if (!out || want == 0) return 0; trc_init(out); uint32_t taken = 0; while (taken < want && meta->freelist) { void* p = meta->freelist; if (__builtin_expect(trc_refill_guard_enabled() && !trc_ptr_is_valid(ss_base, ss_limit, block_size, p), 0)) { trc_failfast_abort("freelist_head", class_idx, ss_base, ss_limit, p); } void* next = *(void**)p; if (__builtin_expect(trc_refill_guard_enabled() && !trc_ptr_is_valid(ss_base, ss_limit, block_size, next), 0)) { trc_failfast_abort("freelist_next", class_idx, ss_base, ss_limit, next); } meta->freelist = next; trc_push_front(out, p); taken++; } // DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead) return taken; } // Carve a contiguous batch of size 'batch' from linear area, return as chain static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs, struct TinySlabMeta* meta, uint32_t batch, TinyRefillChain* out) { if (!out || batch == 0) return 0; trc_init(out); uint8_t* cursor = base + ((size_t)meta->used * bs); void* head = (void*)cursor; for (uint32_t i = 1; i < batch; i++) { uint8_t* next = cursor + bs; *(void**)cursor = (void*)next; cursor = next; } void* tail = (void*)cursor; meta->used += batch; out->head = head; out->tail = tail; out->count = batch; // DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead) return batch; }