hakmem/core/tiny_refill_opt.h

// tiny_refill_opt.h - Inline helpers to batch and splice refill chains
// Box: Refill Boundary optimization helpers (kept header-only)
#pragma once

#include <stdint.h>
#include <stdio.h>
#include <stdatomic.h>
#include <stdlib.h>

#ifndef HAKMEM_TINY_REFILL_OPT
#define HAKMEM_TINY_REFILL_OPT 1
#endif

// Local chain structure (head/tail pointers)
typedef struct TinyRefillChain {
    void* head;
    void* tail;
    uint32_t count;
} TinyRefillChain;

static inline void trc_init(TinyRefillChain* c) {
    c->head = NULL; c->tail = NULL; c->count = 0;
}

static inline void refill_opt_dbg(const char* stage, int class_idx, uint32_t n) {
#if HAKMEM_TINY_REFILL_OPT
    static int en = -1;
    static _Atomic int printed = 0;
    if (__builtin_expect(en == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_REFILL_OPT_DEBUG");
        en = (e && *e && *e != '0') ? 1 : 0;
    }
    if (!en) return;
    int exp = 0;
    if (atomic_compare_exchange_strong(&printed, &exp, 1)) {
        fprintf(stderr, "[REFILL_OPT] stage=%s cls=%d n=%u\n", stage ? stage : "(null)", class_idx, (unsigned)n);
        fflush(stderr);
    }
#else
    (void)stage; (void)class_idx; (void)n;
#endif
}

static inline void trc_push_front(TinyRefillChain* c, void* node) {
    if (c->head == NULL) {
        c->head = node; c->tail = node; *(void**)node = NULL; c->count = 1;
    } else {
        *(void**)node = c->head; c->head = node; c->count++;
    }
}

// Forward declaration of guard function
static inline int trc_refill_guard_enabled(void);

// Splice local chain into TLS SLL (single meta write)
static inline void trc_splice_to_sll(int class_idx, TinyRefillChain* c,
                                     void** sll_head, uint32_t* sll_count) {
    if (!c || c->head == NULL) return;

    // CORRUPTION DEBUG: Validate chain before splicing
    if (__builtin_expect(trc_refill_guard_enabled(), 0)) {
        extern const size_t g_tiny_class_sizes[];
        size_t blk = g_tiny_class_sizes[class_idx];

        fprintf(stderr, "[SPLICE_TO_SLL] cls=%d head=%p tail=%p count=%u\n",
                class_idx, c->head, c->tail, c->count);

        // Check alignment of chain head
        if (((uintptr_t)c->head % blk) != 0) {
            fprintf(stderr, "[SPLICE_CORRUPT] Chain head %p misaligned (blk=%zu offset=%zu)!\n",
                    c->head, blk, (uintptr_t)c->head % blk);
            fprintf(stderr, "[SPLICE_CORRUPT] Corruption detected BEFORE writing to TLS!\n");
            abort();
        }
    }

    if (c->tail) {
        *(void**)c->tail = *sll_head;
    }
    *sll_head = c->head;
    if (sll_count) *sll_count += c->count;
}

static inline int trc_refill_guard_enabled(void) {
#if HAKMEM_BUILD_RELEASE
    return 0;  // Always disabled in release builds
#else
    static int g_trc_guard = -1;
    if (__builtin_expect(g_trc_guard == -1, 0)) {
        const char* env = getenv("HAKMEM_TINY_REFILL_FAILFAST");
        g_trc_guard = (env && *env) ? ((*env != '0') ? 1 : 0) : 1;
        fprintf(stderr, "[TRC_GUARD] failfast=%d env=%s\n", g_trc_guard, env ? env : "(null)");
        fflush(stderr);
    }
    return g_trc_guard;
#endif
}

static inline int trc_ptr_is_valid(uintptr_t base, uintptr_t limit, size_t blk, const void* node) {
    if (!node || limit <= base) return 1;
    uintptr_t addr = (uintptr_t)node;
    if (addr < base || addr >= limit) return 0;
    if (blk == 0) return 1;
    return ((addr - base) % blk) == 0;
}

static inline void trc_failfast_abort(const char* stage,
                                      int class_idx,
                                      uintptr_t base,
                                      uintptr_t limit,
                                      const void* node) {
    fprintf(stderr,
            "[TRC_FAILFAST] stage=%s cls=%d node=%p base=%p limit=%p\n",
            stage ? stage : "(null)",
            class_idx,
            node,
            (void*)base,
            (void*)limit);
    fflush(stderr);
    abort();
}

// Pop up to 'want' nodes from freelist into local chain
static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta,
                                             int class_idx,
                                             uintptr_t ss_base,
                                             uintptr_t ss_limit,
                                             size_t block_size,
                                             uint32_t want,
                                             TinyRefillChain* out) {
    if (!out || want == 0) return 0;
    trc_init(out);
    uint32_t taken = 0;
    while (taken < want && meta->freelist) {
        void* p = meta->freelist;
        if (__builtin_expect(trc_refill_guard_enabled() &&
                                 !trc_ptr_is_valid(ss_base, ss_limit, block_size, p),
                             0)) {
            fprintf(stderr, "[FREELIST_CORRUPT] Reading freelist head: p=%p (ss_base=%p ss_limit=%p blk=%zu)\n",
                    p, (void*)ss_base, (void*)ss_limit, block_size);
            fprintf(stderr, "[FREELIST_CORRUPT] Head pointer is corrupted (invalid range/alignment)\n");
            trc_failfast_abort("freelist_head", class_idx, ss_base, ss_limit, p);
        }
        void* next = *(void**)p;
        if (__builtin_expect(trc_refill_guard_enabled() &&
                                 !trc_ptr_is_valid(ss_base, ss_limit, block_size, next),
                             0)) {
            fprintf(stderr, "[FREELIST_CORRUPT] Reading freelist node: p=%p next=%p (ss_base=%p ss_limit=%p blk=%zu)\n",
                    p, next, (void*)ss_base, (void*)ss_limit, block_size);
            fprintf(stderr, "[FREELIST_CORRUPT] Next pointer is corrupted (cls=%d taken=%u/%u)\n",
                    class_idx, taken, want);
            // Log offset details
            if (next != NULL) {
                uintptr_t offset = (uintptr_t)next - ss_base;
                size_t expected_align = offset % block_size;
                fprintf(stderr, "[FREELIST_CORRUPT] Corrupted offset=%zu (0x%zx) expected_align=%zu\n",
                        offset, offset, expected_align);
            }
            trc_failfast_abort("freelist_next", class_idx, ss_base, ss_limit, next);
        }
        meta->freelist = next;
        trc_push_front(out, p);
        taken++;
    }
    // DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead)
    return taken;
}

// Carve a contiguous batch of size 'batch' from linear area, return as chain
static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs,
                                        struct TinySlabMeta* meta,
                                        uint32_t batch,
                                        TinyRefillChain* out) {
    if (!out || batch == 0) return 0;
    trc_init(out);

    // FIX: Use carved (monotonic) instead of used (decrements on free)
    // CORRUPTION DEBUG: Validate capacity before carving
    if (__builtin_expect(trc_refill_guard_enabled(), 0)) {
        if (meta->carved + batch > meta->capacity) {
            fprintf(stderr, "[LINEAR_CARVE_CORRUPT] Carving beyond capacity!\n");
            fprintf(stderr, "[LINEAR_CARVE_CORRUPT] carved=%u batch=%u capacity=%u (would be %u)\n",
                    meta->carved, batch, meta->capacity, meta->carved + batch);
            fprintf(stderr, "[LINEAR_CARVE_CORRUPT] base=%p bs=%zu\n", (void*)base, bs);
            abort();
        }
    }

    // FIX: Use carved counter (monotonic) instead of used (which decrements on free)
    uint8_t* cursor = base + ((size_t)meta->carved * bs);
    void* head = (void*)cursor;

    // CORRUPTION DEBUG: Log carve operation
    if (__builtin_expect(trc_refill_guard_enabled(), 0)) {
        fprintf(stderr, "[LINEAR_CARVE] base=%p carved=%u batch=%u cursor=%p\n",
                (void*)base, meta->carved, batch, (void*)cursor);
    }

    for (uint32_t i = 1; i < batch; i++) {
        uint8_t* next = cursor + bs;
        *(void**)cursor = (void*)next;
        cursor = next;
    }
    void* tail = (void*)cursor;
    // FIX: Update both carved (monotonic) and used (active count)
    meta->carved += batch;
    meta->used += batch;
    out->head = head;
    out->tail = tail;
    out->count = batch;
    // DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead)
    return batch;
}