hakmem/core/box/tls_sll_box.h

// tls_sll_box.h - Box TLS-SLL: Single-Linked List API (Unified Box version)
//
// Goal:
//   - Single authoritative Box for TLS SLL operations.
//   - All next pointer layout is decided by tiny_next_ptr_box.h (Box API).
//   - Callers pass BASE pointers only; no local next_offset arithmetic.
//   - Compatible with existing ptr_trace PTR_NEXT_* macros (off is logging-only).
//
// Invariants:
//   - g_tiny_class_sizes[cls] is TOTAL stride (including 1-byte header when enabled).
//   - For HEADER_CLASSIDX != 0, tiny_nextptr.h encodes:
//       class 0: next_off = 0
//       class 1-7: next_off = 1
//     Callers MUST NOT duplicate this logic.
//   - TLS SLL stores BASE pointers only.
//   - Box provides: push / pop / splice with capacity & integrity checks.

#ifndef TLS_SLL_BOX_H
#define TLS_SLL_BOX_H

#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdatomic.h>

#include "../hakmem_tiny_config.h"
#include "../hakmem_build_flags.h"
#include "../tiny_remote.h"
#include "../tiny_region_id.h"
#include "../hakmem_tiny_integrity.h"
#include "../ptr_track.h"
#include "../ptr_trace.h"
#include "../tiny_debug_ring.h"
#include "../hakmem_super_registry.h"
#include "../superslab/superslab_inline.h"
#include "tiny_next_ptr_box.h"

// Per-thread debug shadow: last successful push base per class (release-safe)
static __thread void* s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};

// Per-thread callsite tracking: last push caller per class (debug-only)
#if !HAKMEM_BUILD_RELEASE
static __thread const char* s_tls_sll_last_push_from[TINY_NUM_CLASSES] = {NULL};
static __thread const char* s_tls_sll_last_pop_from[TINY_NUM_CLASSES] = {NULL};
#endif

// Phase 3d-B: Unified TLS SLL (defined in hakmem_tiny.c)
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
extern __thread uint64_t g_tls_canary_before_sll;
extern __thread uint64_t g_tls_canary_after_sll;
extern __thread const char* g_tls_sll_last_writer[TINY_NUM_CLASSES];
extern int g_tls_sll_class_mask;  // bit i=1 → SLL allowed for class i

// ========== Debug guard ==========

#if !HAKMEM_BUILD_RELEASE
static inline void tls_sll_debug_guard(int class_idx, void* base, const char* where)
{
    (void)class_idx;
    if ((uintptr_t)base < 4096) {
        fprintf(stderr,
                "[TLS_SLL_GUARD] %s: suspicious ptr=%p cls=%d\n",
                where, base, class_idx);
        abort();
    }
}
#else
static inline void tls_sll_debug_guard(int class_idx, void* base, const char* where)
{
    (void)class_idx; (void)base; (void)where;
}
#endif

// Normalize helper: callers are required to pass BASE already.
// Kept as a no-op for documentation / future hardening.
static inline void* tls_sll_normalize_base(int class_idx, void* node)
{
#if HAKMEM_TINY_HEADER_CLASSIDX
    if (node && class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
        extern const size_t g_tiny_class_sizes[];
        size_t stride = g_tiny_class_sizes[class_idx];
        if (__builtin_expect(stride != 0, 1)) {
            uintptr_t delta = (uintptr_t)node % stride;
            if (__builtin_expect(delta == 1, 0)) {
                // USER pointer passed in; normalize to BASE (= user-1) to avoid offset-1 writes.
                void* base = (uint8_t*)node - 1;
                static _Atomic uint32_t g_tls_sll_norm_userptr = 0;
                uint32_t n = atomic_fetch_add_explicit(&g_tls_sll_norm_userptr, 1, memory_order_relaxed);
                if (n < 8) {
                    fprintf(stderr,
                            "[TLS_SLL_NORMALIZE_USERPTR] cls=%d node=%p -> base=%p stride=%zu\n",
                            class_idx, node, base, stride);
                }
                return base;
            }
        }
    }
#else
    (void)class_idx;
#endif
    return node;
}

// Narrow dump around TLS SLL array when corruption is detected (env-gated)
static inline void tls_sll_dump_tls_window(int class_idx, const char* stage)
{
    static _Atomic uint32_t g_tls_sll_diag_shots = 0;
    static int s_diag_enable = -1;
    if (__builtin_expect(s_diag_enable == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_SLL_DIAG");
        s_diag_enable = (e && *e && *e != '0') ? 1 : 0;
    }
    if (!__builtin_expect(s_diag_enable, 0)) return;

    uint32_t shot = atomic_fetch_add_explicit(&g_tls_sll_diag_shots, 1, memory_order_relaxed);
    if (shot >= 2) return;  // limit noise

    if (shot == 0) {
        // Map TLS layout once to confirm index→address mapping during triage
        fprintf(stderr,
                "[TLS_SLL_ADDRMAP] before=%p sll=%p after=%p entry_size=%zu\n",
                (void*)&g_tls_canary_before_sll,
                (void*)g_tls_sll,
                (void*)&g_tls_canary_after_sll,
                sizeof(TinyTLSSLL));
        for (int c = 0; c < TINY_NUM_CLASSES; c++) {
            fprintf(stderr, "  C%d: head@%p count@%p\n",
                    c,
                    (void*)&g_tls_sll[c].head,
                    (void*)&g_tls_sll[c].count);
        }
    }

    fprintf(stderr,
            "[TLS_SLL_INVALID_POP_DIAG] shot=%u stage=%s cls=%d head=%p count=%u last_push=%p last_writer=%s\n",
            shot + 1,
            stage ? stage : "(null)",
            class_idx,
            g_tls_sll[class_idx].head,
            g_tls_sll[class_idx].count,
            s_tls_sll_last_push[class_idx],
            g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)");
    fprintf(stderr, "  tls_sll snapshot (head/count):");
    for (int c = 0; c < TINY_NUM_CLASSES; c++) {
        fprintf(stderr, " C%d:%p/%u", c, g_tls_sll[c].head, g_tls_sll[c].count);
    }
    fprintf(stderr, " canary_before=%#llx canary_after=%#llx\n",
            (unsigned long long)g_tls_canary_before_sll,
            (unsigned long long)g_tls_canary_after_sll);
}

static inline void tls_sll_record_writer(int class_idx, const char* who)
{
    if (__builtin_expect(class_idx >= 0 && class_idx < TINY_NUM_CLASSES, 1)) {
        g_tls_sll_last_writer[class_idx] = who;
    }
}

static inline int tls_sll_head_valid(void* head)
{
    uintptr_t a = (uintptr_t)head;
    return (a >= 4096 && a <= 0x00007fffffffffffULL);
}

static inline void tls_sll_log_hdr_mismatch(int class_idx, void* base, uint8_t got, uint8_t expect, const char* stage)
{
    static _Atomic uint32_t g_hdr_mismatch_log = 0;
    uint32_t n = atomic_fetch_add_explicit(&g_hdr_mismatch_log, 1, memory_order_relaxed);
    if (n < 16) {
        fprintf(stderr,
                "[TLS_SLL_HDR_MISMATCH] stage=%s cls=%d base=%p got=0x%02x expect=0x%02x\n",
                stage ? stage : "(null)",
                class_idx,
                base,
                got,
                expect);
    }
}

static inline void tls_sll_diag_next(int class_idx, void* base, void* next, const char* stage)
{
    static int s_diag_enable = -1;
    if (__builtin_expect(s_diag_enable == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_SLL_DIAG");
        s_diag_enable = (e && *e && *e != '0') ? 1 : 0;
    }
    if (!__builtin_expect(s_diag_enable, 0)) return;

    // Narrow to target classes to preserve early shots
    if (class_idx != 4 && class_idx != 6 && class_idx != 7) return;

    int in_range = tls_sll_head_valid(next);
    if (in_range) {
        // Range check (abort on clearly bad pointers to catch first offender)
        validate_ptr_range(next, "tls_sll_pop_next_diag");
    }

    SuperSlab* ss = hak_super_lookup(next);
    int slab_idx = ss ? slab_index_for(ss, next) : -1;
    TinySlabMeta* meta = (ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) ? &ss->slabs[slab_idx] : NULL;
    int meta_cls = meta ? (int)meta->class_idx : -1;
#if HAKMEM_TINY_HEADER_CLASSIDX
    int hdr_cls = next ? tiny_region_id_read_header((uint8_t*)next + 1) : -1;
#else
    int hdr_cls = -1;
#endif

    static _Atomic uint32_t g_next_diag_once = 0;
    uint32_t shot = atomic_fetch_add_explicit(&g_next_diag_once, 1, memory_order_relaxed);
    if (shot < 12) {
        fprintf(stderr,
                "[TLS_SLL_POP_NEXT_DIAG] shot=%u stage=%s cls=%d base=%p next=%p hdr_cls=%d meta_cls=%d slab=%d ss=%p\n",
                shot + 1,
                stage ? stage : "(null)",
                class_idx,
                base,
                next,
                hdr_cls,
                meta_cls,
                slab_idx,
                (void*)ss);
    }
}

// ========== Push ==========
//
// Push BASE pointer into TLS SLL for given class.
// Returns true on success, false if capacity full or input invalid.
//
// Implementation function with callsite tracking (where).
// Use tls_sll_push() macro instead of calling directly.

static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity, const char* where)
{
    HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push");

    // Class mask gate (narrow triage): if disallowed, reject push
    if (__builtin_expect(((g_tls_sll_class_mask & (1u << class_idx)) == 0), 0)) {
        return false;
    }

    // Capacity semantics:
    //  - capacity == 0 → disabled (reject)
    //  - capacity > 1<<20 → treat as "unbounded" sentinel (no limit)
    if (capacity == 0) {
        return false;
    }
    const uint32_t kCapacityHardMax = (1u << 20);
    const int unlimited = (capacity > kCapacityHardMax);

    if (!ptr) {
        return false;
    }

    // Base pointer only (callers must pass BASE; this is a no-op by design).
    ptr = tls_sll_normalize_base(class_idx, ptr);

    // Detect meta/class mismatch on push (first few only).
    do {
        static _Atomic uint32_t g_tls_sll_push_meta_mis = 0;
        struct SuperSlab* ss = hak_super_lookup(ptr);
        if (ss && ss->magic == SUPERSLAB_MAGIC) {
            int sidx = slab_index_for(ss, ptr);
            if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
                uint8_t meta_cls = ss->slabs[sidx].class_idx;
                if (meta_cls < TINY_NUM_CLASSES && meta_cls != (uint8_t)class_idx) {
                    uint32_t n = atomic_fetch_add_explicit(&g_tls_sll_push_meta_mis, 1, memory_order_relaxed);
                    if (n < 4) {
                        fprintf(stderr,
                                "[TLS_SLL_PUSH_META_MISMATCH] cls=%d meta_cls=%u base=%p slab_idx=%d ss=%p\n",
                                class_idx, (unsigned)meta_cls, ptr, sidx, (void*)ss);
                        void* bt[8];
                        int frames = backtrace(bt, 8);
                        backtrace_symbols_fd(bt, frames, fileno(stderr));
                    }
                    fflush(stderr);
                }
            }
        }
    } while (0);

#if !HAKMEM_BUILD_RELEASE
    // Minimal range guard before we touch memory.
    if (!validate_ptr_range(ptr, "tls_sll_push_base")) {
        fprintf(stderr,
                "[TLS_SLL_PUSH] FATAL invalid BASE ptr cls=%d base=%p\n",
                class_idx, ptr);
        abort();
    }
#else
    // Release: drop malformed ptrs but keep running.
    uintptr_t ptr_addr = (uintptr_t)ptr;
    if (ptr_addr < 4096 || ptr_addr > 0x00007fffffffffffULL) {
        extern _Atomic uint64_t g_tls_sll_invalid_push[];
        uint64_t cnt = atomic_fetch_add_explicit(&g_tls_sll_invalid_push[class_idx], 1, memory_order_relaxed);
        static __thread uint8_t s_log_limit_push[TINY_NUM_CLASSES] = {0};
        if (s_log_limit_push[class_idx] < 4) {
            fprintf(stderr, "[TLS_SLL_PUSH_INVALID] cls=%d base=%p dropped count=%llu\n",
                    class_idx, ptr, (unsigned long long)cnt + 1);
            s_log_limit_push[class_idx]++;
        }
        return false;
    }
#endif

    // Capacity check BEFORE any writes.
    uint32_t cur = g_tls_sll[class_idx].count;
    if (!unlimited && cur >= capacity) {
        return false;
    }

#if HAKMEM_TINY_HEADER_CLASSIDX
    // Header handling for header classes (class 1-6 only, NOT 0 or 7).
    // C0, C7 use offset=0, so next pointer is at base[0] and MUST NOT restore header.
    // Safe mode (HAKMEM_TINY_SLL_SAFEHEADER=1): never overwrite header; reject on magic mismatch.
    // Default mode: restore expected header.
    if (class_idx != 0 && class_idx != 7) {
        static int g_sll_safehdr = -1;
        static int g_sll_ring_en = -1; // optional ring trace for TLS-SLL anomalies
        if (__builtin_expect(g_sll_safehdr == -1, 0)) {
            const char* e = getenv("HAKMEM_TINY_SLL_SAFEHEADER");
            g_sll_safehdr = (e && *e && *e != '0') ? 1 : 0;
        }
        if (__builtin_expect(g_sll_ring_en == -1, 0)) {
            const char* r = getenv("HAKMEM_TINY_SLL_RING");
            g_sll_ring_en = (r && *r && *r != '0') ? 1 : 0;
        }
        uint8_t* b = (uint8_t*)ptr;
        uint8_t expected = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
        uint8_t got_pre = *b;
        if (__builtin_expect(got_pre != expected, 0)) {
            tls_sll_log_hdr_mismatch(class_idx, ptr, got_pre, expected, "push_preheader");
        }
        if (g_sll_safehdr) {
            uint8_t got = *b;
            if ((got & 0xF0u) != HEADER_MAGIC) {
                // Reject push silently (fall back to slow path at caller)
                if (__builtin_expect(g_sll_ring_en, 0)) {
                    // aux encodes: high 8 bits = got, low 8 bits = expected
                    uintptr_t aux = ((uintptr_t)got << 8) | (uintptr_t)expected;
                    tiny_debug_ring_record(0x7F10 /*TLS_SLL_REJECT*/, (uint16_t)class_idx, ptr, aux);
                }
                return false;
            }
        } else {
            PTR_TRACK_TLS_PUSH(ptr, class_idx);
            PTR_TRACK_HEADER_WRITE(ptr, expected);
            *b = expected;
        }
    }
#endif

    tls_sll_debug_guard(class_idx, ptr, "push");

#if !HAKMEM_BUILD_RELEASE
    // Optional double-free detection: scan a bounded prefix of the list.
    // Increased from 64 to 256 to catch orphaned blocks deeper in the chain.
    {
        void* scan = g_tls_sll[class_idx].head;
        uint32_t scanned = 0;
        const uint32_t limit = (g_tls_sll[class_idx].count < 256)
                                 ? g_tls_sll[class_idx].count
                                 : 256;
        while (scan && scanned < limit) {
            if (scan == ptr) {
                fprintf(stderr,
                        "[TLS_SLL_PUSH_DUP] cls=%d ptr=%p head=%p count=%u scanned=%u last_push=%p last_push_from=%s last_pop_from=%s last_writer=%s where=%s\n",
                        class_idx,
                        ptr,
                        g_tls_sll[class_idx].head,
                        g_tls_sll[class_idx].count,
                        scanned,
                        s_tls_sll_last_push[class_idx],
                        s_tls_sll_last_push_from[class_idx] ? s_tls_sll_last_push_from[class_idx] : "(null)",
                        s_tls_sll_last_pop_from[class_idx] ? s_tls_sll_last_pop_from[class_idx] : "(null)",
                        g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)",
                        where ? where : "(null)");
                ptr_trace_dump_now("tls_sll_dup");
                // ABORT to get backtrace showing exact double-free location
                abort();
            }
            void* next;
            PTR_NEXT_READ("tls_sll_scan", class_idx, scan, 0, next);
            scan = next;
            scanned++;
        }
    }
#endif

    // Link new node to current head via Box API (offset is handled inside tiny_nextptr).
    PTR_NEXT_WRITE("tls_push", class_idx, ptr, 0, g_tls_sll[class_idx].head);
    g_tls_sll[class_idx].head = ptr;
    tls_sll_record_writer(class_idx, "push");
    g_tls_sll[class_idx].count = cur + 1;
    s_tls_sll_last_push[class_idx] = ptr;

#if !HAKMEM_BUILD_RELEASE
    // Record callsite for debugging (debug-only)
    s_tls_sll_last_push_from[class_idx] = where;
#else
    (void)where; // Suppress unused warning in release
#endif

    return true;
}

// ========== Pop ==========
//
// Pop BASE pointer from TLS SLL.
// Returns true on success and stores BASE into *out.
//
// Implementation function with callsite tracking (where).
// Use tls_sll_pop() macro instead of calling directly.

static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where)
{
    HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_pop");
    // Class mask gate: if disallowed, behave as empty
    if (__builtin_expect(((g_tls_sll_class_mask & (1u << class_idx)) == 0), 0)) {
        return false;
    }
    atomic_fetch_add(&g_integrity_check_class_bounds, 1);

    void* base = g_tls_sll[class_idx].head;
    if (!base) {
        return false;
    }

    // Sentinel guard: remote sentinel must never be in TLS SLL.
    if (__builtin_expect((uintptr_t)base == TINY_REMOTE_SENTINEL, 0)) {
        g_tls_sll[class_idx].head = NULL;
        g_tls_sll[class_idx].count = 0;
        tls_sll_record_writer(class_idx, "pop_sentinel_reset");
#if !HAKMEM_BUILD_RELEASE
        fprintf(stderr,
                "[TLS_SLL_POP] Remote sentinel detected at head; SLL reset (cls=%d)\n",
                class_idx);
#endif
        {
            static int g_sll_ring_en = -1;
            if (__builtin_expect(g_sll_ring_en == -1, 0)) {
                const char* r = getenv("HAKMEM_TINY_SLL_RING");
                g_sll_ring_en = (r && *r && *r != '0') ? 1 : 0;
            }
            if (__builtin_expect(g_sll_ring_en, 0)) {
                tiny_debug_ring_record(0x7F11 /*TLS_SLL_SENTINEL*/, (uint16_t)class_idx, base, 0);
            }
        }
        return false;
    }

#if !HAKMEM_BUILD_RELEASE
    if (!validate_ptr_range(base, "tls_sll_pop_base")) {
        fprintf(stderr,
                "[TLS_SLL_POP] FATAL invalid BASE ptr cls=%d base=%p\n",
                class_idx, base);
        abort();
    }
#else
    // Fail-fast even in release: drop malformed TLS head to avoid SEGV on bad base.
    uintptr_t base_addr = (uintptr_t)base;
    if (base_addr < 4096 || base_addr > 0x00007fffffffffffULL) {
        extern _Atomic uint64_t g_tls_sll_invalid_head[];
        uint64_t cnt = atomic_fetch_add_explicit(&g_tls_sll_invalid_head[class_idx], 1, memory_order_relaxed);
        static __thread uint8_t s_log_limit[TINY_NUM_CLASSES] = {0};
        if (s_log_limit[class_idx] < 4) {
            fprintf(stderr, "[TLS_SLL_POP_INVALID] cls=%d head=%p dropped count=%llu\n",
                    class_idx, base, (unsigned long long)cnt + 1);
            s_log_limit[class_idx]++;
        }
        // Help triage: show last successful push base for this thread/class
        if (s_tls_sll_last_push[class_idx] && s_log_limit[class_idx] <= 4) {
            fprintf(stderr, "[TLS_SLL_POP_INVALID] cls=%d last_push=%p\n",
                    class_idx, s_tls_sll_last_push[class_idx]);
        }
        tls_sll_dump_tls_window(class_idx, "head_range");
        g_tls_sll[class_idx].head = NULL;
        g_tls_sll[class_idx].count = 0;
        tls_sll_record_writer(class_idx, "pop_invalid_head");
        return false;
    }
#endif

    // Optional high-frequency canary check for target classes (e.g., 4/6)
    static int s_canary_fast = -1;
    if (__builtin_expect(s_canary_fast == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_SLL_CANARY_FAST");
        s_canary_fast = (e && *e && *e != '0') ? 1 : 0;
    }
    if (__builtin_expect(s_canary_fast && (class_idx == 4 || class_idx == 6), 0)) {
        extern _Atomic uint64_t g_tls_sll_pop_counter[];
        uint64_t pc = atomic_fetch_add_explicit(&g_tls_sll_pop_counter[class_idx], 1, memory_order_relaxed) + 1;
        periodic_canary_check(pc, class_idx == 4 ? "tls_sll_pop_cls4" : "tls_sll_pop_cls6");
    }

    tls_sll_debug_guard(class_idx, base, "pop");

#if HAKMEM_TINY_HEADER_CLASSIDX
    // Header validation for header-classes (class != 0,7).
    if (class_idx != 0 && class_idx != 7) {
        uint8_t got = *(uint8_t*)base;
        uint8_t expect = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
        PTR_TRACK_TLS_POP(base, class_idx);
        PTR_TRACK_HEADER_READ(base, got);
        if (__builtin_expect(got != expect, 0)) {
#if !HAKMEM_BUILD_RELEASE
            fprintf(stderr,
                    "[TLS_SLL_POP] CORRUPTED HEADER cls=%d base=%p got=0x%02x expect=0x%02x\n",
                    class_idx, base, got, expect);
            ptr_trace_dump_now("header_corruption");
            abort();
#else
            // In release, fail-safe: drop list.
            // PERF DEBUG: Count header corruption resets
            static _Atomic uint64_t g_hdr_reset_count = 0;
            uint64_t cnt = atomic_fetch_add_explicit(&g_hdr_reset_count, 1, memory_order_relaxed);
            if (cnt % 10000 == 0) {
                fprintf(stderr, "[TLS_SLL_HDR_RESET] cls=%d base=%p got=0x%02x expect=0x%02x count=%llu\n",
                        class_idx, base, got, expect, (unsigned long long)cnt);
            }
            g_tls_sll[class_idx].head = NULL;
            g_tls_sll[class_idx].count = 0;
            tls_sll_record_writer(class_idx, "header_reset");
            {
                static int g_sll_ring_en = -1;
                if (__builtin_expect(g_sll_ring_en == -1, 0)) {
                    const char* r = getenv("HAKMEM_TINY_SLL_RING");
                    g_sll_ring_en = (r && *r && *r != '0') ? 1 : 0;
                }
                if (__builtin_expect(g_sll_ring_en, 0)) {
                    // aux encodes: high 8 bits = got, low 8 bits = expect
                    uintptr_t aux = ((uintptr_t)got << 8) | (uintptr_t)expect;
                    tiny_debug_ring_record(0x7F12 /*TLS_SLL_HDR_CORRUPT*/, (uint16_t)class_idx, base, aux);
                }
            }
            return false;
#endif
        }
    }
#endif

    // Read next via Box API.
    void* next;
    PTR_NEXT_READ("tls_pop", class_idx, base, 0, next);
    tls_sll_diag_next(class_idx, base, next, "pop_next");

#if !HAKMEM_BUILD_RELEASE
    if (next && !validate_ptr_range(next, "tls_sll_pop_next")) {
        fprintf(stderr,
                "[TLS_SLL_POP] FATAL invalid next ptr cls=%d base=%p next=%p\n",
                class_idx, base, next);
        ptr_trace_dump_now("next_corruption");
        abort();
    }
#endif

    g_tls_sll[class_idx].head = next;
    tls_sll_record_writer(class_idx, "pop");
    if ((class_idx == 4 || class_idx == 6) && next && !tls_sll_head_valid(next)) {
        fprintf(stderr, "[TLS_SLL_POP_POST_INVALID] cls=%d next=%p last_writer=%s\n",
                class_idx,
                next,
                g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)");
        tls_sll_dump_tls_window(class_idx, "pop_post");
        g_tls_sll[class_idx].head = NULL;
        g_tls_sll[class_idx].count = 0;
        return false;
    }
    if (g_tls_sll[class_idx].count > 0) {
        g_tls_sll[class_idx].count--;
    }

    // Clear next inside popped node to avoid stale-chain issues.
    tiny_next_write(class_idx, base, NULL);

#if !HAKMEM_BUILD_RELEASE
    // Record callsite for debugging (debug-only)
    s_tls_sll_last_pop_from[class_idx] = where;
#else
    (void)where; // Suppress unused warning in release
#endif

    *out = base;
    return true;
}

// ========== Splice ==========
//
// Splice a pre-linked chain of BASE pointers into TLS SLL head.
// chain_head is BASE; links are via Box API-compatible next layout.
// Returns number of nodes actually moved (<= capacity remaining).

static inline uint32_t tls_sll_splice(int class_idx,
                                      void* chain_head,
                                      uint32_t count,
                                      uint32_t capacity)
{
    HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_splice");

    if (!chain_head || count == 0 || capacity == 0) {
        return 0;
    }

    uint32_t cur = g_tls_sll[class_idx].count;
    if (cur >= capacity) {
        return 0;
    }

    uint32_t room = capacity - cur;
    uint32_t to_move = (count < room) ? count : room;

    // Traverse chain up to to_move, validate, and find tail.
    void* tail = chain_head;
    uint32_t moved = 1;

    tls_sll_debug_guard(class_idx, chain_head, "splice_head");

#if HAKMEM_TINY_HEADER_CLASSIDX
    // Restore header defensively on each node we touch.
    {
        uint8_t* b = (uint8_t*)chain_head;
        uint8_t expected = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
        *b = expected;
    }
#endif

    while (moved < to_move) {
        tls_sll_debug_guard(class_idx, tail, "splice_traverse");

        void* next;
        PTR_NEXT_READ("tls_splice_trav", class_idx, tail, 0, next);
        if (next && !tls_sll_head_valid(next)) {
            static _Atomic uint32_t g_splice_diag = 0;
            uint32_t shot = atomic_fetch_add_explicit(&g_splice_diag, 1, memory_order_relaxed);
            if (shot < 8) {
                fprintf(stderr,
                        "[TLS_SLL_SPLICE_INVALID_NEXT] cls=%d head=%p tail=%p next=%p moved=%u/%u\n",
                        class_idx, chain_head, tail, next, moved, to_move);
            }
        }

        if (!next) {
            break;
        }

#if HAKMEM_TINY_HEADER_CLASSIDX
        {
            uint8_t* b = (uint8_t*)next;
            uint8_t expected = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
            *b = expected;
        }
#endif

        tail = next;
        moved++;
    }

    // Link tail to existing head and install new head.
    tls_sll_debug_guard(class_idx, tail, "splice_tail");
    PTR_NEXT_WRITE("tls_splice_link", class_idx, tail, 0, g_tls_sll[class_idx].head);

    g_tls_sll[class_idx].head = chain_head;
    tls_sll_record_writer(class_idx, "splice");
    g_tls_sll[class_idx].count = cur + moved;

    return moved;
}

// ========== Macro Wrappers ==========
//
// Box Theory: Callers use tls_sll_push/pop() macros which auto-insert __func__.
// No changes required to 20+ call sites.

#if !HAKMEM_BUILD_RELEASE
#  define tls_sll_push(cls, ptr, cap) \
       tls_sll_push_impl((cls), (ptr), (cap), __func__)
#  define tls_sll_pop(cls, out) \
       tls_sll_pop_impl((cls), (out), __func__)
#else
#  define tls_sll_push(cls, ptr, cap) \
       tls_sll_push_impl((cls), (ptr), (cap), NULL)
#  define tls_sll_pop(cls, out) \
       tls_sll_pop_impl((cls), (out), NULL)
#endif

#endif // TLS_SLL_BOX_H