hakmem/core/box/tls_sll_box.h

// tls_sll_box.h - Box TLS-SLL: Single-Linked List API (Unified Box version)
//
// Goal:
//   - Single authoritative Box for TLS SLL operations.
//   - All next pointer layout is decided by tiny_next_ptr_box.h (Box API).
//   - Callers pass BASE pointers only; no local next_offset arithmetic.
//   - Compatible with existing ptr_trace PTR_NEXT_* macros (off is logging-only).
//
// Invariants:
//   - g_tiny_class_sizes[cls] is TOTAL stride (including 1-byte header when enabled).
//   - For HEADER_CLASSIDX != 0, tiny_nextptr.h encodes:
//       class 0: next_off = 0
//       class 1-7: next_off = 1
//     Callers MUST NOT duplicate this logic.
//   - TLS SLL stores BASE pointers only.
//   - Box provides: push / pop / splice with capacity & integrity checks.

#ifndef TLS_SLL_BOX_H
#define TLS_SLL_BOX_H

#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdatomic.h>

#include "../hakmem_internal.h"  // Phase 10: Type Safety (hak_base_ptr_t)
#include "../hakmem_tiny_config.h"
#include "../hakmem_build_flags.h"
#include "../hakmem_debug_master.h"  // For unified debug level control
#include "../tiny_remote.h"
#include "../tiny_region_id.h"
#include "../hakmem_tiny_integrity.h"
#include "../ptr_track.h"
#include "../ptr_trace.h"
#include "../tiny_debug_ring.h"
#include "../hakmem_super_registry.h"
#include "ss_addr_map_box.h"
#include "../superslab/superslab_inline.h"
#include "tiny_ptr_bridge_box.h"  // Box: ptr→(ss,slab,meta,class) bridge
#include "tiny_next_ptr_box.h"
#include "tiny_header_box.h"  // Header Box: Single Source of Truth for header operations

// ============================================================================
// Performance Measurement: TLS SLL Hit Rate (ENV-gated)
// ============================================================================
// Global atomic counters for TLS SLL performance measurement
// ENV: HAKMEM_MEASURE_UNIFIED_CACHE=1 to enable (default: OFF)
extern _Atomic uint64_t g_tls_sll_push_count_global;
extern _Atomic uint64_t g_tls_sll_pop_count_global;
extern _Atomic uint64_t g_tls_sll_pop_empty_count_global;

// Print statistics function
void tls_sll_print_measurements(void);

// Check if measurement is enabled (inline for hot path)
static inline int tls_sll_measure_enabled(void) {
    static int g_measure = -1;
    if (__builtin_expect(g_measure == -1, 0)) {
        const char* e = getenv("HAKMEM_MEASURE_UNIFIED_CACHE");
        g_measure = (e && *e && *e != '0') ? 1 : 0;
    }
    return g_measure;
}

// Per-thread debug shadow: last successful push base per class (release-safe)
// Changed to extern to share across TUs (defined in hakmem_tiny.c)
extern __thread hak_base_ptr_t s_tls_sll_last_push[TINY_NUM_CLASSES];

// Per-thread callsite tracking: last push caller per class (debug-only)
#if !HAKMEM_BUILD_RELEASE
static __thread const char* s_tls_sll_last_push_from[TINY_NUM_CLASSES] = {NULL};
static __thread const char* s_tls_sll_last_pop_from[TINY_NUM_CLASSES] = {NULL};
#endif

// Phase 3d-B: Unified TLS SLL (defined in hakmem_tiny.c)
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
extern __thread uint64_t g_tls_canary_before_sll;
extern __thread uint64_t g_tls_canary_after_sll;
extern __thread const char* g_tls_sll_last_writer[TINY_NUM_CLASSES];
extern int g_tls_sll_class_mask;  // bit i=1 → SLL allowed for class i

#if !HAKMEM_BUILD_RELEASE
// Global callsite record (debug only; zero overhead in release)
static const char* g_tls_sll_push_file[TINY_NUM_CLASSES] = {0};
static int g_tls_sll_push_line[TINY_NUM_CLASSES] = {0};
#endif

// ========== Debug guard ==========

#if !HAKMEM_BUILD_RELEASE
static inline void tls_sll_debug_guard(int class_idx, hak_base_ptr_t base, const char* where)
{
    (void)class_idx;
    void* raw = HAK_BASE_TO_RAW(base);
    if ((uintptr_t)raw < 4096) {
        fprintf(stderr,
                "[TLS_SLL_GUARD] %s: suspicious ptr=%p cls=%d\n",
                where, raw, class_idx);
        abort();
    }
}
#else
static inline void tls_sll_debug_guard(int class_idx, hak_base_ptr_t base, const char* where)
{
    (void)class_idx; (void)base; (void)where;
}
#endif

// Normalize helper: callers are required to pass BASE already.
// Kept as a no-op for documentation / future hardening.
static inline hak_base_ptr_t tls_sll_normalize_base(int class_idx, hak_base_ptr_t node)
{
#if HAKMEM_TINY_HEADER_CLASSIDX
    if (!hak_base_is_null(node) && class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
        extern const size_t g_tiny_class_sizes[];
        size_t stride = g_tiny_class_sizes[class_idx];
        void* raw = HAK_BASE_TO_RAW(node);
        if (__builtin_expect(stride != 0, 1)) {
            uintptr_t delta = (uintptr_t)raw % stride;
            if (__builtin_expect(delta == 1, 0)) {
                // USER pointer passed in; normalize to BASE (= user-1) to avoid offset-1 writes.
                void* base = (uint8_t*)raw - 1;
                static _Atomic uint32_t g_tls_sll_norm_userptr = 0;
                uint32_t n = atomic_fetch_add_explicit(&g_tls_sll_norm_userptr, 1, memory_order_relaxed);
                if (n < 8) {
                    fprintf(stderr,
                            "[TLS_SLL_NORMALIZE_USERPTR] cls=%d node=%p -> base=%p stride=%zu\n",
                            class_idx, raw, base, stride);
                    void* bt[16];
                    int frames = backtrace(bt, 16);
                    backtrace_symbols_fd(bt, frames, fileno(stderr));
                }
                return HAK_BASE_FROM_RAW(base);
            }
        }
    }
#else
    (void)class_idx;
#endif
    return node;
}

// Narrow dump around TLS SLL array when corruption is detected (env-gated)
static inline void tls_sll_dump_tls_window(int class_idx, const char* stage)
{
    static _Atomic uint32_t g_tls_sll_diag_shots = 0;
    static int s_diag_enable = -1;
    if (__builtin_expect(s_diag_enable == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_SLL_DIAG");
        s_diag_enable = (e && *e && *e != '0') ? 1 : 0;
    }
    if (!__builtin_expect(s_diag_enable, 0)) return;

    uint32_t shot = atomic_fetch_add_explicit(&g_tls_sll_diag_shots, 1, memory_order_relaxed);
    if (shot >= 2) return;  // limit noise

    if (shot == 0) {
        // Map TLS layout once to confirm index→address mapping during triage
        fprintf(stderr,
                "[TLS_SLL_ADDRMAP] before=%p sll=%p after=%p entry_size=%zu\n",
                (void*)&g_tls_canary_before_sll,
                (void*)g_tls_sll,
                (void*)&g_tls_canary_after_sll,
                sizeof(TinyTLSSLL));
        for (int c = 0; c < TINY_NUM_CLASSES; c++) {
            fprintf(stderr, "  C%d: head@%p count@%p\n",
                    c,
                    (void*)&g_tls_sll[c].head,
                    (void*)&g_tls_sll[c].count);
        }
    }

    fprintf(stderr,
            "[TLS_SLL_INVALID_POP_DIAG] shot=%u stage=%s cls=%d head=%p count=%u last_push=%p last_writer=%s\n",
            shot + 1,
            stage ? stage : "(null)",
            class_idx,
            HAK_BASE_TO_RAW(g_tls_sll[class_idx].head),
            g_tls_sll[class_idx].count,
            HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]),
            g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)");
    fprintf(stderr, "  tls_sll snapshot (head/count):");
    for (int c = 0; c < TINY_NUM_CLASSES; c++) {
        fprintf(stderr, " C%d:%p/%u", c, HAK_BASE_TO_RAW(g_tls_sll[c].head), g_tls_sll[c].count);
    }
    fprintf(stderr, " canary_before=%#llx canary_after=%#llx\n",
            (unsigned long long)g_tls_canary_before_sll,
            (unsigned long long)g_tls_canary_after_sll);
}

static inline void tls_sll_record_writer(int class_idx, const char* who)
{
    if (__builtin_expect(class_idx >= 0 && class_idx < TINY_NUM_CLASSES, 1)) {
        g_tls_sll_last_writer[class_idx] = who;
    }
}

static inline int tls_sll_head_valid(hak_base_ptr_t head)
{
    uintptr_t a = (uintptr_t)HAK_BASE_TO_RAW(head);
    return (a >= 4096 && a <= 0x00007fffffffffffULL);
}

#if !HAKMEM_BUILD_RELEASE
// Defensive: validate current TLS head before using it (Debug/triage).
// Uses full TinyPtrBridgeBox to confirm (ss, slab, class) consistency.
static inline void tls_sll_sanitize_head(int class_idx, const char* stage)
{
    if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
        return;
    }
    hak_base_ptr_t head = g_tls_sll[class_idx].head;
    if (hak_base_is_null(head)) return;

    void* raw = HAK_BASE_TO_RAW(head);
    TinyPtrBridgeInfo info = tiny_ptr_bridge_classify_raw(raw);
    SuperSlab* ss = info.ss;
    int idx = info.slab_idx;
    uint8_t meta_cls = info.meta_cls;

    int reset = 0;
    if (!ss || !info.meta || idx < 0 || meta_cls != (uint8_t)class_idx) {
        reset = 1;
    }
#if HAKMEM_TINY_HEADER_CLASSIDX
    if (!reset) {
        uint8_t hdr = *(uint8_t*)raw;
        uint8_t expect = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
        if (hdr != expect) {
            reset = 1;
        }
    }
#endif
    if (reset) {
        fprintf(stderr,
                "[TLS_SLL_SANITIZE] stage=%s cls=%d head=%p meta_cls=%u idx=%d ss=%p\n",
                stage ? stage : "(null)",
                class_idx,
                raw,
                (unsigned)meta_cls,
                idx,
                (void*)ss);
        g_tls_sll[class_idx].head = HAK_BASE_FROM_RAW(NULL);
        g_tls_sll[class_idx].count = 0;
        tls_sll_record_writer(class_idx, "sanitize");
    }
}

// Debug/triage: full pointer→(ss,slab,meta,class) validation on new head.
static inline int tls_sll_check_node(int class_idx, void* raw, void* from_base, const char* stage)
{
    if (!raw) return 1;
    TinyPtrBridgeInfo info = tiny_ptr_bridge_classify_raw(raw);
    SuperSlab* ss = info.ss;
    int idx = info.slab_idx;
    uint8_t meta_cls = info.meta_cls;
    if (!ss || !info.meta || idx < 0 || meta_cls != (uint8_t)class_idx) {
        goto bad;
    }
#if HAKMEM_TINY_HEADER_CLASSIDX
    {
        uint8_t hdr = *(uint8_t*)raw;
        uint8_t expect = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
        if (hdr != expect) {
            goto bad;
        }
    }
#endif
    return 1;
bad:
    static _Atomic uint32_t g_head_set_diag = 0;
    uint32_t shot = atomic_fetch_add_explicit(&g_head_set_diag, 1, memory_order_relaxed);
    if (shot < 8) {
        uint8_t from_meta_cls = 0xff;
        int from_idx = -1;
        SuperSlab* from_ss = NULL;
        TinySlabMeta* from_meta = NULL;
        uint64_t from_meta_used = 0;
        void* from_meta_freelist = NULL;
        if (from_base) {
            TinyPtrBridgeInfo from_info = tiny_ptr_bridge_classify_raw(from_base);
            from_ss = from_info.ss;
            from_idx = from_info.slab_idx;
            from_meta = from_info.meta;
            from_meta_cls = from_info.meta_cls;
            if (from_meta) {
                from_meta_used = from_meta->used;
                from_meta_freelist = from_meta->freelist;
            }
        }
        // Dump raw next pointers stored in from_base for extra forensics
        uintptr_t from_next_off0 = 0;
        uintptr_t from_next_off1 = 0;
        size_t next_off_dbg = tiny_next_off(class_idx);
        if (from_base) {
            memcpy(&from_next_off0, from_base, sizeof(from_next_off0));
            memcpy(&from_next_off1, (uint8_t*)from_base + next_off_dbg, sizeof(from_next_off1));
        }

        fprintf(stderr,
                "[TLS_SLL_SET_INVALID] stage=%s cls=%d head=%p meta_cls=%u idx=%d ss=%p "
                "from_base=%p from_meta_cls=%u from_idx=%d from_ss=%p "
                "from_meta_used=%llu from_meta_freelist=%p next_off=%zu next_raw0=%p next_raw1=%p "
                "canary_before=%#llx canary_after=%#llx last_writer=%s last_push=%p\n",
                stage ? stage : "(null)",
                class_idx,
                raw,
                (unsigned)meta_cls,
                idx,
                ss,
                from_base,
                (unsigned)from_meta_cls,
                from_idx,
                (void*)from_ss,
                (unsigned long long)from_meta_used,
                from_meta_freelist,
                next_off_dbg,
                (void*)from_next_off0,
                (void*)from_next_off1,
                (unsigned long long)g_tls_canary_before_sll,
                (unsigned long long)g_tls_canary_after_sll,
                g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)",
                HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]));
        void* bt[16];
        int frames = backtrace(bt, 16);
        backtrace_symbols_fd(bt, frames, fileno(stderr));
        fflush(stderr);
    }
    return 0;
}
#else
// Release: keep only a cheap range check to preserve fail-fast,
// but avoid heavy TinyPtrBridgeBox lookups on every head update.
static inline void tls_sll_sanitize_head(int class_idx, const char* stage)
{
    (void)stage;
    if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
        return;
    }
    hak_base_ptr_t head = g_tls_sll[class_idx].head;
    if (hak_base_is_null(head)) return;

    uintptr_t a = (uintptr_t)HAK_BASE_TO_RAW(head);
    if (a < 4096 || a > 0x00007fffffffffffULL) {
        g_tls_sll[class_idx].head = HAK_BASE_FROM_RAW(NULL);
        g_tls_sll[class_idx].count = 0;
        tls_sll_record_writer(class_idx, "sanitize_fast");
    }
}

static inline int tls_sll_check_node(int class_idx, void* raw, void* from_base, const char* stage)
{
    (void)class_idx;
    (void)from_base;
    (void)stage;
    if (!raw) return 1;
    uintptr_t a = (uintptr_t)raw;
    if (a < 4096 || a > 0x00007fffffffffffULL) {
        return 0;  // trigger abort() in caller
    }
    return 1;
}
#endif

// Forward decl for head trace (definition below)
static inline void tls_sll_head_trace(int class_idx,
                                      void* old_head,
                                      void* new_head,
                                      void* from_base,
                                      const char* stage);

static inline void tls_sll_set_head(int class_idx, hak_base_ptr_t head, const char* stage)
{
    void* raw = HAK_BASE_TO_RAW(head);
    void* old_raw = HAK_BASE_TO_RAW(g_tls_sll[class_idx].head);
    tls_sll_head_trace(class_idx, old_raw, raw, NULL, stage);
    if (!tls_sll_check_node(class_idx, raw, NULL, stage)) {
        abort();
    }
    g_tls_sll[class_idx].head = head;
    tls_sll_record_writer(class_idx, stage ? stage : "set_head");
}

static inline void tls_sll_set_head_from(int class_idx, hak_base_ptr_t head, void* from_base, const char* stage)
{
    void* raw = HAK_BASE_TO_RAW(head);
    void* old_raw = HAK_BASE_TO_RAW(g_tls_sll[class_idx].head);
    tls_sll_head_trace(class_idx, old_raw, raw, from_base, stage);
    if (!tls_sll_check_node(class_idx, raw, from_base, stage)) {
        abort();
    }
    g_tls_sll[class_idx].head = head;
    tls_sll_record_writer(class_idx, stage ? stage : "set_head");
}

static inline void tls_sll_set_head_raw(int class_idx, void* raw_head, const char* stage)
{
    tls_sll_set_head(class_idx, HAK_BASE_FROM_RAW(raw_head), stage);
}

static inline void tls_sll_log_hdr_mismatch(int class_idx, hak_base_ptr_t base, uint8_t got, uint8_t expect, const char* stage)
{
    static _Atomic uint32_t g_hdr_mismatch_log = 0;
    uint32_t n = atomic_fetch_add_explicit(&g_hdr_mismatch_log, 1, memory_order_relaxed);
    if (n < 16) {
        fprintf(stderr,
                "[TLS_SLL_HDR_MISMATCH] stage=%s cls=%d base=%p got=0x%02x expect=0x%02x\n",
                stage ? stage : "(null)",
                class_idx,
                HAK_BASE_TO_RAW(base),
                got,
                expect);
    }
}

static inline void tls_sll_diag_next(int class_idx, hak_base_ptr_t base, hak_base_ptr_t next, const char* stage)
{
#if !HAKMEM_BUILD_RELEASE
    static int s_diag_enable = -1;
    if (__builtin_expect(s_diag_enable == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_SLL_DIAG");
        s_diag_enable = (e && *e && *e != '0') ? 1 : 0;
    }
    if (!__builtin_expect(s_diag_enable, 0)) return;

    // Narrow to target classes to preserve early shots
    if (class_idx != 4 && class_idx != 6 && class_idx != 7) return;

    void* raw_next = HAK_BASE_TO_RAW(next);
    int in_range = tls_sll_head_valid(next);
    if (in_range) {
        // Range check (abort on clearly bad pointers to catch first offender)
        validate_ptr_range(raw_next, "tls_sll_pop_next_diag");
    }

    SuperSlab* ss = hak_super_lookup(raw_next);
    int slab_idx = ss ? slab_index_for(ss, raw_next) : -1;
    TinySlabMeta* meta = (ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) ? &ss->slabs[slab_idx] : NULL;
    int meta_cls = meta ? (int)meta->class_idx : -1;
#if HAKMEM_TINY_HEADER_CLASSIDX
    int hdr_cls = raw_next ? tiny_region_id_read_header((uint8_t*)raw_next + 1) : -1;
#else
    int hdr_cls = -1;
#endif

    static _Atomic uint32_t g_next_diag_once = 0;
    uint32_t shot = atomic_fetch_add_explicit(&g_next_diag_once, 1, memory_order_relaxed);
    if (shot < 12) {
        fprintf(stderr,
                "[TLS_SLL_POP_NEXT_DIAG] shot=%u stage=%s cls=%d base=%p next=%p hdr_cls=%d meta_cls=%d slab=%d ss=%p\n",
                shot + 1,
                stage ? stage : "(null)",
                class_idx,
                HAK_BASE_TO_RAW(base),
                raw_next,
                hdr_cls,
                meta_cls,
                slab_idx,
                (void*)ss);
    }
#else
    (void)class_idx; (void)base; (void)next; (void)stage;
#endif
}

#if !HAKMEM_BUILD_RELEASE
// Optional: trace head writes to locate corruption sources (debug/triage only).
// Enabled via env: HAKMEM_TINY_SLL_HEADLOG=1 (optionally filter by HAKMEM_TINY_SLL_HEADCLS).
static inline void tls_sll_fetch_ptr_info(void* p, SuperSlab** out_ss, int* out_idx, uint8_t* out_cls)
{
    SuperSlab* ss = hak_super_lookup(p);
    int cap = ss ? ss_slabs_capacity(ss) : 0;
    int idx = (ss && ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(ss, p) : -1;
    uint8_t cls = (idx >= 0 && idx < cap) ? ss->slabs[idx].class_idx : 0xff;
    if (out_ss) *out_ss = ss;
    if (out_idx) *out_idx = idx;
    if (out_cls) *out_cls = cls;
}

static inline void tls_sll_head_trace(int class_idx,
                                      void* old_head,
                                      void* new_head,
                                      void* from_base,
                                      const char* stage)
{
    // Lazy init: default OFF unless HAKMEM_TINY_SLL_HEADLOG is set.
    static int g_headlog_en = -1;
    static int g_headlog_cls = -2;  // -1 = no filter; >=0 only that class
    if (__builtin_expect(g_headlog_en == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_SLL_HEADLOG");
        g_headlog_en = (e && *e && *e != '0') ? 1 : 0;
    } else {
        const char* e = getenv("HAKMEM_TINY_SLL_HEADLOG");
        if (e && *e == '0') g_headlog_en = 0;
    }
    if (g_headlog_cls == -2) {
        const char* c = getenv("HAKMEM_TINY_SLL_HEADCLS");
        if (c && *c) {
            g_headlog_cls = atoi(c);
        } else {
            g_headlog_cls = -1;
        }
    }
    if (!__builtin_expect(g_headlog_en, 0)) return;
    if (g_headlog_cls >= 0 && class_idx != g_headlog_cls) return;

    static _Atomic uint32_t g_headlog_shot = 0;
    uint32_t shot = atomic_fetch_add_explicit(&g_headlog_shot, 1, memory_order_relaxed);
    if (shot >= 256) return;

    uint32_t count_before = 0;
    if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
        count_before = g_tls_sll[class_idx].count;
    }

    SuperSlab *new_ss = NULL, *old_ss = NULL, *from_ss = NULL;
    int new_idx = -1, old_idx = -1, from_idx = -1;
    uint8_t new_cls = 0xff, old_cls = 0xff, from_cls = 0xff;
    tls_sll_fetch_ptr_info(new_head, &new_ss, &new_idx, &new_cls);
    tls_sll_fetch_ptr_info(old_head, &old_ss, &old_idx, &old_cls);
    tls_sll_fetch_ptr_info(from_base, &from_ss, &from_idx, &from_cls);

    fprintf(stderr,
            "[TLS_SLL_HEAD_SET] shot=%u stage=%s cls=%d count=%u old=%p new=%p from=%p "
            "new_ss=%p new_idx=%d new_cls=%u old_ss=%p old_idx=%d old_cls=%u "
            "from_ss=%p from_idx=%d from_cls=%u last_writer=%s last_push=%p\n",
            shot + 1,
            stage ? stage : "(null)",
            class_idx,
            (unsigned)count_before,
            old_head,
            new_head,
            from_base,
            (void*)new_ss,
            new_idx,
            (unsigned)new_cls,
            (void*)old_ss,
            old_idx,
            (unsigned)old_cls,
            (void*)from_ss,
            from_idx,
            (unsigned)from_cls,
            g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)",
            HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]));
}
#else
// Release: keep a no-op stub to avoid any overhead on head updates.
static inline void tls_sll_fetch_ptr_info(void* p, SuperSlab** out_ss, int* out_idx, uint8_t* out_cls)
{
    (void)p; (void)out_ss; (void)out_idx; (void)out_cls;
}

static inline void tls_sll_head_trace(int class_idx,
                                      void* old_head,
                                      void* new_head,
                                      void* from_base,
                                      const char* stage)
{
    (void)class_idx;
    (void)old_head;
    (void)new_head;
    (void)from_base;
    (void)stage;
}
#endif

// ========== Push ==========
//
// Push BASE pointer into TLS SLL for given class.
// Returns true on success, false if capacity full or input invalid.
//
// Implementation function with callsite tracking (where).
// Use tls_sll_push() macro instead of calling directly.

static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t capacity, const char* where)
{
    static _Atomic uint32_t g_tls_push_trace = 0;
    if (atomic_fetch_add_explicit(&g_tls_push_trace, 1, memory_order_relaxed) < 4096) {
        HAK_TRACE("[tls_sll_push_impl_enter]\n");
    }
    HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push");

    // Class mask gate (narrow triage): if disallowed, reject push
    if (__builtin_expect(((g_tls_sll_class_mask & (1u << class_idx)) == 0), 0)) {
        return false;
    }

    // Defensive: ensure current head is sane before linking new node.
    tls_sll_sanitize_head(class_idx, "push");

    // Capacity semantics:
    //  - capacity == 0 → disabled (reject)
    //  - capacity > 1<<20 → treat as "unbounded" sentinel (no limit)
    if (capacity == 0) {
        return false;
    }
    const uint32_t kCapacityHardMax = (1u << 20);
    const int unlimited = (capacity > kCapacityHardMax);

    if (hak_base_is_null(ptr)) {
        return false;
    }

    // Base pointer only (callers must pass BASE; this is a no-op by design).
    ptr = tls_sll_normalize_base(class_idx, ptr);
    void* raw_ptr = HAK_BASE_TO_RAW(ptr);

    // TWO-SPEED: Full validation with hak_super_lookup is DEBUG-ONLY.
    // Release builds use ss_fast_lookup (O(1) mask arithmetic) for pinning only.
    SuperSlab* ss_ptr = NULL;
#if !HAKMEM_BUILD_RELEASE
    // Debug: Full validation with expensive registry lookup
    bool push_valid = true;
    do {
        static _Atomic uint32_t g_tls_sll_push_meta_mis = 0;
        struct SuperSlab* ss = hak_super_lookup(raw_ptr);
        if (ss && ss->magic == SUPERSLAB_MAGIC) {
            ss_ptr = ss;
            int sidx = slab_index_for(ss, raw_ptr);
            if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
                uint8_t meta_cls = ss->slabs[sidx].class_idx;
                if (meta_cls < TINY_NUM_CLASSES && meta_cls != (uint8_t)class_idx) {
                    push_valid = false;
                    uint32_t n = atomic_fetch_add_explicit(&g_tls_sll_push_meta_mis, 1, memory_order_relaxed);
                    if (n < 4) {
                        fprintf(stderr,
                                "[TLS_SLL_PUSH_META_MISMATCH] cls=%d meta_cls=%u base=%p slab_idx=%d ss=%p\n",
                                class_idx, (unsigned)meta_cls, raw_ptr, sidx, (void*)ss);
                        void* bt[8];
                        int frames = backtrace(bt, 8);
                        backtrace_symbols_fd(bt, frames, fileno(stderr));
                    }
                    fflush(stderr);
                }
            }
        } else {
            push_valid = false;
            static _Atomic uint32_t g_tls_sll_push_no_ss = 0;
            uint32_t n = atomic_fetch_add_explicit(&g_tls_sll_push_no_ss, 1, memory_order_relaxed);
            if (n < 4) {
                extern int g_super_reg_initialized;
                extern SSAddrMap g_ss_addr_map;
                fprintf(stderr,
                        "[TLS_SLL_PUSH_NO_SS] cls=%d base=%p from=%s reg_init=%d map_count=%zu\n",
                        class_idx,
                        raw_ptr,
                        where ? where : "(null)",
                        g_super_reg_initialized,
                        g_ss_addr_map.count);
                fflush(stderr);
            }
        }
    } while (0);
    if (!push_valid) {
        return false;  // Drop malformed pointer instead of corrupting TLS SLL
    }
#else
    // Release: Use fast O(1) lookup for pinning (no validation overhead)
    ss_ptr = ss_fast_lookup(raw_ptr);
#endif  // !HAKMEM_BUILD_RELEASE

#if HAKMEM_TINY_HEADER_CLASSIDX
    // Validate header on push - detect blocks pushed without header write
    // Enabled via HAKMEM_DEBUG_LEVEL >= 3 (INFO level) or in debug builds
    // Legacy: HAKMEM_TINY_SLL_VALIDATE_HDR=1 still works for compatibility
    do {
        static int g_validate_hdr = -1;
        if (__builtin_expect(g_validate_hdr == -1, 0)) {
#if !HAKMEM_BUILD_RELEASE
            g_validate_hdr = 1;  // Always on in debug
#else
            g_validate_hdr = hak_debug_check_level("HAKMEM_TINY_SLL_VALIDATE_HDR", 3);
#endif
        }

        if (__builtin_expect(g_validate_hdr, 0)) {
            static _Atomic uint32_t g_tls_sll_push_bad_hdr = 0;
            uint8_t hdr = *(uint8_t*)raw_ptr;
            uint8_t expected = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
            if (hdr != expected) {
                uint32_t n = atomic_fetch_add_explicit(&g_tls_sll_push_bad_hdr, 1, memory_order_relaxed);
                if (n < 10) {
                    fprintf(stderr,
                            "[TLS_SLL_PUSH_BAD_HDR] cls=%d base=%p got=0x%02x expect=0x%02x from=%s\n",
                            class_idx, raw_ptr, hdr, expected, where ? where : "(null)");
                    void* bt[8];
                    int frames = backtrace(bt, 8);
                    backtrace_symbols_fd(bt, frames, fileno(stderr));
                    fflush(stderr);
                }
            }
        }
    } while (0);
#endif

#if !HAKMEM_BUILD_RELEASE
    // Minimal range guard before we touch memory.
    if (!validate_ptr_range(raw_ptr, "tls_sll_push_base")) {
        fprintf(stderr,
                "[TLS_SLL_PUSH] FATAL invalid BASE ptr cls=%d base=%p\n",
                class_idx, raw_ptr);
        abort();
    }
#else
    // Release: drop malformed ptrs but keep running.
    uintptr_t ptr_addr = (uintptr_t)raw_ptr;
    if (ptr_addr < 4096 || ptr_addr > 0x00007fffffffffffULL) {
        extern _Atomic uint64_t g_tls_sll_invalid_push[];
        uint64_t cnt = atomic_fetch_add_explicit(&g_tls_sll_invalid_push[class_idx], 1, memory_order_relaxed);
        static __thread uint8_t s_log_limit_push[TINY_NUM_CLASSES] = {0};
        if (s_log_limit_push[class_idx] < 4) {
            fprintf(stderr, "[TLS_SLL_PUSH_INVALID] cls=%d base=%p dropped count=%llu\n",
                    class_idx, raw_ptr, (unsigned long long)cnt + 1);
            s_log_limit_push[class_idx]++;
        }
        return false;
    }
#endif

    // Capacity check BEFORE any writes.
    uint32_t cur = g_tls_sll[class_idx].count;
    if (!unlimited && cur >= capacity) {
        return false;
    }

    // Pin SuperSlab while node resides in TLS SLL (prevents premature free)
    if (ss_ptr && ss_ptr->magic == SUPERSLAB_MAGIC) {
        superslab_ref_inc(ss_ptr);
    }

    // DEBUG: Strict address check on push to catch corruption early
    uintptr_t ptr_val = (uintptr_t)raw_ptr;
    if (ptr_val < 4096 || ptr_val > 0x00007fffffffffffULL) {
        fprintf(stderr, "[TLS_SLL_PUSH_INVALID] cls=%d base=%p (val=%llx) from=%s\n",
                class_idx, raw_ptr, (unsigned long long)ptr_val, where ? where : "(null)");
        abort();
    }

    // Header restoration using Header Box (C1-C6 only; C0/C7 skip)
    // Safe mode (HAKMEM_TINY_SLL_SAFEHEADER=1): never overwrite header; reject on magic mismatch.
    // Default mode: restore expected header.
#if !HAKMEM_TINY_HEADERLESS
    if (tiny_class_preserves_header(class_idx)) {
        static int g_sll_safehdr = -1;
        static int g_sll_ring_en = -1; // optional ring trace for TLS-SLL anomalies
        if (__builtin_expect(g_sll_safehdr == -1, 0)) {
            const char* e = getenv("HAKMEM_TINY_SLL_SAFEHEADER");
            g_sll_safehdr = (e && *e && *e != '0') ? 1 : 0;
        }
        if (__builtin_expect(g_sll_ring_en == -1, 0)) {
            const char* r = getenv("HAKMEM_TINY_SLL_RING");
            g_sll_ring_en = (r && *r && *r != '0') ? 1 : 0;
        }
        // ptr is BASE pointer, header is at ptr+0
        uint8_t* b = (uint8_t*)raw_ptr;
        uint8_t got_pre, expected;
        tiny_header_validate(b, class_idx, &got_pre, &expected);
        if (__builtin_expect(got_pre != expected, 0)) {
            tls_sll_log_hdr_mismatch(class_idx, ptr, got_pre, expected, "push_preheader");
        }
        if (g_sll_safehdr) {
            uint8_t got = *b;
            if ((got & 0xF0u) != HEADER_MAGIC) {
                // Reject push silently (fall back to slow path at caller)
                if (__builtin_expect(g_sll_ring_en, 0)) {
                    // aux encodes: high 8 bits = got, low 8 bits = expected
                    uintptr_t aux = ((uintptr_t)got << 8) | (uintptr_t)expected;
                    tiny_debug_ring_record(0x7F10 /*TLS_SLL_REJECT*/, (uint16_t)class_idx, raw_ptr, aux);
                }
                return false;
            }
        } else {
            PTR_TRACK_TLS_PUSH(b, class_idx);
            PTR_TRACK_HEADER_WRITE(b, expected);
            // GEMINI FIX: Always write header before push + memory barrier
            // This prevents compiler/CPU reordering that might delay header write after next-ptr write
            // or expose incomplete state to other threads (though TLS SLL should be private).
            *(uint8_t*)b = (uint8_t)(0xa0 | (class_idx & 0x0f));
            __atomic_thread_fence(__ATOMIC_RELEASE);
        }
    }
#endif

    tls_sll_debug_guard(class_idx, ptr, "push");

#if !HAKMEM_BUILD_RELEASE
    // Optional double-free detection: scan a bounded prefix of the list.
    // Increased from 64 to 256 to catch orphaned blocks deeper in the chain.
    {
        hak_base_ptr_t scan = g_tls_sll[class_idx].head;
        uint32_t scanned = 0;
        const uint32_t limit = (g_tls_sll[class_idx].count < 256)
                                 ? g_tls_sll[class_idx].count
                                 : 256;
        while (!hak_base_is_null(scan) && scanned < limit) {
            if (hak_base_eq(scan, ptr)) {
                fprintf(stderr,
                        "[TLS_SLL_PUSH_DUP] cls=%d ptr=%p head=%p count=%u scanned=%u last_push=%p last_push_from=%s last_pop_from=%s last_writer=%s where=%s\n",
                        class_idx,
                        raw_ptr,
                        HAK_BASE_TO_RAW(g_tls_sll[class_idx].head),
                        g_tls_sll[class_idx].count,
                        scanned,
                        HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]),
                        s_tls_sll_last_push_from[class_idx] ? s_tls_sll_last_push_from[class_idx] : "(null)",
                        s_tls_sll_last_pop_from[class_idx] ? s_tls_sll_last_pop_from[class_idx] : "(null)",
                        g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)",
                        where ? where : "(null)");
                ptr_trace_dump_now("tls_sll_dup");
                // ABORT to get backtrace showing exact double-free location
                abort();
            }
            void* next_raw;
            PTR_NEXT_READ("tls_sll_scan", class_idx, HAK_BASE_TO_RAW(scan), 0, next_raw);
            scan = HAK_BASE_FROM_RAW(next_raw);
            scanned++;
        }
    }
#endif

    // Link new node to current head via Box API (offset is handled inside tiny_nextptr).
    // Note: g_tls_sll[...].head is hak_base_ptr_t, but PTR_NEXT_WRITE takes void* val.
    PTR_NEXT_WRITE("tls_push", class_idx, raw_ptr, 0, HAK_BASE_TO_RAW(g_tls_sll[class_idx].head));
    tls_sll_set_head(class_idx, ptr, "push");
    g_tls_sll[class_idx].count = cur + 1;
    s_tls_sll_last_push[class_idx] = ptr;

    // Performance measurement: count push operations
    if (__builtin_expect(tls_sll_measure_enabled(), 0)) {
        atomic_fetch_add_explicit(&g_tls_sll_push_count_global, 1, memory_order_relaxed);
    }

#if !HAKMEM_BUILD_RELEASE
    // Trace TLS SLL push (debug only)
    extern void ptr_trace_record_impl(int event, void* ptr, int class_idx, uint64_t op_num,
                                      void* aux_ptr, uint32_t aux_u32, int aux_int,
                                      const char* file, int line);
    extern _Atomic uint64_t g_ptr_trace_op_counter;
    uint64_t _trace_op = atomic_fetch_add_explicit(&g_ptr_trace_op_counter, 1, memory_order_relaxed);
    ptr_trace_record_impl(4 /*PTR_EVENT_FREE_TLS_PUSH*/, raw_ptr, class_idx, _trace_op,
                          NULL, g_tls_sll[class_idx].count, 0,
                          where ? where : __FILE__, __LINE__);
#endif

#if !HAKMEM_BUILD_RELEASE
    // Record callsite for debugging (debug-only)
    s_tls_sll_last_push_from[class_idx] = where;
#else
    (void)where; // Suppress unused warning in release
#endif

    return true;
}

// ========== Pop ==========
//
// Pop BASE pointer from TLS SLL.
// Returns true on success and stores BASE into *out.
//
// Implementation function with callsite tracking (where).
// Use tls_sll_pop() macro instead of calling directly.

static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const char* where)
{
    static _Atomic uint32_t g_tls_pop_trace = 0;
    if (atomic_fetch_add_explicit(&g_tls_pop_trace, 1, memory_order_relaxed) < 4096) {
        HAK_TRACE("[tls_sll_pop_impl_enter]\n");
    }
    HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_pop");
    // Class mask gate: if disallowed, behave as empty
    if (__builtin_expect(((g_tls_sll_class_mask & (1u << class_idx)) == 0), 0)) {
        return false;
    }
    atomic_fetch_add(&g_integrity_check_class_bounds, 1);

    // Defensive: ensure current head is sane before accessing it.
    tls_sll_sanitize_head(class_idx, "pop_enter");

    hak_base_ptr_t base = g_tls_sll[class_idx].head;
    if (hak_base_is_null(base)) {
        // Performance measurement: count empty pops
        if (__builtin_expect(tls_sll_measure_enabled(), 0)) {
            atomic_fetch_add_explicit(&g_tls_sll_pop_empty_count_global, 1, memory_order_relaxed);
        }
        return false;
    }
    void* raw_base = HAK_BASE_TO_RAW(base);

    // Sentinel guard: remote sentinel must never be in TLS SLL.
    if (__builtin_expect((uintptr_t)raw_base == TINY_REMOTE_SENTINEL, 0)) {
        tls_sll_set_head(class_idx, HAK_BASE_FROM_RAW(NULL), "pop_sentinel");
        g_tls_sll[class_idx].count = 0;
        tls_sll_record_writer(class_idx, "pop_sentinel_reset");
#if !HAKMEM_BUILD_RELEASE
        fprintf(stderr,
                "[TLS_SLL_POP] Remote sentinel detected at head; SLL reset (cls=%d)\n",
                class_idx);
#endif
        {
            static int g_sll_ring_en = -1;
            if (__builtin_expect(g_sll_ring_en == -1, 0)) {
                const char* r = getenv("HAKMEM_TINY_SLL_RING");
                g_sll_ring_en = (r && *r && *r != '0') ? 1 : 0;
            }
            if (__builtin_expect(g_sll_ring_en, 0)) {
                tiny_debug_ring_record(0x7F11 /*TLS_SLL_SENTINEL*/, (uint16_t)class_idx, raw_base, 0);
            }
        }
        return false;
    }

#if !HAKMEM_BUILD_RELEASE
    if (!validate_ptr_range(raw_base, "tls_sll_pop_base")) {
        fprintf(stderr,
                "[TLS_SLL_POP] FATAL invalid BASE ptr cls=%d base=%p\n",
                class_idx, raw_base);
        abort();
    }
#else
    // Fail-fast even in release: drop malformed TLS head to avoid SEGV on bad base.
    uintptr_t base_addr = (uintptr_t)raw_base;
    if (base_addr < 4096 || base_addr > 0x00007fffffffffffULL) {
        extern _Atomic uint64_t g_tls_sll_invalid_head[];
        uint64_t cnt = atomic_fetch_add_explicit(&g_tls_sll_invalid_head[class_idx], 1, memory_order_relaxed);
        static __thread uint8_t s_log_limit[TINY_NUM_CLASSES] = {0};
        if (s_log_limit[class_idx] < 4) {
            fprintf(stderr, "[TLS_SLL_POP_INVALID] cls=%d head=%p (val=%llx) dropped count=%llu\n",
                    class_idx, raw_base, (unsigned long long)base_addr, (unsigned long long)cnt + 1);
            s_log_limit[class_idx]++;
            tls_sll_dump_tls_window(class_idx, "invalid_head"); // Added dump
        }
        // Help triage: show last successful push base for this thread/class
        if (!hak_base_is_null(s_tls_sll_last_push[class_idx]) && s_log_limit[class_idx] <= 4) {
            fprintf(stderr, "[TLS_SLL_POP_INVALID] cls=%d last_push=%p\n",
                    class_idx, HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]));
        }
        tls_sll_dump_tls_window(class_idx, "head_range");
        tls_sll_set_head(class_idx, HAK_BASE_FROM_RAW(NULL), "pop_invalid_head");
        g_tls_sll[class_idx].count = 0;
        return false;
    }
#endif

    // Optional high-frequency canary check for target classes (e.g., 4/6)
    static int s_canary_fast = -1;
    if (__builtin_expect(s_canary_fast == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_SLL_CANARY_FAST");
        s_canary_fast = (e && *e && *e != '0') ? 1 : 0;
    }
    if (__builtin_expect(s_canary_fast && (class_idx == 4 || class_idx == 6), 0)) {
        extern _Atomic uint64_t g_tls_sll_pop_counter[];
        uint64_t pc = atomic_fetch_add_explicit(&g_tls_sll_pop_counter[class_idx], 1, memory_order_relaxed) + 1;
        periodic_canary_check(pc, class_idx == 4 ? "tls_sll_pop_cls4" : "tls_sll_pop_cls6");
    }

    tls_sll_debug_guard(class_idx, base, "pop");

    // Header validation using Header Box (C1-C6 only; C0/C7 skip)
#if !HAKMEM_TINY_HEADERLESS
    if (tiny_class_preserves_header(class_idx)) {
        uint8_t got, expect;
        PTR_TRACK_TLS_POP(raw_base, class_idx);
        bool valid = tiny_header_validate(raw_base, class_idx, &got, &expect);
        PTR_TRACK_HEADER_READ(raw_base, got);
        if (__builtin_expect(!valid, 0)) {
#if !HAKMEM_BUILD_RELEASE
            fprintf(stderr,
                    "[TLS_SLL_POP] CORRUPTED HEADER cls=%d base=%p got=0x%02x expect=0x%02x\n",
                    class_idx, raw_base, got, expect);
            ptr_trace_dump_now("header_corruption");
            abort();
#else
            // In release, fail-safe: drop list.
            // PERF DEBUG: Count header corruption resets
            static _Atomic uint64_t g_hdr_reset_count = 0;
            uint64_t cnt = atomic_fetch_add_explicit(&g_hdr_reset_count, 1, memory_order_relaxed);
            // Narrow diagnostics for early shots to root-cause corruption.
            static _Atomic uint32_t g_hdr_reset_diag = 0;
            uint32_t shot = atomic_fetch_add_explicit(&g_hdr_reset_diag, 1, memory_order_relaxed);
            if (shot < 8) {
                // Extra diagnostics: dump raw next pointers at offsets 0 and tiny_next_off()
                uintptr_t next_raw_off0 = 0;
                uintptr_t next_raw_off1 = 0;
                size_t next_off = tiny_next_off(class_idx);
                memcpy(&next_raw_off0, raw_base, sizeof(next_raw_off0));
                memcpy(&next_raw_off1, (uint8_t*)raw_base + next_off, sizeof(next_raw_off1));
                uint8_t dump8[8] = {0};
                memcpy(dump8, raw_base, sizeof(dump8));

                SuperSlab* ss_diag = hak_super_lookup(raw_base);
                int slab_idx = ss_diag ? slab_index_for(ss_diag, raw_base) : -1;
                uint8_t meta_cls = 0xff;
                if (ss_diag && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss_diag)) {
                    meta_cls = ss_diag->slabs[slab_idx].class_idx;
                }
                void* raw_next_diag = NULL;
                PTR_NEXT_READ("tls_hdr_reset_diag", class_idx, raw_base, 0, raw_next_diag);
                fprintf(stderr,
                        "[TLS_SLL_HDR_RESET] shot=%u cls=%d base=%p got=0x%02x expect=0x%02x "
                        "next=%p meta_cls=%u slab_idx=%d last_writer=%s last_push=%p count=%llu "
                        "next_off=%zu next_raw0=%p next_raw1=%p bytes=%02x%02x%02x%02x%02x%02x%02x%02x\n",
                        shot + 1,
                        class_idx,
                        raw_base,
                        got,
                        expect,
                        raw_next_diag,
                        (unsigned)meta_cls,
                        slab_idx,
                        g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)",
                        HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]),
                        (unsigned long long)cnt,
                        next_off,
                        (void*)next_raw_off0,
                        (void*)next_raw_off1,
                        dump8[0], dump8[1], dump8[2], dump8[3],
                        dump8[4], dump8[5], dump8[6], dump8[7]);
            } else if (cnt % 10000 == 0) {
                fprintf(stderr, "[TLS_SLL_HDR_RESET] cls=%d base=%p got=0x%02x expect=0x%02x count=%llu\n",
                        class_idx, raw_base, got, expect, (unsigned long long)cnt);
            }
            tls_sll_set_head(class_idx, HAK_BASE_FROM_RAW(NULL), "header_reset");
            g_tls_sll[class_idx].count = 0;
            {
                static int g_sll_ring_en = -1;
                if (__builtin_expect(g_sll_ring_en == -1, 0)) {
                    const char* r = getenv("HAKMEM_TINY_SLL_RING");
                    g_sll_ring_en = (r && *r && *r != '0') ? 1 : 0;
                }
                if (__builtin_expect(g_sll_ring_en, 0)) {
                    // aux encodes: high 8 bits = got, low 8 bits = expect
                    uintptr_t aux = ((uintptr_t)got << 8) | (uintptr_t)expect;
                    tiny_debug_ring_record(0x7F12 /*TLS_SLL_HDR_CORRUPT*/, (uint16_t)class_idx, raw_base, aux);
                }
            }
            return false;
#endif
        }
    }
#endif

    // Read next via Box API.
    void* raw_next;
    PTR_NEXT_READ("tls_pop", class_idx, raw_base, 0, raw_next);
    hak_base_ptr_t next = HAK_BASE_FROM_RAW(raw_next);
    tls_sll_diag_next(class_idx, base, next, "pop_next");

    // Optional: misalignment guard to catch BASE/USER混入由来のnextズレ (triage用)
    do {
        static int g_misalign_guard = -1;
        if (__builtin_expect(g_misalign_guard == -1, 0)) {
            const char* e = getenv("HAKMEM_TINY_SLL_MISALIGN_GUARD");
            g_misalign_guard = (e && *e && *e != '0') ? 1 : 0;
        }
        if (!__builtin_expect(g_misalign_guard, 0)) break;
        if (hak_base_is_null(next)) break;
        extern const size_t g_tiny_class_sizes[];
        size_t stride = (class_idx >= 0 && class_idx < TINY_NUM_CLASSES)
                          ? g_tiny_class_sizes[class_idx]
                          : 0;
        if (stride == 0) break;
        uintptr_t next_addr = (uintptr_t)raw_next;
        if ((next_addr % stride) != 0) {
            static _Atomic uint32_t g_misalign_shot = 0;
            uint32_t shot = atomic_fetch_add_explicit(&g_misalign_shot, 1, memory_order_relaxed);
            if (shot < 8) {
                fprintf(stderr,
                        "[TLS_SLL_POP_MISALIGNED_NEXT] shot=%u cls=%d base=%p next=%p stride=%zu where=%s last_writer=%s\n",
                        shot + 1,
                        class_idx,
                        raw_base,
                        raw_next,
                        stride,
                        where ? where : "(null)",
                        g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)");
            }
            // Drop list defensively; nextが壊れているのでheadごと破棄
            tls_sll_set_head(class_idx, HAK_BASE_FROM_RAW(NULL), "pop_next_misaligned");
            g_tls_sll[class_idx].count = 0;
            return false;
        }
    } while (0);

    // TWO-SPEED: Next pointer validation is DEBUG-ONLY to keep HOT PATH fast.
    // In Release builds, we trust the linked list structure.
#if !HAKMEM_BUILD_RELEASE
    // Validate next pointer before installing as new head.
    if (!hak_base_is_null(next)) {
        SuperSlab* next_ss = hak_super_lookup(raw_next);
        int next_cap = next_ss ? ss_slabs_capacity(next_ss) : 0;
        int next_idx = (next_ss && next_ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(next_ss, raw_next) : -1;
        uint8_t next_meta_cls = (next_idx >= 0 && next_idx < next_cap) ? next_ss->slabs[next_idx].class_idx : 0xff;
        if (!next_ss || next_ss->magic != SUPERSLAB_MAGIC || next_idx < 0 || next_idx >= next_cap || next_meta_cls != (uint8_t)class_idx) {
            static _Atomic uint32_t g_next_invalid = 0;
            uint32_t shot = atomic_fetch_add_explicit(&g_next_invalid, 1, memory_order_relaxed);
            if (shot < 8) {
                fprintf(stderr,
                        "[TLS_SLL_NEXT_INVALID] cls=%d next=%p meta_cls=%u idx=%d ss=%p from_base=%p head=%p last_writer=%s\n",
                        class_idx,
                        raw_next,
                        (unsigned)next_meta_cls,
                        next_idx,
                        (void*)next_ss,
                        raw_base,
                        HAK_BASE_TO_RAW(g_tls_sll[class_idx].head),
                        g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)");
            }
            // Drop remainder of list to avoid chasing stale pointers.
            next = HAK_BASE_FROM_RAW(NULL);
            tls_sll_set_head(class_idx, next, "pop_next_invalid");
            g_tls_sll[class_idx].count = 0;
        }
    }
#endif  // !HAKMEM_BUILD_RELEASE

#if !HAKMEM_BUILD_RELEASE
    if (!hak_base_is_null(next) && !validate_ptr_range(raw_next, "tls_sll_pop_next")) {
        fprintf(stderr,
                "[TLS_SLL_POP] FATAL invalid next ptr cls=%d base=%p next=%p\n",
                class_idx, raw_base, raw_next);
        ptr_trace_dump_now("next_corruption");
        abort();
    }
#endif

    tls_sll_set_head_from(class_idx, next, raw_base, where ? where : "pop");
    if ((class_idx == 4 || class_idx == 6) && !hak_base_is_null(next) && !tls_sll_head_valid(next)) {
        fprintf(stderr, "[TLS_SLL_POP_POST_INVALID] cls=%d next=%p last_writer=%s\n",
                class_idx,
                raw_next,
                g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)");
        tls_sll_dump_tls_window(class_idx, "pop_post");
        tls_sll_set_head(class_idx, HAK_BASE_FROM_RAW(NULL), "pop_post");
        g_tls_sll[class_idx].count = 0;
        return false;
    }
    if (g_tls_sll[class_idx].count > 0) {
        g_tls_sll[class_idx].count--;
    }

    // Clear next inside popped node to avoid stale-chain issues.
    PTR_NEXT_WRITE("tls_pop_clear", class_idx, raw_base, 0, NULL);

    // Release SuperSlab pin now that node left TLS SLL
    do {
        SuperSlab* ss_pop = hak_super_lookup(raw_base);
        if (ss_pop && ss_pop->magic == SUPERSLAB_MAGIC) {
            superslab_ref_dec(ss_pop);
        }
    } while (0);

#if !HAKMEM_BUILD_RELEASE
    // Trace TLS SLL pop (debug only)
    extern void ptr_trace_record_impl(int event, void* ptr, int class_idx, uint64_t op_num,
                                      void* aux_ptr, uint32_t aux_u32, int aux_int,
                                      const char* file, int line);
    extern _Atomic uint64_t g_ptr_trace_op_counter;
    uint64_t _trace_op = atomic_fetch_add_explicit(&g_ptr_trace_op_counter, 1, memory_order_relaxed);
    ptr_trace_record_impl(3 /*PTR_EVENT_ALLOC_TLS_POP*/, raw_base, class_idx, _trace_op,
                          NULL, g_tls_sll[class_idx].count + 1, 0,
                          where ? where : __FILE__, __LINE__);

    // Record callsite for debugging (debug-only)
    s_tls_sll_last_pop_from[class_idx] = where;

    // Debug: Log pop operations (first 50, class 1 only)
    {
        extern _Atomic uint64_t g_debug_op_count;
        uint64_t op = atomic_load(&g_debug_op_count);
        if (op < 50 && class_idx == 1) {
            fprintf(stderr, "[OP#%04lu POP] cls=%d base=%p tls_count_after=%u\n",
                    (unsigned long)op, class_idx, raw_base,
                    g_tls_sll[class_idx].count);
            fflush(stderr);
        }
    }
#else
    (void)where; // Suppress unused warning in release
#endif

    *out = base;

    // Performance measurement: count successful pops
    if (__builtin_expect(tls_sll_measure_enabled(), 0)) {
        atomic_fetch_add_explicit(&g_tls_sll_pop_count_global, 1, memory_order_relaxed);
    }

    return true;
}

// ========== Splice ==========
//
// Splice a pre-linked chain of BASE pointers into TLS SLL head.
// chain_head is BASE; links are via Box API-compatible next layout.
// Returns number of nodes actually moved (<= capacity remaining).

static inline uint32_t tls_sll_splice(int class_idx,
                                      hak_base_ptr_t chain_head,
                                      uint32_t count,
                                      uint32_t capacity)
{
    HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_splice");

    if (hak_base_is_null(chain_head) || count == 0 || capacity == 0) {
        return 0;
    }

    uint32_t cur = g_tls_sll[class_idx].count;
    if (cur >= capacity) {
        return 0;
    }

    uint32_t room = capacity - cur;
    uint32_t to_move = (count < room) ? count : room;

    // Traverse chain up to to_move, validate, and find tail.
    hak_base_ptr_t tail = chain_head;
    uint32_t moved = 1;

    tls_sll_debug_guard(class_idx, chain_head, "splice_head");

    // Restore header defensively on each node we touch (C1-C6 only; C0/C7 skip)
    tiny_header_write_if_preserved(HAK_BASE_TO_RAW(chain_head), class_idx);

    while (moved < to_move) {
        tls_sll_debug_guard(class_idx, tail, "splice_traverse");

        void* raw_next;
        PTR_NEXT_READ("tls_splice_trav", class_idx, HAK_BASE_TO_RAW(tail), 0, raw_next);
        hak_base_ptr_t next = HAK_BASE_FROM_RAW(raw_next);

        if (!hak_base_is_null(next) && !tls_sll_head_valid(next)) {
            static _Atomic uint32_t g_splice_diag = 0;
            uint32_t shot = atomic_fetch_add_explicit(&g_splice_diag, 1, memory_order_relaxed);
            if (shot < 8) {
                fprintf(stderr,
                        "[TLS_SLL_SPLICE_INVALID_NEXT] cls=%d head=%p tail=%p next=%p moved=%u/%u\n",
                        class_idx, HAK_BASE_TO_RAW(chain_head), HAK_BASE_TO_RAW(tail), raw_next, moved, to_move);
            }
        }

        if (hak_base_is_null(next)) {
            break;
        }

        // Restore header on each traversed node (C1-C6 only; C0/C7 skip)
        tiny_header_write_if_preserved(raw_next, class_idx);

        tail = next;
        moved++;
    }

    // Link tail to existing head and install new head.
    tls_sll_debug_guard(class_idx, tail, "splice_tail");
    PTR_NEXT_WRITE("tls_splice_link", class_idx, HAK_BASE_TO_RAW(tail), 0, HAK_BASE_TO_RAW(g_tls_sll[class_idx].head));

    tls_sll_set_head(class_idx, chain_head, "splice");
    g_tls_sll[class_idx].count = cur + moved;

    return moved;
}

// ========== Macro Wrappers ==========
//
// Box Theory: Callers use tls_sll_push/pop() macros which auto-insert callsite info (debug only).
// No changes required to call sites.

#if !HAKMEM_BUILD_RELEASE
static inline bool tls_sll_push_guarded(int class_idx, hak_base_ptr_t ptr, uint32_t capacity,
                                        const char* where, const char* file, int line) {
    // Enhanced duplicate guard (scan up to 256 nodes for deep duplicates)
    uint32_t scanned = 0;
    hak_base_ptr_t cur = g_tls_sll[class_idx].head;
    const uint32_t limit = (g_tls_sll[class_idx].count < 256) ? g_tls_sll[class_idx].count : 256;

    while (!hak_base_is_null(cur) && scanned < limit) {
        if (hak_base_eq(cur, ptr)) {
            // Enhanced error message with both old and new callsite info
            const char* last_file = g_tls_sll_push_file[class_idx] ? g_tls_sll_push_file[class_idx] : "(null)";
            fprintf(stderr,
                    "[TLS_SLL_DUP] cls=%d ptr=%p head=%p count=%u scanned=%u\n"
                    "  Current push: where=%s at %s:%d\n"
                    "  Previous push: %s:%d\n",
                    class_idx, HAK_BASE_TO_RAW(ptr), HAK_BASE_TO_RAW(g_tls_sll[class_idx].head), g_tls_sll[class_idx].count, scanned,
                    where, file, line,
                    last_file, g_tls_sll_push_line[class_idx]);

            // Dump pointer trace for detailed analysis
            ptr_trace_dump_now("tls_sll_dup");
            abort();
        }
        void* raw_next = NULL;
        PTR_NEXT_READ("tls_sll_dupcheck", class_idx, HAK_BASE_TO_RAW(cur), 0, raw_next);
        cur = HAK_BASE_FROM_RAW(raw_next);
        scanned++;
    }

    // Call impl (duplicate check in impl will be skipped since we already checked above and would abort)
    // Note: impl has its own duplicate check, but we'll never reach it because we abort above
    bool ok = tls_sll_push_impl(class_idx, ptr, capacity, where);
    if (ok) {
        g_tls_sll_push_file[class_idx] = file;
        g_tls_sll_push_line[class_idx] = line;
    }
    return ok;
}

#  define tls_sll_push(cls, ptr, cap) \
       tls_sll_push_guarded((cls), (ptr), (cap), __func__, __FILE__, __LINE__)
#  define tls_sll_pop(cls, out) \
       tls_sll_pop_impl((cls), (out), __func__)
#else
#  define tls_sll_push(cls, ptr, cap) \
       tls_sll_push_impl((cls), (ptr), (cap), __func__)
#  define tls_sll_pop(cls, out) \
       tls_sll_pop_impl((cls), (out), __func__)
#endif

#endif // TLS_SLL_BOX_H