hakmem/core/box/tls_sll_box.h

// tls_sll_box.h - Box TLS-SLL: Single-Linked List API (C7-safe)
//
// Purpose: Centralized TLS SLL management with C7 protection
// Design: Zero-overhead static inline API, C7 always rejected
//
// Key Rules:
//   1. C7 (1KB headerless) is ALWAYS rejected (returns false/0)
//   2. All SLL direct writes MUST go through this API
//   3. Pop returns with first 8 bytes cleared for C7 (safety)
//   4. Capacity checks prevent overflow
//
// Architecture:
//   - Box TLS-SLL (this): Push/Pop/Splice authority
//   - Caller: Provides capacity limits, handles fallback on failure
//
// Performance:
//   - Static inline → zero function call overhead
//   - C7 check: 1 comparison + predict-not-taken (< 1 cycle)
//   - Same performance as direct SLL access for C0-C6

#ifndef TLS_SLL_BOX_H
#define TLS_SLL_BOX_H

#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>     // For fprintf in debug
#include <stdlib.h>    // For abort in debug
#include "../ptr_trace.h"              // Debug-only: pointer next read/write tracing
#include "../hakmem_tiny_config.h"     // For TINY_NUM_CLASSES
#include "../hakmem_build_flags.h"
#include "../tiny_remote.h"            // For TINY_REMOTE_SENTINEL detection
#include "../tiny_region_id.h"        // HEADER_MAGIC / HEADER_CLASS_MASK
#include "../hakmem_tiny_integrity.h"  // PRIORITY 2: Freelist integrity checks
#include "../ptr_track.h"              // Pointer tracking for debugging header corruption
#include "tiny_next_ptr_box.h"        // Box API: Next pointer read/write

// Debug guard: validate base pointer before SLL ops (Debug only)
#if !HAKMEM_BUILD_RELEASE
extern const size_t g_tiny_class_sizes[];
static inline void tls_sll_debug_guard(int class_idx, void* base, const char* where) {
    (void)g_tiny_class_sizes;
    // Only a minimal guard: tiny integers are always invalid
    if ((uintptr_t)base < 4096) {
        fprintf(stderr, "[TLS_SLL_GUARD] %s: small ptr=%p cls=%d (likely corruption)\n", where, base, class_idx);
        abort();
    }
    // NOTE: Do NOT check alignment vs class size here.
    // Blocks are stride-aligned (size+header) from slab base; modulo class size is not 0.
}
#else
static inline void tls_sll_debug_guard(int class_idx, void* base, const char* where) { (void)class_idx; (void)base; (void)where; }
#endif

// Normalize a possibly user-pointer (base+1) to base (header classes)
static inline void* tls_sll_normalize_base(int class_idx, void* node) {
    (void)class_idx;
    // Caller must pass base pointers; do not heuristically adjust.
    return node;
}

// External TLS SLL state (defined elsewhere)
extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES];
extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];

// ========== Push ==========

// Push pointer to TLS SLL
// Returns: true on success, false if C7 or capacity exceeded
//
// CRITICAL Phase 7 Header Design:
//   - C0-C6 (header classes): [1B header][user data]
//                             ^base      ^ptr (caller passes this)
//   - SLL stores "base" (ptr-1) to avoid overwriting header
//   - C7 (headerless): ptr == base (no offset)
//
// Safety:
//   - C7 always rejected (headerless, first 8 bytes = user data)
//   - Capacity check prevents overflow
//   - Header protection: stores base (ptr-1) for C0-C6
//
// Performance: 3-4 cycles (C0-C6), < 1 cycle (C7 fast rejection)
static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) {
    // PRIORITY 1: Bounds check BEFORE any array access
    HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push");

    // Phase E1-CORRECT: All classes including C7 can now use TLS SLL

    // Capacity check
    if (g_tls_sll_count[class_idx] >= capacity) {
        return false;  // SLL full
    }

    // ✅ FIX #15: CATCH USER pointer contamination at injection point
    // For Class 2 (32B blocks), BASE addresses should be multiples of 33 (stride)
    // USER pointers are BASE+1, so for Class 2 starting at even address, USER is ODD
    // This catches USER pointers being passed to TLS SLL (should be BASE!)
#if !HAKMEM_BUILD_RELEASE && HAKMEM_TINY_HEADER_CLASSIDX
    if (class_idx == 2) {  // Class 2 specific check (can extend to all header classes)
        uintptr_t addr = (uintptr_t)ptr;
        // For class 2 with 32B blocks, check if pointer looks like USER (BASE+1)
        // If slab base is at offset 0x...X0, then:
        //   - First block BASE: 0x...X0 (even)
        //   - First block USER: 0x...X1 (odd)
        //   - Second block BASE: 0x...X0 + 33 = 0x...Y1 (odd)
        //   - Second block USER: 0x...Y2 (even)
        // So ODD/EVEN alternates, but we can detect obvious USER pointers
        // by checking if ptr-1 has a header
        if ((addr & 0xF) <= 15) {  // Check last nibble for patterns
            uint8_t* possible_base = (addr & 1) ? ((uint8_t*)ptr - 1) : (uint8_t*)ptr;
            uint8_t byte_at_possible_base = *possible_base;
            uint8_t expected_header = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);

            // If ptr is ODD and ptr-1 has valid header, ptr is USER!
            if ((addr & 1) && byte_at_possible_base == expected_header) {
                extern _Atomic uint64_t malloc_count;
                uint64_t call = atomic_load(&malloc_count);
                fprintf(stderr, "\n========================================\n");
                fprintf(stderr, "=== USER POINTER BUG DETECTED ===\n");
                fprintf(stderr, "========================================\n");
                fprintf(stderr, "Call:        %lu\n", call);
                fprintf(stderr, "Class:       %d\n", class_idx);
                fprintf(stderr, "Passed ptr:  %p (ODD address - USER pointer!)\n", ptr);
                fprintf(stderr, "Expected:    %p (EVEN address - BASE pointer)\n", (void*)possible_base);
                fprintf(stderr, "Header at ptr-1: 0x%02x (valid header!)\n", byte_at_possible_base);
                fprintf(stderr, "========================================\n");
                fprintf(stderr, "BUG: Caller passed USER pointer to tls_sll_push!\n");
                fprintf(stderr, "FIX: Convert USER → BASE before push\n");
                fprintf(stderr, "========================================\n");
                fflush(stderr);
                abort();
            }
        }
    }
#endif

    // CRITICAL: Caller must pass "base" pointer (NOT user ptr)
    // Phase 7 carve operations return base (stride includes header)
    // SLL stores base to avoid overwriting header with next pointer

    // ✅ FIX #11C: ALWAYS restore header before pushing to SLL (defense in depth)
    // ROOT CAUSE (multiple sources):
    // 1. User may overwrite byte 0 (header) during normal use
    // 2. Freelist stores next at base (offset 0), overwriting header
    // 3. Simple refill carves blocks without writing headers
    //
    // SOLUTION: Restore header HERE (single point of truth) instead of at each call site.
    // This prevents all header corruption bugs at the TLS SLL boundary.
    // COST: 1 byte write (~1-2 cycles, negligible vs SEGV debugging cost).
#if HAKMEM_TINY_HEADER_CLASSIDX
    // DEBUG: Log if header was corrupted (0x00) before restoration for class 2
    uint8_t before = *(uint8_t*)ptr;
    PTR_TRACK_TLS_PUSH(ptr, class_idx);  // Track BEFORE header write
    *(uint8_t*)ptr = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
    PTR_TRACK_HEADER_WRITE(ptr, HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));

    // ✅ Option C: Class 2 inline logs - PUSH operation (DISABLED for performance)
    if (0 && class_idx == 2) {
        extern _Atomic uint64_t malloc_count;
        uint64_t call = atomic_load(&malloc_count);
        fprintf(stderr, "[C2_PUSH] ptr=%p before=0x%02x after=0xa2 call=%lu\n",
                ptr, before, call);
        fflush(stderr);
    }
#endif

    // Phase 7: Store next pointer at header-safe offset (base+1 for C0-C6)
#if HAKMEM_TINY_HEADER_CLASSIDX
    const size_t next_offset = 1;  // C7 is rejected above; always skip header
#else
    const size_t next_offset = 0;
#endif
    tls_sll_debug_guard(class_idx, ptr, "push");

#if !HAKMEM_BUILD_RELEASE
    // PRIORITY 2+: Double-free detection - scan existing SLL for duplicates
    // This is expensive but critical for debugging the P0 corruption bug
    {
        void* scan = g_tls_sll_head[class_idx];
        uint32_t scan_count = 0;
        const uint32_t scan_limit = (g_tls_sll_count[class_idx] < 100) ? g_tls_sll_count[class_idx] : 100;

        while (scan && scan_count < scan_limit) {
            if (scan == ptr) {
                fprintf(stderr, "[TLS_SLL_PUSH] FATAL: Double-free detected!\n");
                fprintf(stderr, "  class_idx=%d ptr=%p appears multiple times in SLL\n", class_idx, ptr);
                fprintf(stderr, "  g_tls_sll_count[%d]=%u scan_pos=%u\n",
                        class_idx, g_tls_sll_count[class_idx], scan_count);
                fprintf(stderr, "  This indicates the same pointer was freed twice\n");
                ptr_trace_dump_now("double_free");
                fflush(stderr);
                abort();
            }

            void* next_scan;
            PTR_NEXT_READ("sll_scan", class_idx, scan, next_offset, next_scan);
            scan = next_scan;
            scan_count++;
        }
    }
#endif

    PTR_NEXT_WRITE("tls_push", class_idx, ptr, next_offset, g_tls_sll_head[class_idx]);
    g_tls_sll_head[class_idx] = ptr;
    g_tls_sll_count[class_idx]++;

    return true;
}

// ========== Pop ==========

// Pop pointer from TLS SLL
// Returns: true on success (writes user ptr to *out), false if empty
//
// CRITICAL Phase 7 Header Design:
//   - SLL stores "base" (ptr-1) for C0-C6
//   - Must return "ptr" (base+1) to user
//   - C7: base == ptr (no offset)
//
// Safety:
//   - C7 protection: clears first 8 bytes on pop (prevents next pointer leak)
//   - Header protection: returns ptr (base+1) for C0-C6
//   - NULL check before deref
//
// Performance: 4-5 cycles
static inline bool tls_sll_pop(int class_idx, void** out) {
    // PRIORITY 1: Bounds check
    HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_pop");
    atomic_fetch_add(&g_integrity_check_class_bounds, 1);

    void* base = g_tls_sll_head[class_idx];
    if (!base) {
        return false;  // SLL empty
    }

    // ✅ CRITICAL FIX: Detect remote sentinel leaked into TLS SLL
    // The sentinel (0xBADA55BADA55BADA) is used by remote free operations
    // If it leaks into TLS SLL head, dereferencing it causes SEGV
    if (__builtin_expect((uintptr_t)base == TINY_REMOTE_SENTINEL, 0)) {
        // Reset corrupted TLS SLL state
        g_tls_sll_head[class_idx] = NULL;
        g_tls_sll_count[class_idx] = 0;

        // Log sentinel detection (helps identify root cause)
        static __thread int sentinel_logged = 0;
        if (sentinel_logged < 10) {
            fprintf(stderr, "[SENTINEL_DETECT] class=%d head=0x%lx (BADASS) - TLS SLL reset\n",
                    class_idx, (unsigned long)TINY_REMOTE_SENTINEL);
            sentinel_logged++;
        }

        return false;  // Trigger refill path
    }

    // PRIORITY 2: Validate base pointer BEFORE dereferencing
#if !HAKMEM_BUILD_RELEASE
    if (!validate_ptr_range(base, "tls_sll_pop_base")) {
        fprintf(stderr, "[TLS_SLL_POP] FATAL: Invalid BASE pointer!\n");
        fprintf(stderr, "  class_idx=%d base=%p\n", class_idx, base);
        fprintf(stderr, "  g_tls_sll_count[%d]=%u\n", class_idx, g_tls_sll_count[class_idx]);
        fflush(stderr);
        abort();
    }
#endif

    // Pop from SLL (reads next from base)
    // Phase E1-CORRECT FIX: Class 0 must use offset 0 (8B block can't fit 8B pointer at offset 1)
#if HAKMEM_TINY_HEADER_CLASSIDX
    // CRITICAL: Use class_idx argument (NOT header byte) because Class 0/7 overwrite header with next pointer!
    const size_t next_offset = (class_idx == 0 || class_idx == 7) ? 0 : 1;
#else
    const size_t next_offset = 0;
#endif

    // PRIORITY 2: Validate that (base + next_offset) is safe to dereference BEFORE reading
#if !HAKMEM_BUILD_RELEASE
    void* read_addr = (uint8_t*)base + next_offset;
    if (!validate_ptr_range(read_addr, "tls_sll_pop_read_addr")) {
        fprintf(stderr, "[TLS_SLL_POP] FATAL: Cannot safely read next pointer!\n");
        fprintf(stderr, "  class_idx=%d base=%p read_addr=%p (base+%zu)\n",
                class_idx, base, read_addr, next_offset);
        fprintf(stderr, "  g_tls_sll_count[%d]=%u\n", class_idx, g_tls_sll_count[class_idx]);
        fflush(stderr);
        abort();
    }
    atomic_fetch_add(&g_integrity_check_freelist, 1);
#endif

    tls_sll_debug_guard(class_idx, base, "pop");

    // ✅ FIX #12: VALIDATION - Detect header corruption at the moment it's injected
    // This is the CRITICAL validation point: we validate the header BEFORE reading next pointer.
    // If the header is corrupted here, we know corruption happened BEFORE this pop (during push/splice/carve).
    // Phase E1-CORRECT: Class 1-6 have headers, Class 0/7 overwrite header with next pointer
#if HAKMEM_TINY_HEADER_CLASSIDX
    if (class_idx != 0 && class_idx != 7) {
        // Read byte 0 (should be header = HEADER_MAGIC | class_idx)
        uint8_t byte0 = *(uint8_t*)base;
        PTR_TRACK_TLS_POP(base, class_idx);  // Track POP operation
        PTR_TRACK_HEADER_READ(base, byte0);   // Track header read
        uint8_t expected = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);

        // ✅ Option C: Class 2 inline logs - POP operation (DISABLED for performance)
        if (0 && class_idx == 2) {
            extern _Atomic uint64_t malloc_count;
            uint64_t call = atomic_load(&malloc_count);
            fprintf(stderr, "[C2_POP] ptr=%p header=0x%02x expected=0xa2 call=%lu\n",
                    base, byte0, call);
            fflush(stderr);
        }

        if (byte0 != expected) {
            // 🚨 CORRUPTION DETECTED AT INJECTION POINT!
            // Get call number from malloc wrapper
            extern _Atomic uint64_t malloc_count;  // Defined in hak_wrappers.inc.h
            uint64_t call_num = atomic_load(&malloc_count);

            fprintf(stderr, "\n========================================\n");
            fprintf(stderr, "=== CORRUPTION DETECTED (Fix #12) ===\n");
            fprintf(stderr, "========================================\n");
            fprintf(stderr, "Malloc call: %lu\n", call_num);
            fprintf(stderr, "Class:       %d\n", class_idx);
            fprintf(stderr, "Base ptr:    %p\n", base);
            fprintf(stderr, "Expected:    0x%02x (HEADER_MAGIC | class_idx)\n", expected);
            fprintf(stderr, "Actual:      0x%02x\n", byte0);
            fprintf(stderr, "========================================\n");
            fprintf(stderr, "\nThis means corruption was injected BEFORE this pop.\n");
            fprintf(stderr, "Likely culprits:\n");
            fprintf(stderr, "  1. tls_sll_push() - failed to restore header\n");
            fprintf(stderr, "  2. tls_sll_splice() - chain had corrupted headers\n");
            fprintf(stderr, "  3. trc_linear_carve() - didn't write header\n");
            fprintf(stderr, "  4. trc_pop_from_freelist() - didn't restore header\n");
            fprintf(stderr, "  5. Remote free path - overwrote header\n");
            fprintf(stderr, "========================================\n");
            fflush(stderr);
            abort();  // Immediate crash with backtrace
        }
    }  // end if (class_idx != 0 && class_idx != 7)
#endif

    // DEBUG: Log read operation for crash investigation
    static _Atomic uint64_t g_pop_count = 0;
    uint64_t pop_num = atomic_fetch_add(&g_pop_count, 1);

    // Log ALL class 0 pops (DISABLED for performance)
    if (0 && class_idx == 0) {
        // Check byte 0 to see if header exists
        uint8_t byte0 = *(uint8_t*)base;
        fprintf(stderr, "[TLS_POP_C0] pop=%lu base=%p byte0=0x%02x next_off=%zu\n",
                pop_num, base, byte0, next_offset);
        fflush(stderr);
    }

    void* next; PTR_NEXT_READ("tls_pop", class_idx, base, next_offset, next);

    if (0 && class_idx == 0) {
        fprintf(stderr, "[TLS_POP_C0] pop=%lu base=%p next=%p\n",
                pop_num, base, next);
        fflush(stderr);
    }

    // PRIORITY 2: Validate next pointer after reading it
#if !HAKMEM_BUILD_RELEASE
    if (!validate_ptr_range(next, "tls_sll_pop_next")) {
        fprintf(stderr, "[TLS_SLL_POP] FATAL: Invalid next pointer after read!\n");
        fprintf(stderr, "  class_idx=%d base=%p next=%p next_offset=%zu\n",
                class_idx, base, next, next_offset);
        fprintf(stderr, "  g_tls_sll_count[%d]=%u\n", class_idx, g_tls_sll_count[class_idx]);
        fflush(stderr);
        abort();
    }

    // PRIORITY 2+: Additional check for obviously corrupted pointers (non-canonical addresses)
    // Detects patterns like 0x7fff00008000 that pass validate_ptr_range but are still invalid
    if (next != NULL) {
        uintptr_t addr = (uintptr_t)next;
        // x86-64 canonical addresses: bits 48-63 must be copies of bit 47
        // Valid ranges: 0x0000_0000_0000_0000 to 0x0000_7FFF_FFFF_FFFF (user space)
        //            or 0xFFFF_8000_0000_0000 to 0xFFFF_FFFF_FFFF_FFFF (kernel space)
        // Invalid: 0x0001_xxxx_xxxx_xxxx to 0xFFFE_xxxx_xxxx_xxxx
        uint64_t top_bits = addr >> 47;
        if (top_bits != 0 && top_bits != 0x1FFFF) {
            fprintf(stderr, "[TLS_SLL_POP] FATAL: Corrupted SLL chain - non-canonical address!\n");
            fprintf(stderr, "  class_idx=%d base=%p next=%p (top_bits=0x%lx)\n",
                    class_idx, base, next, (unsigned long)top_bits);
            fprintf(stderr, "  g_tls_sll_count[%d]=%u\n", class_idx, g_tls_sll_count[class_idx]);
            fprintf(stderr, "  Likely causes: double-free, use-after-free, buffer overflow\n");
            ptr_trace_dump_now("sll_chain_corruption");
            fflush(stderr);
            abort();
        }
    }
#endif

    g_tls_sll_head[class_idx] = next;
    if (g_tls_sll_count[class_idx] > 0) {
        g_tls_sll_count[class_idx]--;
    }

    // CRITICAL FIX: Clear next pointer to prevent stale pointer corruption
    //
    // ROOT CAUSE OF P0 BUG (iteration 28,440 crash):
    // When a block is popped from SLL and given to user, the `next` pointer at base+1
    // (for C0-C6) or base (for C7) was NOT cleared. If the user doesn't overwrite it,
    // the stale `next` pointer remains. When the block is freed and pushed back to SLL,
    // the stale pointer creates loops or invalid pointers → SEGV at 0x7fff00008000!
    //
    // FIX: Clear next pointer for BOTH C7 AND C0-C6:
    // - C7 (headerless): next at base (offset 0) - was already cleared
    // - C0-C6 (header):  next at base+1 (offset 1) - **WAS NOT CLEARED** ← BUG!
    //
    // Previous WRONG assumption: "C0-C6 header hides next" - FALSE!
    // Phase E1-CORRECT: All classes have 1-byte header at base, next is at base+1
    //
    // Cost: 1 store instruction (~1 cycle) for all classes
#if HAKMEM_TINY_HEADER_CLASSIDX
    // DEBUG: Verify header is intact BEFORE clearing next pointer
    if (class_idx == 2) {
        uint8_t header_before_clear = *(uint8_t*)base;
        if (header_before_clear != (HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK))) {
            extern _Atomic uint64_t malloc_count;
            uint64_t call_num = atomic_load(&malloc_count);
            fprintf(stderr, "[POP_HEADER_CHECK] call=%lu cls=%d base=%p header=0x%02x BEFORE clear_next!\n",
                    call_num, class_idx, base, header_before_clear);
            fflush(stderr);
        }
    }

    tiny_next_write(class_idx, base, NULL);  // All classes: clear next pointer

    // DEBUG: Verify header is STILL intact AFTER clearing next pointer
    if (class_idx == 2) {
        uint8_t header_after_clear = *(uint8_t*)base;
        if (header_after_clear != (HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK))) {
            extern _Atomic uint64_t malloc_count;
            uint64_t call_num = atomic_load(&malloc_count);
            fprintf(stderr, "[POP_HEADER_CORRUPTED] call=%lu cls=%d base=%p header=0x%02x AFTER clear_next!\n",
                    call_num, class_idx, base, header_after_clear);
            fprintf(stderr, "[POP_HEADER_CORRUPTED] This means clear_next OVERWROTE the header!\n");
            fprintf(stderr, "[POP_HEADER_CORRUPTED] Bug: next_offset calculation is WRONG!\n");
            fflush(stderr);
            abort();
        }
    }
#else
    *(void**)base = NULL;  // No header: clear at base
#endif

    *out = base;  // Return base (caller converts to ptr if needed)
    return true;
}

// ========== Splice ==========

// Splice chain of pointers to TLS SLL (batch push)
// Returns: actual count moved (0 for C7 or if capacity exceeded)
//
// CRITICAL Phase 7 Header Design:
//   - Caller MUST pass chain of "base" pointers (ptr-1 for C0-C6)
//   - Chain links are stored at base (*(void**)base = next_base)
//   - SLL head stores base pointers
//
// Safety:
//   - C7 always returns 0 (no splice)
//   - Capacity check limits splice size
//   - Chain traversal with safety (breaks on NULL)
//   - Assumes chain is already linked using base pointers
//
// Performance: ~5 cycles + O(count) for chain traversal
static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t count, uint32_t capacity) {
    // Phase E1-CORRECT: All classes including C7 can now use splice

    // 🐛 DEBUG: UNCONDITIONAL log to verify function is called
    #if !HAKMEM_BUILD_RELEASE
    {
        static _Atomic int g_once = 0;
        if (atomic_fetch_add(&g_once, 1) == 0) {
            fprintf(stderr, "[SPLICE_ENTRY] First call to tls_sll_splice()! cls=%d count=%u capacity=%u\n",
                    class_idx, count, capacity);
            fflush(stderr);
        }
    }
    #endif

    // Calculate available capacity
    uint32_t available = (capacity > g_tls_sll_count[class_idx])
                         ? (capacity - g_tls_sll_count[class_idx]) : 0;

    // 🐛 DEBUG: Log ALL splice inputs to diagnose truncation
    #if !HAKMEM_BUILD_RELEASE
    {
        static _Atomic uint64_t g_splice_log_count = 0;
        uint64_t splice_num = atomic_fetch_add(&g_splice_log_count, 1);
        if (splice_num < 10) {  // Log first 10 splices
            fprintf(stderr, "[SPLICE_DEBUG #%lu] cls=%d count=%u capacity=%u sll_count=%u available=%u\n",
                    splice_num, class_idx, count, capacity, g_tls_sll_count[class_idx], available);
            fflush(stderr);
        }
    }
    #endif

    if (available == 0 || count == 0 || !chain_head) {
        return 0;  // No space or empty chain
    }

    // Limit splice size to available capacity
    uint32_t to_move = (count < available) ? count : available;

    // ✅ FIX #14: DEFENSE IN DEPTH - Restore headers for ALL nodes in chain
    // ROOT CAUSE: Even though callers (trc_linear_carve, trc_pop_from_freelist) are
    // supposed to restore headers, there might be edge cases or future code paths
    // that forget. Adding header restoration HERE provides a safety net.
    //
    // COST: 1 byte write per node (~1-2 cycles each, negligible vs SEGV debugging)
    // BENEFIT: Guaranteed header integrity at TLS SLL boundary (defense in depth!)
#if HAKMEM_TINY_HEADER_CLASSIDX
    const size_t next_offset = 1;  // C0-C6: next at base+1

    // Restore headers for ALL nodes in chain (traverse once)
    {
        void* node = chain_head;
        uint32_t restored_count = 0;

        while (node != NULL && restored_count < to_move) {
            uint8_t before = *(uint8_t*)node;
            uint8_t expected = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);

            // Restore header unconditionally
            *(uint8_t*)node = expected;

            // ✅ Option C: Class 2 inline logs - SPLICE operation (DISABLED for performance)
            if (0 && class_idx == 2) {
                extern _Atomic uint64_t malloc_count;
                uint64_t call = atomic_load(&malloc_count);
                fprintf(stderr, "[C2_SPLICE] ptr=%p before=0x%02x after=0xa2 restored=%u/%u call=%lu\n",
                        node, before, restored_count+1, to_move, call);
                fflush(stderr);
            }

            // Move to next node
            void* next = tiny_next_read(class_idx, node);
            node = next;
            restored_count++;
        }
    }
#else
    const size_t next_offset = 0;  // No header: next at base
#endif

    // Traverse chain to find tail (needed for splicing)
    void* tail = chain_head;
    for (uint32_t i = 1; i < to_move; i++) {
        tls_sll_debug_guard(class_idx, tail, "splice_trav");
        void* next; PTR_NEXT_READ("tls_sp_trav", class_idx, tail, next_offset, next);
        if (!next) {
            // Chain shorter than expected, adjust to_move
            to_move = i;
            break;
        }
        tail = next;
    }

    // Splice chain to SLL head
    // tail is a base pointer by construction
    tls_sll_debug_guard(class_idx, tail, "splice_link");
#if !HAKMEM_BUILD_RELEASE
    fprintf(stderr, "[SPLICE_LINK] cls=%d tail=%p off=%zu old_head=%p\n",
            class_idx, tail, (size_t)next_offset, g_tls_sll_head[class_idx]);
#endif
    PTR_NEXT_WRITE("tls_sp_link", class_idx, tail, next_offset, g_tls_sll_head[class_idx]);

    // ✅ FIX #11: chain_head is already correct BASE pointer from caller
    tls_sll_debug_guard(class_idx, chain_head, "splice_head");
#if !HAKMEM_BUILD_RELEASE
    fprintf(stderr, "[SPLICE_SET_HEAD] cls=%d head=%p moved=%u\n",
            class_idx, chain_head, (unsigned)to_move);
#endif
    g_tls_sll_head[class_idx] = chain_head;
    g_tls_sll_count[class_idx] += to_move;

    return to_move;
}

// ========== Debug/Stats (optional) ==========

#if !HAKMEM_BUILD_RELEASE
// Verify C7 is not in SLL (debug only, call at safe points)
static inline void tls_sll_verify_no_c7(void) {
    void* head = g_tls_sll_head[7];
    if (head != NULL) {
        fprintf(stderr, "[TLS_SLL_BUG] C7 found in TLS SLL! head=%p count=%u\n",
                head, g_tls_sll_count[7]);
        fprintf(stderr, "[TLS_SLL_BUG] This should NEVER happen - C7 is headerless!\n");
        abort();
    }
}
#endif

#endif // TLS_SLL_BOX_H