hakmem/core/hakmem_tiny.c

#include "hakmem_tiny.h"
#include "hakmem_tiny_config.h"    // Centralized configuration
#include "hakmem_phase7_config.h"  // Phase 7: Task 3 constants (PREWARM_COUNT, etc.)
#include "hakmem_tiny_superslab.h"
#include "box/ss_slab_meta_box.h"  // Phase 3d-A: SlabMeta Box boundary  // Phase 6.22: SuperSlab allocator
#include "hakmem_super_registry.h"  // Phase 8.2: SuperSlab registry for memory profiling
#include "hakmem_internal.h"
#include "hakmem_syscall.h"  // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD)
#include "hakmem_tiny_magazine.h"
#include "hakmem_tiny_integrity.h"  // PRIORITY 1-4: Corruption detection
#include "box/tiny_next_ptr_box.h"  // Box API: next pointer read/write
#include "box/ptr_conversion_box.h" // Box API: pointer conversion
#include "hakmem_env_cache.h"      // Priority-2: ENV cache
#include "box/tiny_cold_iface_v1.h" // Cold boundary wrapper for TinyHotHeap v2
// Phase 1 modules (must come AFTER hakmem_tiny.h for TinyPool definition)
#include "hakmem_tiny_batch_refill.h"  // Phase 1: Batch refill/spill for mini-magazine
#include "hakmem_tiny_stats.h"     // Phase 1: Batched statistics (replaces XOR RNG)
// Phase 2B modules
#include "tiny_api.h"  // Consolidated: stats_api, query_api, rss_api, registry_api
#include "tiny_tls.h"
#include "tiny_debug.h"
#include "hakmem_debug_master.h"  // For unified debug level control
#include "tiny_mmap_gate.h"
#include "tiny_debug_ring.h"
#include "tiny_route.h"
#include "front/tiny_heap_v2.h"
#include "box/tiny_front_stats_box.h"
#include "box/tiny_front_v3_env_box.h"
#include "box/ss_os_acquire_box.h"
#include "tiny_tls_guard.h"
#include "tiny_ready.h"
#include "box/c7_meta_used_counter_box.h"
#include "box/tiny_c7_hotbox.h"
#include "box/tiny_heap_box.h"
#include "box/tiny_hotheap_v2_box.h"
#include "box/tiny_route_env_box.h"
#include "box/super_reg_box.h"
#include "tiny_region_id.h"
#include "tiny_debug_api.h"
#include "hakmem_tiny_tls_list.h"
#include "hakmem_tiny_remote_target.h" // Phase 2C-1: Remote target queue
#include "hakmem_tiny_bg_spill.h"      // Phase 2C-2: Background spill queue
#include "tiny_adaptive_sizing.h"      // Phase 2b: Adaptive TLS cache sizing
// NOTE: hakmem_tiny_tls_ops.h included later (after type definitions)
#include "tiny_system.h"  // Consolidated: stdio, stdlib, string, etc.
#include "hakmem_prof.h"
#include "hakmem_trace.h"   // Optional USDT (perf) tracepoints

extern uint64_t g_bytes_allocated;  // from hakmem_tiny_superslab.c

// Tiny allocator configuration, debug counters, and return helpers
#include "hakmem_tiny_config_box.inc"

// ============================================================================
// Debug: TLS SLL last push tracking (for core/box/tls_sll_box.h)
// ============================================================================
__thread hak_base_ptr_t s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};
__thread tiny_heap_ctx_t g_tiny_heap_ctx;
__thread int g_tiny_heap_ctx_init = 0;
__thread tiny_hotheap_ctx_v2* g_tiny_hotheap_ctx_v2 = NULL;
TinyHeapClassStats g_tiny_heap_stats[TINY_NUM_CLASSES] = {0};
TinyC7PageStats g_c7_page_stats = {0};
tiny_route_kind_t g_tiny_route_class[TINY_NUM_CLASSES] = {0};
int g_tiny_route_snapshot_done = 0;
_Atomic uint64_t g_tiny_front_alloc_class[TINY_NUM_CLASSES] = {0};
_Atomic uint64_t g_tiny_front_free_class[TINY_NUM_CLASSES] = {0};
TinyFrontV3Snapshot g_tiny_front_v3_snapshot = {0};
int g_tiny_front_v3_snapshot_ready = 0;
static TinyFrontV3SizeClassEntry g_tiny_front_v3_lut[TINY_MAX_SIZE + 1] = {0};
static int g_tiny_front_v3_lut_ready = 0;

// Forward decls (to keep deps light in this TU)
int unified_cache_enabled(void);

static int tiny_heap_stats_dump_enabled(void) {
    static int g = -1;
    if (__builtin_expect(g == -1, 0)) {
        const char* eh = getenv("HAKMEM_TINY_HEAP_STATS_DUMP");
        const char* e = getenv("HAKMEM_TINY_C7_HEAP_STATS_DUMP");
        g = ((eh && *eh && *eh != '0') || (e && *e && *e != '0')) ? 1 : 0;
    }
    return g;
}

void tiny_front_v3_snapshot_init(void) {
    if (g_tiny_front_v3_snapshot_ready) {
        return;
    }
    TinyFrontV3Snapshot snap = {
        .unified_cache_on = unified_cache_enabled(),
        .tiny_guard_on = tiny_guard_is_enabled(),
        .header_mode = (uint8_t)tiny_header_mode(),
        .header_v3_enabled = tiny_header_v3_enabled(),
        .header_v3_skip_c7 = tiny_header_v3_skip_c7(),
    };
    g_tiny_front_v3_snapshot = snap;
    g_tiny_front_v3_snapshot_ready = 1;
}

void tiny_front_v3_size_class_lut_init(void) {
    if (g_tiny_front_v3_lut_ready) {
        return;
    }
    tiny_route_snapshot_init();
    size_t max_size = tiny_get_max_size();
    if (max_size > TINY_MAX_SIZE) {
        max_size = TINY_MAX_SIZE;
    }
    for (size_t sz = 0; sz <= TINY_MAX_SIZE; sz++) {
        TinyFrontV3SizeClassEntry e = {
            .class_idx = TINY_FRONT_V3_INVALID_CLASS,
            .route_kind = (uint8_t)TINY_ROUTE_LEGACY,
        };
        if (sz == 0 || sz > max_size) {
            g_tiny_front_v3_lut[sz] = e;
            continue;
        }
        int cls = hak_tiny_size_to_class((int)sz);
        if (cls >= 0 && cls < TINY_NUM_CLASSES) {
            e.class_idx = (uint8_t)cls;
            e.route_kind = (uint8_t)tiny_route_for_class((uint8_t)cls);
        }
        g_tiny_front_v3_lut[sz] = e;
    }
    g_tiny_front_v3_lut_ready = 1;
}

const TinyFrontV3SizeClassEntry* tiny_front_v3_lut_lookup(size_t size) {
    if (__builtin_expect(!g_tiny_front_v3_lut_ready, 0)) {
        tiny_front_v3_size_class_lut_init();
    }
    if (size == 0 || size > TINY_MAX_SIZE) {
        return NULL;
    }
    return &g_tiny_front_v3_lut[size];
}

__attribute__((destructor))
static void tiny_heap_stats_dump(void) {
    if (!tiny_heap_stats_enabled() || !tiny_heap_stats_dump_enabled()) {
        return;
    }
    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
        TinyHeapClassStats snap = {
            .alloc_fast_current = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_fast_current, memory_order_relaxed),
            .alloc_slow_prepare = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_slow_prepare, memory_order_relaxed),
            .free_fast_local = atomic_load_explicit(&g_tiny_heap_stats[cls].free_fast_local, memory_order_relaxed),
            .free_slow_fallback = atomic_load_explicit(&g_tiny_heap_stats[cls].free_slow_fallback, memory_order_relaxed),
            .alloc_prepare_fail = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_prepare_fail, memory_order_relaxed),
            .alloc_fail = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_fail, memory_order_relaxed),
        };
        if (snap.alloc_fast_current == 0 && snap.alloc_slow_prepare == 0 &&
            snap.free_fast_local == 0 && snap.free_slow_fallback == 0 &&
            snap.alloc_prepare_fail == 0 && snap.alloc_fail == 0) {
            continue;
        }
        fprintf(stderr,
                "[HEAP_STATS cls=%d] alloc_fast_current=%llu alloc_slow_prepare=%llu free_fast_local=%llu free_slow_fallback=%llu alloc_prepare_fail=%llu alloc_fail=%llu\n",
                cls,
                (unsigned long long)snap.alloc_fast_current,
                (unsigned long long)snap.alloc_slow_prepare,
                (unsigned long long)snap.free_fast_local,
                (unsigned long long)snap.free_slow_fallback,
                (unsigned long long)snap.alloc_prepare_fail,
                (unsigned long long)snap.alloc_fail);
    }
    TinyC7PageStats ps = {
        .prepare_calls = atomic_load_explicit(&g_c7_page_stats.prepare_calls, memory_order_relaxed),
        .prepare_with_current_null = atomic_load_explicit(&g_c7_page_stats.prepare_with_current_null, memory_order_relaxed),
        .prepare_from_partial = atomic_load_explicit(&g_c7_page_stats.prepare_from_partial, memory_order_relaxed),
        .current_set_from_free = atomic_load_explicit(&g_c7_page_stats.current_set_from_free, memory_order_relaxed),
        .current_dropped_to_partial = atomic_load_explicit(&g_c7_page_stats.current_dropped_to_partial, memory_order_relaxed),
    };
    if (ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial ||
        ps.current_set_from_free || ps.current_dropped_to_partial) {
        fprintf(stderr,
                "[C7_PAGE_STATS] prepare_calls=%llu prepare_with_current_null=%llu prepare_from_partial=%llu current_set_from_free=%llu current_dropped_to_partial=%llu\n",
                (unsigned long long)ps.prepare_calls,
                (unsigned long long)ps.prepare_with_current_null,
                (unsigned long long)ps.prepare_from_partial,
                (unsigned long long)ps.current_set_from_free,
                (unsigned long long)ps.current_dropped_to_partial);
        fflush(stderr);
    }
}

__attribute__((destructor))
static void tiny_front_class_stats_dump(void) {
    if (!tiny_front_class_stats_dump_enabled()) {
        return;
    }
    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
        uint64_t a = atomic_load_explicit(&g_tiny_front_alloc_class[cls], memory_order_relaxed);
        uint64_t f = atomic_load_explicit(&g_tiny_front_free_class[cls], memory_order_relaxed);
        if (a == 0 && f == 0) {
            continue;
        }
        fprintf(stderr, "[FRONT_CLASS cls=%d] alloc=%llu free=%llu\n",
                cls, (unsigned long long)a, (unsigned long long)f);
    }
}

__attribute__((destructor))
static void tiny_c7_delta_debug_destructor(void) {
    if (tiny_c7_meta_light_enabled() && tiny_c7_delta_debug_enabled()) {
        tiny_c7_heap_debug_dump_deltas();
    }
    if (tiny_heap_meta_light_enabled_for_class(6) && tiny_c6_delta_debug_enabled()) {
        tiny_c6_heap_debug_dump_deltas();
    }
}

// =============================================================================
// TinyHotHeap v2 (Phase30/31 wiring). Currently C7-only thin wrapper.
// NOTE: Phase34/35 時点では v2 は C7-only でも v1 より遅く、mixed では大きな回帰がある。
//       実験用フラグを明示 ON にしたときだけ使う前提で、デフォルトは v1 を推奨。
// =============================================================================
static inline int tiny_hotheap_v2_stats_enabled(void) {
    static int g = -1;
    if (__builtin_expect(g == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_HOTHEAP_V2_STATS");
        g = (e && *e && *e != '0') ? 1 : 0;
    }
    return g;
}

static _Atomic uint64_t g_tiny_hotheap_v2_route_hits[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_calls[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_fast[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_lease[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_fallback_v1[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_refill[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_refill_with_current[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_refill_with_partial[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_route_fb[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_free_calls[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_free_fast[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_free_fallback_v1[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_cold_refill_fail[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_cold_retire_calls[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_retire_calls_v2[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_partial_pushes[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_partial_pops[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_partial_peak[TINY_HOTHEAP_MAX_CLASSES] = {0};

typedef struct {
    _Atomic uint64_t prepare_calls;
    _Atomic uint64_t prepare_with_current_null;
    _Atomic uint64_t prepare_from_partial;
    _Atomic uint64_t free_made_current;
    _Atomic uint64_t page_retired;
} TinyHotHeapV2PageStats;

static TinyHotHeapV2PageStats g_tiny_hotheap_v2_page_stats[TINY_HOTHEAP_MAX_CLASSES] = {0};
static void tiny_hotheap_v2_page_retire_slow(tiny_hotheap_ctx_v2* ctx,
                                             uint8_t class_idx,
                                             tiny_hotheap_page_v2* page);

static inline uint8_t tiny_hotheap_v2_idx(uint8_t class_idx) {
    return (class_idx < TINY_HOTHEAP_MAX_CLASSES) ? class_idx : 0;
}

void tiny_hotheap_v2_record_route_fallback(uint8_t class_idx) {
    atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_route_fb[tiny_hotheap_v2_idx(class_idx)],
                              1,
                              memory_order_relaxed);
}

void tiny_hotheap_v2_record_free_fallback(uint8_t class_idx) {
    atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fallback_v1[tiny_hotheap_v2_idx(class_idx)],
                              1,
                              memory_order_relaxed);
}

void tiny_hotheap_v2_debug_snapshot(tiny_hotheap_v2_stats_snapshot_t* out) {
    if (!out) return;
    memset(out, 0, sizeof(*out));
    uint8_t ci = 7;
    out->route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_route_hits[ci], memory_order_relaxed);
    out->alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_calls[ci], memory_order_relaxed);
    out->alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fast[ci], memory_order_relaxed);
    out->alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_lease[ci], memory_order_relaxed);
    out->alloc_refill = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_refill[ci], memory_order_relaxed);
    out->refill_with_current = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_current[ci], memory_order_relaxed);
    out->refill_with_partial = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_partial[ci], memory_order_relaxed);
    out->alloc_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[ci], memory_order_relaxed);
    out->alloc_route_fb = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_route_fb[ci], memory_order_relaxed);
    out->free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_free_calls[ci], memory_order_relaxed);
    out->free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_free_fast[ci], memory_order_relaxed);
    out->free_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_free_fallback_v1[ci], memory_order_relaxed);
    out->cold_refill_fail = atomic_load_explicit(&g_tiny_hotheap_v2_cold_refill_fail[ci], memory_order_relaxed);
    out->cold_retire_calls = atomic_load_explicit(&g_tiny_hotheap_v2_cold_retire_calls[ci], memory_order_relaxed);
    out->retire_calls_v2 = atomic_load_explicit(&g_tiny_hotheap_v2_retire_calls_v2[ci], memory_order_relaxed);
    out->prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_calls, memory_order_relaxed);
    out->prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_with_current_null, memory_order_relaxed);
    out->prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_from_partial, memory_order_relaxed);
    out->free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].free_made_current, memory_order_relaxed);
    out->page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].page_retired, memory_order_relaxed);
    out->partial_pushes = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pushes[ci], memory_order_relaxed);
    out->partial_pops = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pops[ci], memory_order_relaxed);
    out->partial_peak = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[ci], memory_order_relaxed);
}

static tiny_hotheap_page_v2* tiny_hotheap_v2_acquire_page_node(tiny_hotheap_class_v2* hc) {
    if (!hc) return NULL;
    if (hc->storage_page.meta == NULL && hc->storage_page.freelist == NULL &&
        hc->storage_page.capacity == 0) {
        tiny_hotheap_v2_page_reset(&hc->storage_page);
        return &hc->storage_page;
    }
    tiny_hotheap_page_v2* node = (tiny_hotheap_page_v2*)calloc(1, sizeof(tiny_hotheap_page_v2));
    if (!node) {
        return NULL;
    }
    tiny_hotheap_v2_page_reset(node);
    return node;
}

static tiny_hotheap_page_v2* tiny_hotheap_v2_find_page(tiny_hotheap_class_v2* hc,
                                                        uint8_t class_idx,
                                                        void* p,
                                                        TinySlabMeta* meta) {
    if (!hc || !p) return NULL;
    const size_t stride = hc->stride ? hc->stride : tiny_stride_for_class(class_idx);
    const size_t max_span = stride * (size_t)(hc->current_page ? hc->current_page->capacity : 0);
    tiny_hotheap_page_v2* candidates[3] = {hc->current_page, hc->partial_pages, hc->full_pages};
    for (int i = 0; i < 3; i++) {
        for (tiny_hotheap_page_v2* page = candidates[i]; page; page = page->next) {
            if (meta && page->meta && page->meta != meta) continue;
            if (!page->base || page->capacity == 0) continue;
            uint8_t* base = (uint8_t*)page->base;
            size_t span = stride * (size_t)page->capacity;
            if ((uint8_t*)p >= base && (uint8_t*)p < base + span) {
                (void)max_span;  // silence unused warning in case stride==0
                return page;
            }
        }
    }
    return NULL;
}

static inline void tiny_hotheap_v2_partial_push(tiny_hotheap_class_v2* hc,
                                                tiny_hotheap_page_v2* page,
                                                uint8_t class_idx,
                                                int stats_on) {
    if (!hc || !page) return;
    page->next = hc->partial_pages;
    hc->partial_pages = page;
    if (hc->partial_count < UINT16_MAX) {
        hc->partial_count++;
    }
    if (stats_on) {
        uint8_t idx = tiny_hotheap_v2_idx(class_idx);
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pushes[idx], 1, memory_order_relaxed);
        uint64_t cur = hc->partial_count;
        uint64_t old = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[idx], memory_order_relaxed);
        while (cur > old &&
               !atomic_compare_exchange_weak_explicit(&g_tiny_hotheap_v2_partial_peak[idx],
                                                      &old,
                                                      cur,
                                                      memory_order_relaxed,
                                                      memory_order_relaxed)) {
            old = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[idx], memory_order_relaxed);
        }
    }
}

static inline void tiny_hotheap_v2_maybe_trim_partial(tiny_hotheap_ctx_v2* ctx,
                                                      tiny_hotheap_class_v2* hc,
                                                      uint8_t class_idx,
                                                      int stats_on) {
    if (!ctx || !hc) return;
    uint16_t limit = hc->max_partial_pages;
    if (limit == 0) {
        return;
    }
    while (hc->partial_count > limit && hc->partial_pages) {
        tiny_hotheap_page_v2* victim = hc->partial_pages;
        hc->partial_pages = victim->next;
        if (hc->partial_count > 0) {
            hc->partial_count--;
        }
        victim->next = NULL;
        if (stats_on) {
            atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pops[tiny_hotheap_v2_idx(class_idx)],
                                      1,
                                      memory_order_relaxed);
        }
        tiny_hotheap_v2_page_retire_slow(ctx, class_idx, victim);
    }
}

static inline void tiny_hotheap_v2_build_freelist(tiny_hotheap_page_v2* page,
                                                  uint8_t class_idx,
                                                  uint16_t stride) {
    if (!page || stride == 0) {
        return;
    }
    if (page->used >= page->capacity) {
        page->freelist = NULL;
        return;
    }
    void* head = NULL;
    size_t start = page->capacity;
    while (start > page->used) {
        start--;
        uint8_t* block = (uint8_t*)page->base + (start * (size_t)stride);
        tiny_next_write(class_idx, block, head);
        head = block;
    }
    page->freelist = head;
}

static void tiny_hotheap_v2_unlink_page(tiny_hotheap_class_v2* hc, tiny_hotheap_page_v2* target) {
    if (!hc || !target) return;
    if (hc->current_page == target) {
        hc->current_page = NULL;
    }
    tiny_hotheap_page_v2** lists[2] = {&hc->partial_pages, &hc->full_pages};
    for (int i = 0; i < 2; i++) {
        tiny_hotheap_page_v2** head = lists[i];
        tiny_hotheap_page_v2* prev = NULL;
        tiny_hotheap_page_v2* cur = *head;
        while (cur) {
            if (cur == target) {
                if (prev) {
                    prev->next = cur->next;
                } else {
                    *head = cur->next;
                }
                cur->next = NULL;
                if (i == 0 && hc->partial_count > 0) {
                    hc->partial_count--;
                }
                break;
            }
            prev = cur;
            cur = cur->next;
        }
    }
}

static tiny_hotheap_page_v2* tiny_hotheap_v2_refill_slow(tiny_hotheap_ctx_v2* ctx, uint8_t class_idx) {
    if (!ctx || class_idx >= TINY_HOTHEAP_MAX_CLASSES) {
        return NULL;
    }
    int stats_on = tiny_hotheap_v2_stats_enabled();
    atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_refill[class_idx], 1, memory_order_relaxed);
    TinyHeapClassStats* stats = tiny_heap_stats_for_class(class_idx);
    if (__builtin_expect(stats != NULL, 0)) {
        atomic_fetch_add_explicit(&stats->alloc_slow_prepare, 1, memory_order_relaxed);
    }
    tiny_hotheap_class_v2* hc = &ctx->cls[class_idx];
    if (hc) {
        if (hc->current_page) {
            atomic_fetch_add_explicit(&g_tiny_hotheap_v2_refill_with_current[class_idx],
                                      1,
                                      memory_order_relaxed);
        }
        if (hc->partial_pages) {
            atomic_fetch_add_explicit(&g_tiny_hotheap_v2_refill_with_partial[class_idx],
                                      1,
                                      memory_order_relaxed);
        }
    }

    // Cold iface (v1 TinyHeap) からページを 1 枚借りる
    TinyColdIface cold = tiny_cold_iface_v1();
    tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
    tiny_heap_page_t* ipage = cold.refill_page ? cold.refill_page(cold_ctx, class_idx) : NULL;
    if (!ipage || !ipage->base || ipage->capacity == 0 || ipage->meta == NULL) {
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_cold_refill_fail[class_idx], 1, memory_order_relaxed);
        return NULL;
    }

    if (hc->stride == 0) {
        hc->stride = (uint16_t)tiny_stride_for_class(class_idx);
    }

    tiny_hotheap_page_v2* page = tiny_hotheap_v2_acquire_page_node(hc);
    if (!page) {
        return NULL;
    }

    page->lease_page = ipage;
    page->meta = ipage->meta;
    page->ss = ipage->ss;
    page->base = ipage->base;
    page->capacity = ipage->capacity;
    page->slab_idx = ipage->slab_idx;
    page->freelist = NULL;
    page->used = 0;

    const uint16_t stride = hc->stride ? hc->stride : (uint16_t)tiny_stride_for_class(class_idx);
    tiny_hotheap_v2_build_freelist(page, class_idx, stride);

    tiny_hotheap_page_v2* old_cur = hc->current_page;
    hc->current_page = page;
    page->next = NULL;
    if (old_cur && old_cur != page) {
        tiny_hotheap_v2_partial_push(hc, old_cur, class_idx, stats_on);
    }
    tiny_hotheap_v2_maybe_trim_partial(ctx, hc, class_idx, stats_on);
    if (!hc->current_page || !hc->current_page->freelist || hc->current_page->capacity == 0 ||
        hc->current_page->used > hc->current_page->capacity) {
        fprintf(stderr, "[HOTHEAP_V2_REFILL_ASSERT] current_page missing freelist (page=%p freelist=%p cap=%u used=%u)\n",
                (void*)hc->current_page,
                hc->current_page ? hc->current_page->freelist : NULL,
                hc->current_page ? (unsigned)hc->current_page->capacity : 0u,
                hc->current_page ? (unsigned)hc->current_page->used : 0u);
        return NULL;
    }
    return hc->current_page;
}

static void tiny_hotheap_v2_page_retire_slow(tiny_hotheap_ctx_v2* ctx,
                                             uint8_t class_idx,
                                             tiny_hotheap_page_v2* page) {
    if (!ctx || !page) return;
    uint8_t idx = tiny_hotheap_v2_idx(class_idx);
    tiny_hotheap_class_v2* hc = &ctx->cls[class_idx];
    tiny_hotheap_v2_unlink_page(hc, page);
    if (page->lease_page) {
        page->lease_page->used = page->used;
        page->lease_page->free_list = page->freelist;
        if (page->lease_page->meta) {
            atomic_store_explicit(&page->lease_page->meta->freelist, page->freelist, memory_order_release);
            atomic_store_explicit(&page->lease_page->meta->used, page->used, memory_order_relaxed);
        }
    }
    TinyColdIface cold = tiny_cold_iface_v1();
    tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
    if (cold.retire_page) {
        cold.retire_page(cold_ctx, class_idx, page->lease_page);
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_cold_retire_calls[idx], 1, memory_order_relaxed);
    }
    if (tiny_hotheap_v2_stats_enabled()) {
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_retire_calls_v2[idx], 1, memory_order_relaxed);
    }
    if (page != &hc->storage_page) {
        free(page);
    } else {
        tiny_hotheap_v2_page_reset(page);
    }
    if (!hc->current_page && hc->partial_pages) {
        hc->current_page = hc->partial_pages;
        hc->partial_pages = hc->partial_pages->next;
        if (hc->current_page) {
            hc->current_page->next = NULL;
        }
    }
    if (tiny_hotheap_v2_stats_enabled()) {
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].page_retired, 1, memory_order_relaxed);
    }
}

static inline void* tiny_hotheap_v2_try_pop(tiny_hotheap_class_v2* hc,
                                            tiny_hotheap_page_v2* page,
                                            uint8_t class_idx,
                                            TinyHeapClassStats* stats,
                                            int stats_on) {
    if (!hc || !page || !page->base || page->capacity == 0) {
        return NULL;
    }
    if (hc->stride == 0) {
        hc->stride = (uint16_t)tiny_stride_for_class(class_idx);
    }
    const uint16_t stride = hc->stride;
    void* block = NULL;
    if (page->freelist) {
        block = page->freelist;
        void* next = tiny_next_read(class_idx, block);
        page->freelist = next;
    } else if (page->used < page->capacity) {
        block = (void*)((uint8_t*)page->base + ((size_t)page->used * stride));
    } else {
        return NULL;
    }
    page->used++;
    if (__builtin_expect(stats != NULL, 0)) {
        atomic_fetch_add_explicit(&stats->alloc_fast_current, 1, memory_order_relaxed);
    }
    if (stats_on) {
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fast[tiny_hotheap_v2_idx(class_idx)],
                                  1,
                                  memory_order_relaxed);
    }
    return tiny_region_id_write_header(block, class_idx);
}

__attribute__((destructor))
static void tiny_hotheap_v2_stats_dump(void) {
    if (!tiny_hotheap_v2_stats_enabled()) {
        return;
    }
    for (uint8_t ci = 0; ci < TINY_HOTHEAP_MAX_CLASSES; ci++) {
        uint64_t alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_calls[ci], memory_order_relaxed);
        uint64_t route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_route_hits[ci], memory_order_relaxed);
        uint64_t alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fast[ci], memory_order_relaxed);
        uint64_t alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_lease[ci], memory_order_relaxed);
        uint64_t alloc_fb = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[ci], memory_order_relaxed);
        uint64_t free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_free_calls[ci], memory_order_relaxed);
        uint64_t free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_free_fast[ci], memory_order_relaxed);
        uint64_t free_fb = atomic_load_explicit(&g_tiny_hotheap_v2_free_fallback_v1[ci], memory_order_relaxed);
        uint64_t cold_refill_fail = atomic_load_explicit(&g_tiny_hotheap_v2_cold_refill_fail[ci], memory_order_relaxed);
        uint64_t cold_retire_calls = atomic_load_explicit(&g_tiny_hotheap_v2_cold_retire_calls[ci], memory_order_relaxed);
        uint64_t retire_calls_v2 = atomic_load_explicit(&g_tiny_hotheap_v2_retire_calls_v2[ci], memory_order_relaxed);
        uint64_t partial_pushes = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pushes[ci], memory_order_relaxed);
        uint64_t partial_pops = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pops[ci], memory_order_relaxed);
        uint64_t partial_peak = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[ci], memory_order_relaxed);
        uint64_t refill_with_cur = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_current[ci], memory_order_relaxed);
        uint64_t refill_with_partial = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_partial[ci], memory_order_relaxed);

        TinyHotHeapV2PageStats ps = {
            .prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_calls, memory_order_relaxed),
            .prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_with_current_null, memory_order_relaxed),
            .prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_from_partial, memory_order_relaxed),
            .free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].free_made_current, memory_order_relaxed),
            .page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].page_retired, memory_order_relaxed),
        };

        if (!(alloc_calls || alloc_fast || alloc_lease || alloc_fb || free_calls || free_fast || free_fb ||
              ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial ||
              ps.free_made_current || ps.page_retired || retire_calls_v2 || partial_pushes || partial_pops || partial_peak)) {
            continue;
        }

        tiny_route_kind_t route_kind = tiny_route_for_class(ci);
        fprintf(stderr,
                "[HOTHEAP_V2_STATS cls=%u route=%d] route_hits=%llu alloc_calls=%llu alloc_fast=%llu alloc_lease=%llu alloc_refill=%llu refill_cur=%llu refill_partial=%llu alloc_fb_v1=%llu alloc_route_fb=%llu cold_refill_fail=%llu cold_retire_calls=%llu retire_v2=%llu free_calls=%llu free_fast=%llu free_fb_v1=%llu prep_calls=%llu prep_null=%llu prep_from_partial=%llu free_made_current=%llu page_retired=%llu partial_push=%llu partial_pop=%llu partial_peak=%llu\n",
                (unsigned)ci,
                (int)route_kind,
                (unsigned long long)route_hits,
                (unsigned long long)alloc_calls,
                (unsigned long long)alloc_fast,
                (unsigned long long)alloc_lease,
                (unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_alloc_refill[ci], memory_order_relaxed),
                (unsigned long long)refill_with_cur,
                (unsigned long long)refill_with_partial,
                (unsigned long long)alloc_fb,
                (unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_alloc_route_fb[ci], memory_order_relaxed),
                (unsigned long long)cold_refill_fail,
                (unsigned long long)cold_retire_calls,
                (unsigned long long)retire_calls_v2,
                (unsigned long long)free_calls,
                (unsigned long long)free_fast,
                (unsigned long long)free_fb,
                (unsigned long long)ps.prepare_calls,
                (unsigned long long)ps.prepare_with_current_null,
                (unsigned long long)ps.prepare_from_partial,
                (unsigned long long)ps.free_made_current,
                (unsigned long long)ps.page_retired,
                (unsigned long long)partial_pushes,
                (unsigned long long)partial_pops,
                (unsigned long long)partial_peak);
    }
}
tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void) {
    tiny_hotheap_ctx_v2* ctx = g_tiny_hotheap_ctx_v2;
    if (__builtin_expect(ctx == NULL, 0)) {
        ctx = (tiny_hotheap_ctx_v2*)calloc(1, sizeof(tiny_hotheap_ctx_v2));
        if (__builtin_expect(ctx == NULL, 0)) {
            fprintf(stderr, "[TinyHotHeapV2] TLS alloc failed (OOM)\n");
            abort();
        }
        g_tiny_hotheap_ctx_v2 = ctx;
        for (int i = 0; i < TINY_HOTHEAP_MAX_CLASSES; i++) {
            tiny_hotheap_v2_page_reset(&ctx->cls[i].storage_page);
            ctx->cls[i].stride = (uint16_t)tiny_stride_for_class(i);
            ctx->cls[i].max_partial_pages = (i == 7 || i == 6) ? 2 : 0;  // C6/C7 は 1〜2 枚握る
            ctx->cls[i].partial_count = 0;
        }
    }
    return ctx;
}

void* tiny_hotheap_v2_alloc(uint8_t class_idx) {
    int stats_on = tiny_hotheap_v2_stats_enabled();
    uint8_t idx = tiny_hotheap_v2_idx(class_idx);
    if (stats_on) {
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_route_hits[idx], 1, memory_order_relaxed);
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_calls[idx], 1, memory_order_relaxed);
    }
    if (__builtin_expect(!(class_idx == 6 || class_idx == 7), 0)) {
        return NULL;  // いまは C6/C7 のみ
    }

    tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get();
    tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[class_idx] : NULL;
    tiny_hotheap_page_v2* v2page = vhcls ? vhcls->current_page : NULL;
    TinyHeapClassStats* stats = tiny_heap_stats_for_class(class_idx);

    // current_page が壊れていそうなら一度捨てて slow に降りる
    if (v2page && (!v2page->base || v2page->capacity == 0)) {
        vhcls->current_page = NULL;
        v2page = NULL;
    }

    // Hot path: current_page → partial → refill
    void* user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on);
    if (user) {
        return user;
    }

    // move exhausted current_page to full list if needed
    if (vhcls && v2page && v2page->used >= v2page->capacity && vhcls->current_page == v2page) {
        vhcls->current_page = NULL;
        v2page->next = vhcls->full_pages;
        vhcls->full_pages = v2page;
    }

    while (vhcls && vhcls->partial_pages) {
        if (stats_on) {
            atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_calls, 1, memory_order_relaxed);
            atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_from_partial, 1, memory_order_relaxed);
            if (vhcls->current_page == NULL) {
                atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_with_current_null, 1, memory_order_relaxed);
            }
        }
        v2page = vhcls->partial_pages;
        vhcls->partial_pages = vhcls->partial_pages->next;
        if (vhcls->partial_count > 0) {
            vhcls->partial_count--;
        }
        if (stats_on) {
            atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pops[idx], 1, memory_order_relaxed);
        }
        v2page->next = NULL;
        vhcls->current_page = v2page;
        user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on);
        if (user) {
            return user;
        }
        if (v2page->used >= v2page->capacity) {
            v2page->next = vhcls->full_pages;
            vhcls->full_pages = v2page;
            vhcls->current_page = NULL;
        }
    }

    // Lease a page from v1 (C7 SAFE) and wrap it
    tiny_hotheap_page_v2* leased = tiny_hotheap_v2_refill_slow(v2ctx, class_idx);
    if (!leased) {
        if (stats_on) {
            atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[idx], 1, memory_order_relaxed);
            atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_route_fb[idx], 1, memory_order_relaxed);
        }
        size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(class_idx)) : tiny_stride_for_class(class_idx);
        if (class_idx == 7) {
            return tiny_c7_alloc_fast(size);  // safety fallback to v1
        }
        tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
        return tiny_heap_alloc_class_fast(cold_ctx, class_idx, size);
    }
    vhcls->current_page = leased;
    v2page = leased;
    if (stats_on) {
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_lease[idx], 1, memory_order_relaxed);
    }

    user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on);
    if (user) {
        return user;
    }

    if (stats_on) {
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[idx], 1, memory_order_relaxed);
    }
    size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(class_idx)) : tiny_stride_for_class(class_idx);
    if (class_idx == 7) {
        return tiny_c7_alloc_fast(size);
    }
    tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
    return tiny_heap_alloc_class_fast(cold_ctx, class_idx, size);
}

void tiny_hotheap_v2_free(uint8_t class_idx, void* p, void* meta) {
    if (__builtin_expect(!(class_idx == 6 || class_idx == 7), 0)) {
        return;
    }
    uint8_t idx = tiny_hotheap_v2_idx(class_idx);
    int stats_on = tiny_hotheap_v2_stats_enabled();
    if (stats_on) {
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_calls[idx], 1, memory_order_relaxed);
    }
    tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get();
    tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[class_idx] : NULL;
    TinySlabMeta* meta_ptr = (TinySlabMeta*)meta;

    tiny_hotheap_page_v2* page = tiny_hotheap_v2_find_page(vhcls, class_idx, p, meta_ptr);
    if (page && page->base && page->capacity > 0) {
        tiny_next_write(class_idx, p, page->freelist);
        page->freelist = p;
        if (page->used > 0) {
            page->used--;
        }
        if (vhcls && vhcls->current_page != page) {
            tiny_hotheap_v2_unlink_page(vhcls, page);
            page->next = vhcls->current_page;
            vhcls->current_page = page;
        }
        if (stats_on) {
            atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].free_made_current, 1, memory_order_relaxed);
        }
        if (page->used == 0) {
            // 空ページは一度 partial に温存し、上限を超えたら retire
            tiny_hotheap_v2_unlink_page(vhcls, page);
            page->next = NULL;
            if (vhcls && vhcls->current_page == NULL) {
                vhcls->current_page = page;
            } else if (vhcls) {
                tiny_hotheap_v2_partial_push(vhcls, page, class_idx, stats_on);
                tiny_hotheap_v2_maybe_trim_partial(v2ctx, vhcls, class_idx, stats_on);
            }
        } else if (stats_on) {
            atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fast[idx], 1, memory_order_relaxed);
        }
        if (stats_on && page->used == 0) {
            atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fast[idx], 1, memory_order_relaxed);
        }
        return;
    }

    // Fallback: mimic v1 free path
    if (stats_on) {
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fallback_v1[idx], 1, memory_order_relaxed);
    }
    SuperSlab* ss = hak_super_lookup(p);
    if (ss && ss->magic == SUPERSLAB_MAGIC) {
        int slab_idx = slab_index_for(ss, p);
        if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
            if (class_idx == 7) {
                tiny_c7_free_fast_with_meta(ss, slab_idx, p);
            } else {
                tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
                tiny_heap_free_class_fast_with_meta(cold_ctx, class_idx, ss, slab_idx, p);
            }
            return;
        }
    }
    if (class_idx == 7) {
        tiny_c7_free_fast(p);
    } else {
        tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
        tiny_heap_free_class_fast(cold_ctx, class_idx, p);
    }
}

#if !HAKMEM_BUILD_RELEASE
// Helper to dump last push from core/hakmem.c (SEGV handler)
// Must be visible to other TUs (extern in hakmem_tiny.h or similar if needed,
// but SEGV handler is in core/hakmem.c which can dlsym or weak link it)
__attribute__((noinline))
void tiny_debug_dump_last_push(int cls) {
    hak_base_ptr_t p = s_tls_sll_last_push[cls];
    void* raw = HAK_BASE_TO_RAW(p);
    fprintf(stderr, "[DEBUG] s_tls_sll_last_push[%d] = %p\n", cls, raw);
    if (raw && (uintptr_t)raw > 4096) {
        unsigned long* vals = (unsigned long*)raw;
        fprintf(stderr, "[DEBUG] Memory at %p: %016lx %016lx\n", raw, vals[0], vals[1]);
    }
}
#endif
// Forward declarations for static helpers used before definition
struct TinySlab; // forward
static void move_to_free_list(int class_idx, struct TinySlab* target_slab);
static void move_to_full_list(int class_idx, struct TinySlab* target_slab);
static void release_slab(struct TinySlab* slab);
static TinySlab* allocate_new_slab(int class_idx);
static void tiny_tls_cache_drain(int class_idx);
static void tiny_apply_mem_diet(void);

// Phase 6.23: SuperSlab allocation forward declaration
static inline void* hak_tiny_alloc_superslab(int class_idx);
static inline void* superslab_tls_bump_fast(int class_idx);
SuperSlab* superslab_refill(int class_idx);
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx);
static inline uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap);
// Forward decl: used by tiny_spec_pop_path before its definition
#if HAKMEM_TINY_P0_BATCH_REFILL
// P0 enabled: sll_refill_batch_from_ss is defined in hakmem_tiny_refill_p0.inc.h
static inline int sll_refill_batch_from_ss(int class_idx, int max_take);
#else
// Phase 12: sll_refill_small_from_ss is defined in hakmem_tiny_refill.inc.h
// Only a single implementation exists there; declare here for callers.
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
int sll_refill_small_from_ss(int class_idx, int max_take);
#else
static inline int sll_refill_small_from_ss(int class_idx, int max_take);
#endif
#endif
static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss);
static void* __attribute__((cold, noinline)) tiny_slow_alloc_fast(int class_idx);
static inline void tiny_remote_drain_owner(struct TinySlab* slab);
static void tiny_remote_drain_locked(struct TinySlab* slab);
// Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop
// without any refill or slow-path work. Returns NULL on miss.
/* moved below TinyTLSSlab definition */

// Step 3d: Forced inlining for readability + performance (306M target)
__attribute__((always_inline))
static inline void* hak_tiny_alloc_wrapper(int class_idx);
// Helpers for SuperSlab active block accounting (atomic, saturating dec)

// SuperSlab Active Counter Helpers - EXTRACTED to hakmem_tiny_ss_active_box.inc
#include "hakmem_tiny_ss_active_box.inc"

// EXTRACTED: ss_active_dec_one() moved to hakmem_tiny_superslab.h (Phase 2C-2)

// Front refill count global config (declare before init.inc uses them)
extern int g_refill_count_global;
extern int g_refill_count_hot;
extern int g_refill_count_mid;
extern int g_refill_count_class[TINY_NUM_CLASSES];

// Step 3d: Forced inlining for slow path (maintain monolithic performance)
// Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c)
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx);
#else
static void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx);
#endif

// ---------------------------------------------------------------------------
// Box: adopt_gate_try (implementation moved from header for robust linkage)
// ---------------------------------------------------------------------------
#include "box/adopt_gate_box.h"
#include "box/super_reg_box.h"
extern int g_super_reg_class_size[TINY_NUM_CLASSES];
extern unsigned long long g_adopt_gate_calls[];
extern unsigned long long g_adopt_gate_success[];
extern unsigned long long g_reg_scan_attempts[];
extern unsigned long long g_reg_scan_hits[];
SuperSlab* adopt_gate_try(int class_idx, TinyTLSSlab* tls) {
    g_adopt_gate_calls[class_idx]++;
    ROUTE_MARK(13);
    SuperSlab* ss = tiny_refill_try_fast(class_idx, tls);
    if (ss) { g_adopt_gate_success[class_idx]++; return ss; }
    g_reg_scan_attempts[class_idx]++;
    int reg_size = g_super_reg_class_size[class_idx];
    int reg_cap = super_reg_effective_per_class();
    if (reg_cap > 0 && reg_size > reg_cap) {
        reg_size = reg_cap;
    }
    int scan_limit = tiny_reg_scan_max();
    if (scan_limit > reg_size) scan_limit = reg_size;
    uint32_t self_tid = tiny_self_u32();
    // Local helper (mirror adopt_bind_if_safe) to avoid including alloc inline here
    auto int adopt_bind_if_safe_local(TinyTLSSlab* tls_l, SuperSlab* ss, int slab_idx, int class_idx_l) {
        uint32_t self_tid = tiny_self_u32();
        SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
        if (!slab_is_valid(&h)) return 0;
        slab_drain_remote_full(&h);
        if (__builtin_expect(slab_is_safe_to_bind(&h), 1)) {
            tiny_tls_bind_slab(tls_l, h.ss, h.slab_idx);
            slab_release(&h);
            return 1;
        }
        slab_release(&h);
        return 0;
    }

    for (int i = 0; i < scan_limit; i++) {
        SuperSlab* cand = super_reg_by_class_at(class_idx, i);
        if (!(cand && cand->magic == SUPERSLAB_MAGIC)) continue;
        // Fast path: use nonempty_mask / freelist_mask to locate candidates in O(1)
        uint32_t mask = cand->nonempty_mask;
        // Fallback to atomic freelist_mask for cross-thread visibility
        if (mask == 0) {
            mask = atomic_load_explicit(&cand->freelist_mask, memory_order_acquire);
        }
        if (mask == 0) continue;  // No visible freelists in this SS
        int cap = ss_slabs_capacity(cand);
        while (mask) {
            int sidx = __builtin_ctz(mask);
            mask &= (mask - 1);
            if (sidx >= cap) continue;
            if (adopt_bind_if_safe_local(tls, cand, sidx, class_idx)) {
                g_adopt_gate_success[class_idx]++;
                g_reg_scan_hits[class_idx]++;
                ROUTE_MARK(14); ROUTE_COMMIT(class_idx, 0x07);
                return cand;
            }
        }
    }
    return NULL;
}


// ============================================================================
// Global State - EXTRACTED to hakmem_tiny_globals_box.inc
// ============================================================================
#include "hakmem_tiny_globals_box.inc"

#include "hakmem_tiny_publish_box.inc"

// ============================================================================
// EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1)
// ============================================================================
// Functions: tiny_fast_pop(), tiny_fast_push() - 28 lines (lines 377-404)
// Forward declarations for functions defined in hakmem_tiny_fastcache.inc.h
static inline hak_base_ptr_t tiny_fast_pop(int class_idx);
static inline int tiny_fast_push(int class_idx, hak_base_ptr_t ptr);
static inline hak_base_ptr_t fastcache_pop(int class_idx);
static inline int fastcache_push(int class_idx, hak_base_ptr_t ptr);

// ============================================================================
// EXTRACTED TO hakmem_tiny_hot_pop.inc.h (Phase 2D-1)
// ============================================================================
// Functions: tiny_hot_pop_class0(), tiny_hot_pop_class1(), tiny_hot_pop_class2(), tiny_hot_pop_class3()
// 88 lines (lines 407-494)


// ============================================================================
// Legacy Slow Allocation Path - ARCHIVED
// ============================================================================
// Note: tiny_slow_alloc_fast() and related legacy slow path implementation
// have been moved to archive/hakmem_tiny_legacy_slow_box.inc and are no
// longer compiled. The current slow path uses Box化された hak_tiny_alloc_slow().


// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Function: tiny_fast_refill_and_take() - 39 lines (lines 584-622)

// ============================================================================
// TLS/Frontend State & Configuration - EXTRACTED to hakmem_tiny_tls_state_box.inc
// ============================================================================
#include "hakmem_tiny_tls_state_box.inc"

#include "hakmem_tiny_intel.inc"

// ============================================================================
// EXTRACTED TO hakmem_tiny_rss.c (Phase 2B-2)
// ============================================================================
// EXTRACTED: static int get_rss_kb_self(void) {
// EXTRACTED:     FILE* f = fopen("/proc/self/status", "r");
// EXTRACTED:     if (!f) return 0;
// EXTRACTED:     char buf[256];
// EXTRACTED:     int kb = 0;
// EXTRACTED:     while (fgets(buf, sizeof(buf), f)) {
// EXTRACTED:         if (strncmp(buf, "VmRSS:", 6) == 0) {
// EXTRACTED:             char* p = buf;
// EXTRACTED:             while (*p && (*p < '0' || *p > '9')) {
// EXTRACTED:                 p++;
// EXTRACTED:             }
// EXTRACTED:             kb = atoi(p);
// EXTRACTED:             break;
// EXTRACTED:         }
// EXTRACTED:     }
// EXTRACTED:     fclose(f);
// EXTRACTED:     return kb;
// EXTRACTED: }

// Miss時にマガジンへ大量リフィルせず、1個だけ確保して即返すオプション
// Env: HAKMEM_TINY_REFILL_ONE_ON_MISS=1 で有効（デフォルト: 0）
int g_refill_one_on_miss = 0;

// Frontend fill target per class (adaptive)
// NOTE: Non-static because used in hakmem_tiny_refill.inc.h
_Atomic uint32_t g_frontend_fill_target[TINY_NUM_CLASSES];

// Adaptive CAS: Active thread counter (for single-threaded optimization)
// Incremented on thread init, decremented on thread shutdown
_Atomic uint32_t g_hakmem_active_threads = 0;

// Per-thread registration flag (TLS variable)
static __thread int g_thread_registered = 0;

// Adaptive CAS: Register current thread (called on first allocation)
// NOTE: Non-static for cross-TU visibility (called from hak_alloc_api.inc.h)
__attribute__((always_inline))
inline void hakmem_thread_register(void) {
    if (__builtin_expect(g_thread_registered == 0, 0)) {
        g_thread_registered = 1;
        atomic_fetch_add_explicit(&g_hakmem_active_threads, 1, memory_order_relaxed);
    }
}

// SLL capacity override array (moved from deleted hakmem_tiny_ultra_batch_box.inc)
static int g_ultra_batch_override[TINY_NUM_CLASSES] = {0};
static int g_ultra_sll_cap_override[TINY_NUM_CLASSES] = {0};

// Helper function for batch size (moved from deleted hakmem_tiny_ultra_batch_box.inc)
static inline int ultra_batch_for_class(int class_idx) {
    int ov = g_ultra_batch_override[class_idx];
    if (ov > 0) return ov;
    switch (class_idx) {
        case 0: return 64;            // 8B
        case 1: return 96;            // 16B
        case 2: return 96;            // 32B
        case 3: return 224;           // 64B
        case 4: return 96;            // 128B
        case 5: return 64;            // 256B
        case 6: return 64;            // 512B
        default: return 32;           // 1024B and others
    }
}

// Helper function for SLL capacity (moved from deleted hakmem_tiny_ultra_batch_box.inc)
static inline int ultra_sll_cap_for_class(int class_idx) {
    int ov = g_ultra_sll_cap_override[class_idx];
    if (ov > 0) return ov;
    switch (class_idx) {
        case 0: return 256;   // 8B
        case 1: return 384;   // 16B
        case 2: return 384;   // 32B
        case 3: return 768;   // 64B
        case 4: return 256;   // 128B
        default: return 128;  // others
    }
}

enum { HAK_TIER_SLL=1, HAK_TIER_MAG=2, HAK_TIER_SLAB=3, HAK_TIER_SUPER=4, HAK_TIER_FRONT=5 };


// Event Queue & Telemetry Helpers - EXTRACTED to hakmem_tiny_eventq_box.inc
#include "hakmem_tiny_eventq_box.inc"


// Background refill workers and intelligence engine
#include "hakmem_tiny_background.inc"

// ============================================================================
// EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1)
// ============================================================================
// Functions: fastcache_pop(), fastcache_push(), quick_pop() - 25 lines (lines 873-896)

// Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop
// without any refill or slow-path work. Returns NULL on miss.
static inline void* hak_tiny_alloc_superslab_try_fast(int class_idx) {
    if (!g_use_superslab) return NULL;
    TinyTLSSlab* tls = &g_tls_slabs[class_idx];
    TinySlabMeta* meta = tls->meta;
    if (!meta) return NULL;
    // Try linear (bump) allocation first when freelist is empty
    if (meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
        // Use per-slab class_idx to get stride
        size_t block_size = tiny_stride_for_class(meta->class_idx);
        void* block = tls->slab_base + ((size_t)meta->used * block_size);
        meta->used++;
        c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_FRONT);
        // Track active blocks in SuperSlab for conservative reclamation
        ss_active_inc(tls->ss);
        return block;
    }
    // Do not pop freelist here (keep magazine/SLL handling consistent)
    return NULL;
}

// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Functions: quick_refill_from_sll(), quick_refill_from_mag() - 31 lines (lines 918-949)

// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Function: sll_refill_small_from_ss() - 45 lines (lines 952-996)

// Phase 2C-3: TLS operations module (included after helper function definitions)
#include "hakmem_tiny_tls_ops.h"

// New TLS list refill: owner-only bulk take from TLS-cached SuperSlab slab
// ============================================================================
// EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3)
// ============================================================================
// Function: tls_refill_from_tls_slab() - 101 lines
// Hot path refill operation, moved to inline function in header

// ============================================================================
// EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3)
// ============================================================================
// Function: tls_list_spill_excess() - 97 lines
// Hot path spill operation, moved to inline function in header

// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Function: superslab_tls_bump_fast() - 45 lines (lines 1016-1060)

// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Function: frontend_refill_fc() - 44 lines (lines 1063-1106)


// SLL capacity policy: for hot tiny classes (0..3), allow larger SLL up to multiplier * mag_cap
// for >=4 keep current conservative half (to limit footprint).

// SLL Capacity Policy - EXTRACTED to hakmem_tiny_sll_cap_box.inc
#include "hakmem_tiny_sll_cap_box.inc"


// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Function: bulk_mag_to_sll_if_room() - 22 lines (lines 1133-1154)

// Ultra-Mode Batch Configuration - REMOVED (dead code cleanup 2025-11-27)

#include "hakmem_tiny_remote.inc"

// ============================================================================
// Internal Helpers
// ============================================================================

// Step 2: Slab Registry Operations

// Hash function for slab_base (64KB aligned)
// ============================================================================
// EXTRACTED TO hakmem_tiny_registry.c (Phase 2B-3)
// ============================================================================
// EXTRACTED: static inline int registry_hash(uintptr_t slab_base) {
// EXTRACTED:     return (slab_base >> 16) & SLAB_REGISTRY_MASK;
// EXTRACTED: }

// Register slab in hash table (returns 1 on success, 0 on failure)
// EXTRACTED: static int registry_register(uintptr_t slab_base, TinySlab* owner) {
// EXTRACTED:     pthread_mutex_lock(&g_tiny_registry_lock);
// EXTRACTED:     int hash = registry_hash(slab_base);
// EXTRACTED:
// EXTRACTED:     // Linear probing (max 8 attempts)
// EXTRACTED:     for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
// EXTRACTED:         int idx = (hash + i) & SLAB_REGISTRY_MASK;
// EXTRACTED:         SlabRegistryEntry* entry = &g_slab_registry[idx];
// EXTRACTED:
// EXTRACTED:         if (entry->slab_base == 0) {
// EXTRACTED:             // Empty slot found
// EXTRACTED:             entry->slab_base = slab_base;
// EXTRACTED:             atomic_store_explicit(&entry->owner, owner, memory_order_release);
// EXTRACTED:             pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED:             return 1;
// EXTRACTED:         }
// EXTRACTED:     }
// EXTRACTED:
// EXTRACTED:     // Registry full (collision limit exceeded)
// EXTRACTED:     pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED:     return 0;
// EXTRACTED: }

// Unregister slab from hash table
// EXTRACTED: static void registry_unregister(uintptr_t slab_base) {
// EXTRACTED:     pthread_mutex_lock(&g_tiny_registry_lock);
// EXTRACTED:     int hash = registry_hash(slab_base);
// EXTRACTED:
// EXTRACTED:     // Linear probing search
// EXTRACTED:     for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
// EXTRACTED:         int idx = (hash + i) & SLAB_REGISTRY_MASK;
// EXTRACTED:         SlabRegistryEntry* entry = &g_slab_registry[idx];
// EXTRACTED:
// EXTRACTED:         if (entry->slab_base == slab_base) {
// EXTRACTED:             // Found - clear entry (atomic store prevents TOCTOU race)
// EXTRACTED:             atomic_store_explicit(&entry->owner, NULL, memory_order_release);
// EXTRACTED:             entry->slab_base = 0;
// EXTRACTED:             pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED:             return;
// EXTRACTED:         }
// EXTRACTED:
// EXTRACTED:         if (entry->slab_base == 0) {
// EXTRACTED:             // Empty slot - not found
// EXTRACTED:             pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED:             return;
// EXTRACTED:         }
// EXTRACTED:     }
// EXTRACTED:     pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED: }

// Lookup slab by base address (O(1) average)

// ============================================================================
// Registry Lookup & Owner Slab Discovery - EXTRACTED to hakmem_tiny_slab_lookup_box.inc
// ============================================================================
#include "hakmem_tiny_slab_lookup_box.inc"


// Function: move_to_full_list() - 20 lines (lines 1104-1123)
// Move slab to full list

// Function: move_to_free_list() - 20 lines (lines 1126-1145)
// Move slab to free list

// ============================================================================
// Public API
// ============================================================================

// ============================================================================
// Phase 2D-2: Initialization function (extracted to hakmem_tiny_init.inc)
// ============================================================================
#include "hakmem_tiny_init.inc"

// ============================================================================
// 3-Layer Architecture (2025-11-01 Simplification)
// ============================================================================
// Layer 1: TLS Bump Allocator (ultra-fast, 2-3 instructions/op)
#include "hakmem_tiny_bump.inc.h"

// Layer 2: TLS Small Magazine (fast, 5-10 instructions/op)
#include "hakmem_tiny_smallmag.inc.h"

// ============================================================================
// Phase 6 Fast Path Option (Metadata Header)
// ============================================================================
// Phase 6-1.6: Metadata Header (recommended)
//   - Enable: -DHAKMEM_TINY_PHASE6_METADATA=1
//   - Speed: 450-480 M ops/sec (expected, Phase 6-1 level)
//   - Memory: ~6-12% overhead (8 bytes/allocation)
//   - Method: Store pool_type + size_class in 8-byte header
//   - Benefit: Extends to ALL pools (Tiny/Mid/L25/Whale)
//   - Eliminates: Registry lookups, mid_lookup, owner checks
// ============================================================================

// Forward declarations for Phase 6 alloc/free functions


// ============================================================================
// Phase 6 Wrapper Functions - EXTRACTED to hakmem_tiny_phase6_wrappers_box.inc
// ============================================================================
#include "hakmem_tiny_phase6_wrappers_box.inc"


// Layer 1-3: Main allocation function (simplified)
// Build-time configurable via: -DHAKMEM_TINY_USE_NEW_3LAYER=1
#ifndef HAKMEM_TINY_USE_NEW_3LAYER
#define HAKMEM_TINY_USE_NEW_3LAYER 0  // default OFF (legacy path)
#endif
#if HAKMEM_TINY_USE_NEW_3LAYER
#include "hakmem_tiny_alloc_new.inc"
#else
// Old 6-7 layer architecture (backup)
#include "hakmem_tiny_alloc.inc"
#endif

#include "hakmem_tiny_slow.inc"

// Free path implementations
#include "hakmem_tiny_free.inc"

// ---- Phase 1: Provide default batch-refill symbol (fallback to small refill)
// Allows runtime gate HAKMEM_TINY_REFILL_BATCH=1 without requiring a rebuild.
#ifndef HAKMEM_TINY_P0_BATCH_REFILL
int sll_refill_small_from_ss(int class_idx, int max_take);
__attribute__((weak)) int sll_refill_batch_from_ss(int class_idx, int max_take)
{
    return sll_refill_small_from_ss(class_idx, max_take);
}
#endif

// ============================================================================
// EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3)
// ============================================================================
// Function: hak_tiny_trim() - 116 lines (lines 1164-1279)
// Public trim and cleanup operation for lifecycle management

// Forward decl for internal registry lookup used by ultra safety validation
static TinySlab* registry_lookup(uintptr_t slab_base);

// ultra_sll_cap_for_class moved earlier in file (before hakmem_tiny_free.inc)

static inline int ultra_validate_sll_head(int class_idx, void* head) {
    uintptr_t base = ((uintptr_t)head) & ~(TINY_SLAB_SIZE - 1);
    TinySlab* owner = registry_lookup(base);
    if (!owner) return 0;
    uintptr_t start = (uintptr_t)owner->base;
    if ((uintptr_t)head < start || (uintptr_t)head >= start + TINY_SLAB_SIZE) return 0;
    return (owner->class_idx == class_idx);
}
// Optional: wrapper TLS guard（ラッパー再入検知をTLSカウンタで）
#ifndef HAKMEM_WRAPPER_TLS_GUARD
#define HAKMEM_WRAPPER_TLS_GUARD 0
#endif
#if HAKMEM_WRAPPER_TLS_GUARD
extern __thread int g_tls_in_wrapper;
#endif

// ============================================================================
// EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3)
// ============================================================================
// Function: tiny_tls_cache_drain() - 90 lines (lines 1314-1403)
// Static function for draining TLS caches
//
// Function: tiny_apply_mem_diet() - 20 lines (lines 1405-1424)
// Static function for memory diet mode application
//
// Phase 2D-3: Lifecycle management functions (226 lines total)
#include "hakmem_tiny_lifecycle.inc"

// Phase 2D-4 (FINAL): Slab management functions (142 lines total)
#include "hakmem_tiny_slab_mgmt.inc"

// Tiny Heap v2 stats dump (opt-in)
void tiny_heap_v2_print_stats(void) {
    // Priority-2: Use cached ENV
    if (!HAK_ENV_TINY_HEAP_V2_STATS()) return;

    fprintf(stderr, "\n[HeapV2] TLS magazine stats (per class, thread-local)\n");
    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
        TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[cls];
        TinyHeapV2Stats* st = &g_tiny_heap_v2_stats[cls];
        fprintf(stderr,
                "C%d: top=%d alloc_calls=%llu mag_hits=%llu refill_calls=%llu refill_blocks=%llu backend_oom=%llu\n",
                cls,
                mag->top,
                (unsigned long long)st->alloc_calls,
                (unsigned long long)st->mag_hits,
                (unsigned long long)st->refill_calls,
                (unsigned long long)st->refill_blocks,
                (unsigned long long)st->backend_oom);
    }
}

static void tiny_heap_v2_stats_atexit(void) __attribute__((destructor));
static void tiny_heap_v2_stats_atexit(void) {
    tiny_heap_v2_print_stats();
}

// Size→class routing for >=1024B (env: HAKMEM_TINY_ALLOC_1024_METRIC)
_Atomic uint64_t g_tiny_alloc_ge1024[TINY_NUM_CLASSES] = {0};
static void tiny_alloc_1024_diag_atexit(void) __attribute__((destructor));
static void tiny_alloc_1024_diag_atexit(void) {
    // Priority-2: Use cached ENV
    if (!HAK_ENV_TINY_ALLOC_1024_METRIC()) return;
    fprintf(stderr, "\n[ALLOC_GE1024] per-class counts (size>=1024)\n");
    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
        uint64_t v = atomic_load_explicit(&g_tiny_alloc_ge1024[cls], memory_order_relaxed);
        if (v) {
            fprintf(stderr, " C%d=%llu", cls, (unsigned long long)v);
        }
    }
    fprintf(stderr, "\n");
}

// TLS SLL pointer diagnostics (optional)
extern _Atomic uint64_t g_tls_sll_invalid_head[TINY_NUM_CLASSES];
extern _Atomic uint64_t g_tls_sll_invalid_push[TINY_NUM_CLASSES];
static void tiny_tls_sll_diag_atexit(void) __attribute__((destructor));
static void tiny_tls_sll_diag_atexit(void) {
#if !HAKMEM_BUILD_RELEASE
    // Priority-2: Use cached ENV
    if (!HAK_ENV_TINY_SLL_DIAG()) return;
    fprintf(stderr, "\n[TLS_SLL_DIAG] invalid head/push counts per class\n");
    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
        uint64_t ih = atomic_load_explicit(&g_tls_sll_invalid_head[cls], memory_order_relaxed);
        uint64_t ip = atomic_load_explicit(&g_tls_sll_invalid_push[cls], memory_order_relaxed);
        if (ih || ip) {
            fprintf(stderr, " C%d: invalid_head=%llu invalid_push=%llu\n",
                    cls, (unsigned long long)ih, (unsigned long long)ip);
        }
    }
#endif
}


// ============================================================================
// Performance Measurement: TLS SLL Statistics Print Function
// ============================================================================
void tls_sll_print_measurements(void) {
    // Check if measurement is enabled
    static int g_measure = -1;
    if (g_measure == -1) {
        const char* e = getenv("HAKMEM_MEASURE_UNIFIED_CACHE");
        g_measure = (e && *e && *e != '0') ? 1 : 0;
    }
    if (!g_measure) {
        return;  // Measurement disabled
    }

    uint64_t pushes = atomic_load_explicit(&g_tls_sll_push_count_global, memory_order_relaxed);
    uint64_t pops = atomic_load_explicit(&g_tls_sll_pop_count_global, memory_order_relaxed);
    uint64_t pop_empty = atomic_load_explicit(&g_tls_sll_pop_empty_count_global, memory_order_relaxed);

    uint64_t total_pop_attempts = pops + pop_empty;
    if (total_pop_attempts == 0 && pushes == 0) {
        fprintf(stderr, "\n========================================\n");
        fprintf(stderr, "TLS SLL Statistics\n");
        fprintf(stderr, "========================================\n");
        fprintf(stderr, "No operations recorded\n");
        fprintf(stderr, "========================================\n\n");
        return;
    }

    double hit_rate = total_pop_attempts > 0 ? (100.0 * pops) / total_pop_attempts : 0.0;
    double empty_rate = total_pop_attempts > 0 ? (100.0 * pop_empty) / total_pop_attempts : 0.0;

    fprintf(stderr, "\n========================================\n");
    fprintf(stderr, "TLS SLL Statistics\n");
    fprintf(stderr, "========================================\n");
    fprintf(stderr, "Total Pushes:     %llu\n", (unsigned long long)pushes);
    fprintf(stderr, "Total Pops:       %llu\n", (unsigned long long)pops);
    fprintf(stderr, "Pop Empty Count:  %llu (%.1f%% of pops)\n",
            (unsigned long long)pop_empty, empty_rate);
    fprintf(stderr, "Hit Rate:         %.1f%%\n", hit_rate);
    fprintf(stderr, "========================================\n\n");
}

// ============================================================================
// ACE Learning Layer & Tiny Guard - EXTRACTED to hakmem_tiny_ace_guard_box.inc
// ============================================================================
#include "hakmem_tiny_ace_guard_box.inc"