// tiny_superslab_alloc.inc.h - SuperSlab Allocation Layer (Box 4)
// Purpose: Slab allocation, refill, and adoption logic (Phase 12 shared pool)
// Public functions:
// - superslab_alloc_from_slab(): Allocate from specific slab (linear or freelist)
// - superslab_refill(): Refill TLS slab via shared pool
// - hak_tiny_alloc_superslab(): Main SuperSlab allocation entry point

#include "box/superslab_expansion_box.h"  // Box E: Expansion with TLS state guarantee
#include "box/tiny_next_ptr_box.h"        // Box API: Next pointer read/write
#include "box/tiny_tls_carve_one_block_box.h"  // Box: Shared TLS carve helper
#include "box/c7_meta_used_counter_box.h" // Box: C7 meta->used telemetry
#include "hakmem_tiny_superslab_constants.h"
#include "tiny_box_geometry.h"            // Box 3: Geometry & Capacity Calculator"
#include "tiny_debug_api.h"               // Guard/failfast declarations
#include "hakmem_env_cache.h"             // Priority-2: ENV cache (eliminate syscalls)
#if HAKMEM_TINY_SS_TLS_HINT
#include "box/tls_ss_hint_box.h"          // Phase 1: TLS SuperSlab Hint Cache for Headerless mode
#endif

// ============================================================================
// Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation)
// ============================================================================

static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
    TinySlabMeta* meta = &ss->slabs[slab_idx];

    // Small hot classes (C0–C3): bump-only fast path if no remote/freelist
    do {
        uint8_t cls = meta->class_idx;
        if (__builtin_expect(cls <= 3, 1)) {
            if (atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0)
                break;
            if (meta->freelist == NULL && meta->used < meta->capacity) {
                size_t unit_sz = tiny_stride_for_class(cls);
                uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
                void* block = tiny_block_at_index(base, meta->used, unit_sz);
                meta->used++;
                c7_meta_used_note(cls, C7_META_USED_SRC_FRONT);
                ss_active_inc(ss);
                HAK_RET_ALLOC(cls, block);
            }
        }
    } while (0);

    // Drain remote queue if needed before handing blocks back to TLS
    if (__builtin_expect(atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0, 0)) {
        uint32_t self_tid = tiny_self_u32();
        SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
        if (slab_is_valid(&h)) {
            slab_drain_remote_full(&h);
            int pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0;
            if (__builtin_expect(pending, 0)) {
                if (__builtin_expect(g_debug_remote_guard, 0)) {
                    uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed);
                    tiny_remote_watch_note("alloc_pending_remote",
                                           ss,
                                           slab_idx,
                                           (void*)head,
                                           0xA243u,
                                           self_tid,
                                           0);
                }
                slab_release(&h);
                return NULL;
            }
            slab_release(&h);
        } else {
            if (__builtin_expect(g_debug_remote_guard, 0)) {
                tiny_remote_watch_note("alloc_acquire_fail",
                                       ss,
                                       slab_idx,
                                       meta,
                                       0xA244u,
                                       self_tid,
                                       0);
            }
            return NULL;
        }
    }

    if (__builtin_expect(g_debug_remote_guard, 0)) {
        uintptr_t head_pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
        if (head_pending != 0) {
            tiny_remote_watch_note("alloc_remote_pending",
                                   ss,
                                   slab_idx,
                                   (void*)head_pending,
                                   0xA247u,
                                   tiny_self_u32(),
                                   1);
            return NULL;
        }
    }

    // Linear allocation mode
    if (__builtin_expect(meta->freelist == NULL && meta->used < meta->capacity, 1)) {
        size_t unit_sz = tiny_stride_for_class(meta->class_idx);
        uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
        void* block_base = tiny_block_at_index(base, meta->used, unit_sz);
#if !HAKMEM_BUILD_RELEASE
        if (__builtin_expect(!tiny_carve_guard(slab_idx, meta->used, unit_sz, 1), 0)) {
            size_t dbg_usable = tiny_usable_bytes_for_slab(slab_idx);
            uintptr_t dbg_off = (uintptr_t)((uint8_t*)block_base - base);
            fprintf(stderr, "[TINY_ALLOC_BOUNDS] cls=%u slab=%d used=%u cap=%u unit=%zu off=%lu usable=%zu\n",
                    meta->class_idx, slab_idx, meta->used, meta->capacity, unit_sz,
                    (unsigned long)dbg_off, dbg_usable);
            return NULL;
        }
#endif
        meta->used++;
        c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_FRONT);
        void* user =
#if HAKMEM_TINY_HEADER_CLASSIDX
            tiny_region_id_write_header(block_base, meta->class_idx);
#else
            block_base;
#endif
        if (__builtin_expect(g_debug_remote_guard, 0)) {
            tiny_remote_track_on_alloc(ss, slab_idx, user, "linear_alloc", 0);
            tiny_remote_assert_not_remote(ss, slab_idx, user, "linear_alloc_ret", 0);
        }
        // Phase 1: Update TLS hint cache with this SuperSlab (fast free path optimization)
#if HAKMEM_TINY_SS_TLS_HINT
        {
            void* ss_base = (void*)ss;
            size_t ss_size = (size_t)1ULL << ss->lg_size;
            tls_ss_hint_update(ss, ss_base, ss_size);
        }
#endif
        return user;
    }

    // Freelist mode
    if (__builtin_expect(meta->freelist != NULL, 0)) {
        void* block = meta->freelist;

        if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
            size_t blk = g_tiny_class_sizes[meta->class_idx];
            uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
            uintptr_t block_addr = (uintptr_t)block;
            uintptr_t slab_addr = (uintptr_t)slab_base;
            uintptr_t offset = block_addr - slab_addr;

            fprintf(stderr, "[ALLOC_POP] cls=%u slab=%d block=%p offset=%zu (used=%u cap=%u)\n",
                    meta->class_idx, slab_idx, block, offset, meta->used, meta->capacity);

            // Misaligned freelist entry → drop this slab's freelist to force new slab.
            if ((offset % blk) != 0) {
                fprintf(stderr, "[ALLOC_POP_MISALIGN] cls=%u slab=%d offset_mod=%zu blk=%zu base=%p ss=%p\n",
                        meta->class_idx, slab_idx, (size_t)(offset % blk), blk, block, (void*)ss);
                meta->freelist = NULL;
                return NULL;
            }

            if (offset % blk != 0 ||
                offset / blk >= meta->capacity) {
                fprintf(stderr, "[ALLOC_CORRUPT] Freelist head invalid\n");
                tiny_failfast_abort_ptr("alloc_pop_invalid", ss, slab_idx, block, "freelist_head_corrupt");
            }
        }

        meta->freelist = tiny_next_read(meta->class_idx, block);
        meta->used++;
        c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_FRONT);

        if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0) &&
            __builtin_expect(meta->used > meta->capacity, 0)) {
            fprintf(stderr, "[ALLOC_CORRUPT] meta->used overflow on freelist alloc\n");
            tiny_failfast_abort_ptr("alloc_used_overflow", ss, slab_idx, block, "freelist_used_over_capacity");
        }

        if (__builtin_expect(g_debug_remote_guard, 0)) {
            tiny_remote_track_on_alloc(ss, slab_idx, block, "freelist_alloc", 0);
            tiny_remote_assert_not_remote(ss, slab_idx, block, "freelist_alloc_ret", 0);
        }

        // CRITICAL FIX (Larson double-free): Write header for freelist allocations
        // Problem: Freelist path was returning BASE without writing header
        // Result: Stale headers from previous allocations → double-free on next free
        // Solution: Always write header before returning (same as linear carve path)
        void* user =
#if HAKMEM_TINY_HEADER_CLASSIDX
            tiny_region_id_write_header(block, meta->class_idx);
#else
            block;
#endif
        // Phase 1: Update TLS hint cache with this SuperSlab (fast free path optimization)
#if HAKMEM_TINY_SS_TLS_HINT
        {
            void* ss_base = (void*)ss;
            size_t ss_size = (size_t)1ULL << ss->lg_size;
            tls_ss_hint_update(ss, ss_base, ss_size);
        }
#endif
        return user;
    }

    return NULL;
}

// ============================================================================
/*
 * Phase 12: Shared SuperSlab Pool based superslab_refill
 *
 * ポリシー:
 *  - superslab_refill(int class_idx) は shared pool を経由して
 *    「class_idx 用の slab を1枚 TLS にバインドする」単一のエントリポイントとする。
 *  - 呼び出し側は、この関数が:
 *      * 成功時: TinyTLSSlab (g_tls_slabs[class_idx]) が有効な ss/meta/slab_base を指す
 *      * 失敗時: NULL を返し、TLS は変更しない or クリーンに巻き戻される
 *    ことだけを前提にすればよい。
 *  - shared_pool_acquire_slab() の戻り値は 0=成功 / 非0=失敗 とみなし、
 *    成功時に (*ss_out, *slab_idx_out) が設定される想定とする。
 *  - superslab_init_slab() / tiny_tls_bind_slab() は再帰的に superslab_refill() を
 *    呼ばない設計前提（自己呼び出し禁止）。ここで安全側に防御チェックを行う。
 */

// TLS Bind Box Logic moved to core/box/ss_tls_bind_box.h
#include "box/ss_tls_bind_box.h"

SuperSlab* superslab_refill(int class_idx)
{
#if HAKMEM_DEBUG_COUNTERS
    g_superslab_refill_calls_dbg[class_idx]++;
#endif

    // Bounds check (defensive, should be enforced by callers too)
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
        return NULL;
    }

    TinyTLSSlab* tls = &g_tls_slabs[class_idx];

    // Shared pool API:
    //   0 == success, (*ss_out, *slab_idx_out) に有効値が入る。
    //  !=0 == failure, 出力は未定義とみなす。
    extern int shared_pool_acquire_slab(int class_idx,
                                        SuperSlab** ss_out,
                                        int* slab_idx_out);

    SuperSlab* ss = NULL;
    int slab_idx = -1;
    if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) != 0 || !ss || slab_idx < 0) {
        return NULL;
    }

    if (!ss_tls_bind_one(class_idx, tls, ss, slab_idx, tiny_self_u32())) {
        return NULL;
    }

    return ss;
}

// ============================================================================
// Phase 6.24: SuperSlab-based allocation using TLS slab
// ============================================================================

static inline void* hak_tiny_alloc_superslab(int class_idx) {
    // MidTC fast path
    do {
        void* mp = midtc_pop(class_idx);
        if (mp) {
            HAK_RET_ALLOC(class_idx, mp);
        }
    } while (0);

    TinyTLSSlab* tls = &g_tls_slabs[class_idx];
    TinySlabMeta* meta = tls->meta;
    int slab_idx = tls->slab_idx;

    if (meta && slab_idx >= 0 && tls->ss) {
        // Ensure TLS metadata matches class and slab base
        if (tls->meta->class_idx != (uint8_t)class_idx) {
            tls->ss = NULL;
            tls->meta = NULL;
            tls->slab_idx = -1;
            tls->slab_base = NULL;
            meta = NULL;
        } else {
            uint8_t* canonical = tiny_slab_base_for(tls->ss, slab_idx);
            if (tls->slab_base != canonical) {
                tls->slab_base = canonical;
            }
        }

        // Drain remote if needed (ownership-checked elsewhere)
        // Priority-2: Use cached ENV (eliminate lazy-init syscall overhead)
        if (meta) {
            uintptr_t pending = atomic_load_explicit(
                &tls->ss->remote_heads[slab_idx],
                HAK_ENV_TINY_ALLOC_REMOTE_RELAX() ? memory_order_relaxed : memory_order_acquire);
            if (__builtin_expect(pending != 0, 0)) {
                uint32_t self_tid = tiny_self_u32();
                if (ss_owner_try_acquire(meta, self_tid)) {
                    _ss_remote_drain_to_freelist_unsafe(tls->ss, slab_idx, meta);
                }
            }
        }
    }

    // Fast path: linear carve from current TLS slab
    if (meta && tls->slab_base) {
        TinyTLSCarveOneResult carve = tiny_tls_carve_one_block(tls, class_idx);
        if (carve.block) {
#if !HAKMEM_BUILD_RELEASE
            if (__builtin_expect(g_debug_remote_guard, 0)) {
                const char* tag = (carve.path == TINY_TLS_CARVE_PATH_FREELIST)
                                      ? "freelist_alloc"
                                      : "linear_alloc";
                tiny_remote_track_on_alloc(tls->ss, slab_idx, carve.block, tag, 0);
                tiny_remote_assert_not_remote(tls->ss, slab_idx, carve.block, tag, 0);
            }
#endif

#if HAKMEM_TINY_SS_TLS_HINT
            {
                void* ss_base = (void*)tls->ss;
                size_t ss_size = (size_t)1ULL << tls->ss->lg_size;
                tls_ss_hint_update(tls->ss, ss_base, ss_size);
            }
#endif
            if (carve.path == TINY_TLS_CARVE_PATH_LINEAR) {
                ROUTE_MARK(11); ROUTE_COMMIT(class_idx, 0x60);
            } else if (carve.path == TINY_TLS_CARVE_PATH_FREELIST) {
                ROUTE_MARK(12); ROUTE_COMMIT(class_idx, 0x61);
            }
            HAK_RET_ALLOC(class_idx, carve.block);
        }
    }

    // Slow path: acquire a new slab via shared pool
    SuperSlab* ss = superslab_refill(class_idx);
    if (!ss) {
        static int log_oom = 0;
        if (log_oom < 2) {
            fprintf(stderr, "[DEBUG] superslab_refill returned NULL (OOM)\n");
            log_oom++;
        }
        return NULL;
    }

    // Retry after refill
    tls = &g_tls_slabs[class_idx];
    meta = tls->meta;
    if (meta && meta->freelist == NULL &&
        meta->used < meta->capacity && tls->slab_base) {
        size_t block_size = tiny_stride_for_class(meta->class_idx);
        void* block = tiny_block_at_index(tls->slab_base, meta->used, block_size);
        meta->used++;
        c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_FRONT);
        ss_active_inc(ss);
        HAK_RET_ALLOC(class_idx, block);
    }

    return NULL;
}