hakmem/core/hakmem_tiny_refill_p0.inc.h

#ifndef HAKMEM_TINY_REFILL_P0_INC_H
#define HAKMEM_TINY_REFILL_P0_INC_H

#include <stdio.h>
#include <stdatomic.h>

// hakmem_tiny_refill_p0.inc.h
// P0: Batch refill implementation (sll_refill_batch_from_ss only).
// Phase 12: DO NOT alias or redefine sll_refill_small_from_ss here.
// NOTE: This file is active only when HAKMEM_TINY_P0_BATCH_REFILL=1.

#if HAKMEM_TINY_P0_BATCH_REFILL

#include "hakmem_tiny_integrity.h"
#include "tiny_box_geometry.h"      // Box 3: Geometry & Capacity Calculator
#include "tiny_refill_opt.h"
#include "tiny_fc_api.h"
#include "superslab/superslab_inline.h"  // For _ss_remote_drain_to_freelist_unsafe()
#include "box/integrity_box.h"      // Box I: Integrity verification (Priority ALPHA)
#include "box/tiny_next_ptr_box.h"  // Box API: Next pointer read/write

// Debug counters (compile-time gated)
#if HAKMEM_DEBUG_COUNTERS
extern unsigned long long g_rf_hit_slab[];
extern unsigned long long g_rf_early_no_ss[];
extern unsigned long long g_rf_early_no_meta[];
extern unsigned long long g_rf_early_no_room[];
extern unsigned long long g_rf_early_want_zero[];
#endif

// Optional P0 diagnostic logging helper
static inline int p0_should_log(void) {
    static int en = -1;
    if (__builtin_expect(en == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_P0_LOG");
        en = (e && *e && *e != '0') ? 1 : 0;
    }
    return en;
}

// P0 batch refill entry point
static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
    // Phase E1-CORRECT: C7 now has headers, can use P0 batch refill

    // Runtime A/B kill switch (defensive). Set HAKMEM_TINY_P0_DISABLE=1 to bypass P0 path.
    do {
        static int g_p0_disable = -1;
        if (__builtin_expect(g_p0_disable == -1, 0)) {
            const char* e = getenv("HAKMEM_TINY_P0_DISABLE");
            g_p0_disable = (e && *e && *e != '0') ? 1 : 0;
        }
        if (__builtin_expect(g_p0_disable, 0)) {
            return 0;
        }
    } while (0);

    HAK_CHECK_CLASS_IDX(class_idx, "sll_refill_batch_from_ss");
    if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
        static _Atomic int g_p0_class_oob_log = 0;
        if (atomic_fetch_add_explicit(&g_p0_class_oob_log, 1, memory_order_relaxed) == 0) {
            fprintf(stderr, "[P0_CLASS_OOB] class_idx=%d max_take=%d\n", class_idx, max_take);
        }
        return 0;
    }

    if (!g_use_superslab || max_take <= 0) {
#if HAKMEM_DEBUG_COUNTERS
        if (!g_use_superslab) g_rf_early_no_ss[class_idx]++;
#endif
        return 0;
    }

    TinyTLSSlab* tls = &g_tls_slabs[class_idx];

    // Phase 3c L1D Opt: Prefetch SuperSlab hot fields early
    if (tls->ss) {
        __builtin_prefetch(&tls->ss->slab_bitmap, 0, 3);
        __builtin_prefetch(&tls->ss->total_active_blocks, 0, 3);
    }

    uint32_t active_before = 0;
    if (tls->ss) {
        active_before = atomic_load_explicit(&tls->ss->total_active_blocks, memory_order_relaxed);
    }

    if (!tls->ss) {
        if (!superslab_refill(class_idx)) {
            return 0;
        }
    }

    TinySlabMeta* meta = tls->meta;
    if (!meta) {
#if HAKMEM_DEBUG_COUNTERS
        g_rf_early_no_meta[class_idx]++;
#endif
        return 0;
    }

    // Phase 3c L1D Opt: Prefetch SlabMeta hot fields (freelist, used, capacity)
    __builtin_prefetch(&meta->freelist, 0, 3);

#if HAKMEM_INTEGRITY_LEVEL >= 4
    uint8_t* initial_slab_base =
        tls->slab_base ? tls->slab_base : tiny_slab_base_for(tls->ss, tls->slab_idx);
    SlabMetadataState meta_initial =
        integrity_capture_slab_metadata(meta, initial_slab_base, class_idx);
    INTEGRITY_CHECK_SLAB_METADATA(meta_initial, "P0 refill entry");
#endif

    // Optional: Direct-FC fast path（全クラス対応 A/B）。
    // Env:
    //  - HAKMEM_TINY_P0_DIRECT_FC=1    → C5優先（互換）
    //  - HAKMEM_TINY_P0_DIRECT_FC_C7=1 → C7のみ（互換）
    //  - HAKMEM_TINY_P0_DIRECT_FC_ALL=1 → 全クラス（推奨、Phase 1 目標）
    do {
        static int g_direct_fc = -1;
        static int g_direct_fc_c7 = -1;
        static int g_direct_fc_all = -1;
        if (__builtin_expect(g_direct_fc == -1, 0)) {
            const char* e = getenv("HAKMEM_TINY_P0_DIRECT_FC");
            g_direct_fc = (e && *e && *e == '0') ? 0 : 1;
        }
        if (__builtin_expect(g_direct_fc_c7 == -1, 0)) {
            const char* e7 = getenv("HAKMEM_TINY_P0_DIRECT_FC_C7");
            g_direct_fc_c7 = (e7 && *e7) ? ((*e7 == '0') ? 0 : 1) : 0;
        }
        if (__builtin_expect(g_direct_fc_all == -1, 0)) {
            const char* ea = getenv("HAKMEM_TINY_P0_DIRECT_FC_ALL");
            g_direct_fc_all = (ea && *ea && *ea != '0') ? 1 : 0;
        }
        if (__builtin_expect(g_direct_fc_all ||
                             (g_direct_fc && class_idx == 5) ||
                             (g_direct_fc_c7 && class_idx == 7), 0)) {
            int room = tiny_fc_room(class_idx);
            if (room <= 0) return 0;

            uint32_t rmt = atomic_load_explicit(
                &tls->ss->remote_counts[tls->slab_idx], memory_order_relaxed);
            static int g_drain_th = -1;
            if (__builtin_expect(g_drain_th == -1, 0)) {
                const char* e = getenv("HAKMEM_TINY_P0_DRAIN_THRESH");
                int v = (e && *e) ? atoi(e) : 64;
                g_drain_th = (v < 0) ? 0 : v;
            }
            if (rmt >= (uint32_t)g_drain_th) {
                static int no_drain = -1;
                if (__builtin_expect(no_drain == -1, 0)) {
                    const char* e = getenv("HAKMEM_TINY_P0_NO_DRAIN");
                    no_drain = (e && *e && *e != '0') ? 1 : 0;
                }
                if (!no_drain) {
                    _ss_remote_drain_to_freelist_unsafe(
                        tls->ss, tls->slab_idx, tls->meta);
                }
            }

            void* out[128];
            int produced = 0;
            TinySlabMeta* m = tls->meta;
            size_t bs = tiny_stride_for_class(class_idx);
            uint8_t* base = tls->slab_base
                                ? tls->slab_base
                                : tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
            while (produced < room) {
                if (m->freelist) {
                    void* p = m->freelist;
                    m->freelist = tiny_next_read(class_idx, p);
                    m->used++;
                    out[produced++] = p;
                } else if (m->carved < m->capacity) {
                    void* p = (void*)(base + ((size_t)m->carved * bs));
                    m->carved++;
                    m->used++;
                    out[produced++] = p;
                } else {
                    if (!superslab_refill(class_idx)) break;
                    tls = &g_tls_slabs[class_idx];
                    m = tls->meta;
                    base = tls->slab_base
                               ? tls->slab_base
                               : tiny_slab_base_for(tls->ss, tls->slab_idx);
                }
            }
            if (produced > 0) {
                ss_active_add(tls->ss, (uint32_t)produced);
                (void)tiny_fc_push_bulk(class_idx, out, produced);
                return produced;
            }
            // fallthrough to regular path
        }
    } while (0);

    uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP);
    int room = (int)sll_cap - (int)g_tls_sll[class_idx].count;
    if (room <= 0) {
#if HAKMEM_DEBUG_COUNTERS
        g_rf_early_no_room[class_idx]++;
#endif
        return 0;
    }

    uint32_t want = (uint32_t)max_take;
    if (want > (uint32_t)room) want = (uint32_t)room;
    if (want == 0) {
#if HAKMEM_DEBUG_COUNTERS
        g_rf_early_want_zero[class_idx]++;
#endif
        return 0;
    }

    size_t bs = tiny_stride_for_class(class_idx);
    int total_taken = 0;

    while (want > 0) {
        uintptr_t ss_base = 0;
        uintptr_t ss_limit = 0;
        if (tls->ss && tls->slab_idx >= 0) {
            uint8_t* slab_base =
                tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
            ss_base = (uintptr_t)slab_base;
            ss_limit = ss_base + tiny_usable_bytes_for_slab(tls->slab_idx);
        }

        if (tls->ss && tls->slab_idx >= 0) {
            uint32_t remote_count = atomic_load_explicit(
                &tls->ss->remote_counts[tls->slab_idx], memory_order_relaxed);
            if (remote_count > 0) {
                static int no_drain = -1;
                if (__builtin_expect(no_drain == -1, 0)) {
                    const char* e = getenv("HAKMEM_TINY_P0_NO_DRAIN");
                    no_drain = (e && *e && *e != '0') ? 1 : 0;
                }
                if (!no_drain) {
                    _ss_remote_drain_to_freelist_unsafe(tls->ss, tls->slab_idx, meta);
                }
            }
        }

        TinyRefillChain chain;
        uint32_t from_freelist = trc_pop_from_freelist(
            meta, class_idx, ss_base, ss_limit, bs, want, &chain);
        if (from_freelist > 0) {
            trc_splice_to_sll(
                class_idx, &chain,
                &g_tls_sll[class_idx].head,
                &g_tls_sll[class_idx].count);
            ss_active_add(tls->ss, from_freelist);
            meta->used = (uint16_t)((uint32_t)meta->used + from_freelist);

            // Phase 3c L1D Opt: Prefetch next freelist entry after refill
            if (meta->freelist) {
                __builtin_prefetch(meta->freelist, 0, 3);
            }

#if HAKMEM_DEBUG_COUNTERS
            extern unsigned long long g_rf_freelist_items[];
            g_rf_freelist_items[class_idx] += from_freelist;
#endif
            total_taken += from_freelist;
            want -= from_freelist;
            if (want == 0) break;
        }

        if (meta->carved >= meta->capacity) {
            if (!superslab_refill(class_idx)) break;
            tls = &g_tls_slabs[class_idx];
            meta = tls->meta;
            if (!meta) break;
            continue;
        }

        // CRITICAL FIX: Validate geometry before carving to prevent stride mismatch
        // (e.g., C7 upgrade from 1024B to 2048B stride)
        // This ensures ALL blocks entering TLS SLL have correct alignment.
        {
            size_t expected_stride = tiny_block_stride_for_class(class_idx);
            size_t usable = (tls->slab_idx == 0) ? SUPERSLAB_SLAB0_USABLE_SIZE
                                                  : SUPERSLAB_SLAB_USABLE_SIZE;
            uint16_t expected_cap = (uint16_t)(usable / expected_stride);

            if (meta->capacity != expected_cap) {
                // Stale geometry detected - FULL RESET to prevent misaligned carve
                extern __thread int g_hakmem_lock_depth;
                g_hakmem_lock_depth++;
                fprintf(stderr,
                        "[CARVE_GEOMETRY_FIX] cls=%d ss=%p slab=%d: capacity %u→%u (stride=%zu) RESET carved=%u\n",
                        class_idx, (void*)tls->ss, tls->slab_idx,
                        meta->capacity, expected_cap, expected_stride, meta->carved);
                g_hakmem_lock_depth--;

                // Reinitialize with correct stride (resets carved=0, freelist=NULL)
                superslab_init_slab(tls->ss, tls->slab_idx, expected_stride, 0);
                meta->class_idx = (uint8_t)class_idx;
                meta = tls->meta = &tls->ss->slabs[tls->slab_idx];  // Reload after reinit
            }
        }

        uint32_t available = meta->capacity - meta->carved;
        uint32_t batch = want;
        if (batch > available) batch = available;
        if (batch == 0) break;

        uint8_t* slab_base = tls->slab_base
                                 ? tls->slab_base
                                 : tiny_slab_base_for(tls->ss, tls->slab_idx);

        TinyRefillChain carve;
        trc_linear_carve(slab_base, bs, meta, batch, class_idx, &carve);
        trc_splice_to_sll(
            class_idx, &carve,
            &g_tls_sll[class_idx].head,
            &g_tls_sll[class_idx].count);
        ss_active_add(tls->ss, batch);
#if HAKMEM_DEBUG_COUNTERS
        extern unsigned long long g_rf_carve_items[];
        g_rf_carve_items[class_idx] += batch;
#endif
        total_taken += batch;
        want -= batch;
    }

#if HAKMEM_DEBUG_COUNTERS
    g_rf_hit_slab[class_idx]++;
#endif

    if (tls->ss && p0_should_log()) {
        uint32_t active_after = atomic_load_explicit(
            &tls->ss->total_active_blocks, memory_order_relaxed);
        int32_t delta =
            (int32_t)active_after - (int32_t)active_before;
        fprintf(stderr,
                "[P0_COUNTER] cls=%d slab=%d taken=%d active_delta=%d\n",
                class_idx, tls->slab_idx, total_taken, delta);
    }

    return total_taken;
}

#endif // HAKMEM_TINY_P0_BATCH_REFILL
#endif // HAKMEM_TINY_REFILL_P0_INC_H