hakmem/core/hakmem_tiny_refill.inc.h

// hakmem_tiny_refill.inc.h
// Phase 12: Minimal refill helpers needed by Box fast path.
//
// 本ヘッダは、以下を提供する:
// - superslab_tls_bump_fast: TinyTLSSlab + SuperSlab メタからのTLSバンプ窓
// - tiny_fast_refill_and_take: FastCache/TLS SLL からの最小 refill + 1個取得
// - bulk_mag_to_sll_if_room: Magazine→SLL へのバルク移送（容量チェック付き）
// - sll_refill_small_from_ss: Phase12 shared SuperSlab pool 向けの最小実装
//
// 旧来の g_sll_cap_override / getenv ベースの多経路ロジックは一切含めない。

#ifndef HAKMEM_TINY_REFILL_INC_H
#define HAKMEM_TINY_REFILL_INC_H

#include "hakmem_tiny.h"
#include "hakmem_tiny_superslab.h"
#include "hakmem_tiny_tls_list.h"
#include "tiny_box_geometry.h"
#include "superslab/superslab_inline.h"
#include "box/tls_sll_box.h"
#include "hakmem_tiny_integrity.h"
#include "box/tiny_next_ptr_box.h"
#include "tiny_region_id.h"   // For HEADER_MAGIC/HEADER_CLASS_MASK (prepare header before SLL push)
#include <stdint.h>
#include <stdatomic.h>

// ========= Externs from hakmem_tiny.c and friends =========

extern int g_use_superslab;
extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];

extern int g_fastcache_enable;
extern uint16_t g_fast_cap[TINY_NUM_CLASSES];
extern __thread TinyFastCache g_fast_cache[TINY_NUM_CLASSES];

extern int g_tls_sll_enable;
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];

extern _Atomic uint32_t g_frontend_fill_target[TINY_NUM_CLASSES];

extern int g_ultra_bump_shadow;
extern int g_bump_chunk;
extern __thread uint8_t* g_tls_bcur[TINY_NUM_CLASSES];
extern __thread uint8_t* g_tls_bend[TINY_NUM_CLASSES];

#if HAKMEM_DEBUG_COUNTERS
extern uint64_t g_bump_hits[TINY_NUM_CLASSES];
extern uint64_t g_bump_arms[TINY_NUM_CLASSES];
extern uint64_t g_path_refill_calls[TINY_NUM_CLASSES];
extern uint64_t g_ultra_refill_calls[TINY_NUM_CLASSES];
extern int g_path_debug_enabled;
#endif

// ========= From other units =========

SuperSlab* superslab_refill(int class_idx);

void ss_active_inc(SuperSlab* ss);
void ss_active_add(SuperSlab* ss, uint32_t n);

size_t tiny_stride_for_class(int class_idx);
uint8_t* tiny_slab_base_for_geometry(SuperSlab* ss, int slab_idx);

extern uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap);

/* ultra_* 系は hakmem_tiny.c 側に定義があるため、ここでは宣言しない */
/* tls_sll_push は box/tls_sll_box.h で static inline bool tls_sll_push(...) 提供済み */
/* tiny_small_mags_init_once / tiny_mag_init_if_needed も hakmem_tiny_magazine.h で宣言済みなので、ここでは再宣言しない */
/* tiny_fast_pop / tiny_fast_push / fastcache_* は hakmem_tiny_fastcache.inc.h 側の static inline なので、ここでは未宣言でOK */

#if !HAKMEM_BUILD_RELEASE
static inline void tiny_debug_validate_node_base(int class_idx, void* node, const char* where)
{
    (void)class_idx;
    (void)where;

    // 最低限の防御: 異常に小さいアドレスを弾く
    if ((uintptr_t)node < 4096) {
        fprintf(stderr,
                "[TINY_REFILL_GUARD] %s: suspicious node=%p cls=%d\n",
                where, node, class_idx);
        abort();
    }
}
#else
static inline void tiny_debug_validate_node_base(int class_idx, void* node, const char* where)
{
    (void)class_idx;
    (void)node;
    (void)where;
}
#endif

// ========= superslab_tls_bump_fast =========
//
// Ultra bump shadow: current slabが freelist 空で carved<capacity のとき、
// 連続領域を TLS window としてまとめ予約する。
// tiny_hot_pop_class{0..3} から呼ばれる。

static inline void* superslab_tls_bump_fast(int class_idx) {
    if (!g_ultra_bump_shadow || !g_use_superslab) return NULL;

    uint8_t* cur = g_tls_bcur[class_idx];
    if (cur) {
        uint8_t* end = g_tls_bend[class_idx];
        size_t stride = tiny_stride_for_class(class_idx);
        if (cur + stride <= end) {
            g_tls_bcur[class_idx] = cur + stride;
#if HAKMEM_DEBUG_COUNTERS
            g_bump_hits[class_idx]++;
#endif
#if HAKMEM_TINY_HEADER_CLASSIDX
            // Headerは呼び出し元で書く or strideに含め済み想定。ここでは生ポインタ返す。
#endif
            return cur;
        }
        g_tls_bcur[class_idx] = NULL;
        g_tls_bend[class_idx] = NULL;
    }

    TinyTLSSlab* tls = &g_tls_slabs[class_idx];
    TinySlabMeta* meta = tls->meta;
    if (!tls->ss || !meta || meta->freelist) return NULL;

    uint16_t carved = meta->carved;
    uint16_t cap = meta->capacity;
    if (carved >= cap) return NULL;

    uint32_t avail = (uint32_t)cap - (uint32_t)carved;
    uint32_t chunk = (g_bump_chunk > 0) ? (uint32_t)g_bump_chunk : 1u;
    if (chunk > avail) chunk = avail;

    size_t stride = tiny_stride_for_class(class_idx);
    uint8_t* base = tls->slab_base
        ? tls->slab_base
        : tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
    uint8_t* start = base + (size_t)carved * stride;

    meta->carved = (uint16_t)(carved + (uint16_t)chunk);
    meta->used = (uint16_t)(meta->used + (uint16_t)chunk);
    ss_active_add(tls->ss, chunk);
#if HAKMEM_DEBUG_COUNTERS
    g_bump_arms[class_idx]++;
#endif

    // 1個目を即返し、残りをTLS windowとして保持
    g_tls_bcur[class_idx] = start + stride;
    g_tls_bend[class_idx] = start + (size_t)chunk * stride;
    return start;
}

// ========= tiny_fast_refill_and_take =========
//
// FCが空の時に、TLS list/superslab からバッチ取得して一つ返す。
// 旧来の複雑な経路を削り、FC/SLLのみの最小ロジックにする。

static inline void* tiny_fast_refill_and_take(int class_idx, TinyTLSList* tls) {
    // 1) Front FastCache から直接
    if (__builtin_expect(g_fastcache_enable && class_idx <= 3, 1)) {
        void* fc = fastcache_pop(class_idx);
        if (fc) {
            extern unsigned long long g_front_fc_hit[TINY_NUM_CLASSES];
            g_front_fc_hit[class_idx]++;
            return fc;
        }
    }

    // 2) ローカルfast list
    {
        void* p = tiny_fast_pop(class_idx);
        if (p) return p;
    }

    uint16_t cap = g_fast_cap[class_idx];
    if (cap == 0) return NULL;
    TinyFastCache* fc = &g_fast_cache[class_idx];
    int room = (int)cap - fc->top;
    if (room <= 0) return NULL;

    // 3) TLS SLL から詰め替え
    int filled = 0;
    while (room > 0 && g_tls_sll_enable) {
        void* h = NULL;
        if (!tls_sll_pop(class_idx, &h)) break;
        tiny_debug_validate_node_base(class_idx, h, "tiny_fast_refill_and_take");
        fc->items[fc->top++] = h;
        room--;
        filled++;
    }

    if (filled == 0) {
        // 4) Superslab bump (optional)
        void* bump = superslab_tls_bump_fast(class_idx);
        if (bump) return bump;
        return NULL;
    }

    // 5) 1個返す
    return fc->items[--fc->top];
}

// ========= bulk_mag_to_sll_if_room =========
//
// Magazine → SLL への安全な流し込み。
// tiny_free_magazine.inc.h から参照される。

static inline int bulk_mag_to_sll_if_room(int class_idx, TinyTLSMag* mag, int n) {
    if (!g_tls_sll_enable || n <= 0) return 0;

    uint32_t cap = sll_cap_for_class(class_idx, (uint32_t)mag->cap);
    uint32_t have = g_tls_sll[class_idx].count;
    if (have >= cap) return 0;

    int room = (int)(cap - have);
    int take = n < room ? n : room;
    if (take <= 0) return 0;
    if (take > mag->top) take = mag->top;
    if (take <= 0) return 0;

    int pushed = 0;
    for (int i = 0; i < take; i++) {
        void* p = mag->items[--mag->top].ptr;
        if (!tls_sll_push(class_idx, p, cap)) {
            mag->top++; // rollback last
            break;
        }
        pushed++;
    }
#if HAKMEM_DEBUG_COUNTERS
    if (pushed > 0) g_path_refill_calls[class_idx]++;
#endif
    return pushed;
}

/*
 * ========= Minimal Phase 12 sll_refill_small_from_ss =========
 *
 * Box化方針:
 *  - フロントエンド（tiny_fast_refill 等）は:
 *      - TLS SLL: tls_sll_box.h API のみを使用
 *      - Superslab: 本関数を唯一の「小サイズ SLL 補充 Box」として利用
 *  - バックエンド:
 *      - 現段階(Stage A/B)では既存 TLS Superslab/TinySlabMeta を直接利用
 *      - 将来(Stage C)に shared_pool_acquire_slab() に差し替え可能なよう、
 *        ここに Superslab 内部アクセスを閉じ込める
 *
 * 契約:
 *  - Tiny classes のみ (0 <= class_idx < TINY_NUM_CLASSES)
 *  - max_take は「この呼び出しで SLL に積みたい最大個数」
 *  - 戻り値は実際に SLL に積んだ個数（0 以上）
 *  - 呼び出し側は head/count/meta 等に触れず、Box API (tls_sll_box) のみ利用する
 */

__attribute__((noinline))
int sll_refill_small_from_ss(int class_idx, int max_take)
{
    // Hard defensive gate: Tiny classes only, never trust caller.
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
        return 0;
    }

    HAK_CHECK_CLASS_IDX(class_idx, "sll_refill_small_from_ss");
    atomic_fetch_add(&g_integrity_check_class_bounds, 1);

    // Phase12: 起動直後など、shared pool / superslab 未有効時は絶対に動かさない。
    if (!g_use_superslab || max_take <= 0) {
        return 0;
    }

    // TLS slab 未構成状態 (ss/meta/slab_base すべて NULL) のときは、ここでは触らない。
    // superslab_refill は「本当に必要になったタイミング」でのみ呼ぶ。
    TinyTLSSlab* tls = &g_tls_slabs[class_idx];
    if (!tls) {
        return 0;
    }

    bool tls_uninitialized =
        (tls->ss == NULL) &&
        (tls->meta == NULL) &&
        (tls->slab_base == NULL);

    if (tls_uninitialized) {
        // 初回は、呼び出し元の上位ロジックが superslab_refill を呼ぶことを期待し、ここでは何もしない。
        return 0;
    }

    // Ensure we have a valid TLS slab for this class via shared pool.
    // superslab_refill() 契約:
    //  - 成功: g_tls_slabs[class_idx] に ss/meta/slab_base/slab_idx を一貫して設定
    //  - 失敗: TLS は不変 or 巻き戻し、NULL を返す
    if (!tls->ss || !tls->meta ||
        tls->meta->class_idx != (uint8_t)class_idx ||
        !tls->slab_base) {
        if (!superslab_refill(class_idx)) {
            return 0;
        }
        tls = &g_tls_slabs[class_idx];
        if (!tls->ss || !tls->meta ||
            tls->meta->class_idx != (uint8_t)class_idx ||
            !tls->slab_base) {
            return 0;
        }
    }

    TinySlabMeta* meta = tls->meta;
    // Meta invariants: class & capacity は妥当であること
    if (!meta ||
        meta->class_idx != (uint8_t)class_idx ||
        meta->capacity == 0) {
        return 0;
    }

    const uint32_t cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP);
    const uint32_t cur = g_tls_sll[class_idx].count;
    if (cur >= cap) {
        return 0;
    }

    int room = (int)(cap - cur);
    int target = (max_take < room) ? max_take : room;
    if (target <= 0) {
        return 0;
    }

    int taken = 0;
    const size_t stride = tiny_stride_for_class(class_idx);

    while (taken < target) {
        void* p = NULL;

        // freelist 優先
        if (meta->freelist) {
            p = meta->freelist;
            meta->freelist = tiny_next_read(class_idx, p);
            meta->used++;
            if (__builtin_expect(meta->used > meta->capacity, 0)) {
                // 異常検出時はロールバックして終了（fail-fast 回避のため静かに中断）
                meta->used = meta->capacity;
                break;
            }
            ss_active_inc(tls->ss);
        }
        // freelist が尽きていて carved < capacity なら線形 carve
        else if (meta->carved < meta->capacity) {
            uint8_t* base = tls->slab_base
                ? tls->slab_base
                : tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
            if (!base) {
                break;
            }
            uint16_t idx = meta->carved;
            if (idx >= meta->capacity) {
                break;
            }
            uint8_t* addr = base + ((size_t)idx * stride);
            meta->carved++;
            meta->used++;
            if (__builtin_expect(meta->used > meta->capacity, 0)) {
                meta->used = meta->capacity;
                break;
            }
            ss_active_inc(tls->ss);
            p = addr;
        }
        // freelist も carve も尽きたら、新しい slab を shared pool から取得
        else {
            if (!superslab_refill(class_idx)) {
                break;
            }
            tls = &g_tls_slabs[class_idx];
            meta = tls->meta;
            if (!tls->ss || !meta ||
                meta->class_idx != (uint8_t)class_idx ||
                !tls->slab_base ||
                meta->capacity == 0) {
                break;
            }
            continue;
        }

        if (!p) {
            break;
        }

        tiny_debug_validate_node_base(class_idx, p, "sll_refill_small_from_ss");

        // Prepare header for header-classes so that safeheader mode accepts the push
#if HAKMEM_TINY_HEADER_CLASSIDX
        if (class_idx != 0 && class_idx != 7) {
            *(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
        }
#endif
        // SLL push 失敗時はそれ以上積まない（p はTLS slab管理下なので破棄でOK）
        if (!tls_sll_push(class_idx, p, cap)) {
            break;
        }

        taken++;
    }

    return taken;
}

#endif // HAKMEM_TINY_REFILL_INC_H