hakmem/core/tiny_ready.h

// tiny_ready.h - Ready List box (per-class, slab-entry hints)
// Purpose: O(1)-ish adopt candidate discovery to bypass deep scans in refill.
// Design: Lock-free ring of encoded slab entries (ss+slab_idx). Best-effort hints.
// Boundary:
//   - Producer: publish境界（ss_partial_publish）/ remote初入荷 / first-free（prev==NULL）で push
//   - Consumer: refill境界（tiny_refill_try_fast の最初）で pop→owner取得→bind
// A/B: ENV HAKMEM_TINY_READY=0 で無効化

#pragma once
#include <stdatomic.h>
#include <stdint.h>
#include "hakmem_tiny.h"

#ifndef TINY_READY_RING
#define TINY_READY_RING 128
#endif

// Per-class ring buffer of encoded slab entries
static _Atomic(uintptr_t) g_ready_ring[TINY_NUM_CLASSES][TINY_READY_RING];
static _Atomic(uint32_t) g_ready_rr[TINY_NUM_CLASSES];

static inline int tiny_ready_enabled(void) {
    static int g_ready_en = -1;
    if (__builtin_expect(g_ready_en == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_READY");
        // Default ON unless explicitly disabled
        g_ready_en = (e && *e == '0') ? 0 : 1;
    }
    return g_ready_en;
}

// Optional: limit scan width (ENV: HAKMEM_TINY_READY_WIDTH, default TINY_READY_RING)
static inline int tiny_ready_width(void) {
    static int w = -1;
    if (__builtin_expect(w == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_READY_WIDTH");
        int defw = TINY_READY_RING;
        if (e && *e) {
            int v = atoi(e);
            if (v <= 0) v = defw;
            if (v > TINY_READY_RING) v = TINY_READY_RING;
            w = v;
        } else {
            w = defw;
        }
    }
    return w;
}

// Encode helpers are declared in main TU; forward here
static inline uintptr_t slab_entry_make(SuperSlab* ss, int slab_idx);
static inline SuperSlab* slab_entry_ss(uintptr_t ent);
static inline int slab_entry_idx(uintptr_t ent);

// Push: best-effort, tries a few slots, drops on contention (hint-only)
static inline void tiny_ready_push(int class_idx, SuperSlab* ss, int slab_idx) {
    if (!tiny_ready_enabled()) return;
    if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) return;
    if (__builtin_expect(ss == NULL || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss), 0)) return;

    uintptr_t ent = slab_entry_make(ss, slab_idx);
    uint32_t start = atomic_fetch_add_explicit(&g_ready_rr[class_idx], 1u, memory_order_relaxed);
    // Try up to 4 slots to reduce collisions
    for (int k = 0; k < 4; k++) {
        uint32_t idx = (start + (uint32_t)k) % (uint32_t)TINY_READY_RING;
        uintptr_t expected = 0;
        if (atomic_compare_exchange_weak_explicit(&g_ready_ring[class_idx][idx], &expected, ent,
                                                  memory_order_release, memory_order_relaxed)) {
            return;
        }
    }
    // Drop if all tried slots were busy (hint ring, loss is acceptable)
}

// Pop any entry; scans ring once (only on refill miss, not on hot path)
static inline uintptr_t tiny_ready_pop(int class_idx) {
    if (!tiny_ready_enabled()) return (uintptr_t)0;
    if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) return (uintptr_t)0;
    int scan = tiny_ready_width();
    for (int i = 0; i < scan; i++) {
        uintptr_t ent = atomic_exchange_explicit(&g_ready_ring[class_idx][i], (uintptr_t)0, memory_order_acq_rel);
        if (ent) return ent;
    }
    return (uintptr_t)0;
}