// Background Refill Bin (per-class lock-free SLL) — fills in background so the
// front path only does a single CAS pop when both slots/bump are empty.
static int g_bg_bin_enable = 0;                // HAKMEM_TINY_BG_BIN=1
static int g_bg_bin_target = 128;              // HAKMEM_TINY_BG_TARGET (per class)
static _Atomic uintptr_t g_bg_bin_head[TINY_NUM_CLASSES];
static pthread_t g_bg_bin_thread;
static volatile int g_bg_bin_stop = 0;
static int g_bg_bin_started = 0;
// Inline helpers
#include "hakmem_tiny_bg_bin.inc.h"

// ============================================================================
// EXTRACTED TO hakmem_tiny_remote_target.c (Phase 2C-1)
// ============================================================================
// Targeted remote-drain queue moved to separate module
// Functions: remote_target_enqueue(), remote_target_pop()
// Variables: g_bg_remote_enable, g_remote_target_head, g_remote_target_len, g_bg_remote_batch

// ============================================================================
// EXTRACTED TO hakmem_tiny_bg_spill.c/.h (Phase 2C-2)
// ============================================================================
// Background spill/drain queue for SuperSlab freelist returns
// Functions: bg_spill_push_one(), bg_spill_push_chain(), bg_spill_drain_class(), bg_spill_init()
// Variables: g_bg_spill_enable, g_bg_spill_target, g_bg_spill_max_batch, g_bg_spill_head[], g_bg_spill_len[]


static void* tiny_bg_refill_main(void* arg) {
    (void)arg;
    const int sleep_us = 1000; // 1ms
    while (!g_bg_bin_stop) {
        if (!g_bg_bin_enable) { usleep(sleep_us); continue; }
        for (int k = 0; k < TINY_NUM_CLASSES; k++) {
            // まずは小クラスだけ対象（シンプルに）
            if (!is_hot_class(k)) continue;
            int have = bgbin_length_approx(k, g_bg_bin_target);
            if (have >= g_bg_bin_target) continue;
            int need = g_bg_bin_target - have;

            // 生成チェーンを作る（free listやbitmapから、裏で重い処理OK）
            void* chain_head = NULL; void* chain_tail = NULL; int built = 0;
            pthread_mutex_t* lock = &g_tiny_class_locks[k].m;
            pthread_mutex_lock(lock);
            TinySlab* slab = g_tiny_pool.free_slabs[k];
            // Adopt first slab with free blocks; if none, allocate one
            if (!slab) slab = allocate_new_slab(k);
            while (need > 0 && slab) {
                if (slab->free_count == 0) { slab = slab->next; continue; }
                int idx = hak_tiny_find_free_block(slab);
                if (idx < 0) { slab = slab->next; continue; }
                hak_tiny_set_used(slab, idx);
                slab->free_count--;
                size_t bs = g_tiny_class_sizes[k];
                void* p = (char*)slab->base + (idx * bs);
                // prepend to local chain
                tiny_next_write(k, p, chain_head);  // Box API: next pointer write
                chain_head = p;
                if (!chain_tail) chain_tail = p;
                built++; need--;
            }
            pthread_mutex_unlock(lock);

            if (built > 0) {
                bgbin_push_chain(k, chain_head, chain_tail);
            }
        }
        // Drain background spill queues (SuperSlab freelist return)
        // EXTRACTED: Drain logic moved to hakmem_tiny_bg_spill.c (Phase 2C-2)
        if (g_bg_spill_enable) {
            for (int k = 0; k < TINY_NUM_CLASSES; k++) {
                pthread_mutex_t* lock = &g_tiny_class_locks[k].m;
                bg_spill_drain_class(k, lock);
            }
        }
        // Drain remote frees: targeted by per-class queue (avoid scanning all slabs)
        if (g_bg_remote_enable) {
            for (int k = 0; k < TINY_NUM_CLASSES; k++) {
                int processed = 0;
                while (processed < g_bg_remote_batch) {
                    TinySlab* s = remote_target_pop(k);
                    if (!s) break;
                    pthread_mutex_t* lock = &g_tiny_class_locks[k].m;
                    pthread_mutex_lock(lock);
                    tiny_remote_drain_locked(s);
                    pthread_mutex_unlock(lock);
                    processed++;
                    // If more remain (due to concurrent pushes), the slab may be re-enqueued
                    // by producers when threshold is hit again.
                }
            }
        }
        usleep(sleep_us);
    }
    return NULL;
}

static inline void eventq_push(int class_idx, uint32_t size) {
    eventq_push_ex(class_idx, size, HAK_TIER_FRONT, 0, 0, 0);
}

static void* intelligence_engine_main(void* arg) {
    (void)arg;
    const int sleep_us = 100000; // 100ms
    int hist[TINY_NUM_CLASSES] = {0};
    int cnt[TINY_NUM_CLASSES] = {0};
    // Tiny の学習は既定でOFF（実アプリは後段で学習）：
    // HAKMEM_INT_ADAPT_REFILL=1 / HAKMEM_INT_ADAPT_CAPS=1 を明示設定した場合のみON
    int adapt_refill = 0;  // default OFF for Tiny
    int adapt_caps = 0;    // default OFF for Tiny (env can enable)
    char* arf = getenv("HAKMEM_INT_ADAPT_REFILL");
    if (arf) adapt_refill = (atoi(arf) != 0);
    char* acp = getenv("HAKMEM_INT_ADAPT_CAPS");
    if (acp) adapt_caps = (atoi(acp) != 0);
    const int REFILL_MIN = 32, REFILL_MAX = 256;
    const int REFILL_HOT_MIN = 96, REFILL_HOT_MAX = 320;
    // Tiny diet (memory-tight) knobs
    {
        char* rb = getenv("HAKMEM_TINY_RSS_BUDGET_KB");
        if (rb) { int v = atoi(rb); if (v > 0) g_tiny_rss_budget_kb = v; }
        char* st = getenv("HAKMEM_TINY_DIET_STEP");
        if (st) { int v = atoi(st); if (v > 0 && v < 256) g_tiny_diet_step = v; }
        char* tt = getenv("HAKMEM_TINY_INT_TIGHT");
        if (tt) g_tiny_int_tight = (atoi(tt) != 0);
        for (int k = 0; k < TINY_NUM_CLASSES; k++) {
            char var[64]; snprintf(var, sizeof(var), "HAKMEM_TINY_CAP_FLOOR_C%d", k);
            char* vf = getenv(var);
            if (vf) { int v = atoi(vf); if (v > 0 && v < TINY_TLS_MAG_CAP) g_tiny_cap_floor[k] = v; }
        }
    }
    // Idle trim knob
    int idle_trim_ms = 0;
    int idle_flush = 0; // flush magazines on idle tick (optional)
    {
        char* it = getenv("HAKMEM_TINY_IDLE_TRIM_MS");
        if (it) { int v = atoi(it); if (v > 0) idle_trim_ms = v; }
        char* iff = getenv("HAKMEM_TINY_IDLE_FLUSH");
        if (iff) idle_flush = (atoi(iff) != 0);
    }
    int idle_trim_ticks = (idle_trim_ms > 0) ? (idle_trim_ms * 1000 / sleep_us) : 0;
    int idle_tick = 0;

    while (!g_int_stop) {
        // Drain events
        uint32_t h = atomic_load_explicit(&g_ev_head, memory_order_relaxed);
        uint32_t t = atomic_load_explicit(&g_ev_tail, memory_order_acquire);
        while (h != t) {
            AllocEvent ev = g_ev_ring[h & EVENTQ_MASK];
            if (ev.class_idx < TINY_NUM_CLASSES) {
                hist[ev.class_idx]++;
                // TODO: use ev.tier_hit/flags/site_id for richer adaptations
            }
            h++;
        }
        atomic_store_explicit(&g_ev_head, h, memory_order_release);

        // Snapshot counts for this window
        for (int k = 0; k < TINY_NUM_CLASSES; k++) { cnt[k] = hist[k]; }

        // Simple adaptive rule: if class seen a lot, increase fill target; else reduce
        for (int k = 0; k < TINY_NUM_CLASSES; k++) {
            int count = cnt[k];
            hist[k] = 0;  // reset for next window
            int cur = atomic_load_explicit(&g_frontend_fill_target[k], memory_order_relaxed);
            if (count > 1000) {
                int nv = cur + 32; if (nv > 256) nv = 256;  // cap
                atomic_store_explicit(&g_frontend_fill_target[k], nv, memory_order_relaxed);
            } else if (count < 200) {
                int nv = cur - 16; if (nv < 0) nv = 0;
                atomic_store_explicit(&g_frontend_fill_target[k], nv, memory_order_relaxed);
            }
        }
        // Stage 1: adjust refill batch bounds by class grouping (hot tiny vs others)
        if (adapt_refill) {
            int hot_sum = 0, other_sum = 0;
            for (int k = 0; k < TINY_NUM_CLASSES; k++) {
                int cur = atomic_load_explicit(&g_frontend_fill_target[k], memory_order_relaxed);
                if (k <= 3) hot_sum += cur; else other_sum += cur;
            }
            if (hot_sum > 512) {
                int nv = g_tiny_refill_max_hot + 16; if (nv > REFILL_HOT_MAX) nv = REFILL_HOT_MAX; g_tiny_refill_max_hot = nv;
            } else if (hot_sum < 64) {
                int nv = g_tiny_refill_max_hot - 16; if (nv < REFILL_HOT_MIN) nv = REFILL_HOT_MIN; g_tiny_refill_max_hot = nv;
            }
            if (other_sum > 256) {
                int nv = g_tiny_refill_max + 16; if (nv > REFILL_MAX) nv = REFILL_MAX; g_tiny_refill_max = nv;
            } else if (other_sum < 32) {
                int nv = g_tiny_refill_max - 16; if (nv < REFILL_MIN) nv = REFILL_MIN; g_tiny_refill_max = nv;
            }
        }

        // Adapt per-class MAG/SLL caps (light-touch; protects hot classes)
        if (adapt_caps) {
            for (int k = 0; k < TINY_NUM_CLASSES; k++) {
                int hot = (k <= 3);
                // Heuristic thresholds per window
                // Hot classes raise caps more aggressively
                int up_th = hot ? 800 : 1000;
                int dn_th = hot ? 120 : 200;
                if (g_tiny_int_tight) { dn_th = hot ? 200 : 300; }

                // MAG cap override: move toward [min..max] within guard rails
                int mag = g_mag_cap_override[k];
                int mag_min;
                switch (k) {
                    case 0: case 1: case 2: mag_min = 128; break; // 8/16/32B
                    case 3: mag_min = 256; break;                  // 64B (allow larger later)
                    case 4: mag_min = 128; break;                  // 128B
                    default: mag_min = 64; break;
                }
                int mag_max = 512; // soft ceiling; global hard ceiling is TINY_TLS_MAG_CAP
                if (k == 3) mag_max = 1024;
                if (mag <= 0) mag = mag_min; // start from baseline

                if (cnt[k] > up_th) { mag += 16; if (mag > mag_max) mag = mag_max; }
                else if (cnt[k] < dn_th) { mag -= 16; if (mag < mag_min) mag = mag_min; }
                g_mag_cap_override[k] = mag;

                // SLL cap override (hot classes only); keep absolute cap modest
                if (hot) {
                    int sll = g_sll_cap_override[k];
                    if (sll <= 0) sll = 256; // starting point for hot classes
                    int sll_min = 128;
                    if (g_tiny_int_tight && g_tiny_cap_floor[k] > 0) sll_min = g_tiny_cap_floor[k];
                    int sll_max = 1024;
                    if (cnt[k] > up_th) { sll += 32; if (sll > sll_max) sll = sll_max; }
                    else if (cnt[k] < dn_th) { sll -= 32; if (sll < sll_min) sll = sll_min; }
                    g_sll_cap_override[k] = sll;
                }
            }
        }
        // Enforce Tiny RSS budget (if enabled): when over budget, shrink per-class caps by step
        if (g_tiny_rss_budget_kb > 0) {
            int rss = get_rss_kb_self();
            if (rss > g_tiny_rss_budget_kb) {
                for (int k = 0; k < TINY_NUM_CLASSES; k++) {
                    int floor = g_tiny_cap_floor[k]; if (floor <= 0) floor = 64;
                    int mag = g_mag_cap_override[k]; if (mag <= 0) mag = tiny_effective_cap(k);
                    mag -= g_tiny_diet_step; if (mag < floor) mag = floor; g_mag_cap_override[k] = mag;
                    // Phase12: SLL cap 調整は g_sll_cap_override ではなくポリシー側が担当するため、ここでは変更しない。
                }
            }
        }
        // Optional periodic idle trim (try to keep overhead small)
        if (idle_trim_ticks > 0) {
            idle_tick++;
            if (idle_tick >= idle_trim_ticks) {
                idle_tick = 0;
                // Optional bounded flush of magazines to enable SS empty detection
                if (idle_flush) hak_tiny_magazine_flush_all();
                // Bounded trim: uses per-class locks briefly; acceptable in background
                hak_tiny_trim();
            }
        }
        usleep(sleep_us);
    }
    return NULL;
}