// hakmem_tiny_lifecycle.inc
// Phase 2D-3: Lifecycle management functions extraction
//
// This file contains lifecycle management functions extracted from hakmem_tiny.c
// to improve code organization. Reduces main file by ~226 lines (16%).
//
// Functions:
// - hak_tiny_trim(): Trim and cleanup operations
// - tiny_tls_cache_drain(): TLS cache draining
// - tiny_apply_mem_diet(): Memory diet mode application
//
// Cold/maintenance path - not performance critical.
#include "tiny_tls_guard.h"
#include "box/ss_slab_meta_box.h"  // Phase 3d-A: SlabMeta Box boundary
#include "hakmem_super_registry.h"

// Phase 12: Helper to derive a representative class index for a SuperSlab
// from per-slab metadata (all slabs are empty when used in trim).
static inline int superslab_any_class_idx(SuperSlab* ss) {
    if (!ss) return -1;
    int cap = ss_slabs_capacity(ss);
    for (int s = 0; s < cap; s++) {
        uint8_t cls = ss_slab_meta_class_idx_get(ss, s);
        if (cls < TINY_NUM_CLASSES) return (int)cls;
    }
    return -1;
}

void hak_tiny_trim(void) {
    static _Atomic int g_trim_call_count = 0;
    int call_count = atomic_fetch_add_explicit(&g_trim_call_count, 1, memory_order_relaxed);
    if (call_count < 5) {  // First 5 calls only
        fprintf(stderr, "[DEBUG hak_tiny_trim] Call #%d\n", call_count + 1);
    }
    if (!g_tiny_initialized) return;
    // Lazy init for SS reserve env
    if (__builtin_expect(g_empty_reserve, 1) == -1) {
        char* er = getenv("HAKMEM_TINY_SS_RESERVE");
        int v = (er ? atoi(er) : EMPTY_SUPERSLAB_RESERVE);
        if (v < 0) {
            v = 0;
        } else if (v > 4) {
            v = 4;  // guardrails
        }
        g_empty_reserve = v;
    }
    for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
        tiny_tls_cache_drain(class_idx);
        pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
        pthread_mutex_lock(lock);
        TinySlab** head = &g_tiny_pool.free_slabs[class_idx];
        TinySlab* prev = NULL;
        TinySlab* slab = *head;
        while (slab) {
            TinySlab* next = slab->next;
            if (slab->free_count == slab->total_count) {
                if (prev) prev->next = next; else *head = next;
                release_slab(slab);
                slab = next;
                continue;
            }
            prev = slab;
            slab = next;
        }
        pthread_mutex_unlock(lock);
    }

    // Optional: attempt SuperSlab reclamation for completely empty SS (conservative)
    static int g_trim_ss_enabled = -1;
    static int g_ss_partial_env = -1;
    if (g_trim_ss_enabled == -1) {
        char* env = getenv("HAKMEM_TINY_TRIM_SS");
        if (env) {
            g_trim_ss_enabled = (atoi(env) != 0) ? 1 : 0;
        } else {
            g_trim_ss_enabled = 1;  // default ON for better memory efficiency
        }
    }
    if (g_ss_partial_env == -1) {
        char* env = getenv("HAKMEM_TINY_SS_PARTIAL");
        if (env) {
            g_ss_partial_enable = (atoi(env) != 0) ? 1 : 0;
        }
        char* interval = getenv("HAKMEM_TINY_SS_PARTIAL_INTERVAL");
        if (interval) {
            int v = atoi(interval);
            if (v < 1) v = 1;
            g_ss_partial_interval = (uint32_t)v;
        }
        g_ss_partial_env = 1;
    }
    if (!g_trim_ss_enabled) return;

    uint32_t partial_epoch = 0;
    if (g_ss_partial_enable) {
        partial_epoch = atomic_fetch_add_explicit(&g_ss_partial_epoch, 1u, memory_order_relaxed) + 1u;
    }

    // Walk the registry and collect empty SuperSlabs by class
    SuperRegEntry* reg = super_reg_entries();
    int reg_cap = super_reg_effective_size();
    if (!reg || reg_cap <= 0) return;
    for (int i = 0; i < reg_cap; i++) {
        SuperRegEntry* e = &reg[i];
        uintptr_t base = atomic_load_explicit((_Atomic uintptr_t*)&e->base, memory_order_acquire);
        if (base == 0) continue;
        SuperSlab* ss = e->ss;
        if (!ss || ss->magic != SUPERSLAB_MAGIC) continue;
        // Only consider completely empty SuperSlabs
        uint32_t active = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed);
        static _Atomic int g_debug_ss_scan = 0;
        int scan_count = atomic_fetch_add_explicit(&g_debug_ss_scan, 1, memory_order_relaxed);
        if (scan_count < 20) {  // First 20 SS scans
            int log_cls = superslab_any_class_idx(ss);
            fprintf(stderr, "[DEBUG trim scan] ss=%p class=%d active=%u\n",
                    (void*)ss, log_cls, active);
        }
        if (active != 0) continue;
        int k = superslab_any_class_idx(ss);
        if (k < 0 || k >= TINY_NUM_CLASSES) continue;
        // Do not free if current thread still caches this SS in TLS
        if (g_tls_slabs[k].ss == ss) continue;
        // Keep up to EMPTY_SUPERSLAB_RESERVE per class as reserve; free extras
        pthread_mutex_lock(&g_empty_lock);
        if (g_empty_reserve == 0) {
            pthread_mutex_unlock(&g_empty_lock);
            if (superslab_ref_get(ss) == 0) {
                superslab_free(ss);
            }
            continue;
        }
        if (g_empty_superslabs[k] == NULL) {
            g_empty_superslabs[k] = ss;
            g_empty_counts[k] = 1;
            superslab_partial_release(ss, partial_epoch);
            pthread_mutex_unlock(&g_empty_lock);
            continue;
        }
        // If same as reserved, nothing to do
        if (g_empty_superslabs[k] == ss) {
            superslab_partial_release(ss, partial_epoch);
            pthread_mutex_unlock(&g_empty_lock);
            continue;
        }
        int can_free = (g_empty_counts[k] >= g_empty_reserve);
        if (!can_free) {
            // Replace reserve with this newer SS
            g_empty_superslabs[k] = ss;
            g_empty_counts[k] = 1;
            superslab_partial_release(ss, partial_epoch);
            pthread_mutex_unlock(&g_empty_lock);
            continue;
        }
        pthread_mutex_unlock(&g_empty_lock);
        // Free outside of the empty_lock（保守的: refcount==0 のときのみ）
        if (superslab_ref_get(ss) == 0) {
            superslab_free(ss);
        }
    }
}

static void tiny_tls_cache_drain(int class_idx) {
    TinyTLSList* tls = &g_tls_lists[class_idx];

    // Phase E1-CORRECT: Drain TLS SLL cache for ALL classes
    #include "box/tiny_next_ptr_box.h"
    void* sll = g_tls_sll[class_idx].head;
    g_tls_sll[class_idx].head = NULL;
    g_tls_sll[class_idx].count = 0;
    while (sll) {
        void* next = tiny_next_read(class_idx, sll);
        tiny_tls_list_guard_push(class_idx, tls, sll);
        tls_list_push(tls, sll, class_idx);
        sll = next;
    }

    // Phase E1-CORRECT: Drain fast tier cache for ALL classes
    void* fast = g_fast_head[class_idx];
    g_fast_head[class_idx] = NULL;
    g_fast_count[class_idx] = 0;
    while (fast) {
        void* next = tiny_next_read(class_idx, fast);
        tiny_tls_list_guard_push(class_idx, tls, fast);
        tls_list_push(tls, fast, class_idx);
        fast = next;
    }

    // Spill TLS list back to owners
    void* head = NULL;
    void* tail = NULL;
    while (1) {
        uint32_t taken = tls_list_bulk_take(tls, 0u, &head, &tail, class_idx);
        if (taken == 0u || head == NULL) break;
        void* cur = head;
        while (cur) {
            void* next = tiny_next_read(class_idx, cur);
            SuperSlab* ss = hak_super_lookup(cur);
            if (ss && ss->magic == SUPERSLAB_MAGIC) {
                hak_tiny_free_superslab(cur, ss);
            } else {
                TinySlab* slab = hak_tiny_owner_slab(cur);
                if (slab) {
                    int cls = slab->class_idx;
                    size_t block_size = g_tiny_class_sizes[cls];
                    int block_idx = (int)(((uintptr_t)cur - (uintptr_t)slab->base) / block_size);
                    pthread_mutex_t* lock = &g_tiny_class_locks[cls].m;
                    pthread_mutex_lock(lock);
                    if (hak_tiny_is_used(slab, block_idx)) {
                        hak_tiny_set_free(slab, block_idx);
                        int was_full = (slab->free_count == 0);
                        slab->free_count++;
                        g_tiny_pool.free_count[cls]++;
                        if (was_full) {
                            move_to_free_list(cls, slab);
                        }
                        if (slab->free_count == slab->total_count) {
                            TinySlab** headp = &g_tiny_pool.free_slabs[cls];
                            TinySlab* prev = NULL;
                            for (TinySlab* s = *headp; s; prev = s, s = s->next) {
                                if (s == slab) {
                                    if (prev) prev->next = s->next;
                                    else *headp = s->next;
                                    break;
                                }
                            }
                            release_slab(slab);
                        }
                    }
                    pthread_mutex_unlock(lock);
                }
            }
            cur = next;
        }
    }

    // Release TLS-bound SuperSlab reference when caches are empty
    TinyTLSSlab* tls_slab = &g_tls_slabs[class_idx];
    SuperSlab* held_ss = tls_slab->ss;
    if (held_ss) {
        int keep_binding = 0;
        if (tls_slab->meta && tls_slab->meta->used > 0) {
            keep_binding = 1;
        }
        if (!keep_binding) {
            tls_slab->ss = NULL;
            tls_slab->meta = NULL;
            tls_slab->slab_base = NULL;
            tls_slab->slab_idx = 0;
            superslab_ref_dec(held_ss);
        }
    }
    g_tls_active_slab_a[class_idx] = NULL;
    g_tls_active_slab_b[class_idx] = NULL;
}

static void tiny_apply_mem_diet(void) {
    g_mag_cap_limit = 64;
    for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
        if (g_fast_cap[class_idx] > 0) {
            uint16_t limit = (class_idx <= 3) ? 48 : 32;
            if (limit < 16) limit = 16;
            if (g_fast_cap[class_idx] > limit) {
                g_fast_cap[class_idx] = limit;
            }
        }
        TinyTLSList* tls = &g_tls_lists[class_idx];
        uint32_t new_cap = tls->cap;
        if (new_cap > (uint32_t)g_mag_cap_limit) new_cap = (uint32_t)g_mag_cap_limit;
        if (new_cap < 16u) new_cap = 16u;
        tls->cap = new_cap;
        tls->refill_low = tiny_tls_default_refill(new_cap);
        tls->spill_high = tiny_tls_default_spill(new_cap);
        tiny_tls_publish_targets(class_idx, new_cap);
    }
}