// superslab_ace.c - ACE (Adaptive Cache Engine) for SuperSlab allocator
// Purpose: Dynamic SuperSlab size adaptation based on usage patterns
// License: MIT
// Date: 2025-11-28

#include "hakmem_tiny_superslab_internal.h"
#include "hakmem_super_registry.h"

// ============================================================================
// ACE (Adaptive Cache Engine) State
// ============================================================================

SuperSlabACEState g_ss_ace[TINY_NUM_CLASSES_SS] = {{0}};

int g_ss_force_lg = -1;
_Atomic int g_ss_populate_once = 0;

// ============================================================================
// ACE Helper Functions
// ============================================================================

// Forward: decide next SuperSlab lg for a class (ACE-aware, clamped)
uint8_t hak_tiny_superslab_next_lg(int class_idx)
{
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
        return SUPERSLAB_LG_DEFAULT;
    }
    // Prefer ACE target if within allowed range
    uint8_t t = atomic_load_explicit((_Atomic uint8_t*)&g_ss_ace[class_idx].target_lg,
                                     memory_order_relaxed);
    if (t < SUPERSLAB_LG_MIN || t > SUPERSLAB_LG_MAX) {
        return SUPERSLAB_LG_DEFAULT;
    }
    return t;
}

// ============================================================================
// ACE Statistics / Debugging
// ============================================================================

void superslab_ace_print_stats(void) {
    printf("=== ACE (Adaptive Cache Engine) Stats ===\n");
    const char* class_names[8] = {"8B", "16B", "24B", "32B", "40B", "48B", "56B", "64B"};

    printf("Class   Curr  Targ  Hot  Allocs  Refills  Spills  LiveBlks\n");
    printf("--------------------------------------------------------------\n");

    for (int i = 0; i < TINY_NUM_CLASSES_SS; i++) {
        SuperSlabACEState* c = &g_ss_ace[i];
        printf("%-6s  %2uMB  %2uMB  %4u  %7u  %8u  %7u  %9u\n",
               class_names[i],
               (1u << c->current_lg) / (1024 * 1024),
               (1u << c->target_lg) / (1024 * 1024),
               c->hot_score,
               c->alloc_count,
               c->refill_count,
               c->spill_count,
               c->live_blocks);
    }
    printf("\n");
}

// ============================================================================
// ACE Tick Function (Promotion/Demotion Logic)
// ============================================================================

#define ACE_TICK_NS        (150ULL * 1000 * 1000)  // 150ms tick interval
#define ACE_COOLDOWN_NS    (800ULL * 1000 * 1000)  // 0.8s cooldown (anti-oscillation)

// Simplified thresholds for refill activity
#define HI_REFILL(k)       (g_ss_ace[k].refill_count > 64)   // High refill rate
#define MID_REFILL(k)      (g_ss_ace[k].refill_count > 16)   // Medium refill rate

// Object sizes per class (for capacity calculation)
// Must match TINY size classes: 8, 16, 24, 32, 40, 48, 56, 64 bytes
static const int g_tiny_obj_sizes[TINY_NUM_CLASSES_SS] = {8, 16, 24, 32, 40, 48, 56, 64};

void hak_tiny_superslab_ace_tick(int k, uint64_t now) {
    if (k < 0 || k >= TINY_NUM_CLASSES_SS) return;

    SuperSlabACEState* c = &g_ss_ace[k];

    // Rate limiting: only tick every ACE_TICK_NS (~150ms)
    if (now - c->last_tick_ns < ACE_TICK_NS) return;

    // Calculate capacity for 1MB and 2MB SuperSlabs
    int obj_size = g_tiny_obj_sizes[k];
    double cap1MB = (double)((1U << 20) / obj_size);  // 1MB capacity
    double cap2MB = (double)((1U << 21) / obj_size);  // 2MB capacity

    // Calculate hotness score (weighted: 60% live blocks, 40% refill rate)
    double hot = 0.6 * (double)c->live_blocks + 0.4 * (double)c->refill_count;
    if (hot < 0) hot = 0;
    if (hot > 1000) hot = 1000;
    c->hot_score = (uint16_t)hot;

    // Cooldown mechanism: prevent size changes within 0.8s of last change
    static uint64_t last_switch_ns[TINY_NUM_CLASSES_SS] = {0};

    if (now - last_switch_ns[k] >= ACE_COOLDOWN_NS) {
        if (c->current_lg <= 20) {
            // Promotion condition: 1MB → 2MB
            // High demand (live > 75% capacity) AND high refill rate
            if (c->live_blocks > 0.75 * cap1MB && HI_REFILL(k)) {
                c->target_lg = 21;  // Promote to 2MB
                last_switch_ns[k] = now;
            }
        } else {
            // Demotion condition: 2MB → 1MB
            // Low demand (live < 35% capacity) AND low refill rate
            if (c->live_blocks < 0.35 * cap2MB && !MID_REFILL(k)) {
                c->target_lg = 20;  // Demote to 1MB
                last_switch_ns[k] = now;
            }
        }
    }

    // EMA-style decay for counters (reduce by 75% each tick)
    c->alloc_count  = c->alloc_count  / 4;
    c->refill_count = c->refill_count / 4;
    c->spill_count  = c->spill_count  / 4;
    // live_blocks is updated incrementally by alloc/free, not decayed here

    c->last_tick_ns = now;
}

// ============================================================================
// ACE Observer (Registry-based, zero hot-path overhead)
// ============================================================================

// Global debug flag (set once at initialization)
static int g_ace_debug = 0;

// Registry-based observation: scan all SuperSlabs for usage stats
void ace_observe_and_decide(int k) {
    if (k < 0 || k >= TINY_NUM_CLASSES_SS) return;

    SuperSlabACEState* c = &g_ss_ace[k];

    // Scan Registry to count SuperSlabs and total live blocks
    int ss_count = 0;
    uint32_t total_live = 0;

    SuperRegEntry* reg = super_reg_entries();
    int reg_cap = super_reg_effective_size();
    if (!reg || reg_cap <= 0) return;

    for (int i = 0; i < reg_cap; i++) {
        SuperRegEntry* e = &reg[i];

        // Atomic read (thread-safe)
        uintptr_t base = atomic_load_explicit(
            (_Atomic uintptr_t*)&e->base,
            memory_order_acquire);

        if (base == 0) continue;  // Empty slot

        // Phase 8.4: Safety check - skip if ss pointer is invalid
        if (!e->ss) continue;
        // Phase 12: per-SS size_class removed; registry entries are per-class by construction.

        ss_count++;
        // Phase 8.4: Scan all slabs to count used blocks (zero hot-path overhead)
        uint32_t ss_live = 0;
        int cap_scan = ss_slabs_capacity(e->ss);
        for (int slab_idx = 0; slab_idx < cap_scan; slab_idx++) {
            TinySlabMeta* meta = &e->ss->slabs[slab_idx];
            // Relaxed read is OK (stats only, no hot-path impact)
            ss_live += meta->used;
        }
        total_live += ss_live;
    }

    // Calculate utilization
    int obj_size = g_tiny_obj_sizes[k];
    uint8_t current_lg = atomic_load_explicit(
        (_Atomic uint8_t*)&c->current_lg,
        memory_order_relaxed);

    uint32_t capacity = (ss_count > 0) ? ss_count * ((1U << current_lg) / obj_size) : 1;
    double util = (double)total_live / capacity;

    // Update hot_score (for debugging/visualization)
    c->hot_score = (uint16_t)(util * 1000);
    if (c->hot_score > 1000) c->hot_score = 1000;

    // Promotion/Demotion decision
    uint8_t new_target = current_lg;

    if (current_lg <= 20) {
        // Promotion: 1MB → 2MB
        if (util > 0.75) {
            new_target = 21;
        }
    } else {
        // Demotion: 2MB → 1MB
        if (util < 0.35) {
            new_target = 20;
        }
    }

    // Debug output (if enabled)
    if (g_ace_debug && ss_count > 0) {
        fprintf(stderr, "[ACE] Class %d (%dB): ss=%d live=%u cap=%u util=%.2f%% lg=%d->%d hot=%d\n",
                k, obj_size, ss_count, total_live, capacity, util * 100.0,
                current_lg, new_target, c->hot_score);
    }

    // Atomic write (thread-safe)
    if (new_target != current_lg) {
        atomic_store_explicit(
            (_Atomic uint8_t*)&c->target_lg,
            new_target,
            memory_order_release);
        if (g_ace_debug) {
            fprintf(stderr, "[ACE] *** Class %d: SIZE CHANGE %dMB -> %dMB (util=%.2f%%)\n",
                    k, 1 << (current_lg - 20), 1 << (new_target - 20), util * 100.0);
        }
    }
}

// Called from Learner thread (background observation)
void hak_tiny_superslab_ace_observe_all(void) {
    // Initialize debug flag once
    static int initialized = 0;
    if (!initialized) {
        const char* ace_debug = getenv("HAKMEM_ACE_DEBUG");
        g_ace_debug = (ace_debug && atoi(ace_debug) != 0) ? 1 : 0;
        initialized = 1;
    }

    for (int k = 0; k < TINY_NUM_CLASSES_SS; k++) {
        ace_observe_and_decide(k);
    }
}