/**
 * hakmem_smallmid_superslab.c - Small-Mid SuperSlab Backend Implementation
 *
 * Phase 17-2: Dedicated SuperSlab pool for Small-Mid allocator
 * Goal: 2-3x performance improvement via batch refills and dedicated backend
 *
 * Created: 2025-11-16
 */

#include "hakmem_smallmid_superslab.h"
#include "hakmem_smallmid.h"
#include <sys/mman.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
#include <errno.h>

// ============================================================================
// Global State
// ============================================================================

SmallMidSSHead g_smallmid_ss_pools[SMALLMID_NUM_CLASSES];

static pthread_once_t g_smallmid_ss_init_once = PTHREAD_ONCE_INIT;
static int g_smallmid_ss_initialized = 0;

#ifdef HAKMEM_SMALLMID_SS_STATS
SmallMidSSStats g_smallmid_ss_stats = {0};
#endif

// ============================================================================
// Initialization
// ============================================================================

static void smallmid_superslab_init_once(void) {
    for (int i = 0; i < SMALLMID_NUM_CLASSES; i++) {
        SmallMidSSHead* pool = &g_smallmid_ss_pools[i];

        pool->class_idx = i;
        pool->total_ss = 0;
        pool->first_ss = NULL;
        pool->current_ss = NULL;
        pool->lru_head = NULL;
        pool->lru_tail = NULL;

        pthread_mutex_init(&pool->lock, NULL);

        pool->alloc_count = 0;
        pool->refill_count = 0;
        pool->ss_alloc_count = 0;
        pool->ss_free_count = 0;
    }

    g_smallmid_ss_initialized = 1;

    #ifndef SMALLMID_DEBUG
    #define SMALLMID_DEBUG 0
    #endif

    #if SMALLMID_DEBUG
    fprintf(stderr, "[SmallMid SuperSlab] Initialized (%d classes)\n", SMALLMID_NUM_CLASSES);
    #endif
}

void smallmid_superslab_init(void) {
    pthread_once(&g_smallmid_ss_init_once, smallmid_superslab_init_once);
}

// ============================================================================
// SuperSlab Allocation/Deallocation
// ============================================================================

/**
 * smallmid_superslab_alloc - Allocate a new 1MB SuperSlab
 *
 * Strategy:
 * - mmap 1MB aligned region (PROT_READ|WRITE, MAP_PRIVATE|ANONYMOUS)
 * - Initialize header, metadata, counters
 * - Add to per-class pool chain
 * - Return SuperSlab pointer
 */
SmallMidSuperSlab* smallmid_superslab_alloc(int class_idx) {
    if (class_idx < 0 || class_idx >= SMALLMID_NUM_CLASSES) {
        return NULL;
    }

    // Allocate 1MB aligned region
    void* mem = mmap(NULL, SMALLMID_SUPERSLAB_SIZE,
                     PROT_READ | PROT_WRITE,
                     MAP_PRIVATE | MAP_ANONYMOUS,
                     -1, 0);

    if (mem == MAP_FAILED) {
        fprintf(stderr, "[SmallMid SS] mmap failed: %s\n", strerror(errno));
        return NULL;
    }

    // Ensure alignment (mmap should return aligned address)
    uintptr_t addr = (uintptr_t)mem;
    if ((addr & (SMALLMID_SS_ALIGNMENT - 1)) != 0) {
        fprintf(stderr, "[SmallMid SS] WARNING: mmap returned unaligned address %p\n", mem);
        munmap(mem, SMALLMID_SUPERSLAB_SIZE);
        return NULL;
    }

    SmallMidSuperSlab* ss = (SmallMidSuperSlab*)mem;

    // Initialize header
    ss->magic = SMALLMID_SS_MAGIC;
    ss->num_slabs = SMALLMID_SLABS_PER_SS;
    ss->active_slabs = 0;
    ss->refcount = 1;
    ss->total_active = 0;
    ss->slab_bitmap = 0;
    ss->nonempty_mask = 0;
    ss->last_used_ns = 0;
    ss->generation = 0;
    ss->next = NULL;
    ss->lru_next = NULL;
    ss->lru_prev = NULL;

    // Initialize slab metadata (all inactive initially)
    for (int i = 0; i < SMALLMID_SLABS_PER_SS; i++) {
        SmallMidSlabMeta* meta = &ss->slabs[i];
        meta->freelist = NULL;
        meta->used = 0;
        meta->capacity = 0;
        meta->carved = 0;
        meta->class_idx = class_idx;
        meta->flags = SMALLMID_SLAB_INACTIVE;
    }

    // Update pool stats
    SmallMidSSHead* pool = &g_smallmid_ss_pools[class_idx];
    atomic_fetch_add(&pool->total_ss, 1);
    atomic_fetch_add(&pool->ss_alloc_count, 1);

    #ifdef HAKMEM_SMALLMID_SS_STATS
    atomic_fetch_add(&g_smallmid_ss_stats.total_ss_alloc, 1);
    #endif

    #if SMALLMID_DEBUG
    fprintf(stderr, "[SmallMid SS] Allocated SuperSlab %p (class=%d, size=1MB)\n",
            ss, class_idx);
    #endif

    return ss;
}

/**
 * smallmid_superslab_free - Free a SuperSlab
 *
 * Strategy:
 * - Validate refcount == 0 (all blocks freed)
 * - munmap the 1MB region
 * - Update pool stats
 */
void smallmid_superslab_free(SmallMidSuperSlab* ss) {
    if (!ss || ss->magic != SMALLMID_SS_MAGIC) {
        fprintf(stderr, "[SmallMid SS] ERROR: Invalid SuperSlab %p\n", ss);
        return;
    }

    uint32_t refcount = atomic_load(&ss->refcount);
    if (refcount > 0) {
        fprintf(stderr, "[SmallMid SS] WARNING: Freeing SuperSlab with refcount=%u\n", refcount);
    }

    uint32_t active = atomic_load(&ss->total_active);
    if (active > 0) {
        fprintf(stderr, "[SmallMid SS] WARNING: Freeing SuperSlab with active blocks=%u\n", active);
    }

    // Invalidate magic
    ss->magic = 0xDEADBEEF;

    // munmap
    if (munmap(ss, SMALLMID_SUPERSLAB_SIZE) != 0) {
        fprintf(stderr, "[SmallMid SS] munmap failed: %s\n", strerror(errno));
    }

    #ifdef HAKMEM_SMALLMID_SS_STATS
    atomic_fetch_add(&g_smallmid_ss_stats.total_ss_free, 1);
    #endif

    #if SMALLMID_DEBUG
    fprintf(stderr, "[SmallMid SS] Freed SuperSlab %p\n", ss);
    #endif
}

// ============================================================================
// Slab Initialization
// ============================================================================

/**
 * smallmid_slab_init - Initialize a slab within SuperSlab
 *
 * Strategy:
 * - Calculate slab base address (ss_base + slab_idx * 64KB)
 * - Set capacity based on size class (256/128/64 blocks)
 * - Mark slab as active
 * - Update SuperSlab bitmaps
 */
void smallmid_slab_init(SmallMidSuperSlab* ss, int slab_idx, int class_idx) {
    if (!ss || slab_idx < 0 || slab_idx >= SMALLMID_SLABS_PER_SS) {
        return;
    }

    SmallMidSlabMeta* meta = &ss->slabs[slab_idx];

    // Set capacity based on class
    const uint16_t capacities[SMALLMID_NUM_CLASSES] = {
        SMALLMID_BLOCKS_256B,
        SMALLMID_BLOCKS_512B,
        SMALLMID_BLOCKS_1KB
    };

    meta->freelist = NULL;
    meta->used = 0;
    meta->capacity = capacities[class_idx];
    meta->carved = 0;
    meta->class_idx = class_idx;
    meta->flags = SMALLMID_SLAB_ACTIVE;

    // Update SuperSlab bitmaps
    ss->slab_bitmap |= (1u << slab_idx);
    ss->nonempty_mask |= (1u << slab_idx);
    ss->active_slabs++;

    #if SMALLMID_DEBUG
    fprintf(stderr, "[SmallMid SS] Initialized slab %d in SS %p (class=%d, capacity=%u)\n",
            slab_idx, ss, class_idx, meta->capacity);
    #endif
}

// ============================================================================
// Batch Refill (Performance-Critical Path)
// ============================================================================

/**
 * smallmid_refill_batch - Batch refill TLS freelist from SuperSlab
 *
 * Performance target: 5-8 instructions per call (amortized)
 *
 * Strategy:
 * 1. Try current slab's freelist (fast path: pop batch_max blocks)
 * 2. Fall back to bump allocation if freelist empty
 * 3. Allocate new slab if current is full
 * 4. Allocate new SuperSlab if no slabs available
 *
 * Returns: Number of blocks refilled (0 on failure)
 */
int smallmid_refill_batch(int class_idx, void** batch_out, int batch_max) {
    if (class_idx < 0 || class_idx >= SMALLMID_NUM_CLASSES || !batch_out || batch_max <= 0) {
        return 0;
    }

    SmallMidSSHead* pool = &g_smallmid_ss_pools[class_idx];

    // Ensure SuperSlab pool is initialized
    if (!g_smallmid_ss_initialized) {
        smallmid_superslab_init();
    }

    // Allocate first SuperSlab if needed
    pthread_mutex_lock(&pool->lock);

    if (!pool->current_ss) {
        pool->current_ss = smallmid_superslab_alloc(class_idx);
        if (!pool->current_ss) {
            pthread_mutex_unlock(&pool->lock);
            return 0;
        }

        // Add to chain
        if (!pool->first_ss) {
            pool->first_ss = pool->current_ss;
        }

        // Initialize first slab
        smallmid_slab_init(pool->current_ss, 0, class_idx);
    }

    SmallMidSuperSlab* ss = pool->current_ss;
    pthread_mutex_unlock(&pool->lock);

    // Find active slab with available blocks
    int slab_idx = -1;
    SmallMidSlabMeta* meta = NULL;

    for (int i = 0; i < SMALLMID_SLABS_PER_SS; i++) {
        if (!(ss->slab_bitmap & (1u << i))) {
            continue;  // Slab not active
        }

        meta = &ss->slabs[i];
        if (meta->used < meta->capacity) {
            slab_idx = i;
            break;  // Found slab with space
        }
    }

    // No slab with space - try to allocate new slab
    if (slab_idx == -1) {
        pthread_mutex_lock(&pool->lock);

        // Find first inactive slab
        for (int i = 0; i < SMALLMID_SLABS_PER_SS; i++) {
            if (!(ss->slab_bitmap & (1u << i))) {
                smallmid_slab_init(ss, i, class_idx);
                slab_idx = i;
                meta = &ss->slabs[i];
                break;
            }
        }

        pthread_mutex_unlock(&pool->lock);

        // All slabs exhausted - need new SuperSlab
        if (slab_idx == -1) {
            pthread_mutex_lock(&pool->lock);

            SmallMidSuperSlab* new_ss = smallmid_superslab_alloc(class_idx);
            if (!new_ss) {
                pthread_mutex_unlock(&pool->lock);
                return 0;
            }

            // Link to chain
            new_ss->next = pool->first_ss;
            pool->first_ss = new_ss;
            pool->current_ss = new_ss;

            // Initialize first slab
            smallmid_slab_init(new_ss, 0, class_idx);

            pthread_mutex_unlock(&pool->lock);

            ss = new_ss;
            slab_idx = 0;
            meta = &ss->slabs[0];
        }
    }

    // Now we have a slab with available capacity
    // Strategy: Try freelist first, then bump allocation

    const size_t block_sizes[SMALLMID_NUM_CLASSES] = {256, 512, 1024};
    size_t block_size = block_sizes[class_idx];
    int refilled = 0;

    // Calculate slab data base address
    uintptr_t ss_base = (uintptr_t)ss;
    uintptr_t slab_base = ss_base + (slab_idx * SMALLMID_SLAB_SIZE);

    // Fast path: Pop from freelist (if available)
    void* freelist_head = meta->freelist;
    while (freelist_head && refilled < batch_max) {
        // Add 1-byte header space (Phase 7 technology)
        void* user_ptr = (uint8_t*)freelist_head + 1;
        batch_out[refilled++] = user_ptr;

        // Next block (freelist stored at offset 0 in user data)
        freelist_head = *(void**)user_ptr;
    }
    meta->freelist = freelist_head;

    // Slow path: Bump allocation
    while (refilled < batch_max && meta->carved < meta->capacity) {
        // Calculate block base address (with 1-byte header)
        uintptr_t block_base = slab_base + (meta->carved * (block_size + 1));
        void* base_ptr = (void*)block_base;
        void* user_ptr = (uint8_t*)base_ptr + 1;

        // Write header (0xb0 | class_idx)
        *(uint8_t*)base_ptr = 0xb0 | class_idx;

        batch_out[refilled++] = user_ptr;
        meta->carved++;
        meta->used++;

        // Update SuperSlab active counter
        atomic_fetch_add(&ss->total_active, 1);
    }

    // Update stats
    atomic_fetch_add(&pool->alloc_count, refilled);
    atomic_fetch_add(&pool->refill_count, 1);

    #ifdef HAKMEM_SMALLMID_SS_STATS
    atomic_fetch_add(&g_smallmid_ss_stats.total_refills, 1);
    atomic_fetch_add(&g_smallmid_ss_stats.total_blocks_carved, refilled);
    #endif

    #if SMALLMID_DEBUG
    if (refilled > 0) {
        fprintf(stderr, "[SmallMid SS] Refilled %d blocks (class=%d, slab=%d, carved=%u/%u)\n",
                refilled, class_idx, slab_idx, meta->carved, meta->capacity);
    }
    #endif

    return refilled;
}

// ============================================================================
// Statistics
// ============================================================================

#ifdef HAKMEM_SMALLMID_SS_STATS
void smallmid_ss_print_stats(void) {
    fprintf(stderr, "\n=== Small-Mid SuperSlab Statistics ===\n");
    fprintf(stderr, "Total SuperSlab allocs: %lu\n", g_smallmid_ss_stats.total_ss_alloc);
    fprintf(stderr, "Total SuperSlab frees:  %lu\n", g_smallmid_ss_stats.total_ss_free);
    fprintf(stderr, "Total refills:          %lu\n", g_smallmid_ss_stats.total_refills);
    fprintf(stderr, "Total blocks carved:    %lu\n", g_smallmid_ss_stats.total_blocks_carved);
    fprintf(stderr, "Total blocks freed:     %lu\n", g_smallmid_ss_stats.total_blocks_freed);

    fprintf(stderr, "\nPer-class statistics:\n");
    for (int i = 0; i < SMALLMID_NUM_CLASSES; i++) {
        SmallMidSSHead* pool = &g_smallmid_ss_pools[i];
        fprintf(stderr, "  Class %d (%zuB):\n", i, g_smallmid_class_sizes[i]);
        fprintf(stderr, "    Total SS: %zu\n", pool->total_ss);
        fprintf(stderr, "    Allocs:   %lu\n", pool->alloc_count);
        fprintf(stderr, "    Refills:  %lu\n", pool->refill_count);
    }

    fprintf(stderr, "=======================================\n\n");
}
#endif