// tls_sll_drain_box.h - Box: TLS SLL Periodic Drain
// Purpose: Restore slab accounting consistency by periodically draining TLS SLL to slab freelists
//
// Problem:
//   - Fast free path (hak_tiny_free_fast_v2) pushes to TLS SLL without decrementing meta->used
//   - Slabs never appear empty → SuperSlabs never freed → LRU cache never populated
//   - Result: 6,455 mmap/munmap syscalls per 200K iterations (74.8% time)
//
// Solution:
//   - Every N frees (default: 1024), drain TLS SLL → slab freelist
//   - This path decrements meta->used properly via tiny_free_local_box()
//   - Enables empty detection → SuperSlabs freed → LRU cache functional
//
// Expected Impact:
//   - mmap/munmap: 6,455 → ~100 calls (-96-97%)
//   - Throughput: 563K → 8-10M ops/s (+1,300-1,700%)
//
// References:
//   - Root cause: PHASE9_LRU_ARCHITECTURE_ISSUE.md
//   - Design: Option B (Periodic TLS SLL Drain)

#pragma once

#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
#include "tls_sll_box.h"           // TLS SLL operations (tls_sll_pop)
#include "../hakmem_tiny_config.h"  // TINY_NUM_CLASSES
#include "../hakmem_super_registry.h"  // SuperSlab lookup
#include "free_local_box.h"         // tiny_free_local_box (decrements meta->used)

// ========== ENV Configuration ==========

// Check if TLS SLL drain is enabled
// ENV: HAKMEM_TINY_SLL_DRAIN_ENABLE=1/0 (default: 1)
static inline int tls_sll_drain_is_enabled(void) {
    static int g_drain_enable = -1;
    if (__builtin_expect(g_drain_enable == -1, 0)) {
        const char* env = getenv("HAKMEM_TINY_SLL_DRAIN_ENABLE");
        if (env && *env == '0') {
            g_drain_enable = 0;
            fprintf(stderr, "[TLS_SLL_DRAIN] Drain DISABLED via ENV\n");
        } else {
            g_drain_enable = 1;
            fprintf(stderr, "[TLS_SLL_DRAIN] Drain ENABLED (default)\n");
        }
    }
    return g_drain_enable;
}

// Get drain interval (number of frees before triggering drain)
// ENV: HAKMEM_TINY_SLL_DRAIN_INTERVAL=N (default: 1024)
static inline uint32_t tls_sll_drain_get_interval(void) {
    static uint32_t g_drain_interval = 0;
    if (__builtin_expect(g_drain_interval == 0, 0)) {
        const char* env = getenv("HAKMEM_TINY_SLL_DRAIN_INTERVAL");
        if (env && *env) {
            int val = atoi(env);
            if (val > 0 && val <= 65536) {
                g_drain_interval = (uint32_t)val;
                fprintf(stderr, "[TLS_SLL_DRAIN] Interval=%u (from ENV)\n", g_drain_interval);
            } else {
                g_drain_interval = 1024;
                fprintf(stderr, "[TLS_SLL_DRAIN] Invalid ENV value, using default=1024\n");
            }
        } else {
            g_drain_interval = 1024;
            fprintf(stderr, "[TLS_SLL_DRAIN] Interval=%u (default)\n", g_drain_interval);
        }
    }
    return g_drain_interval;
}

// ========== Drain Counter (TLS) ==========

// Per-class drain counter (TLS, one per size class)
// Incremented on each free, triggers drain when reaching interval
static __thread uint32_t g_tls_sll_drain_counter[TINY_NUM_CLASSES] = {0};

// Debug: Total drain operations performed (all classes)
static __thread uint64_t g_tls_sll_drain_total_calls = 0;
static __thread uint64_t g_tls_sll_drain_total_blocks = 0;

// ========== Drain Implementation (Skeleton) ==========

// Box: TLS SLL Drain
// Purpose: Pop blocks from TLS SLL and push to slab freelist
//
// Flow:
//   1. Pop up to batch_size blocks from TLS SLL (g_tls_sll_head[class_idx])
//   2. For each block:
//      a. Resolve SuperSlab/Slab (like slow path does)
//      b. Call tiny_free_local_box() → decrements meta->used properly
//   3. Result: meta->used reflects true state, empty detection works
//
// Args:
//   class_idx: Size class to drain
//   batch_size: Max blocks to drain (0 = drain all)
//
// Returns: Number of blocks drained
static inline uint32_t tiny_tls_sll_drain(int class_idx, uint32_t batch_size) {
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
        return 0;
    }

    // Sanity check: TLS SLL count
    extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
    uint32_t avail = g_tls_sll_count[class_idx];
    if (avail == 0) {
        return 0;  // Nothing to drain
    }

    // Drain up to batch_size blocks (0 = drain all)
    uint32_t to_drain = (batch_size == 0) ? avail : (avail < batch_size ? avail : batch_size);
    uint32_t drained = 0;

    // Debug logging
    static int g_debug = -1;
    if (__builtin_expect(g_debug == -1, 0)) {
        const char* env = getenv("HAKMEM_TINY_SLL_DRAIN_DEBUG");
        g_debug = (env && *env && *env != '0') ? 1 : 0;
    }

    if (g_debug) {
        fprintf(stderr, "[TLS_SLL_DRAIN] START: class=%d avail=%u to_drain=%u\n",
                class_idx, avail, to_drain);
    }

    // External functions needed for drain
    extern SuperSlab* hak_super_lookup(void* ptr);  // SuperSlab registry lookup
    extern const size_t g_tiny_class_sizes[TINY_NUM_CLASSES];  // Block sizes (const)

    // Get thread ID once (used for all blocks)
    // Note: Use pthread_self() directly since tiny_self_u32() is static inline
    uint32_t my_tid = (uint32_t)(uintptr_t)pthread_self();

    // Drain loop: Pop blocks from TLS SLL and push to slab freelist
    for (uint32_t i = 0; i < to_drain; i++) {
        void* base = NULL;
        if (!tls_sll_pop(class_idx, &base)) {
            // TLS SLL exhausted (concurrent drain or count mismatch)
            break;
        }

        // Resolve SuperSlab/Slab (like slow path does)
        SuperSlab* ss = hak_super_lookup(base);
        if (!ss || ss->magic != SUPERSLAB_MAGIC) {
            // Invalid SuperSlab - skip this block
            if (g_debug) {
                fprintf(stderr, "[TLS_SLL_DRAIN] SKIP: class=%d base=%p (invalid SuperSlab)\n",
                        class_idx, base);
            }
            continue;
        }

        // Get slab index
        int slab_idx = slab_index_for(ss, base);
        if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
            // Invalid slab index - skip this block
            if (g_debug) {
                fprintf(stderr, "[TLS_SLL_DRAIN] SKIP: class=%d base=%p (invalid slab_idx=%d)\n",
                        class_idx, base, slab_idx);
            }
            continue;
        }

        // Get slab metadata
        TinySlabMeta* meta = &ss->slabs[slab_idx];

        // Convert BASE → USER pointer (add 1 byte header offset)
        // Phase E1: ALL classes (C0-C7) have 1-byte header
        void* user_ptr = (char*)base + 1;

        // Call tiny_free_local_box() to:
        // 1. Push block to slab freelist
        // 2. Decrement meta->used (THIS IS THE KEY!)
        tiny_free_local_box(ss, slab_idx, meta, user_ptr, my_tid);

        drained++;

        // CRITICAL: Check if slab became empty and release to shared pool
        // (This logic is in tiny_superslab_free.inc.h:223-236)
        if (meta->used == 0) {
            // Debug: Log when used reaches 0 (slab becomes empty)
            if (g_debug) {
                fprintf(stderr, "[TLS_SLL_DRAIN] EMPTY: class=%d ss=%p slab=%d (meta->used=0) -> releasing to pool\n",
                        class_idx, (void*)ss, slab_idx);
            }

            // Release empty slab to shared pool
            // This will eventually free the SuperSlab and add to LRU cache
            extern void shared_pool_release_slab(SuperSlab* ss, int slab_idx);
            shared_pool_release_slab(ss, slab_idx);
        }
    }

    if (g_debug && drained > 0) {
        fprintf(stderr, "[TLS_SLL_DRAIN] END: class=%d drained=%u remaining=%u\n",
                class_idx, drained, g_tls_sll_count[class_idx]);
    }

    // Update stats
    g_tls_sll_drain_total_calls++;
    g_tls_sll_drain_total_blocks += drained;

    return drained;
}

// ========== Drain Trigger (Called from Fast Free Path) ==========

// Box: Try Drain (with counter trigger)
// Purpose: Check drain counter and trigger drain if interval reached
//
// Flow:
//   1. Increment drain counter for this class
//   2. If counter >= interval, trigger drain and reset counter
//   3. Otherwise, do nothing (fast path continues)
//
// Args:
//   class_idx: Size class that was just freed
//
// Returns: Number of blocks drained (0 if no drain)
static inline uint32_t tiny_tls_sll_try_drain(int class_idx) {
    // Check if drain is enabled
    if (__builtin_expect(!tls_sll_drain_is_enabled(), 0)) {
        return 0;
    }

    // Increment counter
    g_tls_sll_drain_counter[class_idx]++;

    // Check if interval reached
    uint32_t interval = tls_sll_drain_get_interval();
    if (__builtin_expect(g_tls_sll_drain_counter[class_idx] >= interval, 0)) {
        // Trigger drain (drain ALL blocks to enable empty detection)
        // batch_size=0 means drain all available blocks
        uint32_t drained = tiny_tls_sll_drain(class_idx, 0);

        // Reset counter
        g_tls_sll_drain_counter[class_idx] = 0;

        return drained;
    }

    return 0;  // No drain triggered
}

// ========== Debug Stats (Destructor) ==========

#if !HAKMEM_BUILD_RELEASE
static void tls_sll_drain_print_stats(void) __attribute__((destructor));
static void tls_sll_drain_print_stats(void) {
    if (g_tls_sll_drain_total_calls > 0) {
        fprintf(stderr, "[TLS_SLL_DRAIN_STATS] Total drains: %lu, Total blocks: %lu, Avg: %.2f\n",
                g_tls_sll_drain_total_calls,
                g_tls_sll_drain_total_blocks,
                (double)g_tls_sll_drain_total_blocks / g_tls_sll_drain_total_calls);
    }
}
#endif