#pragma once

#include <stdio.h>
#include <stdlib.h>

// Phase 1: SuperSlab Registry - Thread-safe O(1) lookup for SuperSlab ownership
//
// Purpose: Replace mincore() syscall (50-100ns) with userspace hash table lookup
// Performance: ~5-10ns per lookup, 10-20x faster than mincore()
//
// Thread Safety:
//   - Readers: Lock-free with acquire semantics
//   - Writers: Mutex-protected with release semantics
//   - Publish order: ss initialization → release fence → base write
//   - Unpublish order: base = 0 (release) → munmap (prevents reader deref)

#include <stdatomic.h>
#include <pthread.h>
#include <stdint.h>
#include "hakmem_tiny_superslab.h"  // For SuperSlab and SUPERSLAB_MAGIC

// Registry configuration
// Increased from 4096 to 32768 to avoid registry exhaustion under
// high-churn microbenchmarks (e.g., larson with many active SuperSlabs).
// Still a power of two for fast masking.
#define SUPER_REG_SIZE      262144   // Power of 2 for fast modulo (8x larger for workloads)
#define SUPER_REG_MASK      (SUPER_REG_SIZE - 1)
#define SUPER_MAX_PROBE     8      // Linear probing limit

// Registry entry: base address → SuperSlab pointer mapping
typedef struct {
    uintptr_t base;           // Aligned base address (1MB or 2MB, 0 = empty slot)
    _Atomic(SuperSlab*) ss;   // Atomic SuperSlab pointer (MT-safe, prevents TOCTOU race)
    uint8_t   lg_size;        // Phase 8.3: ACE - SuperSlab size (20=1MB, 21=2MB)
    uint8_t   _pad[7];        // Padding to 24 bytes (cache-friendly)
} SuperRegEntry;

// Global registry (lock-free reads, mutex-protected writes)
extern SuperRegEntry g_super_reg[SUPER_REG_SIZE];
extern pthread_mutex_t g_super_reg_lock;
extern int g_super_reg_initialized;

// Initialize registry (call once at startup)
void hak_super_registry_init(void);

// Hash function for aligned addresses (variable size)
static inline int hak_super_hash(uintptr_t base, int lg_size) {
    // Phase 8.3: ACE - Variable size hash (lg_size = 20 for 1MB, 21 for 2MB)
    return (int)((base >> lg_size) & SUPER_REG_MASK);
}

// Lookup SuperSlab by pointer (lock-free, thread-safe)
// Returns: SuperSlab* if found, NULL otherwise
// Phase 8.3: ACE - Supports both 1MB and 2MB SuperSlabs
static inline SuperSlab* hak_super_lookup(void* ptr) {
    if (!g_super_reg_initialized) return NULL;

    // Try both 1MB and 2MB alignments (1MB first for Step 1 default)
    // ACE will use both sizes dynamically in Step 3
    for (int lg = 20; lg <= 21; lg++) {
        uintptr_t mask = (1UL << lg) - 1;
        uintptr_t base = (uintptr_t)ptr & ~mask;
        int h = hak_super_hash(base, lg);

        // Linear probing with acquire semantics
        for (int i = 0; i < SUPER_MAX_PROBE; i++) {
            SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK];
            uintptr_t b = atomic_load_explicit((_Atomic uintptr_t*)&e->base,
                                               memory_order_acquire);

            // Match both base address AND lg_size
            if (b == base && e->lg_size == lg) {
                // Atomic load to prevent TOCTOU race with unregister
                SuperSlab* ss = atomic_load_explicit(&e->ss, memory_order_acquire);
                if (!ss) return NULL;  // Entry cleared by unregister

                // CRITICAL: Check magic BEFORE returning pointer to prevent TOCTOU
                // Race scenario: lookup → free (clear magic, munmap) → caller checks magic
                // Fix: Check magic HERE while we're certain ss is still registered
                if (ss->magic != SUPERSLAB_MAGIC) return NULL;  // Being freed

                return ss;
            }
            if (b == 0) break;  // Empty slot, try next lg_size
        }
    }
    return NULL;  // Not found
}

// Register SuperSlab (mutex-protected, called after SuperSlab initialization)
// Returns: 1 on success, 0 if registry is full
int hak_super_register(uintptr_t base, SuperSlab* ss);

// Unregister SuperSlab (mutex-protected, MUST call before munmap)
// Critical: base = 0 happens BEFORE munmap to prevent reader segfault
void hak_super_unregister(uintptr_t base);

// Debug: Get registry statistics
typedef struct {
    int total_slots;
    int used_slots;
    int max_probe_depth;
} SuperRegStats;

void hak_super_registry_stats(SuperRegStats* stats);