hakmem/core/hakmem_smallmid_superslab.h

/**
 * hakmem_smallmid_superslab.h - Small-Mid SuperSlab Backend (Phase 17-2)
 *
 * Purpose: Dedicated SuperSlab pool for Small-Mid allocator (256B-1KB)
 * Separate from Tiny SuperSlab to avoid competition and optimize for mid-range sizes
 *
 * Design:
 * - SuperSlab size: 1MB (aligned for fast pointer→slab lookup)
 * - Slab size: 64KB (same as Tiny for consistency)
 * - Size classes: 3 (256B/512B/1KB)
 * - Blocks per slab: 256/128/64
 * - Refill strategy: Batch 8-16 blocks per TLS refill
 *
 * Created: 2025-11-16 (Phase 17-2)
 */

#ifndef HAKMEM_SMALLMID_SUPERSLAB_H
#define HAKMEM_SMALLMID_SUPERSLAB_H

#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdatomic.h>
#include <pthread.h>

#ifdef __cplusplus
extern "C" {
#endif

// ============================================================================
// Configuration
// ============================================================================

#define SMALLMID_SUPERSLAB_SIZE   (1024 * 1024)  // 1MB
#define SMALLMID_SLAB_SIZE        (64 * 1024)    // 64KB
#define SMALLMID_SLABS_PER_SS     (SMALLMID_SUPERSLAB_SIZE / SMALLMID_SLAB_SIZE)  // 16
#define SMALLMID_SS_ALIGNMENT     SMALLMID_SUPERSLAB_SIZE  // 1MB alignment
#define SMALLMID_SS_MAGIC         0x534D5353u    // 'SMSS'

// Blocks per slab (per size class)
#define SMALLMID_BLOCKS_256B      256  // 64KB / 256B
#define SMALLMID_BLOCKS_512B      128  // 64KB / 512B
#define SMALLMID_BLOCKS_1KB       64   // 64KB / 1KB

// Batch refill sizes (per size class)
#define SMALLMID_REFILL_BATCH_256B  16
#define SMALLMID_REFILL_BATCH_512B  12
#define SMALLMID_REFILL_BATCH_1KB   8

// ============================================================================
// Data Structures
// ============================================================================

/**
 * SmallMidSlabMeta - Metadata for a single 64KB slab
 *
 * Each slab is dedicated to one size class and contains:
 * - Freelist: linked list of freed blocks
 * - Used counter: number of allocated blocks
 * - Capacity: total blocks available
 * - Class index: which size class (0=256B, 1=512B, 2=1KB)
 */
typedef struct SmallMidSlabMeta {
    void*    freelist;       // Freelist head (NULL if empty)
    uint16_t used;           // Blocks currently allocated
    uint16_t capacity;       // Total blocks in slab
    uint16_t carved;         // Blocks carved (bump allocation)
    uint8_t  class_idx;      // Size class (0/1/2)
    uint8_t  flags;          // Status flags (active/inactive)
} SmallMidSlabMeta;

// Slab status flags
#define SMALLMID_SLAB_INACTIVE  0x00
#define SMALLMID_SLAB_ACTIVE    0x01
#define SMALLMID_SLAB_FULL      0x02

/**
 * SmallMidSuperSlab - 1MB region containing 16 slabs of 64KB each
 *
 * Structure:
 * - Header: metadata, counters, LRU tracking
 * - Slabs array: 16 × SmallMidSlabMeta
 * - Data region: 16 × 64KB = 1MB of block storage
 *
 * Alignment: 1MB boundary for fast pointer→SuperSlab lookup
 * Lookup formula: ss = (void*)((uintptr_t)ptr & ~(SMALLMID_SUPERSLAB_SIZE - 1))
 */
typedef struct SmallMidSuperSlab {
    uint32_t magic;                    // Validation magic (SMALLMID_SS_MAGIC)
    uint8_t  num_slabs;                // Number of slabs (16)
    uint8_t  active_slabs;             // Count of active slabs
    uint16_t _pad0;

    // Reference counting
    _Atomic uint32_t refcount;         // SuperSlab refcount (for safe deallocation)
    _Atomic uint32_t total_active;     // Total active blocks across all slabs

    // Slab tracking bitmaps
    uint16_t slab_bitmap;              // Active slabs (bit i = slab i active)
    uint16_t nonempty_mask;            // Slabs with available blocks

    // LRU tracking (for lazy deallocation)
    uint64_t last_used_ns;             // Last allocation/free timestamp
    uint32_t generation;               // LRU generation counter

    // Linked lists
    struct SmallMidSuperSlab* next;    // Per-class chain
    struct SmallMidSuperSlab* lru_next;
    struct SmallMidSuperSlab* lru_prev;

    // Per-slab metadata (16 slabs × ~20 bytes = 320 bytes)
    SmallMidSlabMeta slabs[SMALLMID_SLABS_PER_SS];

    // Data region follows header (aligned to slab boundary)
    // Total: header (~400 bytes) + data (1MB) = 1MB aligned region
} SmallMidSuperSlab;

/**
 * SmallMidSSHead - Per-class SuperSlab pool head
 *
 * Each size class (256B/512B/1KB) has its own pool of SuperSlabs.
 * This allows:
 * - Fast allocation from class-specific pool
 * - LRU-based lazy deallocation
 * - Lock-free TLS refill (per-thread current_ss)
 */
typedef struct SmallMidSSHead {
    uint8_t  class_idx;                // Size class index (0/1/2)
    uint8_t  _pad0[3];

    // SuperSlab pool
    _Atomic size_t total_ss;           // Total SuperSlabs allocated
    SmallMidSuperSlab* first_ss;       // First SuperSlab in chain
    SmallMidSuperSlab* current_ss;     // Current allocation target

    // LRU list (for lazy deallocation)
    SmallMidSuperSlab* lru_head;
    SmallMidSuperSlab* lru_tail;

    // Lock for expansion/deallocation
    pthread_mutex_t lock;

    // Statistics
    _Atomic uint64_t alloc_count;
    _Atomic uint64_t refill_count;
    _Atomic uint64_t ss_alloc_count;   // SuperSlab allocations
    _Atomic uint64_t ss_free_count;    // SuperSlab deallocations
} SmallMidSSHead;

// ============================================================================
// Global State
// ============================================================================

/**
 * g_smallmid_ss_pools - Per-class SuperSlab pools
 *
 * Array of 3 pools (one per size class: 256B/512B/1KB)
 * Each pool manages its own SuperSlabs independently.
 */
extern SmallMidSSHead g_smallmid_ss_pools[3];

// ============================================================================
// API Functions
// ============================================================================

/**
 * smallmid_superslab_init - Initialize Small-Mid SuperSlab system
 *
 * Call once at startup (thread-safe, idempotent)
 * Initializes per-class pools and locks.
 */
void smallmid_superslab_init(void);

/**
 * smallmid_superslab_alloc - Allocate a new 1MB SuperSlab
 *
 * @param class_idx  Size class index (0/1/2)
 * @return           Pointer to new SuperSlab, or NULL on OOM
 *
 * Allocates 1MB aligned region via mmap, initializes header and metadata.
 * Thread-safety: Callable from any thread (uses per-class lock)
 */
SmallMidSuperSlab* smallmid_superslab_alloc(int class_idx);

/**
 * smallmid_superslab_free - Free a SuperSlab
 *
 * @param ss  SuperSlab to free
 *
 * Returns SuperSlab to OS via munmap.
 * Thread-safety: Caller must ensure no concurrent access to ss
 */
void smallmid_superslab_free(SmallMidSuperSlab* ss);

/**
 * smallmid_slab_init - Initialize a slab within SuperSlab
 *
 * @param ss         SuperSlab containing the slab
 * @param slab_idx   Slab index (0-15)
 * @param class_idx  Size class (0=256B, 1=512B, 2=1KB)
 *
 * Sets up slab metadata and marks it as active.
 */
void smallmid_slab_init(SmallMidSuperSlab* ss, int slab_idx, int class_idx);

/**
 * smallmid_refill_batch - Batch refill TLS freelist from SuperSlab
 *
 * @param class_idx  Size class index (0/1/2)
 * @param batch_out  Output array for blocks (caller-allocated)
 * @param batch_max  Max blocks to refill (8-16 typically)
 * @return           Number of blocks refilled (0 on failure)
 *
 * Performance-critical path:
 * - Tries to pop batch_max blocks from current slab's freelist
 * - Falls back to bump allocation if freelist empty
 * - Allocates new SuperSlab if current is full
 * - Expected cost: 5-8 instructions per call (amortized)
 *
 * Thread-safety: Lock-free for single-threaded TLS refill
 */
int smallmid_refill_batch(int class_idx, void** batch_out, int batch_max);

/**
 * smallmid_superslab_lookup - Fast pointer→SuperSlab lookup
 *
 * @param ptr  Block pointer (user or base)
 * @return     SuperSlab containing ptr, or NULL if invalid
 *
 * Uses 1MB alignment for O(1) mask-based lookup:
 * ss = (SmallMidSuperSlab*)((uintptr_t)ptr & ~(SMALLMID_SUPERSLAB_SIZE - 1))
 */
static inline SmallMidSuperSlab* smallmid_superslab_lookup(void* ptr) {
    uintptr_t addr = (uintptr_t)ptr;
    uintptr_t ss_addr = addr & ~(SMALLMID_SUPERSLAB_SIZE - 1);
    SmallMidSuperSlab* ss = (SmallMidSuperSlab*)ss_addr;

    // Validate magic
    if (ss->magic != SMALLMID_SS_MAGIC) {
        return NULL;
    }

    return ss;
}

/**
 * smallmid_slab_index - Get slab index from pointer
 *
 * @param ss   SuperSlab
 * @param ptr  Block pointer
 * @return     Slab index (0-15), or -1 if out of bounds
 */
static inline int smallmid_slab_index(SmallMidSuperSlab* ss, void* ptr) {
    uintptr_t ss_base = (uintptr_t)ss;
    uintptr_t ptr_addr = (uintptr_t)ptr;
    uintptr_t offset = ptr_addr - ss_base;

    if (offset >= SMALLMID_SUPERSLAB_SIZE) {
        return -1;
    }

    int slab_idx = (int)(offset / SMALLMID_SLAB_SIZE);
    return (slab_idx < SMALLMID_SLABS_PER_SS) ? slab_idx : -1;
}

// ============================================================================
// Statistics (Debug)
// ============================================================================

#ifdef HAKMEM_SMALLMID_SS_STATS
typedef struct SmallMidSSStats {
    uint64_t total_ss_alloc;      // Total SuperSlab allocations
    uint64_t total_ss_free;       // Total SuperSlab frees
    uint64_t total_refills;       // Total batch refills
    uint64_t total_blocks_carved; // Total blocks carved (bump alloc)
    uint64_t total_blocks_freed;  // Total blocks freed to freelist
} SmallMidSSStats;

extern SmallMidSSStats g_smallmid_ss_stats;

void smallmid_ss_print_stats(void);
#endif

#ifdef __cplusplus
}
#endif

#endif // HAKMEM_SMALLMID_SUPERSLAB_H