hakmem/core/hakmem_shared_pool.c

#include "hakmem_shared_pool_internal.h"
#include "hakmem_debug_master.h"  // Phase 4b: Master debug control
#include "hakmem_stats_master.h"  // Phase 4d: Master stats control
#include "box/ss_slab_meta_box.h"  // Phase 3d-A: SlabMeta Box boundary
#include "box/ss_hot_cold_box.h"   // Phase 12-1.1: EMPTY slab marking
#include "box/pagefault_telemetry_box.h"  // Box PageFaultTelemetry (PF_BUCKET_SS_META)
#include "box/tls_sll_drain_box.h"        // Box TLS SLL Drain (tiny_tls_sll_drain)
#include "box/tls_slab_reuse_guard_box.h" // Box TLS Slab Reuse Guard (P0.3)
#include "hakmem_policy.h"                // FrozenPolicy (learning layer)
#include "box/shared_pool_box.h"          // Logical cap for bench profile

#include <stdlib.h>
#include <string.h>
#include <stdatomic.h>
#include <stdio.h>
#include <sys/mman.h>  // For mmap/munmap (used in shared_pool_ensure_capacity_unlocked)

// ============================================================================
// P0 Lock Contention Instrumentation (Debug build only; counters defined always)
// ============================================================================
_Atomic uint64_t g_lock_acquire_count = 0;      // Total lock acquisitions
_Atomic uint64_t g_lock_release_count = 0;      // Total lock releases
_Atomic uint64_t g_lock_acquire_slab_count = 0; // Locks from acquire_slab path
_Atomic uint64_t g_lock_release_slab_count = 0; // Locks from release_slab path

#if !HAKMEM_BUILD_RELEASE
int g_lock_stats_enabled = -1;                  // -1=uninitialized, 0=off, 1=on

// Initialize lock stats from environment variable
// Phase 4b: Now uses hak_debug_check() for master debug control support
void lock_stats_init(void) {
    if (__builtin_expect(g_lock_stats_enabled == -1, 0)) {
        g_lock_stats_enabled = hak_debug_check("HAKMEM_SHARED_POOL_LOCK_STATS");
    }
}

// Report lock statistics at shutdown
static void __attribute__((destructor)) lock_stats_report(void) {
    if (g_lock_stats_enabled != 1) {
        return;
    }

    uint64_t acquires = atomic_load(&g_lock_acquire_count);
    uint64_t releases = atomic_load(&g_lock_release_count);
    uint64_t acquire_path = atomic_load(&g_lock_acquire_slab_count);
    uint64_t release_path = atomic_load(&g_lock_release_slab_count);

    fprintf(stderr, "\n=== SHARED POOL LOCK STATISTICS ===\n");
    fprintf(stderr, "Total lock ops:    %lu (acquire) + %lu (release) = %lu\n",
            acquires, releases, acquires + releases);
    fprintf(stderr, "Balance:           %ld (should be 0)\n",
            (int64_t)acquires - (int64_t)releases);
    fprintf(stderr, "\n--- Breakdown by Code Path ---\n");
    fprintf(stderr, "acquire_slab():    %lu (%.1f%%)\n",
            acquire_path, 100.0 * acquire_path / (acquires ? acquires : 1));
    fprintf(stderr, "release_slab():    %lu (%.1f%%)\n",
            release_path, 100.0 * release_path / (acquires ? acquires : 1));
    fprintf(stderr, "===================================\n");
    fflush(stderr);
}
#else
// Release build: No-op stubs
int g_lock_stats_enabled = 0;
#endif

// ============================================================================
// SP Acquire Stage Statistics (Stage1/2/3 breakdown)
// ============================================================================
_Atomic uint64_t g_sp_stage1_hits[TINY_NUM_CLASSES_SS];
_Atomic uint64_t g_sp_stage2_hits[TINY_NUM_CLASSES_SS];
_Atomic uint64_t g_sp_stage3_hits[TINY_NUM_CLASSES_SS];
// Data collection gate (0=off, 1=on). 学習層からも有効化される。
int g_sp_stage_stats_enabled = 0;

#if !HAKMEM_BUILD_RELEASE
// Logging gate for destructor（ENV: HAKMEM_SHARED_POOL_STAGE_STATS）
static int g_sp_stage_stats_log_enabled = -1;  // -1=uninitialized, 0=off, 1=on

void sp_stage_stats_init(void) {
    // Phase 4d: Now uses hak_stats_check() for unified stats control
    if (__builtin_expect(g_sp_stage_stats_log_enabled == -1, 0)) {
        g_sp_stage_stats_log_enabled = hak_stats_check("HAKMEM_SHARED_POOL_STAGE_STATS", "pool");
        if (g_sp_stage_stats_log_enabled == 1) {
            // ログが有効なら計測も必ず有効化する。
            g_sp_stage_stats_enabled = 1;
        }
    }
}

static void __attribute__((destructor)) sp_stage_stats_report(void) {
    if (g_sp_stage_stats_log_enabled != 1) {
        return;
    }

    fprintf(stderr, "\n=== SHARED POOL STAGE STATISTICS ===\n");
    fprintf(stderr, "Per-class acquire_slab() stage hits (Stage1=EMPTY, Stage2=UNUSED, Stage3=new SS)\n");

    for (int cls = 0; cls < TINY_NUM_CLASSES_SS; cls++) {
        uint64_t s1 = atomic_load(&g_sp_stage1_hits[cls]);
        uint64_t s2 = atomic_load(&g_sp_stage2_hits[cls]);
        uint64_t s3 = atomic_load(&g_sp_stage3_hits[cls]);
        uint64_t total = s1 + s2 + s3;
        if (total == 0) continue;  // Skip unused classes

        double p1 = 100.0 * (double)s1 / (double)total;
        double p2 = 100.0 * (double)s2 / (double)total;
        double p3 = 100.0 * (double)s3 / (double)total;

        fprintf(stderr,
                "Class %d: total=%llu  S1=%llu (%.1f%%)  S2=%llu (%.1f%%)  S3=%llu (%.1f%%)\n",
                cls,
                (unsigned long long)total,
                (unsigned long long)s1, p1,
                (unsigned long long)s2, p2,
                (unsigned long long)s3, p3);
    }
    fprintf(stderr, "====================================\n");
    fflush(stderr);
}
#else
// Release build: No-op stubs
void sp_stage_stats_init(void) {}
#endif

// Snapshot Tiny-related backend metrics for learner / observability.
void
shared_pool_tiny_metrics_snapshot(uint64_t stage1[TINY_NUM_CLASSES_SS],
                                  uint64_t stage2[TINY_NUM_CLASSES_SS],
                                  uint64_t stage3[TINY_NUM_CLASSES_SS],
                                  uint32_t active_slots[TINY_NUM_CLASSES_SS])
{
    // Ensure env-based logging設定の初期化だけ先に済ませる。
    sp_stage_stats_init();
    // 学習層から呼ばれた場合は、計測自体は常に有効化する（ログは env で制御）。
    g_sp_stage_stats_enabled = 1;

    for (int cls = 0; cls < TINY_NUM_CLASSES_SS; cls++) {
        if (stage1) {
            stage1[cls] = atomic_load_explicit(&g_sp_stage1_hits[cls],
                                               memory_order_relaxed);
        }
        if (stage2) {
            stage2[cls] = atomic_load_explicit(&g_sp_stage2_hits[cls],
                                               memory_order_relaxed);
        }
        if (stage3) {
            stage3[cls] = atomic_load_explicit(&g_sp_stage3_hits[cls],
                                               memory_order_relaxed);
        }
        if (active_slots) {
            active_slots[cls] = g_shared_pool.class_active_slots[cls];
        }
    }
}

// Helper: return per-class active slot limit from FrozenPolicy.tiny_cap[]
// Semantics:
//   - tiny_cap[class] == 0 → no limit (unbounded)
//   - otherwise: soft cap on ACTIVE slots managed by shared pool for this class.
uint32_t sp_class_active_limit(int class_idx) {
    const FrozenPolicy* pol = hkm_policy_get();
    if (!pol) {
        return 0;  // no limit
    }
    if (class_idx < 0 || class_idx >= 8) {
        return 0;
    }
    return (uint32_t)pol->tiny_cap[class_idx];
}

// ============================================================================
// P0-4: Lock-Free Free Slot List - Node Pool
// ============================================================================

// Pre-allocated node pools (one per class, to avoid malloc/free)
FreeSlotNode g_free_node_pool[TINY_NUM_CLASSES_SS][MAX_FREE_NODES_PER_CLASS];
_Atomic uint32_t g_node_alloc_index[TINY_NUM_CLASSES_SS] = {0};

// Recycle list for FreeSlotNode (per class, lock-free LIFO).
// node_alloc() はまずこのリストから再利用を試み、枯渇時のみ新規ノードを切り出す。
static _Atomic(FreeSlotNode*) g_node_free_head[TINY_NUM_CLASSES_SS] = {
    [0 ... TINY_NUM_CLASSES_SS-1] = ATOMIC_VAR_INIT(NULL)
};

// Allocate a node from pool (lock-free fast path, may fall back to legacy path)
static inline FreeSlotNode* node_alloc(int class_idx) {
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
        return NULL;
    }

    // First, try to pop from recycle list (nodes returned by pop_lockfree).
    FreeSlotNode* free_head = atomic_load_explicit(
        &g_node_free_head[class_idx],
        memory_order_acquire);
    while (free_head != NULL) {
        FreeSlotNode* next = free_head->next;
        if (atomic_compare_exchange_weak_explicit(
                &g_node_free_head[class_idx],
                &free_head,
                next,
                memory_order_acq_rel,
                memory_order_acquire)) {
            return free_head;  // Recycled node
        }
        // CAS failed: free_head is updated; retry with new head.
    }

    uint32_t idx = atomic_fetch_add(&g_node_alloc_index[class_idx], 1);
    if (idx >= MAX_FREE_NODES_PER_CLASS) {
        // Pool exhausted - should be rare.
        return NULL;
    }

    return &g_free_node_pool[class_idx][idx];
}

// ============================================================================
// Phase 12-2: SharedSuperSlabPool skeleton implementation
// Goal:
//   - Centralize SuperSlab allocation/registration
//   - Provide acquire_slab/release_slab APIs for later refill/free integration
//   - Keep logic simple & conservative; correctness and observability first.
//
// Notes:
//   - Concurrency: protected by g_shared_pool.alloc_lock for now.
//   - class_hints is best-effort: read lock-free, written under lock.
//   - LRU hooks left as no-op placeholders.

SharedSuperSlabPool g_shared_pool = {
    .slabs        = NULL,
    .capacity     = 0,
    .total_count  = 0,
    .active_count = 0,
    .alloc_lock   = PTHREAD_MUTEX_INITIALIZER,
    .class_hints  = { NULL },
    .lru_head     = NULL,
    .lru_tail     = NULL,
    .lru_count    = 0,
    // P0-4: Lock-free free slot lists (zero-initialized atomic pointers)
    .free_slots_lockfree = {{.head = ATOMIC_VAR_INIT(NULL)}},
    // Legacy: mutex-protected free lists
    .free_slots   = {{.entries = {{0}}, .count = 0}},
    // Phase 12: SP-SLOT fields (ss_metadata is fixed-size array, auto-zeroed)
    .ss_meta_count = 0
};

void
shared_pool_ensure_capacity_unlocked(uint32_t min_capacity)
{
    if (g_shared_pool.capacity >= min_capacity) {
        return;
    }

    uint32_t new_cap = g_shared_pool.capacity ? g_shared_pool.capacity : 16;
    while (new_cap < min_capacity) {
        new_cap *= 2;
    }

    // CRITICAL FIX: Use system mmap() directly to avoid recursion!
    size_t new_size = new_cap * sizeof(SuperSlab*);
    SuperSlab** new_slabs = (SuperSlab**)mmap(NULL, new_size,
                                               PROT_READ | PROT_WRITE,
                                               MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    if (new_slabs == MAP_FAILED) {
        // Allocation failure: keep old state; caller must handle NULL later.
        return;
    }

    // Copy old data if exists
    if (g_shared_pool.slabs != NULL) {
        memcpy(new_slabs, g_shared_pool.slabs,
               g_shared_pool.capacity * sizeof(SuperSlab*));
        // Free old mapping (also use system munmap, not free!)
        size_t old_size = g_shared_pool.capacity * sizeof(SuperSlab*);
        munmap(g_shared_pool.slabs, old_size);
    }

    // Zero new entries to keep scanning logic simple.
    memset(new_slabs + g_shared_pool.capacity, 0,
           (new_cap - g_shared_pool.capacity) * sizeof(SuperSlab*));

    g_shared_pool.slabs    = new_slabs;
    g_shared_pool.capacity = new_cap;
}

void
shared_pool_init(void)
{
    // Idempotent init; safe to call from multiple early paths.
    // pthread_mutex_t with static initializer is already valid.
    shared_pool_box_init(NULL, NULL);
    pthread_mutex_lock(&g_shared_pool.alloc_lock);
    if (g_shared_pool.capacity == 0 && g_shared_pool.slabs == NULL) {
        shared_pool_ensure_capacity_unlocked(16);
    }
    pthread_mutex_unlock(&g_shared_pool.alloc_lock);
}

// ============================================================================
// Phase 12: SP-SLOT Box - Modular Helper Functions
// ============================================================================

// ---------- Layer 1: Slot Operations (Low-level) ----------

// Find first unused slot in SharedSSMeta
// P0-5: Uses atomic load for state check
// Returns: slot_idx on success, -1 if no unused slots
static int sp_slot_find_unused(SharedSSMeta* meta) __attribute__((unused));
static int sp_slot_find_unused(SharedSSMeta* meta) {
    if (!meta) return -1;

    for (int i = 0; i < meta->total_slots; i++) {
        SlotState state = atomic_load_explicit(&meta->slots[i].state, memory_order_acquire);
        if (state == SLOT_UNUSED) {
            return i;
        }
    }
    return -1;
}

// Mark slot as ACTIVE (UNUSED→ACTIVE or EMPTY→ACTIVE)
// P0-5: Uses atomic store for state transition (caller must hold mutex!)
// Returns: 0 on success, -1 on error
int sp_slot_mark_active(SharedSSMeta* meta, int slot_idx, int class_idx) {
    if (!meta || slot_idx < 0 || slot_idx >= meta->total_slots) return -1;
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) return -1;

    SharedSlot* slot = &meta->slots[slot_idx];

    // Load state atomically
    SlotState state = atomic_load_explicit(&slot->state, memory_order_acquire);

    // Transition: UNUSED→ACTIVE or EMPTY→ACTIVE
    if (state == SLOT_UNUSED || state == SLOT_EMPTY) {
        atomic_store_explicit(&slot->state, SLOT_ACTIVE, memory_order_release);
        slot->class_idx = (uint8_t)class_idx;
        slot->slab_idx = (uint8_t)slot_idx;
        meta->active_slots++;
        return 0;
    }

    return -1;  // Already ACTIVE or invalid state
}

// Mark slot as EMPTY (ACTIVE→EMPTY)
// P0-5: Uses atomic store for state transition (caller must hold mutex!)
// Returns: 0 on success, -1 on error
int sp_slot_mark_empty(SharedSSMeta* meta, int slot_idx) {
    if (!meta || slot_idx < 0 || slot_idx >= meta->total_slots) return -1;

    SharedSlot* slot = &meta->slots[slot_idx];

    // Load state atomically
    SlotState state = atomic_load_explicit(&slot->state, memory_order_acquire);

    if (state == SLOT_ACTIVE) {
        atomic_store_explicit(&slot->state, SLOT_EMPTY, memory_order_release);
        if (meta->active_slots > 0) {
            meta->active_slots--;
        }
        return 0;
    }

    return -1;  // Not ACTIVE
}

// Sync SP-SLOT view from an existing SuperSlab.
// This is needed when a legacy-allocated SuperSlab reaches the shared-pool
// release path for the first time (slot states are still SLOT_UNUSED).
void sp_meta_sync_slots_from_ss(SharedSSMeta* meta, SuperSlab* ss) {
    if (!meta || !ss) return;

    int cap = ss_slabs_capacity(ss);
    if (cap > MAX_SLOTS_PER_SS) {
        cap = MAX_SLOTS_PER_SS;
    }

    meta->total_slots = (uint8_t)cap;
    meta->active_slots = 0;

    for (int i = 0; i < cap; i++) {
        SlotState state = SLOT_UNUSED;
        uint32_t bit = (1u << i);
        if (ss->slab_bitmap & bit) {
            state = SLOT_ACTIVE;
            meta->active_slots++;
        } else {
            TinySlabMeta* smeta = &ss->slabs[i];
            uint16_t used = atomic_load_explicit(&smeta->used, memory_order_relaxed);
            if (smeta->capacity > 0 && used == 0) {
                state = SLOT_EMPTY;
            }
        }

        uint8_t cls = ss->class_map[i];
        if (cls == 255) {
            cls = ss->slabs[i].class_idx;
        }

        meta->slots[i].class_idx = cls;
        meta->slots[i].slab_idx = (uint8_t)i;
        atomic_store_explicit(&meta->slots[i].state, state, memory_order_release);
    }
}

// ---------- Layer 2: Metadata Management (Mid-level) ----------

// Ensure ss_metadata array has capacity for at least min_count entries
// Caller must hold alloc_lock
// Returns: 0 on success, -1 if capacity exceeded
// RACE FIX: No realloc! Fixed-size array prevents race with lock-free Stage 2
static int sp_meta_ensure_capacity(uint32_t min_count) {
    if (min_count > MAX_SS_METADATA_ENTRIES) {
        #if !HAKMEM_BUILD_RELEASE
        static int warn_once = 0;
        if (warn_once == 0) {
            fprintf(stderr, "[SP_META_CAPACITY_ERROR] Exceeded MAX_SS_METADATA_ENTRIES=%d\n",
                    MAX_SS_METADATA_ENTRIES);
            warn_once = 1;
        }
        #endif
        return -1;
    }
    return 0;
}

// Find SharedSSMeta for given SuperSlab, or create if not exists
// Caller must hold alloc_lock
// Returns: SharedSSMeta* on success, NULL on error
SharedSSMeta* sp_meta_find_or_create(SuperSlab* ss) {
    if (!ss) return NULL;

    // P0 Optimization: O(1) lookup via direct pointer (eliminates 7.8% CPU bottleneck)
    // Check if this SuperSlab already has metadata cached
    if (ss->shared_meta) {
        return ss->shared_meta;
    }

    // RACE FIX: Load count atomically for consistency (even under mutex)
    uint32_t count = atomic_load_explicit(&g_shared_pool.ss_meta_count, memory_order_relaxed);

    // Search existing metadata (fallback for legacy SuperSlabs without cached pointer)
    for (uint32_t i = 0; i < count; i++) {
        // RACE FIX: Load pointer atomically for consistency
        SuperSlab* meta_ss = atomic_load_explicit(&g_shared_pool.ss_metadata[i].ss, memory_order_relaxed);
        if (meta_ss == ss) {
            // Cache the pointer for future O(1) lookups
            ss->shared_meta = &g_shared_pool.ss_metadata[i];
            return &g_shared_pool.ss_metadata[i];
        }
    }

    // Create new metadata entry
    if (sp_meta_ensure_capacity(count + 1) != 0) {
        return NULL;
    }

    // RACE FIX: Read current count atomically (even under mutex for consistency)
    uint32_t current_count = atomic_load_explicit(&g_shared_pool.ss_meta_count, memory_order_relaxed);
    SharedSSMeta* meta = &g_shared_pool.ss_metadata[current_count];

    // RACE FIX: Store SuperSlab pointer atomically (visible to lock-free Stage 2)
    atomic_store_explicit(&meta->ss, ss, memory_order_relaxed);
    meta->total_slots = (uint8_t)ss_slabs_capacity(ss);
    meta->active_slots = 0;

    // Initialize all slots as UNUSED
    // P0-5: Use atomic store for state initialization
    for (int i = 0; i < meta->total_slots; i++) {
        atomic_store_explicit(&meta->slots[i].state, SLOT_UNUSED, memory_order_relaxed);
        meta->slots[i].class_idx = 0;
        meta->slots[i].slab_idx = (uint8_t)i;
    }

    // P0 Optimization: Cache the metadata pointer in SuperSlab for O(1) future lookups
    ss->shared_meta = meta;

    // RACE FIX: Atomic increment with release semantics
    // This ensures all writes to metadata[current_count] (lines 268-278) are visible
    // before the count increment is visible to lock-free Stage 2 readers
    atomic_fetch_add_explicit(&g_shared_pool.ss_meta_count, 1, memory_order_release);
    return meta;
}

// Find UNUSED slot and claim it (UNUSED → ACTIVE) using lock-free CAS
// Returns: slot_idx on success, -1 if no UNUSED slots
int sp_slot_claim_lockfree(SharedSSMeta* meta, int class_idx) {
    (void)class_idx;
    if (!meta) return -1;

    // Optimization: Quick check if any unused slots exist?
    // For now, just iterate. Metadata size is small (max 32 slots).
    for (int i = 0; i < meta->total_slots; i++) {
        SharedSlot* slot = &meta->slots[i];
        SlotState state = atomic_load_explicit(&slot->state, memory_order_acquire);
        if (state == SLOT_UNUSED) {
            // Attempt CAS: UNUSED → ACTIVE
            if (atomic_compare_exchange_strong_explicit(
                    &slot->state,
                    &state,
                    SLOT_ACTIVE,
                    memory_order_acq_rel,
                    memory_order_acquire)) {
                return i;  // Success!
            }
            // CAS failed: someone else took it or state changed
        }
    }
    return -1;
}

// ---------- Layer 3: Free List Management ----------

// Push empty slot to per-class free list
// Caller must hold alloc_lock
// Returns: 0 on success, -1 if list is full
int sp_freelist_push_lockfree(int class_idx, SharedSSMeta* meta, int slot_idx) {
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) return -1;

    FreeSlotNode* node = node_alloc(class_idx);
    if (!node) {
        // Pool exhausted
        return -1;
    }

    node->meta = meta;
    node->slot_idx = slot_idx;

    // Lock-free push to stack (LIFO)
    FreeSlotNode* old_head = atomic_load_explicit(
        &g_shared_pool.free_slots_lockfree[class_idx].head,
        memory_order_relaxed);
    do {
        node->next = old_head;
    } while (!atomic_compare_exchange_weak_explicit(
        &g_shared_pool.free_slots_lockfree[class_idx].head,
        &old_head,
        node,
        memory_order_release,
        memory_order_relaxed));

    return 0;
}

// Pop empty slot from per-class free list
// Lock-free
// Returns: 1 on success, 0 if empty
int sp_freelist_pop_lockfree(int class_idx, SharedSSMeta** meta_out, int* slot_idx_out) {
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) return 0;

    FreeSlotNode* head = atomic_load_explicit(
        &g_shared_pool.free_slots_lockfree[class_idx].head,
        memory_order_acquire);

    while (head) {
        FreeSlotNode* next = head->next;
        if (atomic_compare_exchange_weak_explicit(
                &g_shared_pool.free_slots_lockfree[class_idx].head,
                &head,
                next,
                memory_order_acquire,
                memory_order_acquire)) {
            // Success!
            *meta_out = head->meta;
            *slot_idx_out = head->slot_idx;

            // Recycle node (push to free_head list)
            FreeSlotNode* free_head = atomic_load_explicit(&g_node_free_head[class_idx], memory_order_relaxed);
            do {
                head->next = free_head;
            } while (!atomic_compare_exchange_weak_explicit(
                &g_node_free_head[class_idx],
                &free_head,
                head,
                memory_order_release,
                memory_order_relaxed));

            return 1;
        }
        // CAS failed: head updated, retry
    }
    return 0;  // Empty list
}


// Allocator helper for SuperSlab (Phase 9-2 Task 1)
// NOTE: class_idx MUST be a valid tiny class (0-7). Passing an out-of-range
// value previously went through superslab_allocate(8), which overflowed
// g_ss_ace[] and could corrupt neighboring globals, leading to missing
// registry entries and TLS SLL header corruption.
SuperSlab*
sp_internal_allocate_superslab(int class_idx)
{
    do {
        static _Atomic uint32_t g_sp_alloc_log = 0;
        uint32_t shot = atomic_fetch_add_explicit(&g_sp_alloc_log, 1, memory_order_relaxed);
        if (shot < 4) {
            fprintf(stderr, "[SP_INTERNAL_ALLOC] class_idx=%d\n", class_idx);
            fflush(stderr);
        }
    } while (0);

    // Clamp to valid range to avoid out-of-bounds access inside superslab_allocate().
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
        class_idx = TINY_NUM_CLASSES_SS - 1;
    }

    // Use legacy backend to allocate a SuperSlab (malloc-based)
    extern SuperSlab* superslab_allocate(uint8_t size_class);
    SuperSlab* ss = superslab_allocate((uint8_t)class_idx);
    if (!ss) {
        return NULL;
    }

    // Initialize basic fields if not done by superslab_alloc
    ss->active_slabs = 0;
    ss->slab_bitmap = 0;

    return ss;
}

// ============================================================================
// Public API (High-level)
// ============================================================================

SuperSlab*
shared_pool_acquire_superslab(void)
{
    // Phase 12: Legacy wrapper?
    // This function seems to be a direct allocation bypass.
    return sp_internal_allocate_superslab(0);
}

void sp_fix_geometry_if_needed(SuperSlab* ss, int slab_idx, int class_idx) {
    // Phase 9-1: For now, we assume geometry is compatible or set by caller.
    // This hook exists for future use when we support dynamic geometry resizing.
    (void)ss; (void)slab_idx; (void)class_idx;
}