hakmem/core/box/ss_allocation_box.c

// Box: Core Allocation
// Purpose: SuperSlab allocation/deallocation and slab initialization

#include "ss_allocation_box.h"
#include "ss_slab_meta_box.h"  // Phase 3d-A: SlabMeta Box boundary
#include "ss_os_acquire_box.h"
#include "ss_cache_box.h"
#include "ss_stats_box.h"
#include "ss_ace_box.h"
#include "ss_slab_management_box.h"
#include "hakmem_super_registry.h"
#include "ss_addr_map_box.h"
#include "hakmem_tiny_config.h"
#include "hakmem_policy.h"  // Phase E3-1: Access FrozenPolicy for never-free policy
#include "tiny_region_id.h"
#include "box/tiny_next_ptr_box.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <pthread.h>

// Global statistics (defined in ss_stats_box.c, declared here for access)
extern pthread_mutex_t g_superslab_lock;
extern uint64_t g_superslabs_freed;
extern uint64_t g_bytes_allocated;

// g_ss_force_lg is defined in ss_ace_box.c but needs external linkage
extern int g_ss_force_lg;

// g_ss_populate_once controls MAP_POPULATE flag
static _Atomic int g_ss_populate_once = 0;

// ============================================================================
// Remote Drain Helper
// ============================================================================

// Drain remote MPSC stack into freelist (ownership already verified by caller)
void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta)
{
    if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss) || !meta) return;

    // Atomically take the whole remote list
    uintptr_t head = atomic_exchange_explicit(&ss->remote_heads[slab_idx], 0,
                                              memory_order_acq_rel);
    if (head == 0) return;

    // Convert remote stack (offset 0 next) into freelist encoding via Box API
    // and splice in front of current freelist preserving relative order.
    void* prev = meta->freelist;
    int cls = (int)meta->class_idx;
    uintptr_t cur = head;
    while (cur != 0) {
        uintptr_t next = *(uintptr_t*)cur;  // remote-next stored at offset 0
        // Restore header for header-classes (class 1-6) which were clobbered by remote push
#if HAKMEM_TINY_HEADER_CLASSIDX
        if (cls != 0 && cls != 7) {
            uint8_t expected = (uint8_t)(HEADER_MAGIC | (cls & HEADER_CLASS_MASK));
            *(uint8_t*)(uintptr_t)cur = expected;
        }
#endif
        // Rewrite next pointer to Box representation for this class
        tiny_next_write(cls, (void*)cur, prev);
        prev = (void*)cur;
        cur = next;
    }
    meta->freelist = prev;
    // Reset remote count after full drain
    atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_release);

    // Update freelist/nonempty visibility bits
    uint32_t bit = (1u << slab_idx);
    atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
    atomic_fetch_or_explicit(&ss->nonempty_mask, bit, memory_order_release);
}

// ============================================================================
// SuperSlab Allocation (ACE-Aware)
// ============================================================================

SuperSlab* superslab_allocate(uint8_t size_class) {
    // Optional fault injection for testing: HAKMEM_TINY_SS_FAULT_RATE=N → 1/N で失敗
    static int fault_rate = -1;  // -1=unparsed, 0=disabled, >0=rate
    static __thread unsigned long fault_tick = 0;
    if (__builtin_expect(fault_rate == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_SS_FAULT_RATE");
        if (e && *e) {
            int v = atoi(e); if (v < 0) v = 0; fault_rate = v;
        } else {
            fault_rate = 0;
        }
    }
    if (fault_rate > 0) {
        unsigned long t = ++fault_tick;
        if ((t % (unsigned long)fault_rate) == 0ul) {
            return NULL;  // simulate OOM
        }
    }
    // Optional env clamp for SuperSlab size
    static int env_parsed = 0;
    // Allow full ACE range [MIN..MAX] by default so 1MB/2MB の二択学習が有効になる。
    static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_MIN;
    static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX;
    if (!env_parsed) {
        char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB");
        if (maxmb) {
            int m = atoi(maxmb); if (m == 1) g_ss_max_lg_env = 20; else if (m == 2) g_ss_max_lg_env = 21;
        }
        char* minmb = getenv("HAKMEM_TINY_SS_MIN_MB");
        if (minmb) {
            int m = atoi(minmb); if (m == 1) g_ss_min_lg_env = 20; else if (m == 2) g_ss_min_lg_env = 21;
        }
        if (g_ss_min_lg_env > g_ss_max_lg_env) g_ss_min_lg_env = g_ss_max_lg_env;
        const char* force_lg_env = getenv("HAKMEM_TINY_SS_FORCE_LG");
        if (force_lg_env && *force_lg_env) {
            int v = atoi(force_lg_env);
            if (v >= SUPERSLAB_LG_MIN && v <= SUPERSLAB_LG_MAX) {
                g_ss_force_lg = v;
                g_ss_min_lg_env = g_ss_max_lg_env = v;
            }
        }
        size_t precharge_default = 0;
        const char* precharge_env = getenv("HAKMEM_TINY_SS_PRECHARGE");
        if (precharge_env && *precharge_env) {
            long v = atol(precharge_env);
            if (v < 0) v = 0;
            precharge_default = (size_t)v;
            if (v > 0) {
                atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
            }
        }
        size_t cache_default = 0;
        const char* cache_env = getenv("HAKMEM_TINY_SS_CACHE");
        if (cache_env && *cache_env) {
            long v = atol(cache_env);
            if (v < 0) v = 0;
            cache_default = (size_t)v;
        }
        // Initialize cache/precharge via direct manipulation (box API doesn't need init function)
        for (int i = 0; i < 8; i++) {
            extern size_t g_ss_cache_cap[8];
            extern size_t g_ss_precharge_target[8];
            g_ss_cache_cap[i] = cache_default;
            g_ss_precharge_target[i] = precharge_default;
        }

        for (int i = 0; i < 8; i++) {
            char name[64];
            snprintf(name, sizeof(name), "HAKMEM_TINY_SS_CACHE_C%d", i);
            char* cap_env = getenv(name);
            if (cap_env && *cap_env) {
                long v = atol(cap_env);
                if (v < 0) v = 0;
                tiny_ss_cache_set_class_cap(i, (size_t)v);
            }
            snprintf(name, sizeof(name), "HAKMEM_TINY_SS_PRECHARGE_C%d", i);
            char* pre_env = getenv(name);
            if (pre_env && *pre_env) {
                long v = atol(pre_env);
                if (v < 0) v = 0;
                tiny_ss_precharge_set_class_target(i, (size_t)v);
            }
        }
        const char* populate_env = getenv("HAKMEM_TINY_SS_POPULATE_ONCE");
        if (populate_env && atoi(populate_env) != 0) {
            atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
        }
        env_parsed = 1;
    }

    uint8_t lg = (g_ss_force_lg >= 0) ? (uint8_t)g_ss_force_lg : hak_tiny_superslab_next_lg(size_class);
    if (lg < g_ss_min_lg_env) lg = g_ss_min_lg_env;
    if (lg > g_ss_max_lg_env) lg = g_ss_max_lg_env;
    size_t ss_size = (size_t)1 << lg;  // 2^20 = 1MB, 2^21 = 2MB
    uintptr_t ss_mask = ss_size - 1;
    int from_cache = 0;
    void* ptr = NULL;

    // Debug logging flag (lazy init)
    static __thread int dbg = -1;
#if HAKMEM_BUILD_RELEASE
    dbg = 0;
#else
    if (__builtin_expect(dbg == -1, 0)) {
        const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG");
        dbg = (e && *e && *e != '0') ? 1 : 0;
    }
#endif

    // Phase 9: Try LRU cache first (lazy deallocation)
    SuperSlab* cached_ss = hak_ss_lru_pop(size_class);
    if (cached_ss) {
        ptr = (void*)cached_ss;
        from_cache = 1;
        // Debug logging for REFILL from LRU
        if (dbg == 1) {
            fprintf(stderr, "[REFILL] class=%d from_lru=1 ss=%p\n",
                    size_class, (void*)cached_ss);
        }
        // Skip old cache path - LRU cache takes priority
    } else {
        // Fallback to old cache (will be deprecated)
        ss_cache_precharge(size_class, ss_size, ss_mask);
        void* old_cached = ss_cache_pop(size_class);
        if (old_cached) {
            ptr = old_cached;
            from_cache = 1;
            // Debug logging for REFILL from prewarm (old cache is essentially prewarm)
            if (dbg == 1) {
                fprintf(stderr, "[REFILL] class=%d from_prewarm=1 ss=%p\n",
                        size_class, ptr);
            }
        }
    }

    if (!ptr) {
        int populate = atomic_exchange_explicit(&g_ss_populate_once, 0, memory_order_acq_rel);
        ptr = ss_os_acquire(size_class, ss_size, ss_mask, populate);
        if (!ptr) {
            return NULL;
        }
        // Debug logging for REFILL with new allocation
        if (dbg == 1) {
            fprintf(stderr, "[REFILL] class=%d new_alloc=1 ss=%p\n",
                    size_class, (void*)ptr);
        }
    }

    // Initialize SuperSlab header (Phase 12: no global size_class field)
    SuperSlab* ss = (SuperSlab*)ptr;
    ss->magic = SUPERSLAB_MAGIC;
    ss->active_slabs = 0;
    ss->lg_size = lg;  // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB)
    ss->slab_bitmap = 0;
    ss->nonempty_mask = 0;  // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask
    ss->freelist_mask = 0;  // P1.1 FIX: Initialize freelist_mask
    ss->empty_mask = 0;     // P1.1 FIX: Initialize empty_mask
    ss->empty_count = 0;    // P1.1 FIX: Initialize empty_count
    ss->partial_epoch = 0;
    ss->publish_hint = 0xFF;

    // Initialize atomics explicitly
    atomic_store_explicit(&ss->total_active_blocks, 0, memory_order_relaxed);
    atomic_store_explicit(&ss->refcount, 0, memory_order_relaxed);
    atomic_store_explicit(&ss->listed, 0, memory_order_relaxed);
    ss->partial_next = NULL;

    // Phase 9: Initialize LRU fields
    ss->last_used_ns = 0;
    ss->generation = 0;
    ss->lru_prev = NULL;
    ss->lru_next = NULL;

    // Phase 3d-C: Initialize hot/cold fields
    ss->hot_count = 0;
    ss->cold_count = 0;
    memset(ss->hot_indices, 0, sizeof(ss->hot_indices));
    memset(ss->cold_indices, 0, sizeof(ss->cold_indices));

    // Phase 12: Initialize next_chunk (legacy per-class chain)
    ss->next_chunk = NULL;

    // Initialize all slab metadata (only up to max slabs for this size)
    int max_slabs = (int)(ss_size / SLAB_SIZE);

    // DEFENSIVE FIX: Zero all slab metadata arrays to prevent ANY uninitialized pointers
    // This catches the 0xa2a2a2a2a2a2a2a2 pattern bug (ASan/debug fill pattern)
    // Even though mmap should return zeroed pages, sanitizers may fill with debug patterns
    memset(ss->slabs, 0, max_slabs * sizeof(TinySlabMeta));
    memset(ss->remote_heads, 0, max_slabs * sizeof(uintptr_t));
    memset(ss->remote_counts, 0, max_slabs * sizeof(uint32_t));
    memset(ss->slab_listed, 0, max_slabs * sizeof(uint32_t));

    // P1.1: Initialize class_map to UNASSIGNED (255) for all slabs
    // This ensures class_map is in a known state even before slabs are assigned
    memset(ss->class_map, 255, max_slabs * sizeof(uint8_t));

    for (int i = 0; i < max_slabs; i++) {
        ss_slab_meta_freelist_set(ss, i, NULL);  // Explicit NULL (redundant after memset, but clear intent)
        ss_slab_meta_used_set(ss, i, 0);
        ss_slab_meta_capacity_set(ss, i, 0);
        ss_slab_meta_owner_tid_low_set(ss, i, 0);

        // Initialize remote queue atomics (memset already zeroed, but use proper atomic init)
        atomic_store_explicit(&ss->remote_heads[i], 0, memory_order_relaxed);
        atomic_store_explicit(&ss->remote_counts[i], 0, memory_order_relaxed);
        atomic_store_explicit(&ss->slab_listed[i], 0, memory_order_relaxed);
    }

    if (from_cache) {
        ss_stats_cache_reuse();
    }

    // Phase 8.3: Update ACE current_lg to match allocated size
    g_ss_ace[size_class].current_lg = lg;

    // Phase 1: Register SuperSlab in global registry for fast lookup
    // CRITICAL: Register AFTER full initialization (ss structure is ready)
    uintptr_t base = (uintptr_t)ss;
    int reg_ok = hak_super_register(base, ss);
    if (!reg_ok) {
        // Registry full - this is a fatal error
        fprintf(stderr, "HAKMEM FATAL: SuperSlab registry full, cannot register %p\n", ss);
        // Still return ss to avoid memory leak, but lookups may fail
    }
    do {
        static _Atomic uint32_t g_ss_reg_log_shot = 0;
        uint32_t shot = atomic_fetch_add_explicit(&g_ss_reg_log_shot, 1, memory_order_relaxed);
        if (shot < 4) {
            fprintf(stderr,
                    "[SS_REG_DEBUG] class=%u ss=%p reg_ok=%d map_count=%zu\n",
                    (unsigned)size_class,
                    (void*)ss,
                    reg_ok,
                    g_ss_addr_map.count);
            fflush(stderr);
        }
    } while (0);

    return ss;
}

// ============================================================================
// SuperSlab Deallocation
// ============================================================================

void superslab_free(SuperSlab* ss) {
    if (!ss || ss->magic != SUPERSLAB_MAGIC) {
        return;  // Invalid SuperSlab
    }

    // Guard: do not free while pinned by TLS/remote holders
    uint32_t ss_refs = atomic_load_explicit(&ss->refcount, memory_order_acquire);
    if (__builtin_expect(ss_refs != 0, 0)) {
#if !HAKMEM_BUILD_RELEASE
        static _Atomic uint32_t g_ss_free_pinned = 0;
        uint32_t shot = atomic_fetch_add_explicit(&g_ss_free_pinned, 1, memory_order_relaxed);
        if (shot < 8) {
            fprintf(stderr, "[SS_FREE_SKIP_PINNED] ss=%p refcount=%u\n", (void*)ss, (unsigned)ss_refs);
        }
#endif
        return;
    }

    // ADD DEBUG LOGGING
    static __thread int dbg = -1;
#if HAKMEM_BUILD_RELEASE
    dbg = 0;
#else
    if (__builtin_expect(dbg == -1, 0)) {
        const char* e = getenv("HAKMEM_SS_FREE_DEBUG");
        dbg = (e && *e && *e != '0') ? 1 : 0;
    }
#endif
    if (dbg == 1) {
        fprintf(stderr, "[SS_FREE] CALLED: ss=%p lg_size=%d active_slabs=%u\n",
                (void*)ss, ss->lg_size, ss->active_slabs);
    }

    // Phase 9: Lazy Deallocation - try to cache in LRU instead of munmap
    size_t ss_size = (size_t)1 << ss->lg_size;

    // Phase 1: Unregister SuperSlab from registry FIRST
    // CRITICAL: Must unregister BEFORE adding to LRU cache
    // Reason: Cached SuperSlabs should NOT be found by lookups
    uintptr_t base = (uintptr_t)ss;
    hak_super_unregister(base);

    // Memory fence to ensure unregister is visible
    atomic_thread_fence(memory_order_release);

    // Phase 9: Try LRU cache first (lazy deallocation)
    // NOTE: LRU cache keeps magic=SUPERSLAB_MAGIC for validation
    // Magic will be cleared on eviction or reuse
    int lru_cached = hak_ss_lru_push(ss);
    if (dbg == 1) {
        fprintf(stderr, "[SS_FREE] hak_ss_lru_push() returned %d\n", lru_cached);
    }
    if (lru_cached) {
        // Successfully cached in LRU - defer munmap
        return;
    }

    // LRU cache full or disabled - try old cache using head class_idx (if known)
    int old_cached = ss_cache_push(0, ss);
    if (old_cached) {
        ss_stats_cache_store();
        return;
    }

    // Phase E3-1: Check never-free policy before munmap
    // If policy forbids Tiny SuperSlab munmap, skip deallocation (leak is intentional)
    const FrozenPolicy* pol = hkm_policy_get();
    if (pol && pol->tiny_ss_never_free_global) {
        // Policy forbids munmap - keep SuperSlab allocated (intentional "leak")
        // Watermark enforcement will be added in Phase E3-2
#if !HAKMEM_BUILD_RELEASE
        fprintf(stderr, "[SS_POLICY_SKIP] Skipping munmap (never_free policy) ss=%p size=%zu\n",
                (void*)ss, ss_size);
#endif
        return;
    }

    // Both caches full - immediately free to OS (eager deallocation)
    // Clear magic to prevent use-after-free
    ss->magic = 0;

#if !HAKMEM_BUILD_RELEASE
    fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n",
            (void*)ss, ss_size,
            atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed));
#endif

    munmap(ss, ss_size);

    // Update statistics for actual release to OS
    pthread_mutex_lock(&g_superslab_lock);
    g_superslabs_freed++;
    // Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here
    g_bytes_allocated -= ss_size;
    pthread_mutex_unlock(&g_superslab_lock);

#if !HAKMEM_BUILD_RELEASE
    fprintf(stderr, "[DEBUG ss_os_release] g_superslabs_freed now = %llu\n",
            (unsigned long long)g_superslabs_freed);
#endif
}

// ============================================================================
// Slab Initialization within SuperSlab
// ============================================================================

void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_t owner_tid)
{
    if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
        return;
    }

    // Phase E1-CORRECT unified geometry:
    // - block_size is the TOTAL stride for this class (g_tiny_class_sizes[cls])
    // - usable bytes are determined by slab index (slab0 vs others)
    // - capacity = usable / stride for ALL classes (including former C7)
    size_t usable_size = (slab_idx == 0)
                           ? SUPERSLAB_SLAB0_USABLE_SIZE
                           : SUPERSLAB_SLAB_USABLE_SIZE;
    size_t stride = block_size;
    uint16_t capacity = (uint16_t)(usable_size / stride);

#if !HAKMEM_BUILD_RELEASE
    if (slab_idx == 0) {
        fprintf(stderr,
                "[SUPERSLAB_INIT] slab 0: usable_size=%zu stride=%zu capacity=%u\n",
                usable_size, stride, (unsigned)capacity);
    }
#endif

    TinySlabMeta* meta = &ss->slabs[slab_idx];
    meta->freelist = NULL;          // NULL = linear allocation mode
    meta->used = 0;
    meta->active = 0;               // P1.3: blocks in use by user (starts at 0)
    meta->tls_cached = 0;           // P2.2: blocks cached in TLS SLL (starts at 0)
    meta->capacity = capacity;
    meta->carved = 0;
    // Store bits 8-15 of owner_tid (low 8 bits are 0 for glibc pthread IDs)
    meta->owner_tid_low = (uint8_t)((owner_tid >> 8) & 0xFFu);
    // Fail-safe: stamp class_idx from geometry (stride → class).
    // This normalizes both legacy and shared pool paths.
    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        if (g_tiny_class_sizes[i] == stride) {
            meta->class_idx = (uint8_t)i;
            // P1.1: Update class_map for out-of-band lookup on free path
            ss->class_map[slab_idx] = (uint8_t)i;
            break;
        }
    }

    superslab_activate_slab(ss, slab_idx);
}