hakmem/core/superslab_backend.c

// superslab_backend.c - Backend allocation paths for SuperSlab allocator
// Purpose: Legacy and shared pool backend implementations
// License: MIT
// Date: 2025-11-28

#include "hakmem_tiny_superslab_internal.h"

/*
 * superslab_return_block() - Single exit point for all SuperSlab allocations
 *
 * Purpose: Ensures consistent header writing across all allocation paths.
 * This prevents bugs where headers are written in some paths but not others.
 *
 * Parameters:
 *   base      - Block start address from SuperSlab geometry
 *   class_idx - Tiny class index (0-7)
 *
 * Returns:
 *   User pointer (base + 1 if headers enabled, base otherwise)
 *
 * Header writing behavior:
 *   - If HAKMEM_TINY_HEADER_CLASSIDX=1: Writes header via tiny_region_id_write_header()
 *   - If HAKMEM_TINY_HEADER_CLASSIDX=0: Returns base directly (no header)
 */
static inline void* superslab_return_block(void* base, int class_idx) {
#if HAKMEM_TINY_HEADER_CLASSIDX
    return tiny_region_id_write_header(base, class_idx);
#else
    return (void*)base;
#endif
}

/*
 * Legacy backend for hak_tiny_alloc_superslab_box().
 *
 * Phase 12 Stage A/B:
 *  - Uses per-class SuperSlabHead (g_superslab_heads) as the implementation.
 *  - Callers MUST use hak_tiny_alloc_superslab_box() and never touch this directly.
 *  - Later Stage C: this function will be replaced by a shared_pool backend.
 */
void* hak_tiny_alloc_superslab_backend_legacy(int class_idx)
{
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
        return NULL;
    }

    SuperSlabHead* head = g_superslab_heads[class_idx];
    if (!head) {
        head = init_superslab_head(class_idx);
        if (!head) {
            return NULL;
        }
        g_superslab_heads[class_idx] = head;
    }

    SuperSlab* chunk = head->current_chunk ? head->current_chunk : head->first_chunk;

    while (chunk) {
        int cap = ss_slabs_capacity(chunk);
        for (int slab_idx = 0; slab_idx < cap; slab_idx++) {
            TinySlabMeta* meta = &chunk->slabs[slab_idx];

            // Skip slabs that belong to a different class (or are uninitialized).
            if (meta->class_idx != (uint8_t)class_idx && meta->class_idx != 255) {
                continue;
            }

            // P1.2 FIX: Initialize slab on first use (like shared backend does)
            // This ensures class_map is populated for all slabs, not just slab 0
            if (meta->capacity == 0) {
                size_t block_size = g_tiny_class_sizes[class_idx];
                uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self();
                superslab_init_slab(chunk, slab_idx, block_size, owner_tid);
                meta = &chunk->slabs[slab_idx];  // Refresh pointer after init
                meta->class_idx = (uint8_t)class_idx;
                // P1.2: Update class_map for dynamic slab initialization
                chunk->class_map[slab_idx] = (uint8_t)class_idx;
            }

            if (meta->used < meta->capacity) {
                size_t stride = tiny_block_stride_for_class(class_idx);
                size_t offset = (size_t)meta->used * stride;
                uint8_t* base = (uint8_t*)chunk
                              + SUPERSLAB_SLAB0_DATA_OFFSET
                              + (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE
                              + offset;

                meta->used++;
                atomic_fetch_add_explicit(&chunk->total_active_blocks, 1, memory_order_relaxed);
                return superslab_return_block(base, class_idx);
            }
        }
        chunk = chunk->next_chunk;
    }

    if (expand_superslab_head(head) < 0) {
        return NULL;
    }

    SuperSlab* new_chunk = head->current_chunk;
    if (!new_chunk) {
        return NULL;
    }

    int cap2 = ss_slabs_capacity(new_chunk);
    for (int slab_idx = 0; slab_idx < cap2; slab_idx++) {
        TinySlabMeta* meta = &new_chunk->slabs[slab_idx];

        // P1.2 FIX: Initialize slab on first use (like shared backend does)
        if (meta->capacity == 0) {
            size_t block_size = g_tiny_class_sizes[class_idx];
            uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self();
            superslab_init_slab(new_chunk, slab_idx, block_size, owner_tid);
            meta = &new_chunk->slabs[slab_idx];  // Refresh pointer after init
            meta->class_idx = (uint8_t)class_idx;
            // P1.2: Update class_map for dynamic slab initialization
            new_chunk->class_map[slab_idx] = (uint8_t)class_idx;
        }

        if (meta->used < meta->capacity) {
            size_t stride = tiny_block_stride_for_class(class_idx);
            size_t offset = (size_t)meta->used * stride;
            uint8_t* base = (uint8_t*)new_chunk
                          + SUPERSLAB_SLAB0_DATA_OFFSET
                          + (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE
                          + offset;

            meta->used++;
            atomic_fetch_add_explicit(&new_chunk->total_active_blocks, 1, memory_order_relaxed);
            return superslab_return_block(base, class_idx);
        }
    }

    return NULL;
}

/*
 * Shared pool backend for hak_tiny_alloc_superslab_box().
 *
 * Phase 12-2:
 *  - Uses SharedSuperSlabPool (g_shared_pool) to obtain a SuperSlab/slab
 *    for the requested class_idx.
 *  - This backend EXPRESSLY owns only:
 *      - choosing (ss, slab_idx) via shared_pool_acquire_slab()
 *      - initializing that slab's TinySlabMeta via superslab_init_slab()
 *    and nothing else; all callers must go through hak_tiny_alloc_superslab_box().
 *
 *  - For now this is a minimal, conservative implementation:
 *      - One linear bump-run is carved from the acquired slab using tiny_block_stride_for_class().
 *      - No complex per-slab freelist or refill policy yet (Phase 12-3+).
 *      - If shared_pool_acquire_slab() fails, we fall back to legacy backend.
 */
void* hak_tiny_alloc_superslab_backend_shared(int class_idx)
{
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
        return NULL;
    }

    SuperSlab* ss = NULL;
    int slab_idx = -1;

    if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) != 0 || !ss) {
        // Shared pool could not provide a slab; caller may choose to fall back.
        return NULL;
    }

    TinySlabMeta* meta = &ss->slabs[slab_idx];

    // Defensive: shared_pool must either hand us an UNASSIGNED slab or one
    // already bound to this class. Anything else is a hard bug.
    if (meta->class_idx != 255 && meta->class_idx != (uint8_t)class_idx) {
#if !HAKMEM_BUILD_RELEASE
        fprintf(stderr,
                "[HAKMEM][SS_SHARED] BUG: acquire_slab mismatch: cls=%d meta->class_idx=%u slab_idx=%d ss=%p\n",
                class_idx, (unsigned)meta->class_idx, slab_idx, (void*)ss);
#endif
        return NULL;
    }

    // Initialize slab geometry once for this class.
    if (meta->capacity == 0) {
        size_t block_size = g_tiny_class_sizes[class_idx];
        // LARSON FIX: Pass actual thread ID for cross-thread free detection
        uint32_t my_tid = (uint32_t)(uintptr_t)pthread_self();
        superslab_init_slab(ss, slab_idx, block_size, my_tid);
        meta = &ss->slabs[slab_idx];

        // CRITICAL FIX: Always set class_idx after init to avoid C0/C7 confusion.
        // New SuperSlabs start with meta->class_idx=0 (mmap zero-init).
        // Must explicitly set to requested class, not just when class_idx==255.
        meta->class_idx = (uint8_t)class_idx;
        // P1.1: Update class_map in shared acquire path
        ss->class_map[slab_idx] = (uint8_t)class_idx;
    }

    // Final contract check before computing addresses.
    if (meta->class_idx != (uint8_t)class_idx ||
        meta->capacity == 0 ||
        meta->used > meta->capacity) {
#if !HAKMEM_BUILD_RELEASE
        fprintf(stderr,
                "[HAKMEM][SS_SHARED] BUG: invalid slab meta before alloc: "
                "cls=%d slab_idx=%d meta_cls=%u used=%u cap=%u ss=%p\n",
                class_idx, slab_idx,
                (unsigned)meta->class_idx,
                (unsigned)meta->used,
                (unsigned)meta->capacity,
                (void*)ss);
#endif
        return NULL;
    }

    // Simple bump allocation within this slab.
    if (meta->used >= meta->capacity) {
        // Slab exhausted: in minimal Phase12-2 backend we do not loop;
        // caller or future logic must acquire another slab.
        return NULL;
    }

    size_t stride = tiny_block_stride_for_class(class_idx);
    size_t offset = (size_t)meta->used * stride;

    // Phase 12-2 minimal geometry:
    //  - slab 0 data offset via SUPERSLAB_SLAB0_DATA_OFFSET
    //  - subsequent slabs at fixed SUPERSLAB_SLAB_USABLE_SIZE strides.
    size_t slab_base_off = SUPERSLAB_SLAB0_DATA_OFFSET
                         + (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE;
    uint8_t* base = (uint8_t*)ss + slab_base_off + offset;

    meta->used++;
    atomic_fetch_add_explicit(&ss->total_active_blocks, 1, memory_order_relaxed);

    return superslab_return_block(base, class_idx);
}

/*
 * Box API entry:
 *  - Single front-door for tiny-side Superslab allocations.
 *
 * Phase 12 policy:
 *  - HAKMEM_TINY_SS_SHARED=0 → legacy backendのみ（回帰確認用）
 *  - HAKMEM_TINY_SS_SHARED=1 → shared backendを優先し、失敗時のみ legacy にフォールバック
 */
void* hak_tiny_alloc_superslab_box(int class_idx)
{
    static int g_ss_shared_mode = -1;
    static _Atomic uint32_t g_ss_backend_log = 0;
    if (__builtin_expect(g_ss_shared_mode == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_SS_SHARED");
        if (!e || !*e) {
            g_ss_shared_mode = 1; // デフォルト: shared 有効
        } else {
            int v = atoi(e);
            g_ss_shared_mode = (v != 0) ? 1 : 0;
        }
    }

    if (g_ss_shared_mode == 1) {
        void* p = hak_tiny_alloc_superslab_backend_shared(class_idx);
        if (p != NULL) {
            uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
            if (n < 4) {
                fprintf(stderr, "[SS_BACKEND] shared cls=%d ptr=%p\n", class_idx, p);
            }
            return p;
        }
        // shared backend が失敗した場合は安全側で legacy にフォールバック
        uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
        if (n < 4) {
            fprintf(stderr, "[SS_BACKEND] shared_fail→legacy cls=%d\n", class_idx);
        }
        return hak_tiny_alloc_superslab_backend_legacy(class_idx);
    }

    // shared OFF 時は legacy のみ
    uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
    if (n < 4) {
        fprintf(stderr, "[SS_BACKEND] legacy cls=%d\n", class_idx);
    }
    return hak_tiny_alloc_superslab_backend_legacy(class_idx);
}