hakmem/core/superslab/superslab_inline.h

#ifndef SUPERSLAB_INLINE_H
#define SUPERSLAB_INLINE_H

#include "superslab_types.h"
#include "../tiny_box_geometry.h"  // Box 3 geometry helpers (stride/base/capacity)

// Forward declaration for unsafe remote drain used by refill/handle paths
// Implemented in hakmem_tiny_superslab.c
void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta);

// Optional debug counter (defined in hakmem_tiny_superslab.c)
extern _Atomic uint64_t g_ss_active_dec_calls;

// ========== Fast SuperSlab Lookup via Registry (Phase 12 fix) ==========
// Purpose: Safe SuperSlab lookup that prevents SEGFAULT on arbitrary pointers
// Original Phase 12: Tried mask+dereference (5-10 cycles) but caused 12% crash rate
// Current Fix: Use registry-based lookup (50-100 cycles) for safety
//
// BUGFIX (2025-11-29): Replaced unsafe mask+dereference with safe registry lookup
// Root Cause: hak_tiny_free_fast_v2() can receive arbitrary pointers (stack, garbage, etc.)
//             Mask calculation could produce unmapped address → SEGFAULT on ss->magic read
// Phase 1a: Tried range checks → insufficient (still 10-12% crash rate)
// Phase 1b: Use hak_super_lookup() registry → 0% crash rate expected
// Trade-off: Rollback Phase 12 optimization (5-10x slower) but crash-free
//
// Performance comparison:
// - Phase 12 (unsafe): ~5-10 cycles, 12% crash rate
// - Phase 1b (safe): ~50-100 cycles, 0% crash rate
// - Still faster than mincore() syscall (5000-10000 cycles)
//
// Note: Implemented as macro to avoid circular include dependency
//       (superslab_inline.h ↔ hakmem_super_registry.h)
//       hak_super_lookup() is defined in hakmem_super_registry.h
#define ss_fast_lookup(ptr) hak_super_lookup(ptr)

// Return maximum number of slabs for this SuperSlab based on lg_size.
static inline int ss_slabs_capacity(SuperSlab* ss)
{
    if (!ss) return 0;
    size_t ss_size = (size_t)1 << ss->lg_size;
    return (int)(ss_size / SLAB_SIZE);
}

// Compute slab base pointer for given (ss, slab_idx).
// Box 5 wrapper: delegate to Box 3 canonical geometry helper.
static inline uint8_t* tiny_slab_base_for(SuperSlab* ss, int slab_idx)
{
    if (!ss || slab_idx < 0) {
        return NULL;
    }
    return tiny_slab_base_for_geometry(ss, slab_idx);
}

// Compute slab index for a pointer inside ss.
// Box 5 wrapper: inverse of Box 3 geometry (tiny_slab_base_for_geometry).
// Layout (data regions):
//   - Slab 0: [ss + SUPERSLAB_SLAB0_DATA_OFFSET, ss + SLAB_SIZE)
//   - Slab 1: [ss + 1*SLAB_SIZE, ss + 2*SLAB_SIZE)
//   - Slab k: [ss + k*SLAB_SIZE, ss + (k+1)*SLAB_SIZE)
static inline int slab_index_for(SuperSlab* ss, void* ptr)
{
    if (!ss || !ptr) {
        return -1;
    }

    uintptr_t base = (uintptr_t)ss;
    uintptr_t p    = (uintptr_t)ptr;
    size_t ss_size = (size_t)1 << ss->lg_size;

    // Outside overall SuperSlab range
    if (p < base + SUPERSLAB_SLAB0_DATA_OFFSET || p >= base + ss_size) {
        return -1;
    }

    // Slab 0: from first data byte up to the end of first slab
    if (p < base + SLAB_SIZE) {
        return 0;
    }

    // Slabs 1+ use simple SLAB_SIZE spacing from SuperSlab base
    size_t rel = p - base;
    int idx = (int)(rel / SLAB_SIZE);
    if (idx < 0 || idx >= SLABS_PER_SUPERSLAB_MAX) {
        return -1;
    }
    return idx;
}

// P1.1: Get class_idx from class_map (out-of-band lookup, avoids reading TinySlabMeta)
// Purpose: Free path optimization - read class_idx without touching cold metadata
// Returns: class_idx (0-7) or 255 if slab is unassigned or invalid
static inline int tiny_get_class_from_ss(SuperSlab* ss, int slab_idx)
{
    if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) {
        return 255;  // Invalid input
    }
    return (int)ss->class_map[slab_idx];
}

// Simple ref helpers used by lifecycle paths.
static inline uint32_t superslab_ref_get(SuperSlab* ss)
{
    return ss ? atomic_load_explicit(&ss->refcount, memory_order_acquire) : 0;
}

static inline void superslab_ref_inc(SuperSlab* ss)
{
    if (ss) {
        atomic_fetch_add_explicit(&ss->refcount, 1, memory_order_acq_rel);
    }
}

static inline void superslab_ref_dec(SuperSlab* ss)
{
    if (ss) {
        uint32_t prev = atomic_fetch_sub_explicit(&ss->refcount, 1, memory_order_acq_rel);
        (void)prev; // caller decides when to free; we just provide the primitive
    }
}

// Ownership helpers (Box 3)
static inline int ss_owner_try_acquire(TinySlabMeta* m, uint32_t tid)
{
    if (!m) return 0;
    uint8_t want = (uint8_t)((tid >> 8) & 0xFFu);
    uint8_t expected = 0;
    return __atomic_compare_exchange_n(&m->owner_tid_low, &expected, want,
                                       false, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
}

static inline void ss_owner_release(TinySlabMeta* m, uint32_t tid)
{
    if (!m) return;
    uint8_t expected = (uint8_t)((tid >> 8) & 0xFFu);
    (void)__atomic_compare_exchange_n(&m->owner_tid_low, &expected, 0u,
                                      false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
}

static inline int ss_owner_is_mine(TinySlabMeta* m, uint32_t tid)
{
    if (!m) return 0;
    uint8_t cur = __atomic_load_n(&m->owner_tid_low, __ATOMIC_RELAXED);
    return cur == (uint8_t)((tid >> 8) & 0xFFu);
}

// Active block accounting (saturating dec by 1)
static inline void ss_active_dec_one(SuperSlab* ss)
{
    if (!ss) return;
    atomic_fetch_add_explicit(&g_ss_active_dec_calls, 1, memory_order_relaxed);
    uint32_t cur = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed);
    while (cur != 0) {
        if (atomic_compare_exchange_weak_explicit(&ss->total_active_blocks,
                                                  &cur,
                                                  cur - 1u,
                                                  memory_order_acq_rel,
                                                  memory_order_relaxed)) {
            return;
        }
        // cur updated by failed CAS; loop
    }
}

// Remote push helper (Box 2):
// - Enqueue node to per-slab MPSC stack
// - Returns 1 if transition empty->nonempty, otherwise 0
// - Also decrements ss->total_active_blocks once (free completed)
static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* node)
{
    if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX || !node) {
        return -1;
    }

    _Atomic uintptr_t* head = &ss->remote_heads[slab_idx];
    uintptr_t old_head;
    uintptr_t new_head;
    int transitioned = 0;

    do {
        old_head = atomic_load_explicit(head, memory_order_acquire);
        // next ポインタは tiny_next_ptr_box / tiny_nextptr 等で扱う前提だが、
        // ここでは単純に単方向リストとして積む（上位が decode する）。
        *(uintptr_t*)node = old_head;
        new_head = (uintptr_t)node;
    } while (!atomic_compare_exchange_weak_explicit(
                 head, &old_head, new_head,
                 memory_order_release, memory_order_relaxed));
    transitioned = (old_head == 0) ? 1 : 0;
    atomic_fetch_add_explicit(&ss->remote_counts[slab_idx], 1, memory_order_acq_rel);

    // account active block removal once per free
    ss_active_dec_one(ss);
    return transitioned;
}

#endif // SUPERSLAB_INLINE_H