357 lines
15 KiB
C
357 lines
15 KiB
C
// tiny_superslab_alloc.inc.h - SuperSlab Allocation Layer (Box 4)
|
||
// Purpose: Slab allocation, refill, and adoption logic (Phase 12 shared pool)
|
||
// Public functions:
|
||
// - superslab_alloc_from_slab(): Allocate from specific slab (linear or freelist)
|
||
// - superslab_refill(): Refill TLS slab via shared pool
|
||
// - hak_tiny_alloc_superslab(): Main SuperSlab allocation entry point
|
||
|
||
#include "box/superslab_expansion_box.h" // Box E: Expansion with TLS state guarantee
|
||
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
|
||
#include "box/tiny_tls_carve_one_block_box.h" // Box: Shared TLS carve helper
|
||
#include "box/c7_meta_used_counter_box.h" // Box: C7 meta->used telemetry
|
||
#include "hakmem_tiny_superslab_constants.h"
|
||
#include "tiny_box_geometry.h" // Box 3: Geometry & Capacity Calculator"
|
||
#include "tiny_debug_api.h" // Guard/failfast declarations
|
||
#include "hakmem_env_cache.h" // Priority-2: ENV cache (eliminate syscalls)
|
||
#if HAKMEM_TINY_SS_TLS_HINT
|
||
#include "box/tls_ss_hint_box.h" // Phase 1: TLS SuperSlab Hint Cache for Headerless mode
|
||
#endif
|
||
|
||
// ============================================================================
|
||
// Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation)
|
||
// ============================================================================
|
||
|
||
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
|
||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||
|
||
// Small hot classes (C0–C3): bump-only fast path if no remote/freelist
|
||
do {
|
||
uint8_t cls = meta->class_idx;
|
||
if (__builtin_expect(cls <= 3, 1)) {
|
||
if (atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0)
|
||
break;
|
||
if (meta->freelist == NULL && meta->used < meta->capacity) {
|
||
size_t unit_sz = tiny_stride_for_class(cls);
|
||
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
|
||
void* block = tiny_block_at_index(base, meta->used, unit_sz);
|
||
meta->used++;
|
||
c7_meta_used_note(cls, C7_META_USED_SRC_FRONT);
|
||
ss_active_inc(ss);
|
||
HAK_RET_ALLOC(cls, block);
|
||
}
|
||
}
|
||
} while (0);
|
||
|
||
// Drain remote queue if needed before handing blocks back to TLS
|
||
if (__builtin_expect(atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0, 0)) {
|
||
uint32_t self_tid = tiny_self_u32();
|
||
SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
|
||
if (slab_is_valid(&h)) {
|
||
slab_drain_remote_full(&h);
|
||
int pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0;
|
||
if (__builtin_expect(pending, 0)) {
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed);
|
||
tiny_remote_watch_note("alloc_pending_remote",
|
||
ss,
|
||
slab_idx,
|
||
(void*)head,
|
||
0xA243u,
|
||
self_tid,
|
||
0);
|
||
}
|
||
slab_release(&h);
|
||
return NULL;
|
||
}
|
||
slab_release(&h);
|
||
} else {
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
tiny_remote_watch_note("alloc_acquire_fail",
|
||
ss,
|
||
slab_idx,
|
||
meta,
|
||
0xA244u,
|
||
self_tid,
|
||
0);
|
||
}
|
||
return NULL;
|
||
}
|
||
}
|
||
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
uintptr_t head_pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
|
||
if (head_pending != 0) {
|
||
tiny_remote_watch_note("alloc_remote_pending",
|
||
ss,
|
||
slab_idx,
|
||
(void*)head_pending,
|
||
0xA247u,
|
||
tiny_self_u32(),
|
||
1);
|
||
return NULL;
|
||
}
|
||
}
|
||
|
||
// Linear allocation mode
|
||
if (__builtin_expect(meta->freelist == NULL && meta->used < meta->capacity, 1)) {
|
||
size_t unit_sz = tiny_stride_for_class(meta->class_idx);
|
||
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
|
||
void* block_base = tiny_block_at_index(base, meta->used, unit_sz);
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
if (__builtin_expect(!tiny_carve_guard(slab_idx, meta->used, unit_sz, 1), 0)) {
|
||
size_t dbg_usable = tiny_usable_bytes_for_slab(slab_idx);
|
||
uintptr_t dbg_off = (uintptr_t)((uint8_t*)block_base - base);
|
||
fprintf(stderr, "[TINY_ALLOC_BOUNDS] cls=%u slab=%d used=%u cap=%u unit=%zu off=%lu usable=%zu\n",
|
||
meta->class_idx, slab_idx, meta->used, meta->capacity, unit_sz,
|
||
(unsigned long)dbg_off, dbg_usable);
|
||
return NULL;
|
||
}
|
||
#endif
|
||
meta->used++;
|
||
c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_FRONT);
|
||
void* user =
|
||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||
tiny_region_id_write_header(block_base, meta->class_idx);
|
||
#else
|
||
block_base;
|
||
#endif
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
tiny_remote_track_on_alloc(ss, slab_idx, user, "linear_alloc", 0);
|
||
tiny_remote_assert_not_remote(ss, slab_idx, user, "linear_alloc_ret", 0);
|
||
}
|
||
// Phase 1: Update TLS hint cache with this SuperSlab (fast free path optimization)
|
||
#if HAKMEM_TINY_SS_TLS_HINT
|
||
{
|
||
void* ss_base = (void*)ss;
|
||
size_t ss_size = (size_t)1ULL << ss->lg_size;
|
||
tls_ss_hint_update(ss, ss_base, ss_size);
|
||
}
|
||
#endif
|
||
return user;
|
||
}
|
||
|
||
// Freelist mode
|
||
if (__builtin_expect(meta->freelist != NULL, 0)) {
|
||
void* block = meta->freelist;
|
||
|
||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||
size_t blk = g_tiny_class_sizes[meta->class_idx];
|
||
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
|
||
uintptr_t block_addr = (uintptr_t)block;
|
||
uintptr_t slab_addr = (uintptr_t)slab_base;
|
||
uintptr_t offset = block_addr - slab_addr;
|
||
|
||
fprintf(stderr, "[ALLOC_POP] cls=%u slab=%d block=%p offset=%zu (used=%u cap=%u)\n",
|
||
meta->class_idx, slab_idx, block, offset, meta->used, meta->capacity);
|
||
|
||
// Misaligned freelist entry → drop this slab's freelist to force new slab.
|
||
if ((offset % blk) != 0) {
|
||
fprintf(stderr, "[ALLOC_POP_MISALIGN] cls=%u slab=%d offset_mod=%zu blk=%zu base=%p ss=%p\n",
|
||
meta->class_idx, slab_idx, (size_t)(offset % blk), blk, block, (void*)ss);
|
||
meta->freelist = NULL;
|
||
return NULL;
|
||
}
|
||
|
||
if (offset % blk != 0 ||
|
||
offset / blk >= meta->capacity) {
|
||
fprintf(stderr, "[ALLOC_CORRUPT] Freelist head invalid\n");
|
||
tiny_failfast_abort_ptr("alloc_pop_invalid", ss, slab_idx, block, "freelist_head_corrupt");
|
||
}
|
||
}
|
||
|
||
meta->freelist = tiny_next_read(meta->class_idx, block);
|
||
meta->used++;
|
||
c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_FRONT);
|
||
|
||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0) &&
|
||
__builtin_expect(meta->used > meta->capacity, 0)) {
|
||
fprintf(stderr, "[ALLOC_CORRUPT] meta->used overflow on freelist alloc\n");
|
||
tiny_failfast_abort_ptr("alloc_used_overflow", ss, slab_idx, block, "freelist_used_over_capacity");
|
||
}
|
||
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
tiny_remote_track_on_alloc(ss, slab_idx, block, "freelist_alloc", 0);
|
||
tiny_remote_assert_not_remote(ss, slab_idx, block, "freelist_alloc_ret", 0);
|
||
}
|
||
|
||
// CRITICAL FIX (Larson double-free): Write header for freelist allocations
|
||
// Problem: Freelist path was returning BASE without writing header
|
||
// Result: Stale headers from previous allocations → double-free on next free
|
||
// Solution: Always write header before returning (same as linear carve path)
|
||
void* user =
|
||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||
tiny_region_id_write_header(block, meta->class_idx);
|
||
#else
|
||
block;
|
||
#endif
|
||
// Phase 1: Update TLS hint cache with this SuperSlab (fast free path optimization)
|
||
#if HAKMEM_TINY_SS_TLS_HINT
|
||
{
|
||
void* ss_base = (void*)ss;
|
||
size_t ss_size = (size_t)1ULL << ss->lg_size;
|
||
tls_ss_hint_update(ss, ss_base, ss_size);
|
||
}
|
||
#endif
|
||
return user;
|
||
}
|
||
|
||
return NULL;
|
||
}
|
||
|
||
// ============================================================================
|
||
/*
|
||
* Phase 12: Shared SuperSlab Pool based superslab_refill
|
||
*
|
||
* ポリシー:
|
||
* - superslab_refill(int class_idx) は shared pool を経由して
|
||
* 「class_idx 用の slab を1枚 TLS にバインドする」単一のエントリポイントとする。
|
||
* - 呼び出し側は、この関数が:
|
||
* * 成功時: TinyTLSSlab (g_tls_slabs[class_idx]) が有効な ss/meta/slab_base を指す
|
||
* * 失敗時: NULL を返し、TLS は変更しない or クリーンに巻き戻される
|
||
* ことだけを前提にすればよい。
|
||
* - shared_pool_acquire_slab() の戻り値は 0=成功 / 非0=失敗 とみなし、
|
||
* 成功時に (*ss_out, *slab_idx_out) が設定される想定とする。
|
||
* - superslab_init_slab() / tiny_tls_bind_slab() は再帰的に superslab_refill() を
|
||
* 呼ばない設計前提(自己呼び出し禁止)。ここで安全側に防御チェックを行う。
|
||
*/
|
||
|
||
// TLS Bind Box Logic moved to core/box/ss_tls_bind_box.h
|
||
#include "box/ss_tls_bind_box.h"
|
||
|
||
SuperSlab* superslab_refill(int class_idx)
|
||
{
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
g_superslab_refill_calls_dbg[class_idx]++;
|
||
#endif
|
||
|
||
// Bounds check (defensive, should be enforced by callers too)
|
||
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
|
||
return NULL;
|
||
}
|
||
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
|
||
// Shared pool API:
|
||
// 0 == success, (*ss_out, *slab_idx_out) に有効値が入る。
|
||
// !=0 == failure, 出力は未定義とみなす。
|
||
extern int shared_pool_acquire_slab(int class_idx,
|
||
SuperSlab** ss_out,
|
||
int* slab_idx_out);
|
||
|
||
SuperSlab* ss = NULL;
|
||
int slab_idx = -1;
|
||
if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) != 0 || !ss || slab_idx < 0) {
|
||
return NULL;
|
||
}
|
||
|
||
if (!ss_tls_bind_one(class_idx, tls, ss, slab_idx, tiny_self_u32())) {
|
||
return NULL;
|
||
}
|
||
|
||
return ss;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Phase 6.24: SuperSlab-based allocation using TLS slab
|
||
// ============================================================================
|
||
|
||
static inline void* hak_tiny_alloc_superslab(int class_idx) {
|
||
// MidTC fast path
|
||
do {
|
||
void* mp = midtc_pop(class_idx);
|
||
if (mp) {
|
||
HAK_RET_ALLOC(class_idx, mp);
|
||
}
|
||
} while (0);
|
||
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
TinySlabMeta* meta = tls->meta;
|
||
int slab_idx = tls->slab_idx;
|
||
|
||
if (meta && slab_idx >= 0 && tls->ss) {
|
||
// Ensure TLS metadata matches class and slab base
|
||
if (tls->meta->class_idx != (uint8_t)class_idx) {
|
||
tls->ss = NULL;
|
||
tls->meta = NULL;
|
||
tls->slab_idx = -1;
|
||
tls->slab_base = NULL;
|
||
meta = NULL;
|
||
} else {
|
||
uint8_t* canonical = tiny_slab_base_for(tls->ss, slab_idx);
|
||
if (tls->slab_base != canonical) {
|
||
tls->slab_base = canonical;
|
||
}
|
||
}
|
||
|
||
// Drain remote if needed (ownership-checked elsewhere)
|
||
// Priority-2: Use cached ENV (eliminate lazy-init syscall overhead)
|
||
if (meta) {
|
||
uintptr_t pending = atomic_load_explicit(
|
||
&tls->ss->remote_heads[slab_idx],
|
||
HAK_ENV_TINY_ALLOC_REMOTE_RELAX() ? memory_order_relaxed : memory_order_acquire);
|
||
if (__builtin_expect(pending != 0, 0)) {
|
||
uint32_t self_tid = tiny_self_u32();
|
||
if (ss_owner_try_acquire(meta, self_tid)) {
|
||
_ss_remote_drain_to_freelist_unsafe(tls->ss, slab_idx, meta);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Fast path: linear carve from current TLS slab
|
||
if (meta && tls->slab_base) {
|
||
TinyTLSCarveOneResult carve = tiny_tls_carve_one_block(tls, class_idx);
|
||
if (carve.block) {
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
const char* tag = (carve.path == TINY_TLS_CARVE_PATH_FREELIST)
|
||
? "freelist_alloc"
|
||
: "linear_alloc";
|
||
tiny_remote_track_on_alloc(tls->ss, slab_idx, carve.block, tag, 0);
|
||
tiny_remote_assert_not_remote(tls->ss, slab_idx, carve.block, tag, 0);
|
||
}
|
||
#endif
|
||
|
||
#if HAKMEM_TINY_SS_TLS_HINT
|
||
{
|
||
void* ss_base = (void*)tls->ss;
|
||
size_t ss_size = (size_t)1ULL << tls->ss->lg_size;
|
||
tls_ss_hint_update(tls->ss, ss_base, ss_size);
|
||
}
|
||
#endif
|
||
if (carve.path == TINY_TLS_CARVE_PATH_LINEAR) {
|
||
ROUTE_MARK(11); ROUTE_COMMIT(class_idx, 0x60);
|
||
} else if (carve.path == TINY_TLS_CARVE_PATH_FREELIST) {
|
||
ROUTE_MARK(12); ROUTE_COMMIT(class_idx, 0x61);
|
||
}
|
||
HAK_RET_ALLOC(class_idx, carve.block);
|
||
}
|
||
}
|
||
|
||
// Slow path: acquire a new slab via shared pool
|
||
SuperSlab* ss = superslab_refill(class_idx);
|
||
if (!ss) {
|
||
static int log_oom = 0;
|
||
if (log_oom < 2) {
|
||
fprintf(stderr, "[DEBUG] superslab_refill returned NULL (OOM)\n");
|
||
log_oom++;
|
||
}
|
||
return NULL;
|
||
}
|
||
|
||
// Retry after refill
|
||
tls = &g_tls_slabs[class_idx];
|
||
meta = tls->meta;
|
||
if (meta && meta->freelist == NULL &&
|
||
meta->used < meta->capacity && tls->slab_base) {
|
||
size_t block_size = tiny_stride_for_class(meta->class_idx);
|
||
void* block = tiny_block_at_index(tls->slab_base, meta->used, block_size);
|
||
meta->used++;
|
||
c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_FRONT);
|
||
ss_active_inc(ss);
|
||
HAK_RET_ALLOC(class_idx, block);
|
||
}
|
||
|
||
return NULL;
|
||
}
|