Root Cause: 1. C7 stride was 1024B, unable to serve 1024B user requests (need 1025B with header) 2. New SuperSlabs start with meta->class_idx=0 (mmap zero-init) 3. superslab_init_slab() only sets class_idx if meta->class_idx==255 4. Multiple code paths used conditional assignment (if class_idx==255), leaving C7 slabs with class_idx=0 5. This caused C7 blocks to be misidentified as C0, leading to HDR_META_MISMATCH errors Changes: 1. Upgrade C7 stride: 1024B → 2048B (can now serve 1024B requests) 2. Update blocks_per_slab[7]: 64 → 32 (2048B stride / 64KB slab) 3. Update size-to-class LUT: entries 513-2048 now map to C7 4. Fix superslab_init_slab() fail-safe: only reinitialize if class_idx==255 (not 0) 5. Add explicit class_idx assignment in 6 initialization paths: - tiny_superslab_alloc.inc.h: superslab_refill() after init - hakmem_tiny_superslab.c: backend_shared after init (main path) - ss_unified_backend_box.c: unconditional assignment - ss_legacy_backend_box.c: explicit assignment - superslab_expansion_box.c: explicit assignment - ss_allocation_box.c: fail-safe condition fix Fix P0 refill bug: - Update obsolete array access after Phase 3d-B TLS SLL unification - g_tls_sll_head[cls] → g_tls_sll[cls].head - g_tls_sll_count[cls] → g_tls_sll[cls].count Results: - HDR_META_MISMATCH: eliminated (0 errors in 100K iterations) - 1024B allocations now routed to C7 (Tiny fast path) - NXT_MISALIGN warnings remain (legacy 1024B SuperSlabs, separate issue) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
383 lines
16 KiB
C
383 lines
16 KiB
C
// tiny_superslab_alloc.inc.h - SuperSlab Allocation Layer (Box 4)
|
||
// Purpose: Slab allocation, refill, and adoption logic (Phase 12 shared pool)
|
||
// Public functions:
|
||
// - superslab_alloc_from_slab(): Allocate from specific slab (linear or freelist)
|
||
// - superslab_refill(): Refill TLS slab via shared pool
|
||
// - hak_tiny_alloc_superslab(): Main SuperSlab allocation entry point
|
||
|
||
#include "box/superslab_expansion_box.h" // Box E: Expansion with TLS state guarantee
|
||
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
|
||
#include "hakmem_tiny_superslab_constants.h"
|
||
#include "tiny_box_geometry.h" // Box 3: Geometry & Capacity Calculator"
|
||
|
||
// ============================================================================
|
||
// Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation)
|
||
// ============================================================================
|
||
|
||
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
|
||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||
|
||
// Small hot classes (C0–C3): bump-only fast path if no remote/freelist
|
||
do {
|
||
uint8_t cls = meta->class_idx;
|
||
if (__builtin_expect(cls <= 3, 1)) {
|
||
if (atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0)
|
||
break;
|
||
if (meta->freelist == NULL && meta->used < meta->capacity) {
|
||
size_t unit_sz = tiny_stride_for_class(cls);
|
||
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
|
||
void* block = tiny_block_at_index(base, meta->used, unit_sz);
|
||
meta->used++;
|
||
ss_active_inc(ss);
|
||
HAK_RET_ALLOC(cls, block);
|
||
}
|
||
}
|
||
} while (0);
|
||
|
||
// Drain remote queue if needed before handing blocks back to TLS
|
||
if (__builtin_expect(atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0, 0)) {
|
||
uint32_t self_tid = tiny_self_u32();
|
||
SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
|
||
if (slab_is_valid(&h)) {
|
||
slab_drain_remote_full(&h);
|
||
int pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0;
|
||
if (__builtin_expect(pending, 0)) {
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed);
|
||
tiny_remote_watch_note("alloc_pending_remote",
|
||
ss,
|
||
slab_idx,
|
||
(void*)head,
|
||
0xA243u,
|
||
self_tid,
|
||
0);
|
||
}
|
||
slab_release(&h);
|
||
return NULL;
|
||
}
|
||
slab_release(&h);
|
||
} else {
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
tiny_remote_watch_note("alloc_acquire_fail",
|
||
ss,
|
||
slab_idx,
|
||
meta,
|
||
0xA244u,
|
||
self_tid,
|
||
0);
|
||
}
|
||
return NULL;
|
||
}
|
||
}
|
||
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
uintptr_t head_pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
|
||
if (head_pending != 0) {
|
||
tiny_remote_watch_note("alloc_remote_pending",
|
||
ss,
|
||
slab_idx,
|
||
(void*)head_pending,
|
||
0xA247u,
|
||
tiny_self_u32(),
|
||
1);
|
||
return NULL;
|
||
}
|
||
}
|
||
|
||
// Linear allocation mode
|
||
if (__builtin_expect(meta->freelist == NULL && meta->used < meta->capacity, 1)) {
|
||
size_t unit_sz = tiny_stride_for_class(meta->class_idx);
|
||
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
|
||
void* block_base = tiny_block_at_index(base, meta->used, unit_sz);
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
if (__builtin_expect(!tiny_carve_guard(slab_idx, meta->used, unit_sz, 1), 0)) {
|
||
size_t dbg_usable = tiny_usable_bytes_for_slab(slab_idx);
|
||
uintptr_t dbg_off = (uintptr_t)((uint8_t*)block_base - base);
|
||
fprintf(stderr, "[TINY_ALLOC_BOUNDS] cls=%u slab=%d used=%u cap=%u unit=%zu off=%lu usable=%zu\n",
|
||
meta->class_idx, slab_idx, meta->used, meta->capacity, unit_sz,
|
||
(unsigned long)dbg_off, dbg_usable);
|
||
return NULL;
|
||
}
|
||
#endif
|
||
meta->used++;
|
||
void* user =
|
||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||
tiny_region_id_write_header(block_base, meta->class_idx);
|
||
#else
|
||
block_base;
|
||
#endif
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
tiny_remote_track_on_alloc(ss, slab_idx, user, "linear_alloc", 0);
|
||
tiny_remote_assert_not_remote(ss, slab_idx, user, "linear_alloc_ret", 0);
|
||
}
|
||
return user;
|
||
}
|
||
|
||
// Freelist mode
|
||
if (__builtin_expect(meta->freelist != NULL, 0)) {
|
||
void* block = meta->freelist;
|
||
|
||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||
size_t blk = g_tiny_class_sizes[meta->class_idx];
|
||
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
|
||
uintptr_t block_addr = (uintptr_t)block;
|
||
uintptr_t slab_addr = (uintptr_t)slab_base;
|
||
uintptr_t offset = block_addr - slab_addr;
|
||
|
||
fprintf(stderr, "[ALLOC_POP] cls=%u slab=%d block=%p offset=%zu (used=%u cap=%u)\n",
|
||
meta->class_idx, slab_idx, block, offset, meta->used, meta->capacity);
|
||
|
||
// Misaligned freelist entry → drop this slab's freelist to force new slab.
|
||
if ((offset % blk) != 0) {
|
||
fprintf(stderr, "[ALLOC_POP_MISALIGN] cls=%u slab=%d offset_mod=%zu blk=%zu base=%p ss=%p\n",
|
||
meta->class_idx, slab_idx, (size_t)(offset % blk), blk, block, (void*)ss);
|
||
meta->freelist = NULL;
|
||
return NULL;
|
||
}
|
||
|
||
if (offset % blk != 0 ||
|
||
offset / blk >= meta->capacity) {
|
||
fprintf(stderr, "[ALLOC_CORRUPT] Freelist head invalid\n");
|
||
tiny_failfast_abort_ptr("alloc_pop_invalid", ss, slab_idx, block, "freelist_head_corrupt");
|
||
}
|
||
}
|
||
|
||
meta->freelist = tiny_next_read(meta->class_idx, block);
|
||
meta->used++;
|
||
|
||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0) &&
|
||
__builtin_expect(meta->used > meta->capacity, 0)) {
|
||
fprintf(stderr, "[ALLOC_CORRUPT] meta->used overflow on freelist alloc\n");
|
||
tiny_failfast_abort_ptr("alloc_used_overflow", ss, slab_idx, block, "freelist_used_over_capacity");
|
||
}
|
||
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
tiny_remote_track_on_alloc(ss, slab_idx, block, "freelist_alloc", 0);
|
||
tiny_remote_assert_not_remote(ss, slab_idx, block, "freelist_alloc_ret", 0);
|
||
}
|
||
return block;
|
||
}
|
||
|
||
return NULL;
|
||
}
|
||
|
||
// ============================================================================
|
||
/*
|
||
* Phase 12: Shared SuperSlab Pool based superslab_refill
|
||
*
|
||
* ポリシー:
|
||
* - superslab_refill(int class_idx) は shared pool を経由して
|
||
* 「class_idx 用の slab を1枚 TLS にバインドする」単一のエントリポイントとする。
|
||
* - 呼び出し側は、この関数が:
|
||
* * 成功時: TinyTLSSlab (g_tls_slabs[class_idx]) が有効な ss/meta/slab_base を指す
|
||
* * 失敗時: NULL を返し、TLS は変更しない or クリーンに巻き戻される
|
||
* ことだけを前提にすればよい。
|
||
* - shared_pool_acquire_slab() の戻り値は 0=成功 / 非0=失敗 とみなし、
|
||
* 成功時に (*ss_out, *slab_idx_out) が設定される想定とする。
|
||
* - superslab_init_slab() / tiny_tls_bind_slab() は再帰的に superslab_refill() を
|
||
* 呼ばない設計前提(自己呼び出し禁止)。ここで安全側に防御チェックを行う。
|
||
*/
|
||
|
||
SuperSlab* superslab_refill(int class_idx)
|
||
{
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
g_superslab_refill_calls_dbg[class_idx]++;
|
||
#endif
|
||
|
||
// Bounds check (defensive, should be enforced by callers too)
|
||
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
|
||
return NULL;
|
||
}
|
||
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
|
||
// Shared pool API:
|
||
// 0 == success, (*ss_out, *slab_idx_out) に有効値が入る。
|
||
// !=0 == failure, 出力は未定義とみなす。
|
||
extern int shared_pool_acquire_slab(int class_idx,
|
||
SuperSlab** ss_out,
|
||
int* slab_idx_out);
|
||
|
||
SuperSlab* ss = NULL;
|
||
int slab_idx = -1;
|
||
if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) != 0 || !ss || slab_idx < 0) {
|
||
return NULL;
|
||
}
|
||
|
||
// Initialize slab metadata for this class/thread.
|
||
// NOTE:
|
||
// - superslab_init_slab は再帰的に superslab_refill() を呼ばない設計前提。
|
||
// - class_idx は slab_meta->class_idx に反映される。
|
||
uint32_t my_tid = tiny_self_u32();
|
||
superslab_init_slab(ss,
|
||
slab_idx,
|
||
g_tiny_class_sizes[class_idx],
|
||
my_tid);
|
||
|
||
// CRITICAL FIX: Ensure class_idx is set after init.
|
||
// New SuperSlabs start with meta->class_idx=0 (mmap zero-init).
|
||
// superslab_init_slab() only sets it if meta->class_idx==255.
|
||
// We must explicitly set it to the requested class to avoid C0/C7 confusion.
|
||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
uint8_t old_cls = meta->class_idx;
|
||
#endif
|
||
meta->class_idx = (uint8_t)class_idx;
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
if (class_idx == 7 && old_cls != class_idx) {
|
||
fprintf(stderr, "[SUPERSLAB_REFILL_FIX_C7] ss=%p slab=%d old_cls=%u new_cls=%d\n",
|
||
(void*)ss, slab_idx, old_cls, class_idx);
|
||
}
|
||
#endif
|
||
|
||
// Bind this slab to TLS for fast subsequent allocations.
|
||
// tiny_tls_bind_slab は:
|
||
// tls->ss, tls->slab_idx, tls->meta, tls->slab_base
|
||
// を一貫して更新する。
|
||
tiny_tls_bind_slab(tls, ss, slab_idx);
|
||
|
||
// Sanity: TLS must now describe this slab for this class.
|
||
// 失敗時は TLS を巻き戻して NULL を返す(呼び出し側は安全に再試行できる)。
|
||
if (!(tls->ss == ss &&
|
||
tls->slab_idx == (uint8_t)slab_idx &&
|
||
tls->meta != NULL &&
|
||
tls->meta->class_idx == (uint8_t)class_idx &&
|
||
tls->slab_base != NULL)) {
|
||
tls->ss = NULL;
|
||
tls->meta = NULL;
|
||
tls->slab_base = NULL;
|
||
tls->slab_idx = 0;
|
||
return NULL;
|
||
}
|
||
|
||
return ss;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Phase 6.24: SuperSlab-based allocation using TLS slab
|
||
// ============================================================================
|
||
|
||
static inline void* hak_tiny_alloc_superslab(int class_idx) {
|
||
// MidTC fast path
|
||
do {
|
||
void* mp = midtc_pop(class_idx);
|
||
if (mp) {
|
||
HAK_RET_ALLOC(class_idx, mp);
|
||
}
|
||
} while (0);
|
||
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
TinySlabMeta* meta = tls->meta;
|
||
int slab_idx = tls->slab_idx;
|
||
|
||
if (meta && slab_idx >= 0 && tls->ss) {
|
||
// Ensure TLS metadata matches class and slab base
|
||
if (tls->meta->class_idx != (uint8_t)class_idx) {
|
||
tls->ss = NULL;
|
||
tls->meta = NULL;
|
||
tls->slab_idx = -1;
|
||
tls->slab_base = NULL;
|
||
meta = NULL;
|
||
} else {
|
||
uint8_t* canonical = tiny_slab_base_for(tls->ss, slab_idx);
|
||
if (tls->slab_base != canonical) {
|
||
tls->slab_base = canonical;
|
||
}
|
||
}
|
||
|
||
// Drain remote if needed (ownership-checked elsewhere)
|
||
if (meta) {
|
||
static int g_alloc_remote_relax = -1;
|
||
if (__builtin_expect(g_alloc_remote_relax == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_ALLOC_REMOTE_RELAX");
|
||
g_alloc_remote_relax = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
uintptr_t pending = atomic_load_explicit(
|
||
&tls->ss->remote_heads[slab_idx],
|
||
g_alloc_remote_relax ? memory_order_relaxed : memory_order_acquire);
|
||
if (__builtin_expect(pending != 0, 0)) {
|
||
uint32_t self_tid = tiny_self_u32();
|
||
if (ss_owner_try_acquire(meta, self_tid)) {
|
||
_ss_remote_drain_to_freelist_unsafe(tls->ss, slab_idx, meta);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Fast path: linear carve from current TLS slab
|
||
if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
|
||
size_t block_size = tiny_stride_for_class(meta->class_idx);
|
||
uint8_t* base = tls->slab_base;
|
||
void* block = base + ((size_t)meta->used * block_size);
|
||
meta->used++;
|
||
|
||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||
uintptr_t base_ss = (uintptr_t)tls->ss;
|
||
size_t ss_size = (size_t)1ULL << tls->ss->lg_size;
|
||
uintptr_t p = (uintptr_t)block;
|
||
int in_range = (p >= base_ss) && (p < base_ss + ss_size);
|
||
int aligned = ((p - (uintptr_t)base) % block_size) == 0;
|
||
int idx_ok = (tls->slab_idx >= 0) &&
|
||
(tls->slab_idx < ss_slabs_capacity(tls->ss));
|
||
if (!in_range || !aligned || !idx_ok || meta->used > meta->capacity) {
|
||
tiny_failfast_abort_ptr("alloc_ret_align",
|
||
tls->ss,
|
||
tls->slab_idx,
|
||
block,
|
||
"superslab_tls_invariant");
|
||
}
|
||
}
|
||
|
||
ss_active_inc(tls->ss);
|
||
ROUTE_MARK(11); ROUTE_COMMIT(class_idx, 0x60);
|
||
HAK_RET_ALLOC(class_idx, block);
|
||
}
|
||
|
||
// Freelist path from current TLS slab
|
||
if (meta && meta->freelist) {
|
||
void* block = meta->freelist;
|
||
if (__builtin_expect(g_tiny_safe_free, 0)) {
|
||
size_t blk = tiny_stride_for_class(meta->class_idx);
|
||
uint8_t* base = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||
uintptr_t delta = (uintptr_t)block - (uintptr_t)base;
|
||
int align_ok = ((delta % blk) == 0);
|
||
int range_ok = (delta / blk) < meta->capacity;
|
||
if (!align_ok || !range_ok) {
|
||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return NULL; }
|
||
return NULL;
|
||
}
|
||
}
|
||
void* next = tiny_next_read(class_idx, block);
|
||
meta->freelist = next;
|
||
meta->used++;
|
||
ss_active_inc(tls->ss);
|
||
ROUTE_MARK(12); ROUTE_COMMIT(class_idx, 0x61);
|
||
HAK_RET_ALLOC(class_idx, block);
|
||
}
|
||
|
||
// Slow path: acquire a new slab via shared pool
|
||
SuperSlab* ss = superslab_refill(class_idx);
|
||
if (!ss) {
|
||
static int log_oom = 0;
|
||
if (log_oom < 2) {
|
||
fprintf(stderr, "[DEBUG] superslab_refill returned NULL (OOM)\n");
|
||
log_oom++;
|
||
}
|
||
return NULL;
|
||
}
|
||
|
||
// Retry after refill
|
||
tls = &g_tls_slabs[class_idx];
|
||
meta = tls->meta;
|
||
if (meta && meta->freelist == NULL &&
|
||
meta->used < meta->capacity && tls->slab_base) {
|
||
size_t block_size = tiny_stride_for_class(meta->class_idx);
|
||
void* block = tiny_block_at_index(tls->slab_base, meta->used, block_size);
|
||
meta->used++;
|
||
ss_active_inc(ss);
|
||
HAK_RET_ALLOC(class_idx, block);
|
||
}
|
||
|
||
return NULL;
|
||
}
|