Files
hakmem/core/tiny_superslab_alloc.inc.h
Moe Charm (CI) a78224123e Fix C0/C7 class confusion: Upgrade C7 stride to 2048B and fix meta->class_idx initialization
Root Cause:
1. C7 stride was 1024B, unable to serve 1024B user requests (need 1025B with header)
2. New SuperSlabs start with meta->class_idx=0 (mmap zero-init)
3. superslab_init_slab() only sets class_idx if meta->class_idx==255
4. Multiple code paths used conditional assignment (if class_idx==255), leaving C7 slabs with class_idx=0
5. This caused C7 blocks to be misidentified as C0, leading to HDR_META_MISMATCH errors

Changes:
1. Upgrade C7 stride: 1024B → 2048B (can now serve 1024B requests)
2. Update blocks_per_slab[7]: 64 → 32 (2048B stride / 64KB slab)
3. Update size-to-class LUT: entries 513-2048 now map to C7
4. Fix superslab_init_slab() fail-safe: only reinitialize if class_idx==255 (not 0)
5. Add explicit class_idx assignment in 6 initialization paths:
   - tiny_superslab_alloc.inc.h: superslab_refill() after init
   - hakmem_tiny_superslab.c: backend_shared after init (main path)
   - ss_unified_backend_box.c: unconditional assignment
   - ss_legacy_backend_box.c: explicit assignment
   - superslab_expansion_box.c: explicit assignment
   - ss_allocation_box.c: fail-safe condition fix

Fix P0 refill bug:
- Update obsolete array access after Phase 3d-B TLS SLL unification
- g_tls_sll_head[cls] → g_tls_sll[cls].head
- g_tls_sll_count[cls] → g_tls_sll[cls].count

Results:
- HDR_META_MISMATCH: eliminated (0 errors in 100K iterations)
- 1024B allocations now routed to C7 (Tiny fast path)
- NXT_MISALIGN warnings remain (legacy 1024B SuperSlabs, separate issue)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-21 13:44:05 +09:00

383 lines
16 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_superslab_alloc.inc.h - SuperSlab Allocation Layer (Box 4)
// Purpose: Slab allocation, refill, and adoption logic (Phase 12 shared pool)
// Public functions:
// - superslab_alloc_from_slab(): Allocate from specific slab (linear or freelist)
// - superslab_refill(): Refill TLS slab via shared pool
// - hak_tiny_alloc_superslab(): Main SuperSlab allocation entry point
#include "box/superslab_expansion_box.h" // Box E: Expansion with TLS state guarantee
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
#include "hakmem_tiny_superslab_constants.h"
#include "tiny_box_geometry.h" // Box 3: Geometry & Capacity Calculator"
// ============================================================================
// Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation)
// ============================================================================
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
TinySlabMeta* meta = &ss->slabs[slab_idx];
// Small hot classes (C0C3): bump-only fast path if no remote/freelist
do {
uint8_t cls = meta->class_idx;
if (__builtin_expect(cls <= 3, 1)) {
if (atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0)
break;
if (meta->freelist == NULL && meta->used < meta->capacity) {
size_t unit_sz = tiny_stride_for_class(cls);
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
void* block = tiny_block_at_index(base, meta->used, unit_sz);
meta->used++;
ss_active_inc(ss);
HAK_RET_ALLOC(cls, block);
}
}
} while (0);
// Drain remote queue if needed before handing blocks back to TLS
if (__builtin_expect(atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0, 0)) {
uint32_t self_tid = tiny_self_u32();
SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
if (slab_is_valid(&h)) {
slab_drain_remote_full(&h);
int pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0;
if (__builtin_expect(pending, 0)) {
if (__builtin_expect(g_debug_remote_guard, 0)) {
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed);
tiny_remote_watch_note("alloc_pending_remote",
ss,
slab_idx,
(void*)head,
0xA243u,
self_tid,
0);
}
slab_release(&h);
return NULL;
}
slab_release(&h);
} else {
if (__builtin_expect(g_debug_remote_guard, 0)) {
tiny_remote_watch_note("alloc_acquire_fail",
ss,
slab_idx,
meta,
0xA244u,
self_tid,
0);
}
return NULL;
}
}
if (__builtin_expect(g_debug_remote_guard, 0)) {
uintptr_t head_pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
if (head_pending != 0) {
tiny_remote_watch_note("alloc_remote_pending",
ss,
slab_idx,
(void*)head_pending,
0xA247u,
tiny_self_u32(),
1);
return NULL;
}
}
// Linear allocation mode
if (__builtin_expect(meta->freelist == NULL && meta->used < meta->capacity, 1)) {
size_t unit_sz = tiny_stride_for_class(meta->class_idx);
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
void* block_base = tiny_block_at_index(base, meta->used, unit_sz);
#if !HAKMEM_BUILD_RELEASE
if (__builtin_expect(!tiny_carve_guard(slab_idx, meta->used, unit_sz, 1), 0)) {
size_t dbg_usable = tiny_usable_bytes_for_slab(slab_idx);
uintptr_t dbg_off = (uintptr_t)((uint8_t*)block_base - base);
fprintf(stderr, "[TINY_ALLOC_BOUNDS] cls=%u slab=%d used=%u cap=%u unit=%zu off=%lu usable=%zu\n",
meta->class_idx, slab_idx, meta->used, meta->capacity, unit_sz,
(unsigned long)dbg_off, dbg_usable);
return NULL;
}
#endif
meta->used++;
void* user =
#if HAKMEM_TINY_HEADER_CLASSIDX
tiny_region_id_write_header(block_base, meta->class_idx);
#else
block_base;
#endif
if (__builtin_expect(g_debug_remote_guard, 0)) {
tiny_remote_track_on_alloc(ss, slab_idx, user, "linear_alloc", 0);
tiny_remote_assert_not_remote(ss, slab_idx, user, "linear_alloc_ret", 0);
}
return user;
}
// Freelist mode
if (__builtin_expect(meta->freelist != NULL, 0)) {
void* block = meta->freelist;
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
size_t blk = g_tiny_class_sizes[meta->class_idx];
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
uintptr_t block_addr = (uintptr_t)block;
uintptr_t slab_addr = (uintptr_t)slab_base;
uintptr_t offset = block_addr - slab_addr;
fprintf(stderr, "[ALLOC_POP] cls=%u slab=%d block=%p offset=%zu (used=%u cap=%u)\n",
meta->class_idx, slab_idx, block, offset, meta->used, meta->capacity);
// Misaligned freelist entry → drop this slab's freelist to force new slab.
if ((offset % blk) != 0) {
fprintf(stderr, "[ALLOC_POP_MISALIGN] cls=%u slab=%d offset_mod=%zu blk=%zu base=%p ss=%p\n",
meta->class_idx, slab_idx, (size_t)(offset % blk), blk, block, (void*)ss);
meta->freelist = NULL;
return NULL;
}
if (offset % blk != 0 ||
offset / blk >= meta->capacity) {
fprintf(stderr, "[ALLOC_CORRUPT] Freelist head invalid\n");
tiny_failfast_abort_ptr("alloc_pop_invalid", ss, slab_idx, block, "freelist_head_corrupt");
}
}
meta->freelist = tiny_next_read(meta->class_idx, block);
meta->used++;
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0) &&
__builtin_expect(meta->used > meta->capacity, 0)) {
fprintf(stderr, "[ALLOC_CORRUPT] meta->used overflow on freelist alloc\n");
tiny_failfast_abort_ptr("alloc_used_overflow", ss, slab_idx, block, "freelist_used_over_capacity");
}
if (__builtin_expect(g_debug_remote_guard, 0)) {
tiny_remote_track_on_alloc(ss, slab_idx, block, "freelist_alloc", 0);
tiny_remote_assert_not_remote(ss, slab_idx, block, "freelist_alloc_ret", 0);
}
return block;
}
return NULL;
}
// ============================================================================
/*
* Phase 12: Shared SuperSlab Pool based superslab_refill
*
* ポリシー:
* - superslab_refill(int class_idx) は shared pool を経由して
* 「class_idx 用の slab を1枚 TLS にバインドする」単一のエントリポイントとする。
* - 呼び出し側は、この関数が:
* * 成功時: TinyTLSSlab (g_tls_slabs[class_idx]) が有効な ss/meta/slab_base を指す
* * 失敗時: NULL を返し、TLS は変更しない or クリーンに巻き戻される
* ことだけを前提にすればよい。
* - shared_pool_acquire_slab() の戻り値は 0=成功 / 非0=失敗 とみなし、
* 成功時に (*ss_out, *slab_idx_out) が設定される想定とする。
* - superslab_init_slab() / tiny_tls_bind_slab() は再帰的に superslab_refill() を
* 呼ばない設計前提(自己呼び出し禁止)。ここで安全側に防御チェックを行う。
*/
SuperSlab* superslab_refill(int class_idx)
{
#if HAKMEM_DEBUG_COUNTERS
g_superslab_refill_calls_dbg[class_idx]++;
#endif
// Bounds check (defensive, should be enforced by callers too)
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
return NULL;
}
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
// Shared pool API:
// 0 == success, (*ss_out, *slab_idx_out) に有効値が入る。
// !=0 == failure, 出力は未定義とみなす。
extern int shared_pool_acquire_slab(int class_idx,
SuperSlab** ss_out,
int* slab_idx_out);
SuperSlab* ss = NULL;
int slab_idx = -1;
if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) != 0 || !ss || slab_idx < 0) {
return NULL;
}
// Initialize slab metadata for this class/thread.
// NOTE:
// - superslab_init_slab は再帰的に superslab_refill() を呼ばない設計前提。
// - class_idx は slab_meta->class_idx に反映される。
uint32_t my_tid = tiny_self_u32();
superslab_init_slab(ss,
slab_idx,
g_tiny_class_sizes[class_idx],
my_tid);
// CRITICAL FIX: Ensure class_idx is set after init.
// New SuperSlabs start with meta->class_idx=0 (mmap zero-init).
// superslab_init_slab() only sets it if meta->class_idx==255.
// We must explicitly set it to the requested class to avoid C0/C7 confusion.
TinySlabMeta* meta = &ss->slabs[slab_idx];
#if !HAKMEM_BUILD_RELEASE
uint8_t old_cls = meta->class_idx;
#endif
meta->class_idx = (uint8_t)class_idx;
#if !HAKMEM_BUILD_RELEASE
if (class_idx == 7 && old_cls != class_idx) {
fprintf(stderr, "[SUPERSLAB_REFILL_FIX_C7] ss=%p slab=%d old_cls=%u new_cls=%d\n",
(void*)ss, slab_idx, old_cls, class_idx);
}
#endif
// Bind this slab to TLS for fast subsequent allocations.
// tiny_tls_bind_slab は:
// tls->ss, tls->slab_idx, tls->meta, tls->slab_base
// を一貫して更新する。
tiny_tls_bind_slab(tls, ss, slab_idx);
// Sanity: TLS must now describe this slab for this class.
// 失敗時は TLS を巻き戻して NULL を返す(呼び出し側は安全に再試行できる)。
if (!(tls->ss == ss &&
tls->slab_idx == (uint8_t)slab_idx &&
tls->meta != NULL &&
tls->meta->class_idx == (uint8_t)class_idx &&
tls->slab_base != NULL)) {
tls->ss = NULL;
tls->meta = NULL;
tls->slab_base = NULL;
tls->slab_idx = 0;
return NULL;
}
return ss;
}
// ============================================================================
// Phase 6.24: SuperSlab-based allocation using TLS slab
// ============================================================================
static inline void* hak_tiny_alloc_superslab(int class_idx) {
// MidTC fast path
do {
void* mp = midtc_pop(class_idx);
if (mp) {
HAK_RET_ALLOC(class_idx, mp);
}
} while (0);
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
TinySlabMeta* meta = tls->meta;
int slab_idx = tls->slab_idx;
if (meta && slab_idx >= 0 && tls->ss) {
// Ensure TLS metadata matches class and slab base
if (tls->meta->class_idx != (uint8_t)class_idx) {
tls->ss = NULL;
tls->meta = NULL;
tls->slab_idx = -1;
tls->slab_base = NULL;
meta = NULL;
} else {
uint8_t* canonical = tiny_slab_base_for(tls->ss, slab_idx);
if (tls->slab_base != canonical) {
tls->slab_base = canonical;
}
}
// Drain remote if needed (ownership-checked elsewhere)
if (meta) {
static int g_alloc_remote_relax = -1;
if (__builtin_expect(g_alloc_remote_relax == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ALLOC_REMOTE_RELAX");
g_alloc_remote_relax = (e && *e && *e != '0') ? 1 : 0;
}
uintptr_t pending = atomic_load_explicit(
&tls->ss->remote_heads[slab_idx],
g_alloc_remote_relax ? memory_order_relaxed : memory_order_acquire);
if (__builtin_expect(pending != 0, 0)) {
uint32_t self_tid = tiny_self_u32();
if (ss_owner_try_acquire(meta, self_tid)) {
_ss_remote_drain_to_freelist_unsafe(tls->ss, slab_idx, meta);
}
}
}
}
// Fast path: linear carve from current TLS slab
if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
size_t block_size = tiny_stride_for_class(meta->class_idx);
uint8_t* base = tls->slab_base;
void* block = base + ((size_t)meta->used * block_size);
meta->used++;
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
uintptr_t base_ss = (uintptr_t)tls->ss;
size_t ss_size = (size_t)1ULL << tls->ss->lg_size;
uintptr_t p = (uintptr_t)block;
int in_range = (p >= base_ss) && (p < base_ss + ss_size);
int aligned = ((p - (uintptr_t)base) % block_size) == 0;
int idx_ok = (tls->slab_idx >= 0) &&
(tls->slab_idx < ss_slabs_capacity(tls->ss));
if (!in_range || !aligned || !idx_ok || meta->used > meta->capacity) {
tiny_failfast_abort_ptr("alloc_ret_align",
tls->ss,
tls->slab_idx,
block,
"superslab_tls_invariant");
}
}
ss_active_inc(tls->ss);
ROUTE_MARK(11); ROUTE_COMMIT(class_idx, 0x60);
HAK_RET_ALLOC(class_idx, block);
}
// Freelist path from current TLS slab
if (meta && meta->freelist) {
void* block = meta->freelist;
if (__builtin_expect(g_tiny_safe_free, 0)) {
size_t blk = tiny_stride_for_class(meta->class_idx);
uint8_t* base = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
uintptr_t delta = (uintptr_t)block - (uintptr_t)base;
int align_ok = ((delta % blk) == 0);
int range_ok = (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return NULL; }
return NULL;
}
}
void* next = tiny_next_read(class_idx, block);
meta->freelist = next;
meta->used++;
ss_active_inc(tls->ss);
ROUTE_MARK(12); ROUTE_COMMIT(class_idx, 0x61);
HAK_RET_ALLOC(class_idx, block);
}
// Slow path: acquire a new slab via shared pool
SuperSlab* ss = superslab_refill(class_idx);
if (!ss) {
static int log_oom = 0;
if (log_oom < 2) {
fprintf(stderr, "[DEBUG] superslab_refill returned NULL (OOM)\n");
log_oom++;
}
return NULL;
}
// Retry after refill
tls = &g_tls_slabs[class_idx];
meta = tls->meta;
if (meta && meta->freelist == NULL &&
meta->used < meta->capacity && tls->slab_base) {
size_t block_size = tiny_stride_for_class(meta->class_idx);
void* block = tiny_block_at_index(tls->slab_base, meta->used, block_size);
meta->used++;
ss_active_inc(ss);
HAK_RET_ALLOC(class_idx, block);
}
return NULL;
}