Files
hakmem/core/tiny_superslab_alloc.inc.h
Moe Charm (CI) e4868bf236 Larson crash investigation: Add freelist header write + abort() on duplicate
## Changes

1. **TLS SLL duplicate detection** (core/box/tls_sll_box.h:381)
   - Changed 'return true' to 'abort()' to get backtrace on double-free
   - Enables precise root cause identification

2. **Freelist header write fix** (core/tiny_superslab_alloc.inc.h:159-169)
   - Added tiny_region_id_write_header() call in freelist allocation path
   - Previously only linear carve wrote headers → stale headers on reuse
   - Now both paths write headers consistently

## Root Cause Analysis

Backtrace revealed true double-free pattern:
- last_push_from=hak_tiny_free_fast_v2 (freed once)
- last_pop_from=(null) (never allocated)
- where=hak_tiny_free_fast_v2 (freed again!)

Same pointer freed twice WITHOUT reallocation in between.

## Status

- Freelist header fix:  Implemented (necessary but not sufficient)
- Double-free still occurs:  Deeper investigation needed
- Possible causes: User code bug, TLS drain race, remote free issue

Next: Investigate allocation/free flow with enhanced tracing
2025-11-27 05:57:22 +09:00

394 lines
16 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_superslab_alloc.inc.h - SuperSlab Allocation Layer (Box 4)
// Purpose: Slab allocation, refill, and adoption logic (Phase 12 shared pool)
// Public functions:
// - superslab_alloc_from_slab(): Allocate from specific slab (linear or freelist)
// - superslab_refill(): Refill TLS slab via shared pool
// - hak_tiny_alloc_superslab(): Main SuperSlab allocation entry point
#include "box/superslab_expansion_box.h" // Box E: Expansion with TLS state guarantee
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
#include "hakmem_tiny_superslab_constants.h"
#include "tiny_box_geometry.h" // Box 3: Geometry & Capacity Calculator"
// ============================================================================
// Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation)
// ============================================================================
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
TinySlabMeta* meta = &ss->slabs[slab_idx];
// Small hot classes (C0C3): bump-only fast path if no remote/freelist
do {
uint8_t cls = meta->class_idx;
if (__builtin_expect(cls <= 3, 1)) {
if (atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0)
break;
if (meta->freelist == NULL && meta->used < meta->capacity) {
size_t unit_sz = tiny_stride_for_class(cls);
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
void* block = tiny_block_at_index(base, meta->used, unit_sz);
meta->used++;
ss_active_inc(ss);
HAK_RET_ALLOC(cls, block);
}
}
} while (0);
// Drain remote queue if needed before handing blocks back to TLS
if (__builtin_expect(atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0, 0)) {
uint32_t self_tid = tiny_self_u32();
SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
if (slab_is_valid(&h)) {
slab_drain_remote_full(&h);
int pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0;
if (__builtin_expect(pending, 0)) {
if (__builtin_expect(g_debug_remote_guard, 0)) {
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed);
tiny_remote_watch_note("alloc_pending_remote",
ss,
slab_idx,
(void*)head,
0xA243u,
self_tid,
0);
}
slab_release(&h);
return NULL;
}
slab_release(&h);
} else {
if (__builtin_expect(g_debug_remote_guard, 0)) {
tiny_remote_watch_note("alloc_acquire_fail",
ss,
slab_idx,
meta,
0xA244u,
self_tid,
0);
}
return NULL;
}
}
if (__builtin_expect(g_debug_remote_guard, 0)) {
uintptr_t head_pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
if (head_pending != 0) {
tiny_remote_watch_note("alloc_remote_pending",
ss,
slab_idx,
(void*)head_pending,
0xA247u,
tiny_self_u32(),
1);
return NULL;
}
}
// Linear allocation mode
if (__builtin_expect(meta->freelist == NULL && meta->used < meta->capacity, 1)) {
size_t unit_sz = tiny_stride_for_class(meta->class_idx);
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
void* block_base = tiny_block_at_index(base, meta->used, unit_sz);
#if !HAKMEM_BUILD_RELEASE
if (__builtin_expect(!tiny_carve_guard(slab_idx, meta->used, unit_sz, 1), 0)) {
size_t dbg_usable = tiny_usable_bytes_for_slab(slab_idx);
uintptr_t dbg_off = (uintptr_t)((uint8_t*)block_base - base);
fprintf(stderr, "[TINY_ALLOC_BOUNDS] cls=%u slab=%d used=%u cap=%u unit=%zu off=%lu usable=%zu\n",
meta->class_idx, slab_idx, meta->used, meta->capacity, unit_sz,
(unsigned long)dbg_off, dbg_usable);
return NULL;
}
#endif
meta->used++;
void* user =
#if HAKMEM_TINY_HEADER_CLASSIDX
tiny_region_id_write_header(block_base, meta->class_idx);
#else
block_base;
#endif
if (__builtin_expect(g_debug_remote_guard, 0)) {
tiny_remote_track_on_alloc(ss, slab_idx, user, "linear_alloc", 0);
tiny_remote_assert_not_remote(ss, slab_idx, user, "linear_alloc_ret", 0);
}
return user;
}
// Freelist mode
if (__builtin_expect(meta->freelist != NULL, 0)) {
void* block = meta->freelist;
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
size_t blk = g_tiny_class_sizes[meta->class_idx];
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
uintptr_t block_addr = (uintptr_t)block;
uintptr_t slab_addr = (uintptr_t)slab_base;
uintptr_t offset = block_addr - slab_addr;
fprintf(stderr, "[ALLOC_POP] cls=%u slab=%d block=%p offset=%zu (used=%u cap=%u)\n",
meta->class_idx, slab_idx, block, offset, meta->used, meta->capacity);
// Misaligned freelist entry → drop this slab's freelist to force new slab.
if ((offset % blk) != 0) {
fprintf(stderr, "[ALLOC_POP_MISALIGN] cls=%u slab=%d offset_mod=%zu blk=%zu base=%p ss=%p\n",
meta->class_idx, slab_idx, (size_t)(offset % blk), blk, block, (void*)ss);
meta->freelist = NULL;
return NULL;
}
if (offset % blk != 0 ||
offset / blk >= meta->capacity) {
fprintf(stderr, "[ALLOC_CORRUPT] Freelist head invalid\n");
tiny_failfast_abort_ptr("alloc_pop_invalid", ss, slab_idx, block, "freelist_head_corrupt");
}
}
meta->freelist = tiny_next_read(meta->class_idx, block);
meta->used++;
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0) &&
__builtin_expect(meta->used > meta->capacity, 0)) {
fprintf(stderr, "[ALLOC_CORRUPT] meta->used overflow on freelist alloc\n");
tiny_failfast_abort_ptr("alloc_used_overflow", ss, slab_idx, block, "freelist_used_over_capacity");
}
if (__builtin_expect(g_debug_remote_guard, 0)) {
tiny_remote_track_on_alloc(ss, slab_idx, block, "freelist_alloc", 0);
tiny_remote_assert_not_remote(ss, slab_idx, block, "freelist_alloc_ret", 0);
}
// CRITICAL FIX (Larson double-free): Write header for freelist allocations
// Problem: Freelist path was returning BASE without writing header
// Result: Stale headers from previous allocations → double-free on next free
// Solution: Always write header before returning (same as linear carve path)
void* user =
#if HAKMEM_TINY_HEADER_CLASSIDX
tiny_region_id_write_header(block, meta->class_idx);
#else
block;
#endif
return user;
}
return NULL;
}
// ============================================================================
/*
* Phase 12: Shared SuperSlab Pool based superslab_refill
*
* ポリシー:
* - superslab_refill(int class_idx) は shared pool を経由して
* 「class_idx 用の slab を1枚 TLS にバインドする」単一のエントリポイントとする。
* - 呼び出し側は、この関数が:
* * 成功時: TinyTLSSlab (g_tls_slabs[class_idx]) が有効な ss/meta/slab_base を指す
* * 失敗時: NULL を返し、TLS は変更しない or クリーンに巻き戻される
* ことだけを前提にすればよい。
* - shared_pool_acquire_slab() の戻り値は 0=成功 / 非0=失敗 とみなし、
* 成功時に (*ss_out, *slab_idx_out) が設定される想定とする。
* - superslab_init_slab() / tiny_tls_bind_slab() は再帰的に superslab_refill() を
* 呼ばない設計前提(自己呼び出し禁止)。ここで安全側に防御チェックを行う。
*/
SuperSlab* superslab_refill(int class_idx)
{
#if HAKMEM_DEBUG_COUNTERS
g_superslab_refill_calls_dbg[class_idx]++;
#endif
// Bounds check (defensive, should be enforced by callers too)
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
return NULL;
}
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
// Shared pool API:
// 0 == success, (*ss_out, *slab_idx_out) に有効値が入る。
// !=0 == failure, 出力は未定義とみなす。
extern int shared_pool_acquire_slab(int class_idx,
SuperSlab** ss_out,
int* slab_idx_out);
SuperSlab* ss = NULL;
int slab_idx = -1;
if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) != 0 || !ss || slab_idx < 0) {
return NULL;
}
// Initialize slab metadata for this class/thread.
// NOTE:
// - superslab_init_slab は再帰的に superslab_refill() を呼ばない設計前提。
// - class_idx は slab_meta->class_idx に反映される。
uint32_t my_tid = tiny_self_u32();
superslab_init_slab(ss,
slab_idx,
g_tiny_class_sizes[class_idx],
my_tid);
// CRITICAL FIX: Ensure class_idx is set after init.
// New SuperSlabs start with meta->class_idx=0 (mmap zero-init).
// superslab_init_slab() only sets it if meta->class_idx==255.
// We must explicitly set it to the requested class to avoid C0/C7 confusion.
TinySlabMeta* meta = &ss->slabs[slab_idx];
#if !HAKMEM_BUILD_RELEASE
uint8_t old_cls = meta->class_idx;
#endif
meta->class_idx = (uint8_t)class_idx;
#if !HAKMEM_BUILD_RELEASE
if (class_idx == 7 && old_cls != class_idx) {
fprintf(stderr, "[SUPERSLAB_REFILL_FIX_C7] ss=%p slab=%d old_cls=%u new_cls=%d\n",
(void*)ss, slab_idx, old_cls, class_idx);
}
#endif
// Bind this slab to TLS for fast subsequent allocations.
// tiny_tls_bind_slab は:
// tls->ss, tls->slab_idx, tls->meta, tls->slab_base
// を一貫して更新する。
tiny_tls_bind_slab(tls, ss, slab_idx);
// Sanity: TLS must now describe this slab for this class.
// 失敗時は TLS を巻き戻して NULL を返す(呼び出し側は安全に再試行できる)。
if (!(tls->ss == ss &&
tls->slab_idx == (uint8_t)slab_idx &&
tls->meta != NULL &&
tls->meta->class_idx == (uint8_t)class_idx &&
tls->slab_base != NULL)) {
tls->ss = NULL;
tls->meta = NULL;
tls->slab_base = NULL;
tls->slab_idx = 0;
return NULL;
}
return ss;
}
// ============================================================================
// Phase 6.24: SuperSlab-based allocation using TLS slab
// ============================================================================
static inline void* hak_tiny_alloc_superslab(int class_idx) {
// MidTC fast path
do {
void* mp = midtc_pop(class_idx);
if (mp) {
HAK_RET_ALLOC(class_idx, mp);
}
} while (0);
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
TinySlabMeta* meta = tls->meta;
int slab_idx = tls->slab_idx;
if (meta && slab_idx >= 0 && tls->ss) {
// Ensure TLS metadata matches class and slab base
if (tls->meta->class_idx != (uint8_t)class_idx) {
tls->ss = NULL;
tls->meta = NULL;
tls->slab_idx = -1;
tls->slab_base = NULL;
meta = NULL;
} else {
uint8_t* canonical = tiny_slab_base_for(tls->ss, slab_idx);
if (tls->slab_base != canonical) {
tls->slab_base = canonical;
}
}
// Drain remote if needed (ownership-checked elsewhere)
if (meta) {
static int g_alloc_remote_relax = -1;
if (__builtin_expect(g_alloc_remote_relax == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ALLOC_REMOTE_RELAX");
g_alloc_remote_relax = (e && *e && *e != '0') ? 1 : 0;
}
uintptr_t pending = atomic_load_explicit(
&tls->ss->remote_heads[slab_idx],
g_alloc_remote_relax ? memory_order_relaxed : memory_order_acquire);
if (__builtin_expect(pending != 0, 0)) {
uint32_t self_tid = tiny_self_u32();
if (ss_owner_try_acquire(meta, self_tid)) {
_ss_remote_drain_to_freelist_unsafe(tls->ss, slab_idx, meta);
}
}
}
}
// Fast path: linear carve from current TLS slab
if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
size_t block_size = tiny_stride_for_class(meta->class_idx);
uint8_t* base = tls->slab_base;
void* block = base + ((size_t)meta->used * block_size);
meta->used++;
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
uintptr_t base_ss = (uintptr_t)tls->ss;
size_t ss_size = (size_t)1ULL << tls->ss->lg_size;
uintptr_t p = (uintptr_t)block;
int in_range = (p >= base_ss) && (p < base_ss + ss_size);
int aligned = ((p - (uintptr_t)base) % block_size) == 0;
int idx_ok = (tls->slab_idx >= 0) &&
(tls->slab_idx < ss_slabs_capacity(tls->ss));
if (!in_range || !aligned || !idx_ok || meta->used > meta->capacity) {
tiny_failfast_abort_ptr("alloc_ret_align",
tls->ss,
tls->slab_idx,
block,
"superslab_tls_invariant");
}
}
ss_active_inc(tls->ss);
ROUTE_MARK(11); ROUTE_COMMIT(class_idx, 0x60);
HAK_RET_ALLOC(class_idx, block);
}
// Freelist path from current TLS slab
if (meta && meta->freelist) {
void* block = meta->freelist;
if (__builtin_expect(g_tiny_safe_free, 0)) {
size_t blk = tiny_stride_for_class(meta->class_idx);
uint8_t* base = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
uintptr_t delta = (uintptr_t)block - (uintptr_t)base;
int align_ok = ((delta % blk) == 0);
int range_ok = (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return NULL; }
return NULL;
}
}
void* next = tiny_next_read(class_idx, block);
meta->freelist = next;
meta->used++;
ss_active_inc(tls->ss);
ROUTE_MARK(12); ROUTE_COMMIT(class_idx, 0x61);
HAK_RET_ALLOC(class_idx, block);
}
// Slow path: acquire a new slab via shared pool
SuperSlab* ss = superslab_refill(class_idx);
if (!ss) {
static int log_oom = 0;
if (log_oom < 2) {
fprintf(stderr, "[DEBUG] superslab_refill returned NULL (OOM)\n");
log_oom++;
}
return NULL;
}
// Retry after refill
tls = &g_tls_slabs[class_idx];
meta = tls->meta;
if (meta && meta->freelist == NULL &&
meta->used < meta->capacity && tls->slab_base) {
size_t block_size = tiny_stride_for_class(meta->class_idx);
void* block = tiny_block_at_index(tls->slab_base, meta->used, block_size);
meta->used++;
ss_active_inc(ss);
HAK_RET_ALLOC(class_idx, block);
}
return NULL;
}