## Summary
Implemented Phase 12 Shared SuperSlab Pool (mimalloc-style) to address
SuperSlab allocation churn (877 SuperSlabs → 100-200 target).
## Implementation (ChatGPT + Claude)
1. **Metadata changes** (superslab_types.h):
- Added class_idx to TinySlabMeta (per-slab dynamic class)
- Removed size_class from SuperSlab (no longer per-SuperSlab)
- Changed owner_tid (16-bit) → owner_tid_low (8-bit)
2. **Shared Pool** (hakmem_shared_pool.{h,c}):
- Global pool shared by all size classes
- shared_pool_acquire_slab() - Get free slab for class_idx
- shared_pool_release_slab() - Return slab when empty
- Per-class hints for fast path optimization
3. **Integration** (23 files modified):
- Updated all ss->size_class → meta->class_idx
- Updated all meta->owner_tid → meta->owner_tid_low
- superslab_refill() now uses shared pool
- Free path releases empty slabs back to pool
4. **Build system** (Makefile):
- Added hakmem_shared_pool.o to OBJS_BASE and TINY_BENCH_OBJS_BASE
## Status: ⚠️ Build OK, Runtime CRASH
**Build**: ✅ SUCCESS
- All 23 files compile without errors
- Only warnings: superslab_allocate type mismatch (legacy code)
**Runtime**: ❌ SEGFAULT
- Crash location: sll_refill_small_from_ss()
- Exit code: 139 (SIGSEGV)
- Test case: ./bench_random_mixed_hakmem 1000 256 42
## Known Issues
1. **SEGFAULT in refill path** - Likely shared_pool_acquire_slab() issue
2. **Legacy superslab_allocate()** still exists (type mismatch warning)
3. **Remaining TODOs** from design doc:
- SuperSlab physical layout integration
- slab_handle.h cleanup
- Remove old per-class head implementation
## Next Steps
1. Debug SEGFAULT (gdb backtrace shows sll_refill_small_from_ss)
2. Fix shared_pool_acquire_slab() or superslab_init_slab()
3. Basic functionality test (1K → 100K iterations)
4. Measure SuperSlab count reduction (877 → 100-200)
5. Performance benchmark (+650-860% expected)
## Files Changed (25 files)
core/box/free_local_box.c
core/box/free_remote_box.c
core/box/front_gate_classifier.c
core/hakmem_super_registry.c
core/hakmem_tiny.c
core/hakmem_tiny_bg_spill.c
core/hakmem_tiny_free.inc
core/hakmem_tiny_lifecycle.inc
core/hakmem_tiny_magazine.c
core/hakmem_tiny_query.c
core/hakmem_tiny_refill.inc.h
core/hakmem_tiny_superslab.c
core/hakmem_tiny_superslab.h
core/hakmem_tiny_tls_ops.h
core/slab_handle.h
core/superslab/superslab_inline.h
core/superslab/superslab_types.h
core/tiny_debug.h
core/tiny_free_fast.inc.h
core/tiny_free_magazine.inc.h
core/tiny_remote.c
core/tiny_superslab_alloc.inc.h
core/tiny_superslab_free.inc.h
Makefile
## New Files (3 files)
PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md
core/hakmem_shared_pool.c
core/hakmem_shared_pool.h
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
324 lines
13 KiB
C
324 lines
13 KiB
C
// tiny_superslab_alloc.inc.h - SuperSlab Allocation Layer (Box 4)
|
||
// Purpose: Slab allocation, refill, and adoption logic (Phase 12 shared pool)
|
||
// Public functions:
|
||
// - superslab_alloc_from_slab(): Allocate from specific slab (linear or freelist)
|
||
// - superslab_refill(): Refill TLS slab via shared pool
|
||
// - hak_tiny_alloc_superslab(): Main SuperSlab allocation entry point
|
||
|
||
#include "box/superslab_expansion_box.h" // Box E: Expansion with TLS state guarantee
|
||
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
|
||
#include "hakmem_tiny_superslab_constants.h"
|
||
#include "tiny_box_geometry.h" // Box 3: Geometry & Capacity Calculator"
|
||
|
||
// ============================================================================
|
||
// Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation)
|
||
// ============================================================================
|
||
|
||
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
|
||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||
|
||
// Small hot classes (C0–C3): bump-only fast path if no remote/freelist
|
||
do {
|
||
uint8_t cls = meta->class_idx;
|
||
if (__builtin_expect(cls <= 3, 1)) {
|
||
if (atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0)
|
||
break;
|
||
if (meta->freelist == NULL && meta->used < meta->capacity) {
|
||
size_t unit_sz = tiny_stride_for_class(cls);
|
||
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
|
||
void* block = tiny_block_at_index(base, meta->used, unit_sz);
|
||
meta->used++;
|
||
ss_active_inc(ss);
|
||
HAK_RET_ALLOC(cls, block);
|
||
}
|
||
}
|
||
} while (0);
|
||
|
||
// Drain remote queue if needed before handing blocks back to TLS
|
||
if (__builtin_expect(atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0, 0)) {
|
||
uint32_t self_tid = tiny_self_u32();
|
||
SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
|
||
if (slab_is_valid(&h)) {
|
||
slab_drain_remote_full(&h);
|
||
int pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0;
|
||
if (__builtin_expect(pending, 0)) {
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed);
|
||
tiny_remote_watch_note("alloc_pending_remote",
|
||
ss,
|
||
slab_idx,
|
||
(void*)head,
|
||
0xA243u,
|
||
self_tid,
|
||
0);
|
||
}
|
||
slab_release(&h);
|
||
return NULL;
|
||
}
|
||
slab_release(&h);
|
||
} else {
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
tiny_remote_watch_note("alloc_acquire_fail",
|
||
ss,
|
||
slab_idx,
|
||
meta,
|
||
0xA244u,
|
||
self_tid,
|
||
0);
|
||
}
|
||
return NULL;
|
||
}
|
||
}
|
||
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
uintptr_t head_pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
|
||
if (head_pending != 0) {
|
||
tiny_remote_watch_note("alloc_remote_pending",
|
||
ss,
|
||
slab_idx,
|
||
(void*)head_pending,
|
||
0xA247u,
|
||
tiny_self_u32(),
|
||
1);
|
||
return NULL;
|
||
}
|
||
}
|
||
|
||
// Linear allocation mode
|
||
if (__builtin_expect(meta->freelist == NULL && meta->used < meta->capacity, 1)) {
|
||
size_t unit_sz = tiny_stride_for_class(meta->class_idx);
|
||
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
|
||
void* block_base = tiny_block_at_index(base, meta->used, unit_sz);
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
if (__builtin_expect(!tiny_carve_guard(slab_idx, meta->used, unit_sz, 1), 0)) {
|
||
size_t dbg_usable = tiny_usable_bytes_for_slab(slab_idx);
|
||
uintptr_t dbg_off = (uintptr_t)((uint8_t*)block_base - base);
|
||
fprintf(stderr, "[TINY_ALLOC_BOUNDS] cls=%u slab=%d used=%u cap=%u unit=%zu off=%lu usable=%zu\n",
|
||
meta->class_idx, slab_idx, meta->used, meta->capacity, unit_sz,
|
||
(unsigned long)dbg_off, dbg_usable);
|
||
return NULL;
|
||
}
|
||
#endif
|
||
meta->used++;
|
||
void* user =
|
||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||
tiny_region_id_write_header(block_base, meta->class_idx);
|
||
#else
|
||
block_base;
|
||
#endif
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
tiny_remote_track_on_alloc(ss, slab_idx, user, "linear_alloc", 0);
|
||
tiny_remote_assert_not_remote(ss, slab_idx, user, "linear_alloc_ret", 0);
|
||
}
|
||
return user;
|
||
}
|
||
|
||
// Freelist mode
|
||
if (__builtin_expect(meta->freelist != NULL, 0)) {
|
||
void* block = meta->freelist;
|
||
|
||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||
size_t blk = g_tiny_class_sizes[meta->class_idx];
|
||
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
|
||
uintptr_t block_addr = (uintptr_t)block;
|
||
uintptr_t slab_addr = (uintptr_t)slab_base;
|
||
uintptr_t offset = block_addr - slab_addr;
|
||
|
||
fprintf(stderr, "[ALLOC_POP] cls=%u slab=%d block=%p offset=%zu (used=%u cap=%u)\n",
|
||
meta->class_idx, slab_idx, block, offset, meta->used, meta->capacity);
|
||
|
||
if (offset % blk != 0 ||
|
||
offset / blk >= meta->capacity) {
|
||
fprintf(stderr, "[ALLOC_CORRUPT] Freelist head invalid\n");
|
||
tiny_failfast_abort_ptr("alloc_pop_invalid", ss, slab_idx, block, "freelist_head_corrupt");
|
||
}
|
||
}
|
||
|
||
meta->freelist = tiny_next_read(meta->class_idx, block);
|
||
meta->used++;
|
||
|
||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0) &&
|
||
__builtin_expect(meta->used > meta->capacity, 0)) {
|
||
fprintf(stderr, "[ALLOC_CORRUPT] meta->used overflow on freelist alloc\n");
|
||
tiny_failfast_abort_ptr("alloc_used_overflow", ss, slab_idx, block, "freelist_used_over_capacity");
|
||
}
|
||
|
||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||
tiny_remote_track_on_alloc(ss, slab_idx, block, "freelist_alloc", 0);
|
||
tiny_remote_assert_not_remote(ss, slab_idx, block, "freelist_alloc_ret", 0);
|
||
}
|
||
return block;
|
||
}
|
||
|
||
return NULL;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Phase 12: Shared SuperSlab Pool based superslab_refill
|
||
// ============================================================================
|
||
|
||
SuperSlab* superslab_refill(int class_idx) {
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
g_superslab_refill_calls_dbg[class_idx]++;
|
||
#endif
|
||
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
extern int shared_pool_acquire_slab(int class_idx, SuperSlab** ss_out, int* slab_idx_out);
|
||
|
||
SuperSlab* ss = NULL;
|
||
int slab_idx = -1;
|
||
if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) != 0) {
|
||
return NULL;
|
||
}
|
||
|
||
uint32_t my_tid = tiny_self_u32();
|
||
superslab_init_slab(ss,
|
||
slab_idx,
|
||
g_tiny_class_sizes[class_idx],
|
||
my_tid);
|
||
|
||
tiny_tls_bind_slab(tls, ss, slab_idx);
|
||
|
||
// Sanity: TLS must now describe this slab for this class.
|
||
if (!(tls->ss == ss &&
|
||
tls->slab_idx == slab_idx &&
|
||
tls->meta != NULL &&
|
||
tls->meta->class_idx == (uint8_t)class_idx)) {
|
||
tls->ss = NULL;
|
||
tls->meta = NULL;
|
||
tls->slab_idx = -1;
|
||
tls->slab_base = NULL;
|
||
return NULL;
|
||
}
|
||
|
||
return ss;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Phase 6.24: SuperSlab-based allocation using TLS slab
|
||
// ============================================================================
|
||
|
||
static inline void* hak_tiny_alloc_superslab(int class_idx) {
|
||
// MidTC fast path
|
||
do {
|
||
void* mp = midtc_pop(class_idx);
|
||
if (mp) {
|
||
HAK_RET_ALLOC(class_idx, mp);
|
||
}
|
||
} while (0);
|
||
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
TinySlabMeta* meta = tls->meta;
|
||
int slab_idx = tls->slab_idx;
|
||
|
||
if (meta && slab_idx >= 0 && tls->ss) {
|
||
// Ensure TLS metadata matches class and slab base
|
||
if (tls->meta->class_idx != (uint8_t)class_idx) {
|
||
tls->ss = NULL;
|
||
tls->meta = NULL;
|
||
tls->slab_idx = -1;
|
||
tls->slab_base = NULL;
|
||
meta = NULL;
|
||
} else {
|
||
uint8_t* canonical = tiny_slab_base_for(tls->ss, slab_idx);
|
||
if (tls->slab_base != canonical) {
|
||
tls->slab_base = canonical;
|
||
}
|
||
}
|
||
|
||
// Drain remote if needed (ownership-checked elsewhere)
|
||
if (meta) {
|
||
static int g_alloc_remote_relax = -1;
|
||
if (__builtin_expect(g_alloc_remote_relax == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_ALLOC_REMOTE_RELAX");
|
||
g_alloc_remote_relax = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
uintptr_t pending = atomic_load_explicit(
|
||
&tls->ss->remote_heads[slab_idx],
|
||
g_alloc_remote_relax ? memory_order_relaxed : memory_order_acquire);
|
||
if (__builtin_expect(pending != 0, 0)) {
|
||
uint32_t self_tid = tiny_self_u32();
|
||
if (ss_owner_try_acquire(meta, self_tid)) {
|
||
_ss_remote_drain_to_freelist_unsafe(tls->ss, slab_idx, meta);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Fast path: linear carve from current TLS slab
|
||
if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
|
||
size_t block_size = tiny_stride_for_class(meta->class_idx);
|
||
uint8_t* base = tls->slab_base;
|
||
void* block = base + ((size_t)meta->used * block_size);
|
||
meta->used++;
|
||
|
||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||
uintptr_t base_ss = (uintptr_t)tls->ss;
|
||
size_t ss_size = (size_t)1ULL << tls->ss->lg_size;
|
||
uintptr_t p = (uintptr_t)block;
|
||
int in_range = (p >= base_ss) && (p < base_ss + ss_size);
|
||
int aligned = ((p - (uintptr_t)base) % block_size) == 0;
|
||
int idx_ok = (tls->slab_idx >= 0) &&
|
||
(tls->slab_idx < ss_slabs_capacity(tls->ss));
|
||
if (!in_range || !aligned || !idx_ok || meta->used > meta->capacity) {
|
||
tiny_failfast_abort_ptr("alloc_ret_align",
|
||
tls->ss,
|
||
tls->slab_idx,
|
||
block,
|
||
"superslab_tls_invariant");
|
||
}
|
||
}
|
||
|
||
ss_active_inc(tls->ss);
|
||
ROUTE_MARK(11); ROUTE_COMMIT(class_idx, 0x60);
|
||
HAK_RET_ALLOC(class_idx, block);
|
||
}
|
||
|
||
// Freelist path from current TLS slab
|
||
if (meta && meta->freelist) {
|
||
void* block = meta->freelist;
|
||
if (__builtin_expect(g_tiny_safe_free, 0)) {
|
||
size_t blk = tiny_stride_for_class(meta->class_idx);
|
||
uint8_t* base = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||
uintptr_t delta = (uintptr_t)block - (uintptr_t)base;
|
||
int align_ok = ((delta % blk) == 0);
|
||
int range_ok = (delta / blk) < meta->capacity;
|
||
if (!align_ok || !range_ok) {
|
||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return NULL; }
|
||
return NULL;
|
||
}
|
||
}
|
||
void* next = tiny_next_read(class_idx, block);
|
||
meta->freelist = next;
|
||
meta->used++;
|
||
ss_active_inc(tls->ss);
|
||
ROUTE_MARK(12); ROUTE_COMMIT(class_idx, 0x61);
|
||
HAK_RET_ALLOC(class_idx, block);
|
||
}
|
||
|
||
// Slow path: acquire a new slab via shared pool
|
||
SuperSlab* ss = superslab_refill(class_idx);
|
||
if (!ss) {
|
||
static int log_oom = 0;
|
||
if (log_oom < 2) {
|
||
fprintf(stderr, "[DEBUG] superslab_refill returned NULL (OOM)\n");
|
||
log_oom++;
|
||
}
|
||
return NULL;
|
||
}
|
||
|
||
// Retry after refill
|
||
tls = &g_tls_slabs[class_idx];
|
||
meta = tls->meta;
|
||
if (meta && meta->freelist == NULL &&
|
||
meta->used < meta->capacity && tls->slab_base) {
|
||
size_t block_size = tiny_stride_for_class(meta->class_idx);
|
||
void* block = tiny_block_at_index(tls->slab_base, meta->used, block_size);
|
||
meta->used++;
|
||
ss_active_inc(ss);
|
||
HAK_RET_ALLOC(class_idx, block);
|
||
}
|
||
|
||
return NULL;
|
||
}
|