Phase 12: Shared SuperSlab Pool implementation (WIP - runtime crash)
## Summary
Implemented Phase 12 Shared SuperSlab Pool (mimalloc-style) to address
SuperSlab allocation churn (877 SuperSlabs → 100-200 target).
## Implementation (ChatGPT + Claude)
1. **Metadata changes** (superslab_types.h):
- Added class_idx to TinySlabMeta (per-slab dynamic class)
- Removed size_class from SuperSlab (no longer per-SuperSlab)
- Changed owner_tid (16-bit) → owner_tid_low (8-bit)
2. **Shared Pool** (hakmem_shared_pool.{h,c}):
- Global pool shared by all size classes
- shared_pool_acquire_slab() - Get free slab for class_idx
- shared_pool_release_slab() - Return slab when empty
- Per-class hints for fast path optimization
3. **Integration** (23 files modified):
- Updated all ss->size_class → meta->class_idx
- Updated all meta->owner_tid → meta->owner_tid_low
- superslab_refill() now uses shared pool
- Free path releases empty slabs back to pool
4. **Build system** (Makefile):
- Added hakmem_shared_pool.o to OBJS_BASE and TINY_BENCH_OBJS_BASE
## Status: ⚠️ Build OK, Runtime CRASH
**Build**: ✅ SUCCESS
- All 23 files compile without errors
- Only warnings: superslab_allocate type mismatch (legacy code)
**Runtime**: ❌ SEGFAULT
- Crash location: sll_refill_small_from_ss()
- Exit code: 139 (SIGSEGV)
- Test case: ./bench_random_mixed_hakmem 1000 256 42
## Known Issues
1. **SEGFAULT in refill path** - Likely shared_pool_acquire_slab() issue
2. **Legacy superslab_allocate()** still exists (type mismatch warning)
3. **Remaining TODOs** from design doc:
- SuperSlab physical layout integration
- slab_handle.h cleanup
- Remove old per-class head implementation
## Next Steps
1. Debug SEGFAULT (gdb backtrace shows sll_refill_small_from_ss)
2. Fix shared_pool_acquire_slab() or superslab_init_slab()
3. Basic functionality test (1K → 100K iterations)
4. Measure SuperSlab count reduction (877 → 100-200)
5. Performance benchmark (+650-860% expected)
## Files Changed (25 files)
core/box/free_local_box.c
core/box/free_remote_box.c
core/box/front_gate_classifier.c
core/hakmem_super_registry.c
core/hakmem_tiny.c
core/hakmem_tiny_bg_spill.c
core/hakmem_tiny_free.inc
core/hakmem_tiny_lifecycle.inc
core/hakmem_tiny_magazine.c
core/hakmem_tiny_query.c
core/hakmem_tiny_refill.inc.h
core/hakmem_tiny_superslab.c
core/hakmem_tiny_superslab.h
core/hakmem_tiny_tls_ops.h
core/slab_handle.h
core/superslab/superslab_inline.h
core/superslab/superslab_types.h
core/tiny_debug.h
core/tiny_free_fast.inc.h
core/tiny_free_magazine.inc.h
core/tiny_remote.c
core/tiny_superslab_alloc.inc.h
core/tiny_superslab_free.inc.h
Makefile
## New Files (3 files)
PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md
core/hakmem_shared_pool.c
core/hakmem_shared_pool.h
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
This commit is contained in:
@ -114,7 +114,7 @@ static inline void tiny_debug_validate_node_base(int class_idx, void* node, cons
|
||||
fprintf(stderr, "[SLL_NODE_UNKNOWN] %s: node=%p cls=%d\n", where, node, class_idx);
|
||||
abort();
|
||||
}
|
||||
int ocls = ss->size_class;
|
||||
int ocls = meta ? meta->class_idx : -1;
|
||||
if (ocls == 7 || ocls != class_idx) {
|
||||
fprintf(stderr, "[SLL_NODE_CLASS_MISMATCH] %s: node=%p cls=%d owner_cls=%d\n", where, node, class_idx, ocls);
|
||||
abort();
|
||||
@ -257,9 +257,6 @@ static inline int quick_refill_from_mag(int class_idx) {
|
||||
return take;
|
||||
}
|
||||
|
||||
// P0 optimization: Batch refill(A/Bテスト用ランタイムゲートで呼び分け)
|
||||
// - デフォルトはOFF(環境変数 HAKMEM_TINY_P0_ENABLE=1 で有効化)
|
||||
#include "hakmem_tiny_refill_p0.inc.h"
|
||||
|
||||
// Box 3 wrapper: verify linear carve stays within slab usable bytes (Fail-Fast)
|
||||
// DEPRECATED: Use tiny_carve_guard_verbose() from Box 3 directly
|
||||
@ -269,7 +266,9 @@ static inline int tiny_linear_carve_guard(TinyTLSSlab* tls,
|
||||
uint32_t reserve,
|
||||
const char* stage) {
|
||||
if (!tls || !meta) return 0;
|
||||
int class_idx = tls->ss ? tls->ss->size_class : -1;
|
||||
int class_idx = (tls->meta && tls->meta->class_idx < TINY_NUM_CLASSES)
|
||||
? (int)tls->meta->class_idx
|
||||
: -1;
|
||||
return tiny_carve_guard_verbose(stage,
|
||||
class_idx,
|
||||
tls->slab_idx,
|
||||
@ -282,174 +281,75 @@ static inline int tiny_linear_carve_guard(TinyTLSSlab* tls,
|
||||
|
||||
// Refill a few nodes directly into TLS SLL from TLS-cached SuperSlab (owner-thread only)
|
||||
// Note: If HAKMEM_TINY_P0_BATCH_REFILL is enabled, sll_refill_batch_from_ss is used instead
|
||||
#if !HAKMEM_TINY_P0_BATCH_REFILL
|
||||
// Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c)
|
||||
// Note: Force non-inline to provide linkable definition for LTO
|
||||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||||
__attribute__((noinline)) int sll_refill_small_from_ss(int class_idx, int max_take) {
|
||||
#else
|
||||
static inline int sll_refill_small_from_ss(int class_idx, int max_take) {
|
||||
#endif
|
||||
// PRIORITY 1: Bounds check before TLS array access
|
||||
HAK_CHECK_CLASS_IDX(class_idx, "sll_refill_small_from_ss");
|
||||
atomic_fetch_add(&g_integrity_check_class_bounds, 1);
|
||||
|
||||
// Phase E1-CORRECT: C7 now has headers, can use small refill
|
||||
if (!g_use_superslab || max_take <= 0)
|
||||
return 0;
|
||||
|
||||
if (!g_use_superslab || max_take <= 0) return 0;
|
||||
// ランタイムA/B: P0を有効化している場合はバッチrefillへ委譲
|
||||
do {
|
||||
// 既定: OFF(HAKMEM_TINY_P0_ENABLE=1 で有効化)
|
||||
static int g_p0_enable = -1;
|
||||
if (__builtin_expect(g_p0_enable == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_P0_ENABLE");
|
||||
// 環境変数が'1'のときだけ有効、それ以外(未設定含む)は無効
|
||||
g_p0_enable = (e && *e && *e == '1') ? 1 : 0;
|
||||
}
|
||||
if (__builtin_expect(g_p0_enable, 0)) {
|
||||
return sll_refill_batch_from_ss(class_idx, max_take);
|
||||
}
|
||||
} while (0);
|
||||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||||
if (!tls->ss) {
|
||||
// Try to obtain a SuperSlab for this class
|
||||
if (superslab_refill(class_idx) == NULL) return 0;
|
||||
// CRITICAL FIX: Reload tls pointer after superslab_refill() binds new slab
|
||||
if (!tls->ss || !tls->meta || tls->meta->class_idx != (uint8_t)class_idx) {
|
||||
if (!superslab_refill(class_idx))
|
||||
return 0;
|
||||
tls = &g_tls_slabs[class_idx];
|
||||
if (!tls->ss || !tls->meta || tls->meta->class_idx != (uint8_t)class_idx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
TinySlabMeta* meta = tls->meta;
|
||||
if (!meta) return 0;
|
||||
|
||||
// Class 4/5/6/7 special-case: simple batch refill (favor linear carve, minimal branching)
|
||||
// Optional gate for class3 via env: HAKMEM_TINY_SIMPLE_REFILL_C3=1
|
||||
static int g_simple_c3 = -1;
|
||||
if (__builtin_expect(g_simple_c3 == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_SIMPLE_REFILL_C3");
|
||||
g_simple_c3 = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
if (__builtin_expect(class_idx >= 4 || (class_idx == 3 && g_simple_c3), 0)) {
|
||||
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP);
|
||||
int room = (int)sll_cap - (int)g_tls_sll_count[class_idx];
|
||||
if (room <= 0) return 0;
|
||||
int take = max_take < room ? max_take : room;
|
||||
int taken = 0;
|
||||
// Box 3: Get stride (block size + header, except C7 which is headerless)
|
||||
size_t bs = tiny_stride_for_class(class_idx);
|
||||
for (; taken < take;) {
|
||||
// Linear first (LIKELY for class7)
|
||||
if (__builtin_expect(meta->freelist == NULL && meta->carved < meta->capacity, 1)) {
|
||||
if (__builtin_expect(!tiny_linear_carve_guard(tls, meta, bs, 1, "simple"), 0)) {
|
||||
abort();
|
||||
}
|
||||
// Box 3: Get slab base (handles Slab 0 offset)
|
||||
uint8_t* base = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||||
void* p = tiny_block_at_index(base, meta->carved, bs);
|
||||
meta->carved++;
|
||||
meta->used++;
|
||||
|
||||
// Phase E1-CORRECT: Restore header BEFORE tls_sll_push
|
||||
// ROOT CAUSE: Simple refill path carves blocks but doesn't write headers.
|
||||
// tls_sll_push() expects headers at base to write next at base+1.
|
||||
// ALL classes (including C7) need headers restored!
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
*(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
|
||||
#endif
|
||||
|
||||
// CRITICAL: Use Box TLS-SLL API (C7-safe, no race)
|
||||
if (!tls_sll_push(class_idx, p, sll_cap)) {
|
||||
// SLL full (should not happen, room was checked)
|
||||
meta->used--; meta->carved--; // Rollback
|
||||
break;
|
||||
}
|
||||
ss_active_inc(tls->ss);
|
||||
taken++;
|
||||
continue;
|
||||
}
|
||||
// Freelist fallback
|
||||
if (__builtin_expect(meta->freelist != NULL, 0)) {
|
||||
void* p = meta->freelist;
|
||||
// BUG FIX: Use Box API to read next pointer at correct offset
|
||||
void* next = tiny_next_read(class_idx, p);
|
||||
meta->freelist = next;
|
||||
meta->used++;
|
||||
|
||||
// Phase E1-CORRECT: Restore header BEFORE tls_sll_push
|
||||
// Freelist stores next at base (offset 0), overwriting header.
|
||||
// Must restore header so tls_sll_push can write next at base+1 correctly.
|
||||
// ALL classes (including C7) need headers restored!
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
*(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
|
||||
#endif
|
||||
|
||||
// CRITICAL: Use Box TLS-SLL API (C7-safe, no race)
|
||||
if (!tls_sll_push(class_idx, p, sll_cap)) {
|
||||
// SLL full (should not happen, room was checked)
|
||||
// BUG FIX: Use Box API to write rollback next pointer
|
||||
tiny_next_write(class_idx, p, next); // Rollback freelist
|
||||
meta->freelist = p;
|
||||
meta->used--;
|
||||
break;
|
||||
}
|
||||
ss_active_inc(tls->ss);
|
||||
taken++;
|
||||
continue;
|
||||
}
|
||||
// Need another slab with space
|
||||
if (__builtin_expect(superslab_refill(class_idx) == NULL, 0)) break;
|
||||
// CRITICAL FIX: Reload tls pointer after superslab_refill() binds new slab
|
||||
tls = &g_tls_slabs[class_idx];
|
||||
meta = tls->meta; // refresh after refill
|
||||
}
|
||||
return taken;
|
||||
}
|
||||
|
||||
// Compute how many we can actually push into SLL without overflow
|
||||
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP);
|
||||
int room = (int)sll_cap - (int)g_tls_sll_count[class_idx];
|
||||
if (room <= 0) return 0;
|
||||
int take = max_take < room ? max_take : room;
|
||||
if (room <= 0)
|
||||
return 0;
|
||||
|
||||
int take = max_take < room ? max_take : room;
|
||||
int taken = 0;
|
||||
// Box 3: Get stride (block size + header, except C7 which is headerless)
|
||||
size_t bs = tiny_stride_for_class(class_idx);
|
||||
|
||||
while (taken < take) {
|
||||
void* p = NULL;
|
||||
if (__builtin_expect(meta->freelist != NULL, 0)) {
|
||||
// BUG FIX: Use Box API to read next pointer at correct offset
|
||||
p = meta->freelist; meta->freelist = tiny_next_read(class_idx, p); meta->used++;
|
||||
// Track active blocks reserved into TLS SLL
|
||||
|
||||
if (meta->freelist) {
|
||||
p = meta->freelist;
|
||||
meta->freelist = tiny_next_read(class_idx, p);
|
||||
meta->used++;
|
||||
ss_active_inc(tls->ss);
|
||||
} else if (__builtin_expect(meta->carved < meta->capacity, 1)) {
|
||||
if (__builtin_expect(!tiny_linear_carve_guard(tls, meta, bs, 1, "general"), 0)) {
|
||||
} else if (meta->carved < meta->capacity) {
|
||||
if (!tiny_linear_carve_guard(tls, meta, bs, 1, "sll_refill_small"))
|
||||
abort();
|
||||
}
|
||||
// Box 3: Get slab base and calculate block address
|
||||
uint8_t* slab_start = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||||
p = tiny_block_at_index(slab_start, meta->carved, bs);
|
||||
meta->carved++;
|
||||
meta->used++;
|
||||
// Track active blocks reserved into TLS SLL
|
||||
ss_active_inc(tls->ss);
|
||||
} else {
|
||||
// Move to another slab with space
|
||||
if (superslab_refill(class_idx) == NULL) break;
|
||||
// CRITICAL FIX: Reload tls pointer after superslab_refill() binds new slab
|
||||
if (!superslab_refill(class_idx))
|
||||
break;
|
||||
tls = &g_tls_slabs[class_idx];
|
||||
meta = tls->meta; // refresh after refill
|
||||
meta = tls->meta;
|
||||
if (!tls->ss || !meta || meta->class_idx != (uint8_t)class_idx)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
if (!p) break;
|
||||
// CRITICAL: Use Box TLS-SLL API (C7-safe, no race)
|
||||
|
||||
if (!p)
|
||||
break;
|
||||
|
||||
if (!tls_sll_push(class_idx, p, sll_cap)) {
|
||||
// SLL full (should not happen, room was checked)
|
||||
// Rollback: need to return block to meta (complex, just break)
|
||||
// SLL full; stop without complex rollback.
|
||||
break;
|
||||
}
|
||||
|
||||
taken++;
|
||||
}
|
||||
|
||||
return taken;
|
||||
}
|
||||
#endif // !HAKMEM_TINY_P0_BATCH_REFILL
|
||||
|
||||
// Ultra-Bump TLS shadow try: returns pointer when a TLS bump window is armed
|
||||
// or can be armed by reserving a small chunk from the current SuperSlab meta.
|
||||
@ -499,7 +399,7 @@ static inline void* superslab_tls_bump_fast(int class_idx) {
|
||||
uint32_t chunk = (g_bump_chunk > 0 ? (uint32_t)g_bump_chunk : 1u);
|
||||
if (chunk > avail) chunk = avail;
|
||||
// Box 3: Get stride and slab base
|
||||
size_t bs = tiny_stride_for_class(tls->ss->size_class);
|
||||
size_t bs = tiny_stride_for_class(tls->meta ? tls->meta->class_idx : 0);
|
||||
uint8_t* base = tls->slab_base ? tls->slab_base : tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||||
if (__builtin_expect(!tiny_linear_carve_guard(tls, meta, bs, chunk, "tls_bump"), 0)) {
|
||||
abort();
|
||||
|
||||
Reference in New Issue
Block a user