Phase 12: Shared SuperSlab Pool implementation (WIP - runtime crash)
## Summary
Implemented Phase 12 Shared SuperSlab Pool (mimalloc-style) to address
SuperSlab allocation churn (877 SuperSlabs → 100-200 target).
## Implementation (ChatGPT + Claude)
1. **Metadata changes** (superslab_types.h):
- Added class_idx to TinySlabMeta (per-slab dynamic class)
- Removed size_class from SuperSlab (no longer per-SuperSlab)
- Changed owner_tid (16-bit) → owner_tid_low (8-bit)
2. **Shared Pool** (hakmem_shared_pool.{h,c}):
- Global pool shared by all size classes
- shared_pool_acquire_slab() - Get free slab for class_idx
- shared_pool_release_slab() - Return slab when empty
- Per-class hints for fast path optimization
3. **Integration** (23 files modified):
- Updated all ss->size_class → meta->class_idx
- Updated all meta->owner_tid → meta->owner_tid_low
- superslab_refill() now uses shared pool
- Free path releases empty slabs back to pool
4. **Build system** (Makefile):
- Added hakmem_shared_pool.o to OBJS_BASE and TINY_BENCH_OBJS_BASE
## Status: ⚠️ Build OK, Runtime CRASH
**Build**: ✅ SUCCESS
- All 23 files compile without errors
- Only warnings: superslab_allocate type mismatch (legacy code)
**Runtime**: ❌ SEGFAULT
- Crash location: sll_refill_small_from_ss()
- Exit code: 139 (SIGSEGV)
- Test case: ./bench_random_mixed_hakmem 1000 256 42
## Known Issues
1. **SEGFAULT in refill path** - Likely shared_pool_acquire_slab() issue
2. **Legacy superslab_allocate()** still exists (type mismatch warning)
3. **Remaining TODOs** from design doc:
- SuperSlab physical layout integration
- slab_handle.h cleanup
- Remove old per-class head implementation
## Next Steps
1. Debug SEGFAULT (gdb backtrace shows sll_refill_small_from_ss)
2. Fix shared_pool_acquire_slab() or superslab_init_slab()
3. Basic functionality test (1K → 100K iterations)
4. Measure SuperSlab count reduction (877 → 100-200)
5. Performance benchmark (+650-860% expected)
## Files Changed (25 files)
core/box/free_local_box.c
core/box/free_remote_box.c
core/box/front_gate_classifier.c
core/hakmem_super_registry.c
core/hakmem_tiny.c
core/hakmem_tiny_bg_spill.c
core/hakmem_tiny_free.inc
core/hakmem_tiny_lifecycle.inc
core/hakmem_tiny_magazine.c
core/hakmem_tiny_query.c
core/hakmem_tiny_refill.inc.h
core/hakmem_tiny_superslab.c
core/hakmem_tiny_superslab.h
core/hakmem_tiny_tls_ops.h
core/slab_handle.h
core/superslab/superslab_inline.h
core/superslab/superslab_types.h
core/tiny_debug.h
core/tiny_free_fast.inc.h
core/tiny_free_magazine.inc.h
core/tiny_remote.c
core/tiny_superslab_alloc.inc.h
core/tiny_superslab_free.inc.h
Makefile
## New Files (3 files)
PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md
core/hakmem_shared_pool.c
core/hakmem_shared_pool.h
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
This commit is contained in:
@ -467,10 +467,9 @@ SuperSlab* superslab_allocate(uint8_t size_class) {
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize SuperSlab header (Phase 1 Quick Win: removed memset for lazy init)
|
||||
// Initialize SuperSlab header (Phase 12: no global size_class field)
|
||||
SuperSlab* ss = (SuperSlab*)ptr;
|
||||
ss->magic = SUPERSLAB_MAGIC;
|
||||
ss->size_class = size_class;
|
||||
ss->active_slabs = 0;
|
||||
ss->lg_size = lg; // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB)
|
||||
ss->slab_bitmap = 0;
|
||||
@ -505,7 +504,7 @@ SuperSlab* superslab_allocate(uint8_t size_class) {
|
||||
ss->slabs[i].freelist = NULL; // Explicit NULL (redundant after memset, but clear intent)
|
||||
ss->slabs[i].used = 0;
|
||||
ss->slabs[i].capacity = 0;
|
||||
ss->slabs[i].owner_tid = 0;
|
||||
ss->slabs[i].owner_tid_low = 0;
|
||||
|
||||
// Initialize remote queue atomics (memset already zeroed, but use proper atomic init)
|
||||
atomic_store_explicit(&ss->remote_heads[i], 0, memory_order_relaxed);
|
||||
@ -726,8 +725,8 @@ void superslab_free(SuperSlab* ss) {
|
||||
return;
|
||||
}
|
||||
|
||||
// LRU cache full or disabled - try old cache
|
||||
int old_cached = ss_cache_push(ss->size_class, ss);
|
||||
// LRU cache full or disabled - try old cache using head class_idx (if known)
|
||||
int old_cached = ss_cache_push(0, ss);
|
||||
if (old_cached) {
|
||||
ss_stats_cache_store();
|
||||
return;
|
||||
@ -738,8 +737,8 @@ void superslab_free(SuperSlab* ss) {
|
||||
ss->magic = 0;
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p class=%d size=%zu active=%u (LRU full)\n",
|
||||
(void*)ss, ss->size_class, ss_size,
|
||||
fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n",
|
||||
(void*)ss, ss_size,
|
||||
atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed));
|
||||
#endif
|
||||
|
||||
@ -748,9 +747,7 @@ void superslab_free(SuperSlab* ss) {
|
||||
// Update statistics for actual release to OS
|
||||
pthread_mutex_lock(&g_superslab_lock);
|
||||
g_superslabs_freed++;
|
||||
if (ss->size_class < 8) {
|
||||
g_ss_freed_by_class[ss->size_class]++;
|
||||
}
|
||||
// Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here
|
||||
g_bytes_allocated -= ss_size;
|
||||
pthread_mutex_unlock(&g_superslab_lock);
|
||||
|
||||
@ -782,8 +779,8 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
|
||||
size_t stride = block_size;
|
||||
int capacity = (int)(usable_size / stride);
|
||||
|
||||
// Diagnostic: Verify capacity for class 7 slab 0 (one-shot)
|
||||
if (ss->size_class == 7 && slab_idx == 0) {
|
||||
// Diagnostic: Verify capacity for slab 0 of class 7 (one-shot)
|
||||
if (slab_idx == 0) {
|
||||
static _Atomic int g_cap_log_printed = 0;
|
||||
if (atomic_load(&g_cap_log_printed) == 0 &&
|
||||
atomic_exchange(&g_cap_log_printed, 1) == 0) {
|
||||
@ -808,8 +805,9 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
|
||||
meta->freelist = NULL; // NULL = linear allocation mode
|
||||
meta->used = 0;
|
||||
meta->capacity = (uint16_t)capacity;
|
||||
meta->carved = 0; // FIX: Initialize carved counter (monotonic carve progress)
|
||||
meta->owner_tid = (uint16_t)owner_tid; // FIX: Cast to uint16_t (changed from uint32_t)
|
||||
meta->carved = 0; // Initialize carved counter
|
||||
meta->owner_tid_low = (uint8_t)(owner_tid & 0xFFu);
|
||||
// Caller (refill) is responsible for setting meta->class_idx
|
||||
|
||||
// Store slab_start in SuperSlab for later use
|
||||
// (We need this for linear allocation)
|
||||
@ -872,15 +870,16 @@ void superslab_print_stats(SuperSlab* ss) {
|
||||
|
||||
printf("=== SuperSlab Stats ===\n");
|
||||
printf("Address: %p\n", (void*)ss);
|
||||
printf("Size class: %u\n", ss->size_class);
|
||||
// Phase 12: per-SS size_class removed; classes are per-slab via meta->class_idx.
|
||||
printf("Active slabs: %u / %d\n", ss->active_slabs, ss_slabs_capacity(ss));
|
||||
printf("Bitmap: 0x%08X\n", ss->slab_bitmap);
|
||||
printf("\nPer-slab details:\n");
|
||||
for (int i = 0; i < ss_slabs_capacity(ss); i++) {
|
||||
if (ss->slab_bitmap & (1u << i)) {
|
||||
TinySlabMeta* meta = &ss->slabs[i];
|
||||
printf(" Slab %2d: used=%u/%u freelist=%p owner=%u\n",
|
||||
i, meta->used, meta->capacity, meta->freelist, meta->owner_tid);
|
||||
printf(" Slab %2d: used=%u/%u freelist=%p class=%u owner_tid_low=%u\n",
|
||||
i, meta->used, meta->capacity, meta->freelist,
|
||||
(unsigned)meta->class_idx, (unsigned)meta->owner_tid_low);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
@ -1016,7 +1015,7 @@ static void ace_observe_and_decide(int k) {
|
||||
|
||||
// Phase 8.4: Safety check - skip if ss pointer is invalid
|
||||
if (!e->ss) continue;
|
||||
if (e->ss->size_class != k) continue; // Wrong class
|
||||
// Phase 12: per-SS size_class removed; registry entries are per-class by construction.
|
||||
|
||||
ss_count++;
|
||||
// Phase 8.4: Scan all slabs to count used blocks (zero hot-path overhead)
|
||||
|
||||
Reference in New Issue
Block a user