Phase 12: Shared SuperSlab Pool implementation (WIP - runtime crash)
## Summary
Implemented Phase 12 Shared SuperSlab Pool (mimalloc-style) to address
SuperSlab allocation churn (877 SuperSlabs → 100-200 target).
## Implementation (ChatGPT + Claude)
1. **Metadata changes** (superslab_types.h):
- Added class_idx to TinySlabMeta (per-slab dynamic class)
- Removed size_class from SuperSlab (no longer per-SuperSlab)
- Changed owner_tid (16-bit) → owner_tid_low (8-bit)
2. **Shared Pool** (hakmem_shared_pool.{h,c}):
- Global pool shared by all size classes
- shared_pool_acquire_slab() - Get free slab for class_idx
- shared_pool_release_slab() - Return slab when empty
- Per-class hints for fast path optimization
3. **Integration** (23 files modified):
- Updated all ss->size_class → meta->class_idx
- Updated all meta->owner_tid → meta->owner_tid_low
- superslab_refill() now uses shared pool
- Free path releases empty slabs back to pool
4. **Build system** (Makefile):
- Added hakmem_shared_pool.o to OBJS_BASE and TINY_BENCH_OBJS_BASE
## Status: ⚠️ Build OK, Runtime CRASH
**Build**: ✅ SUCCESS
- All 23 files compile without errors
- Only warnings: superslab_allocate type mismatch (legacy code)
**Runtime**: ❌ SEGFAULT
- Crash location: sll_refill_small_from_ss()
- Exit code: 139 (SIGSEGV)
- Test case: ./bench_random_mixed_hakmem 1000 256 42
## Known Issues
1. **SEGFAULT in refill path** - Likely shared_pool_acquire_slab() issue
2. **Legacy superslab_allocate()** still exists (type mismatch warning)
3. **Remaining TODOs** from design doc:
- SuperSlab physical layout integration
- slab_handle.h cleanup
- Remove old per-class head implementation
## Next Steps
1. Debug SEGFAULT (gdb backtrace shows sll_refill_small_from_ss)
2. Fix shared_pool_acquire_slab() or superslab_init_slab()
3. Basic functionality test (1K → 100K iterations)
4. Measure SuperSlab count reduction (877 → 100-200)
5. Performance benchmark (+650-860% expected)
## Files Changed (25 files)
core/box/free_local_box.c
core/box/free_remote_box.c
core/box/front_gate_classifier.c
core/hakmem_super_registry.c
core/hakmem_tiny.c
core/hakmem_tiny_bg_spill.c
core/hakmem_tiny_free.inc
core/hakmem_tiny_lifecycle.inc
core/hakmem_tiny_magazine.c
core/hakmem_tiny_query.c
core/hakmem_tiny_refill.inc.h
core/hakmem_tiny_superslab.c
core/hakmem_tiny_superslab.h
core/hakmem_tiny_tls_ops.h
core/slab_handle.h
core/superslab/superslab_inline.h
core/superslab/superslab_types.h
core/tiny_debug.h
core/tiny_free_fast.inc.h
core/tiny_free_magazine.inc.h
core/tiny_remote.c
core/tiny_superslab_alloc.inc.h
core/tiny_superslab_free.inc.h
Makefile
## New Files (3 files)
PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md
core/hakmem_shared_pool.c
core/hakmem_shared_pool.h
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
This commit is contained in:
@ -20,9 +20,9 @@ typedef struct SlabHandle {
|
||||
SuperSlab* ss; // SuperSlab pointer
|
||||
TinySlabMeta* meta; // Cached metadata pointer
|
||||
uint8_t slab_idx; // Slab index within SuperSlab
|
||||
uint32_t owner_tid; // Owner thread ID (cached)
|
||||
uint8_t owner_tid_low; // Owner thread ID (low 8 bits, cached)
|
||||
uint8_t valid; // 1=owned, 0=invalid/unowned
|
||||
uint8_t _pad[3]; // Padding
|
||||
uint8_t _pad[2]; // Padding
|
||||
} SlabHandle;
|
||||
|
||||
// Core operations
|
||||
@ -44,7 +44,7 @@ static inline SlabHandle slab_try_acquire(SuperSlab* ss, int idx, uint32_t tid)
|
||||
|
||||
TinySlabMeta* m = &ss->slabs[idx];
|
||||
|
||||
// Try to acquire ownership (Box 3: Ownership)
|
||||
// Try to acquire ownership (Box 3: Ownership, Phase 12 uses owner_tid_low)
|
||||
if (!ss_owner_try_acquire(m, tid)) {
|
||||
return h; // Failed to acquire
|
||||
}
|
||||
@ -53,14 +53,14 @@ static inline SlabHandle slab_try_acquire(SuperSlab* ss, int idx, uint32_t tid)
|
||||
h.ss = ss;
|
||||
h.meta = m;
|
||||
h.slab_idx = (uint8_t)idx;
|
||||
h.owner_tid = tid;
|
||||
h.owner_tid_low = (uint8_t)tid;
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
uint32_t cur = __atomic_load_n(&m->owner_tid, __ATOMIC_RELAXED);
|
||||
if (cur != tid || cur == 0) {
|
||||
uint8_t cur = __atomic_load_n(&m->owner_tid_low, __ATOMIC_RELAXED);
|
||||
if (cur != h.owner_tid_low || cur == 0) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID,
|
||||
(uint16_t)ss->size_class,
|
||||
(uint16_t)m->class_idx,
|
||||
m,
|
||||
((uintptr_t)cur << 32) | (uintptr_t)tid);
|
||||
((uintptr_t)cur << 32) | (uintptr_t)h.owner_tid_low);
|
||||
// Log the error but don't raise signal in debug builds by default to avoid hangs
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static _Atomic uint64_t g_invalid_owner_count = 0;
|
||||
@ -76,9 +76,9 @@ static inline SlabHandle slab_try_acquire(SuperSlab* ss, int idx, uint32_t tid)
|
||||
h.valid = 0;
|
||||
return h;
|
||||
}
|
||||
uintptr_t aux = ((uintptr_t)h.slab_idx << 32) | (uintptr_t)tid;
|
||||
uintptr_t aux = ((uintptr_t)h.slab_idx << 32) | (uintptr_t)h.owner_tid_low;
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_OWNER_ACQUIRE,
|
||||
(uint16_t)ss->size_class,
|
||||
(uint16_t)m->class_idx,
|
||||
m,
|
||||
aux);
|
||||
}
|
||||
@ -108,11 +108,11 @@ static inline void slab_drain_remote(SlabHandle* h) {
|
||||
}
|
||||
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
uint32_t cur_owner = __atomic_load_n(&h->meta->owner_tid, __ATOMIC_RELAXED);
|
||||
if (cur_owner != h->owner_tid || cur_owner == 0) {
|
||||
uintptr_t aux = ((uintptr_t)cur_owner << 32) | (uintptr_t)h->owner_tid;
|
||||
uint8_t cur_owner = __atomic_load_n(&h->meta->owner_tid_low, __ATOMIC_RELAXED);
|
||||
if (cur_owner != h->owner_tid_low || cur_owner == 0) {
|
||||
uintptr_t aux = ((uintptr_t)cur_owner << 32) | (uintptr_t)h->owner_tid_low;
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID,
|
||||
(uint16_t)h->ss->size_class,
|
||||
(uint16_t)h->meta->class_idx,
|
||||
h->meta,
|
||||
aux);
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
@ -149,7 +149,7 @@ static inline void slab_drain_remote_full(SlabHandle* h) {
|
||||
h->slab_idx,
|
||||
(void*)head,
|
||||
0xA242u,
|
||||
h->owner_tid,
|
||||
h->owner_tid_low,
|
||||
0);
|
||||
}
|
||||
}
|
||||
@ -169,17 +169,17 @@ static inline void slab_release(SlabHandle* h) {
|
||||
}
|
||||
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
uint32_t cur_owner = __atomic_load_n(&h->meta->owner_tid, __ATOMIC_RELAXED);
|
||||
uint8_t cur_owner = __atomic_load_n(&h->meta->owner_tid_low, __ATOMIC_RELAXED);
|
||||
uintptr_t aux = ((uintptr_t)h->slab_idx << 32) | (uintptr_t)cur_owner;
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_OWNER_RELEASE,
|
||||
(uint16_t)(h->ss ? h->ss->size_class : 0u),
|
||||
(uint16_t)(h->meta ? h->meta->class_idx : 0xFFu),
|
||||
h->meta,
|
||||
aux);
|
||||
if (cur_owner != h->owner_tid || cur_owner == 0) {
|
||||
if (cur_owner != h->owner_tid_low || cur_owner == 0) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID,
|
||||
(uint16_t)(h->ss ? h->ss->size_class : 0u),
|
||||
(uint16_t)(h->meta ? h->meta->class_idx : 0xFFu),
|
||||
h->meta,
|
||||
((uintptr_t)cur_owner << 32) | (uintptr_t)h->owner_tid);
|
||||
((uintptr_t)cur_owner << 32) | (uintptr_t)h->owner_tid_low);
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static _Atomic uint64_t g_release_invalid_count = 0;
|
||||
uint64_t count = atomic_fetch_add(&g_release_invalid_count, 1);
|
||||
@ -194,10 +194,10 @@ static inline void slab_release(SlabHandle* h) {
|
||||
}
|
||||
}
|
||||
|
||||
// Release ownership (Box 3: Ownership)
|
||||
__atomic_store_n(&h->meta->owner_tid, 0u, __ATOMIC_RELEASE);
|
||||
// Release ownership (Box 3: Ownership, Phase 12)
|
||||
__atomic_store_n(&h->meta->owner_tid_low, 0u, __ATOMIC_RELEASE);
|
||||
h->valid = 0;
|
||||
h->owner_tid = 0;
|
||||
h->owner_tid_low = 0;
|
||||
}
|
||||
|
||||
// Check if handle is valid (owned and safe to use)
|
||||
@ -243,11 +243,11 @@ static inline int slab_freelist_push(SlabHandle* h, void* ptr) {
|
||||
if ((pval & (sizeof(void*) - 1)) != 0 || (fval && (fval & (sizeof(void*) - 1)) != 0)) {
|
||||
fprintf(stderr,
|
||||
"[SLAB_HANDLE] FREELIST_ALIGN cls=%u slab=%u ptr=%p freelist=%p owner=%u used=%u\n",
|
||||
h->ss ? h->ss->size_class : 0u,
|
||||
h->meta ? h->meta->class_idx : 0u,
|
||||
(unsigned)h->slab_idx,
|
||||
ptr,
|
||||
h->meta->freelist,
|
||||
h->meta->owner_tid,
|
||||
h->meta->owner_tid_low,
|
||||
(unsigned)h->meta->used);
|
||||
}
|
||||
}
|
||||
@ -255,7 +255,7 @@ static inline int slab_freelist_push(SlabHandle* h, void* ptr) {
|
||||
// Ownership guaranteed by valid==1 → safe to modify freelist
|
||||
void* old_freelist = h->meta->freelist; // Store for empty→non-empty detection
|
||||
void* prev = h->meta->freelist;
|
||||
tiny_next_write(h->ss->size_class, ptr, prev); // Box API: next pointer write
|
||||
tiny_next_write(h->meta->class_idx, ptr, prev); // Box API: next pointer write (per-slab class)
|
||||
h->meta->freelist = ptr;
|
||||
// Optional freelist mask update (opt-in via env HAKMEM_TINY_FREELIST_MASK)
|
||||
do {
|
||||
@ -276,8 +276,8 @@ static inline int slab_freelist_push(SlabHandle* h, void* ptr) {
|
||||
uint32_t bit = (1u << h->slab_idx);
|
||||
atomic_fetch_or_explicit(&h->ss->nonempty_mask, bit, memory_order_release);
|
||||
}
|
||||
tiny_remote_watch_note("freelist_push", h->ss, h->slab_idx, ptr, 0xA236u, h->owner_tid, 0);
|
||||
tiny_remote_track_on_local_free(h->ss, h->slab_idx, ptr, "freelist_push", h->owner_tid);
|
||||
tiny_remote_watch_note("freelist_push", h->ss, h->slab_idx, ptr, 0xA236u, h->owner_tid_low, 0);
|
||||
tiny_remote_track_on_local_free(h->ss, h->slab_idx, ptr, "freelist_push", h->owner_tid_low);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -296,7 +296,7 @@ static inline void* slab_freelist_pop(SlabHandle* h) {
|
||||
if (__builtin_expect((uintptr_t)ptr == TINY_REMOTE_SENTINEL, 0)) {
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
fprintf(stderr, "[FREELIST_POP] sentinel detected in freelist (cls=%u slab=%u) -> break chain\n",
|
||||
h->ss ? h->ss->size_class : 0u,
|
||||
h->meta ? h->meta->class_idx : 0u,
|
||||
(unsigned)h->slab_idx);
|
||||
}
|
||||
h->meta->freelist = NULL; // break the chain to avoid propagating corruption
|
||||
@ -304,7 +304,7 @@ static inline void* slab_freelist_pop(SlabHandle* h) {
|
||||
return NULL;
|
||||
}
|
||||
if (ptr) {
|
||||
void* next = tiny_next_read(h->ss->size_class, ptr); // Box API: next pointer read
|
||||
void* next = tiny_next_read(h->meta->class_idx, ptr); // Box API: next pointer read
|
||||
h->meta->freelist = next;
|
||||
h->meta->used++;
|
||||
// Optional freelist mask clear when freelist becomes empty
|
||||
@ -321,9 +321,9 @@ static inline void* slab_freelist_pop(SlabHandle* h) {
|
||||
} while (0);
|
||||
// Keep nonempty_mask sticky to ensure subsequent frees remain discoverable.
|
||||
// Do NOT clear nonempty_mask on transient empty; adopt gate will verify safety.
|
||||
tiny_remote_watch_note("freelist_pop", h->ss, h->slab_idx, ptr, 0xA237u, h->owner_tid, 0);
|
||||
tiny_remote_assert_not_remote(h->ss, h->slab_idx, ptr, "freelist_pop_ret", h->owner_tid);
|
||||
tiny_remote_track_on_alloc(h->ss, h->slab_idx, ptr, "freelist_pop", h->owner_tid);
|
||||
tiny_remote_watch_note("freelist_pop", h->ss, h->slab_idx, ptr, 0xA237u, h->owner_tid_low, 0);
|
||||
tiny_remote_assert_not_remote(h->ss, h->slab_idx, ptr, "freelist_pop_ret", h->owner_tid_low);
|
||||
tiny_remote_track_on_alloc(h->ss, h->slab_idx, ptr, "freelist_pop", h->owner_tid_low);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user