Files
hakmem/core/superslab_slab.c
Moe Charm (CI) 87b7d30998 Phase 9: SuperSlab optimization & EMPTY slab recycling (WIP)
Phase 9-1: O(1) SuperSlab lookup optimization
- Created ss_addr_map_box: Hash table (8192 buckets) for O(1) SuperSlab lookup
- Created ss_tls_hint_box: TLS caching layer for SuperSlab hints
- Integrated hash table into registry (init, insert, remove, lookup)
- Modified hak_super_lookup() to use new hash table
- Expected: 50-80 cycles → 10-20 cycles (not verified - SuperSlab disabled by default)

Phase 9-2: EMPTY slab recycling implementation
- Created slab_recycling_box: SLAB_TRY_RECYCLE() macro following Box pattern
- Integrated into remote drain (superslab_slab.c)
- Integrated into TLS SLL drain (tls_sll_drain_box.h) with touched slab tracking
- Observable: Debug tracing via HAKMEM_SLAB_RECYCLE_TRACE
- Updated Makefile: Added new box objects to 3 build targets

Known Issues:
- SuperSlab registry exhaustion still occurs (unregistration not working)
- shared_pool_release_slab() may not be removing from g_super_reg[]
- Needs investigation before Phase 9-2 can be completed

Expected Impact (when fixed):
- Stage 1 hit rate: 0% → 80%
- shared_fail events: 4 → 0
- Kernel overhead: 55% → 15%
- Throughput: 16.5M → 25-30M ops/s (+50-80%)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-30 07:16:50 +09:00

216 lines
8.7 KiB
C

// superslab_slab.c - Slab initialization and management
// Purpose: Slab lifecycle and bitmap management within SuperSlabs
// License: MIT
// Date: 2025-11-28
#include "hakmem_tiny_superslab_internal.h"
#include "box/slab_recycling_box.h"
// ============================================================================
// Remote Drain (MPSC queue to freelist conversion)
// ============================================================================
// Drain remote MPSC stack into freelist (ownership already verified by caller)
void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta)
{
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss) || !meta) return;
static _Atomic uint32_t g_remote_drain_diag_once = 0;
static int g_remote_drain_diag_en = -1;
// Atomically take the whole remote list
uintptr_t head = atomic_exchange_explicit(&ss->remote_heads[slab_idx], 0,
memory_order_acq_rel);
if (head == 0) return;
// Convert remote stack (offset 0 next) into freelist encoding via Box API
// and splice in front of current freelist preserving relative order.
void* prev = meta->freelist;
int cls = (int)meta->class_idx;
HAK_CHECK_CLASS_IDX(cls, "_ss_remote_drain_to_freelist_unsafe");
if (__builtin_expect(cls < 0 || cls >= TINY_NUM_CLASSES, 0)) {
static _Atomic int g_remote_drain_cls_oob = 0;
if (atomic_fetch_add_explicit(&g_remote_drain_cls_oob, 1, memory_order_relaxed) == 0) {
fprintf(stderr,
"[REMOTE_DRAIN_CLASS_OOB] ss=%p slab_idx=%d meta=%p cls=%d head=%#lx\n",
(void*)ss, slab_idx, (void*)meta, cls, (unsigned long)head);
}
return;
}
uintptr_t cur = head;
while (cur != 0) {
uintptr_t next = *(uintptr_t*)cur; // remote-next stored at offset 0
#if !HAKMEM_BUILD_RELEASE
if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_SLL_DIAG");
g_remote_drain_diag_en = (e && *e && *e != '0') ? 1 : 0;
}
#else
if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) {
g_remote_drain_diag_en = 0;
}
#endif
if (__builtin_expect(g_remote_drain_diag_en, 0)) {
uintptr_t addr = (uintptr_t)next;
if (addr != 0 && (addr < 4096 || addr > 0x00007fffffffffffULL)) {
uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed);
if (shot < 8) {
fprintf(stderr,
"[REMOTE_DRAIN_NEXT_INVALID] cls=%d slab=%d cur=%p next=%p head=%#lx prev=%p count=%u\n",
cls,
slab_idx,
(void*)cur,
(void*)next,
(unsigned long)head,
prev,
(unsigned)meta->used);
}
}
#if HAKMEM_TINY_HEADER_CLASSIDX
int hdr_cls = tiny_region_id_read_header((uint8_t*)cur + 1);
if (hdr_cls >= 0 && hdr_cls != cls) {
uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed);
if (shot < 8) {
fprintf(stderr,
"[REMOTE_DRAIN_HDR_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d head=%#lx\n",
cls, slab_idx, (void*)cur, hdr_cls, (int)meta->class_idx, (unsigned long)head);
}
}
#endif
}
#if HAKMEM_TINY_HEADER_CLASSIDX
// Cross-check header vs meta before writing next (even if diag is off)
{
int hdr_cls_pre = tiny_region_id_read_header((uint8_t*)cur + 1);
if (hdr_cls_pre >= 0 && hdr_cls_pre != cls) {
static _Atomic uint32_t g_hdr_meta_mismatch_rd = 0;
uint32_t n = atomic_fetch_add_explicit(&g_hdr_meta_mismatch_rd, 1, memory_order_relaxed);
if (n < 16) {
fprintf(stderr,
"[REMOTE_DRAIN_HDR_META_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d\n",
cls, slab_idx, (void*)cur, hdr_cls_pre, (int)meta->class_idx);
}
}
}
#endif
// Restore header for header-classes (class 1-6) which were clobbered by remote push
#if HAKMEM_TINY_HEADER_CLASSIDX
if (cls != 0) {
uint8_t expected = (uint8_t)(HEADER_MAGIC | (cls & HEADER_CLASS_MASK));
*(uint8_t*)(uintptr_t)cur = expected;
}
#endif
// Rewrite next pointer to Box representation for this class
tiny_next_write(cls, (void*)cur, prev);
prev = (void*)cur;
cur = next;
}
meta->freelist = prev;
// Reset remote count after full drain
atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_release);
// Phase 9-2: Try to recycle slab if EMPTY after remote drain
// This fixes the bug where EMPTY slabs accumulate and never get returned to freelist
SLAB_TRY_RECYCLE(ss, slab_idx, meta);
// Update freelist/nonempty visibility bits
uint32_t bit = (1u << slab_idx);
atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
atomic_fetch_or_explicit(&ss->nonempty_mask, bit, memory_order_release);
}
// ============================================================================
// Slab Initialization within SuperSlab
// ============================================================================
void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_t owner_tid)
{
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
return;
}
// Phase E1-CORRECT unified geometry:
// - block_size is the TOTAL stride for this class (g_tiny_class_sizes[cls])
// - usable bytes are determined by slab index (slab0 vs others)
// - capacity = usable / stride for ALL classes (including former C7)
size_t usable_size = (slab_idx == 0)
? SUPERSLAB_SLAB0_USABLE_SIZE
: SUPERSLAB_SLAB_USABLE_SIZE;
size_t stride = block_size;
uint16_t capacity = (uint16_t)(usable_size / stride);
#if !HAKMEM_BUILD_RELEASE
if (slab_idx == 0) {
fprintf(stderr,
"[SUPERSLAB_INIT] slab 0: usable_size=%zu stride=%zu capacity=%u\n",
usable_size, stride, (unsigned)capacity);
}
#endif
TinySlabMeta* meta = &ss->slabs[slab_idx];
meta->freelist = NULL; // NULL = linear allocation mode
meta->used = 0;
meta->active = 0; // P1.3: blocks in use by user (starts at 0)
meta->tls_cached = 0; // P2.2: blocks cached in TLS SLL (starts at 0)
meta->capacity = capacity;
meta->carved = 0;
// LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes
meta->owner_tid_low = (uint8_t)((owner_tid >> 8) & 0xFFu);
// Fail-safe: stamp class_idx from geometry (stride → class).
// This ensures legacy/shared/legacy-refill paths all end with a correct class.
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
if (g_tiny_class_sizes[i] == stride) {
meta->class_idx = (uint8_t)i;
// P1.1: Update class_map for out-of-band lookup on free path
ss->class_map[slab_idx] = (uint8_t)i;
break;
}
}
superslab_activate_slab(ss, slab_idx);
}
// ============================================================================
// Slab Bitmap Management
// ============================================================================
void superslab_activate_slab(SuperSlab* ss, int slab_idx) {
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
return;
}
uint32_t mask = 1u << slab_idx;
if ((ss->slab_bitmap & mask) == 0) {
ss->slab_bitmap |= mask;
ss->active_slabs++;
// Phase 3d-C: Update hot/cold indices after activating new slab
ss_update_hot_cold_indices(ss);
}
}
void superslab_deactivate_slab(SuperSlab* ss, int slab_idx) {
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
return;
}
uint32_t mask = 1u << slab_idx;
if (ss->slab_bitmap & mask) {
ss->slab_bitmap &= ~mask;
ss->active_slabs--;
}
}
int superslab_find_free_slab(SuperSlab* ss) {
if (!ss) return -1;
if ((int)ss->active_slabs >= ss_slabs_capacity(ss)) {
return -1; // No free slabs
}
// Find first 0 bit in bitmap
int cap = ss_slabs_capacity(ss);
for (int i = 0; i < cap; i++) {
if ((ss->slab_bitmap & (1u << i)) == 0) {
return i;
}
}
return -1;
}