Phase 9-1: O(1) SuperSlab lookup optimization - Created ss_addr_map_box: Hash table (8192 buckets) for O(1) SuperSlab lookup - Created ss_tls_hint_box: TLS caching layer for SuperSlab hints - Integrated hash table into registry (init, insert, remove, lookup) - Modified hak_super_lookup() to use new hash table - Expected: 50-80 cycles → 10-20 cycles (not verified - SuperSlab disabled by default) Phase 9-2: EMPTY slab recycling implementation - Created slab_recycling_box: SLAB_TRY_RECYCLE() macro following Box pattern - Integrated into remote drain (superslab_slab.c) - Integrated into TLS SLL drain (tls_sll_drain_box.h) with touched slab tracking - Observable: Debug tracing via HAKMEM_SLAB_RECYCLE_TRACE - Updated Makefile: Added new box objects to 3 build targets Known Issues: - SuperSlab registry exhaustion still occurs (unregistration not working) - shared_pool_release_slab() may not be removing from g_super_reg[] - Needs investigation before Phase 9-2 can be completed Expected Impact (when fixed): - Stage 1 hit rate: 0% → 80% - shared_fail events: 4 → 0 - Kernel overhead: 55% → 15% - Throughput: 16.5M → 25-30M ops/s (+50-80%) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
216 lines
8.7 KiB
C
216 lines
8.7 KiB
C
// superslab_slab.c - Slab initialization and management
|
|
// Purpose: Slab lifecycle and bitmap management within SuperSlabs
|
|
// License: MIT
|
|
// Date: 2025-11-28
|
|
|
|
#include "hakmem_tiny_superslab_internal.h"
|
|
#include "box/slab_recycling_box.h"
|
|
|
|
// ============================================================================
|
|
// Remote Drain (MPSC queue to freelist conversion)
|
|
// ============================================================================
|
|
|
|
// Drain remote MPSC stack into freelist (ownership already verified by caller)
|
|
void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta)
|
|
{
|
|
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss) || !meta) return;
|
|
|
|
static _Atomic uint32_t g_remote_drain_diag_once = 0;
|
|
static int g_remote_drain_diag_en = -1;
|
|
|
|
// Atomically take the whole remote list
|
|
uintptr_t head = atomic_exchange_explicit(&ss->remote_heads[slab_idx], 0,
|
|
memory_order_acq_rel);
|
|
if (head == 0) return;
|
|
|
|
// Convert remote stack (offset 0 next) into freelist encoding via Box API
|
|
// and splice in front of current freelist preserving relative order.
|
|
void* prev = meta->freelist;
|
|
int cls = (int)meta->class_idx;
|
|
HAK_CHECK_CLASS_IDX(cls, "_ss_remote_drain_to_freelist_unsafe");
|
|
if (__builtin_expect(cls < 0 || cls >= TINY_NUM_CLASSES, 0)) {
|
|
static _Atomic int g_remote_drain_cls_oob = 0;
|
|
if (atomic_fetch_add_explicit(&g_remote_drain_cls_oob, 1, memory_order_relaxed) == 0) {
|
|
fprintf(stderr,
|
|
"[REMOTE_DRAIN_CLASS_OOB] ss=%p slab_idx=%d meta=%p cls=%d head=%#lx\n",
|
|
(void*)ss, slab_idx, (void*)meta, cls, (unsigned long)head);
|
|
}
|
|
return;
|
|
}
|
|
uintptr_t cur = head;
|
|
while (cur != 0) {
|
|
uintptr_t next = *(uintptr_t*)cur; // remote-next stored at offset 0
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_TINY_SLL_DIAG");
|
|
g_remote_drain_diag_en = (e && *e && *e != '0') ? 1 : 0;
|
|
}
|
|
#else
|
|
if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) {
|
|
g_remote_drain_diag_en = 0;
|
|
}
|
|
#endif
|
|
if (__builtin_expect(g_remote_drain_diag_en, 0)) {
|
|
uintptr_t addr = (uintptr_t)next;
|
|
if (addr != 0 && (addr < 4096 || addr > 0x00007fffffffffffULL)) {
|
|
uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed);
|
|
if (shot < 8) {
|
|
fprintf(stderr,
|
|
"[REMOTE_DRAIN_NEXT_INVALID] cls=%d slab=%d cur=%p next=%p head=%#lx prev=%p count=%u\n",
|
|
cls,
|
|
slab_idx,
|
|
(void*)cur,
|
|
(void*)next,
|
|
(unsigned long)head,
|
|
prev,
|
|
(unsigned)meta->used);
|
|
}
|
|
}
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
|
int hdr_cls = tiny_region_id_read_header((uint8_t*)cur + 1);
|
|
if (hdr_cls >= 0 && hdr_cls != cls) {
|
|
uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed);
|
|
if (shot < 8) {
|
|
fprintf(stderr,
|
|
"[REMOTE_DRAIN_HDR_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d head=%#lx\n",
|
|
cls, slab_idx, (void*)cur, hdr_cls, (int)meta->class_idx, (unsigned long)head);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
|
// Cross-check header vs meta before writing next (even if diag is off)
|
|
{
|
|
int hdr_cls_pre = tiny_region_id_read_header((uint8_t*)cur + 1);
|
|
if (hdr_cls_pre >= 0 && hdr_cls_pre != cls) {
|
|
static _Atomic uint32_t g_hdr_meta_mismatch_rd = 0;
|
|
uint32_t n = atomic_fetch_add_explicit(&g_hdr_meta_mismatch_rd, 1, memory_order_relaxed);
|
|
if (n < 16) {
|
|
fprintf(stderr,
|
|
"[REMOTE_DRAIN_HDR_META_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d\n",
|
|
cls, slab_idx, (void*)cur, hdr_cls_pre, (int)meta->class_idx);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
// Restore header for header-classes (class 1-6) which were clobbered by remote push
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
|
if (cls != 0) {
|
|
uint8_t expected = (uint8_t)(HEADER_MAGIC | (cls & HEADER_CLASS_MASK));
|
|
*(uint8_t*)(uintptr_t)cur = expected;
|
|
}
|
|
#endif
|
|
// Rewrite next pointer to Box representation for this class
|
|
tiny_next_write(cls, (void*)cur, prev);
|
|
prev = (void*)cur;
|
|
cur = next;
|
|
}
|
|
meta->freelist = prev;
|
|
// Reset remote count after full drain
|
|
atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_release);
|
|
|
|
// Phase 9-2: Try to recycle slab if EMPTY after remote drain
|
|
// This fixes the bug where EMPTY slabs accumulate and never get returned to freelist
|
|
SLAB_TRY_RECYCLE(ss, slab_idx, meta);
|
|
|
|
// Update freelist/nonempty visibility bits
|
|
uint32_t bit = (1u << slab_idx);
|
|
atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
|
|
atomic_fetch_or_explicit(&ss->nonempty_mask, bit, memory_order_release);
|
|
}
|
|
|
|
// ============================================================================
|
|
// Slab Initialization within SuperSlab
|
|
// ============================================================================
|
|
|
|
void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_t owner_tid)
|
|
{
|
|
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
|
|
return;
|
|
}
|
|
|
|
// Phase E1-CORRECT unified geometry:
|
|
// - block_size is the TOTAL stride for this class (g_tiny_class_sizes[cls])
|
|
// - usable bytes are determined by slab index (slab0 vs others)
|
|
// - capacity = usable / stride for ALL classes (including former C7)
|
|
size_t usable_size = (slab_idx == 0)
|
|
? SUPERSLAB_SLAB0_USABLE_SIZE
|
|
: SUPERSLAB_SLAB_USABLE_SIZE;
|
|
size_t stride = block_size;
|
|
uint16_t capacity = (uint16_t)(usable_size / stride);
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
if (slab_idx == 0) {
|
|
fprintf(stderr,
|
|
"[SUPERSLAB_INIT] slab 0: usable_size=%zu stride=%zu capacity=%u\n",
|
|
usable_size, stride, (unsigned)capacity);
|
|
}
|
|
#endif
|
|
|
|
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
|
meta->freelist = NULL; // NULL = linear allocation mode
|
|
meta->used = 0;
|
|
meta->active = 0; // P1.3: blocks in use by user (starts at 0)
|
|
meta->tls_cached = 0; // P2.2: blocks cached in TLS SLL (starts at 0)
|
|
meta->capacity = capacity;
|
|
meta->carved = 0;
|
|
// LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes
|
|
meta->owner_tid_low = (uint8_t)((owner_tid >> 8) & 0xFFu);
|
|
// Fail-safe: stamp class_idx from geometry (stride → class).
|
|
// This ensures legacy/shared/legacy-refill paths all end with a correct class.
|
|
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
|
|
if (g_tiny_class_sizes[i] == stride) {
|
|
meta->class_idx = (uint8_t)i;
|
|
// P1.1: Update class_map for out-of-band lookup on free path
|
|
ss->class_map[slab_idx] = (uint8_t)i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
superslab_activate_slab(ss, slab_idx);
|
|
}
|
|
|
|
// ============================================================================
|
|
// Slab Bitmap Management
|
|
// ============================================================================
|
|
|
|
void superslab_activate_slab(SuperSlab* ss, int slab_idx) {
|
|
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
|
|
return;
|
|
}
|
|
uint32_t mask = 1u << slab_idx;
|
|
if ((ss->slab_bitmap & mask) == 0) {
|
|
ss->slab_bitmap |= mask;
|
|
ss->active_slabs++;
|
|
|
|
// Phase 3d-C: Update hot/cold indices after activating new slab
|
|
ss_update_hot_cold_indices(ss);
|
|
}
|
|
}
|
|
|
|
void superslab_deactivate_slab(SuperSlab* ss, int slab_idx) {
|
|
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
|
|
return;
|
|
}
|
|
uint32_t mask = 1u << slab_idx;
|
|
if (ss->slab_bitmap & mask) {
|
|
ss->slab_bitmap &= ~mask;
|
|
ss->active_slabs--;
|
|
}
|
|
}
|
|
|
|
int superslab_find_free_slab(SuperSlab* ss) {
|
|
if (!ss) return -1;
|
|
if ((int)ss->active_slabs >= ss_slabs_capacity(ss)) {
|
|
return -1; // No free slabs
|
|
}
|
|
// Find first 0 bit in bitmap
|
|
int cap = ss_slabs_capacity(ss);
|
|
for (int i = 0; i < cap; i++) {
|
|
if ((ss->slab_bitmap & (1u << i)) == 0) {
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|