Phase 9: SuperSlab optimization & EMPTY slab recycling (WIP)
Phase 9-1: O(1) SuperSlab lookup optimization - Created ss_addr_map_box: Hash table (8192 buckets) for O(1) SuperSlab lookup - Created ss_tls_hint_box: TLS caching layer for SuperSlab hints - Integrated hash table into registry (init, insert, remove, lookup) - Modified hak_super_lookup() to use new hash table - Expected: 50-80 cycles → 10-20 cycles (not verified - SuperSlab disabled by default) Phase 9-2: EMPTY slab recycling implementation - Created slab_recycling_box: SLAB_TRY_RECYCLE() macro following Box pattern - Integrated into remote drain (superslab_slab.c) - Integrated into TLS SLL drain (tls_sll_drain_box.h) with touched slab tracking - Observable: Debug tracing via HAKMEM_SLAB_RECYCLE_TRACE - Updated Makefile: Added new box objects to 3 build targets Known Issues: - SuperSlab registry exhaustion still occurs (unregistration not working) - shared_pool_release_slab() may not be removing from g_super_reg[] - Needs investigation before Phase 9-2 can be completed Expected Impact (when fixed): - Stage 1 hit rate: 0% → 80% - shared_fail events: 4 → 0 - Kernel overhead: 55% → 15% - Throughput: 16.5M → 25-30M ops/s (+50-80%) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -18,6 +18,7 @@
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include "hakmem_tiny_superslab.h" // For SuperSlab and SUPERSLAB_MAGIC
|
||||
#include "box/ss_addr_map_box.h" // Phase 9-1: O(1) hash table lookup
|
||||
|
||||
// Registry configuration
|
||||
// Increased from 4096 to 32768 to avoid registry exhaustion under
|
||||
@ -115,10 +116,14 @@ static inline int hak_super_hash(uintptr_t base, int lg_size) {
|
||||
|
||||
// Lookup SuperSlab by pointer (lock-free, thread-safe)
|
||||
// Returns: SuperSlab* if found, NULL otherwise
|
||||
// Phase 8.3: ACE - Supports both 1MB and 2MB SuperSlabs
|
||||
// Phase 9-1: Optimized with hash table O(1) lookup (replaced linear probing)
|
||||
static inline SuperSlab* hak_super_lookup(void* ptr) {
|
||||
if (!g_super_reg_initialized) return NULL;
|
||||
|
||||
// Phase 9-1: Use new O(1) hash table lookup
|
||||
// Replaces old linear probing (50-80 cycles → 10-20 cycles)
|
||||
SuperSlab* ss = ss_map_lookup(&g_ss_addr_map, ptr);
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Debug logging (ENV-gated)
|
||||
static __thread int s_dbg = -1;
|
||||
@ -126,68 +131,26 @@ static inline SuperSlab* hak_super_lookup(void* ptr) {
|
||||
const char* e = getenv("HAKMEM_SUPER_LOOKUP_DEBUG");
|
||||
s_dbg = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
#else
|
||||
static const int s_dbg = 0;
|
||||
#endif
|
||||
|
||||
// Try both 1MB and 2MB alignments (1MB first for Step 1 default)
|
||||
// ACE will use both sizes dynamically in Step 3
|
||||
for (int lg = 20; lg <= 21; lg++) {
|
||||
uintptr_t mask = (1UL << lg) - 1;
|
||||
uintptr_t base = (uintptr_t)ptr & ~mask;
|
||||
int h = hak_super_hash(base, lg);
|
||||
|
||||
if (s_dbg == 1) {
|
||||
fprintf(stderr, "[SUPER_LOOKUP] ptr=%p lg=%d aligned_base=%p hash=%d\n",
|
||||
ptr, lg, (void*)base, h);
|
||||
}
|
||||
|
||||
// Linear probing with acquire semantics
|
||||
for (int i = 0; i < SUPER_MAX_PROBE; i++) {
|
||||
SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK];
|
||||
uintptr_t b = atomic_load_explicit(&e->base, memory_order_acquire);
|
||||
|
||||
if (s_dbg == 1 && b != 0) {
|
||||
fprintf(stderr, "[SUPER_LOOKUP] probe[%d] entry_base=%p entry_lg=%d (match=%d)\n",
|
||||
i, (void*)b, e->lg_size, (b == base && e->lg_size == lg));
|
||||
}
|
||||
|
||||
// Match both base address AND lg_size
|
||||
if (b == base && e->lg_size == lg) {
|
||||
// Atomic load to prevent TOCTOU race with unregister
|
||||
SuperSlab* ss = atomic_load_explicit(&e->ss, memory_order_acquire);
|
||||
if (!ss) {
|
||||
if (s_dbg == 1) {
|
||||
fprintf(stderr, "[SUPER_LOOKUP] MATCH but ss=NULL (unregistered)\n");
|
||||
}
|
||||
return NULL; // Entry cleared by unregister
|
||||
}
|
||||
|
||||
// CRITICAL: Check magic BEFORE returning pointer to prevent TOCTOU
|
||||
// Race scenario: lookup → free (clear magic, munmap) → caller checks magic
|
||||
// Fix: Check magic HERE while we're certain ss is still registered
|
||||
if (ss->magic != SUPERSLAB_MAGIC) {
|
||||
if (s_dbg == 1) {
|
||||
fprintf(stderr, "[SUPER_LOOKUP] MATCH but bad magic=%llx (being freed)\n",
|
||||
(unsigned long long)ss->magic);
|
||||
}
|
||||
return NULL; // Being freed
|
||||
}
|
||||
|
||||
if (s_dbg == 1) {
|
||||
fprintf(stderr, "[SUPER_LOOKUP] FOUND: ss=%p magic=%llx\n",
|
||||
(void*)ss, (unsigned long long)ss->magic);
|
||||
}
|
||||
return ss;
|
||||
}
|
||||
if (b == 0) break; // Empty slot, try next lg_size
|
||||
}
|
||||
}
|
||||
|
||||
if (s_dbg == 1) {
|
||||
fprintf(stderr, "[SUPER_LOOKUP] NOT FOUND (all lg sizes exhausted)\n");
|
||||
if (ss) {
|
||||
fprintf(stderr, "[SUPER_LOOKUP] ptr=%p -> ss=%p (hash table hit)\n", ptr, (void*)ss);
|
||||
} else {
|
||||
fprintf(stderr, "[SUPER_LOOKUP] ptr=%p -> NULL (hash table miss)\n", ptr);
|
||||
}
|
||||
}
|
||||
return NULL; // Not found
|
||||
#endif
|
||||
|
||||
// Magic check for safety (same as before)
|
||||
if (ss && ss->magic != SUPERSLAB_MAGIC) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
fprintf(stderr, "[SUPER_LOOKUP] WARNING: ss=%p has bad magic=%llx (being freed)\n",
|
||||
(void*)ss, (unsigned long long)ss->magic);
|
||||
#endif
|
||||
return NULL; // Being freed
|
||||
}
|
||||
|
||||
return ss;
|
||||
}
|
||||
|
||||
// Register SuperSlab (mutex-protected, called after SuperSlab initialization)
|
||||
|
||||
Reference in New Issue
Block a user