Phase 9: SuperSlab optimization & EMPTY slab recycling (WIP)

Phase 9-1: O(1) SuperSlab lookup optimization
- Created ss_addr_map_box: Hash table (8192 buckets) for O(1) SuperSlab lookup
- Created ss_tls_hint_box: TLS caching layer for SuperSlab hints
- Integrated hash table into registry (init, insert, remove, lookup)
- Modified hak_super_lookup() to use new hash table
- Expected: 50-80 cycles → 10-20 cycles (not verified - SuperSlab disabled by default)

Phase 9-2: EMPTY slab recycling implementation
- Created slab_recycling_box: SLAB_TRY_RECYCLE() macro following Box pattern
- Integrated into remote drain (superslab_slab.c)
- Integrated into TLS SLL drain (tls_sll_drain_box.h) with touched slab tracking
- Observable: Debug tracing via HAKMEM_SLAB_RECYCLE_TRACE
- Updated Makefile: Added new box objects to 3 build targets

Known Issues:
- SuperSlab registry exhaustion still occurs (unregistration not working)
- shared_pool_release_slab() may not be removing from g_super_reg[]
- Needs investigation before Phase 9-2 can be completed

Expected Impact (when fixed):
- Stage 1 hit rate: 0% → 80%
- shared_fail events: 4 → 0
- Kernel overhead: 55% → 15%
- Throughput: 16.5M → 25-30M ops/s (+50-80%)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-30 07:16:50 +09:00
parent 4ad3223f5b
commit 87b7d30998
12 changed files with 957 additions and 64 deletions

225
core/box/ss_tls_hint_box.h Normal file
View File

@ -0,0 +1,225 @@
// ss_tls_hint_box.h - Phase 9-1-4: TLS Hints for SuperSlab Lookup
// Purpose: Cache last-used SuperSlab per class to eliminate hash table lookups
//
// Box Pattern:
// - Single Responsibility: TLS caching layer for SuperSlab lookups
// - Clear Contract: O(1) hint check, fallback to hash table on miss
// - Observable: Debug macros log hit/miss rates
// - Composable: Wraps ss_addr_map_box for fallback
//
// Performance Target:
// - Hit case: 5-10 cycles (TLS load + range check)
// - Miss case: 15-25 cycles (TLS update + hash table lookup)
// - Expected hit rate: 80-95% (locality of reference)
// - Net improvement: 50-80 cycles → 10-15 cycles average
//
// Design:
// - __thread SuperSlab* g_tls_ss_hint[TINY_NUM_CLASSES]
// - Each allocation/free updates hint for its size class
// - Quick range check: ptr >= base && ptr < base + size
// - Fallback to hash table on miss, update hint
#ifndef HAK_BOX_SS_TLS_HINT_H
#define HAK_BOX_SS_TLS_HINT_H
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include "../hakmem_build_flags.h"
#include "../hakmem_tiny_superslab.h"
#include "../hakmem_tiny_superslab_constants.h"
#include "ss_addr_map_box.h"
// Forward declaration
struct SuperSlab;
// ============================================================================
// TLS Hint Cache
// ============================================================================
// TLS cache: Most recently used SuperSlab per size class
// - Reduces hash table lookups by 80-95% (locality of reference)
// - Each thread maintains its own cache (no contention)
// - Invalidated automatically on SuperSlab free (future Phase 9-2)
#ifndef TINY_NUM_CLASSES
#define TINY_NUM_CLASSES 8 // Fallback if hakmem_tiny.h not included
#endif
extern __thread struct SuperSlab* g_tls_ss_hint[TINY_NUM_CLASSES];
// ============================================================================
// Statistics (Debug builds only)
// ============================================================================
#if !HAKMEM_BUILD_RELEASE
typedef struct {
uint64_t total_lookups; // Total lookup calls
uint64_t hint_hits; // Successful TLS hint hits
uint64_t hint_misses; // TLS hint misses (fallback to hash table)
uint64_t hash_hits; // Successful hash table lookups
uint64_t hash_misses; // Hash table lookup failures (NULL)
} SSTLSHintStats;
extern __thread SSTLSHintStats g_tls_hint_stats;
// Print statistics (for profiling)
static inline void ss_tls_hint_print_stats(void) {
fprintf(stderr, "\n[SS_TLS_HINT_STATS] Thread-local SuperSlab Lookup Statistics:\n");
fprintf(stderr, " Total lookups: %lu\n", g_tls_hint_stats.total_lookups);
fprintf(stderr, " TLS hint hits: %lu (%.1f%%)\n",
g_tls_hint_stats.hint_hits,
100.0 * g_tls_hint_stats.hint_hits / (g_tls_hint_stats.total_lookups + 1));
fprintf(stderr, " TLS hint misses: %lu (%.1f%%)\n",
g_tls_hint_stats.hint_misses,
100.0 * g_tls_hint_stats.hint_misses / (g_tls_hint_stats.total_lookups + 1));
fprintf(stderr, " Hash table hits: %lu\n", g_tls_hint_stats.hash_hits);
fprintf(stderr, " Hash table misses: %lu\n", g_tls_hint_stats.hash_misses);
uint64_t total_misses = g_tls_hint_stats.hint_misses + g_tls_hint_stats.hash_misses;
fprintf(stderr, " Overall hit rate: %.1f%%\n",
100.0 * (g_tls_hint_stats.hint_hits + g_tls_hint_stats.hash_hits) /
(g_tls_hint_stats.total_lookups + 1));
}
#endif
// ============================================================================
// API Functions
// ============================================================================
// Initialize TLS hints (call once per thread)
static inline void ss_tls_hint_init(void) {
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
g_tls_ss_hint[i] = NULL;
}
#if !HAKMEM_BUILD_RELEASE
g_tls_hint_stats.total_lookups = 0;
g_tls_hint_stats.hint_hits = 0;
g_tls_hint_stats.hint_misses = 0;
g_tls_hint_stats.hash_hits = 0;
g_tls_hint_stats.hash_misses = 0;
#endif
}
// Check if pointer is within SuperSlab range
// Fast inline range check: ptr >= base && ptr < base + size
static inline int ss_contains(struct SuperSlab* ss, void* ptr) {
if (!ss) return 0;
uintptr_t p = (uintptr_t)ptr;
uintptr_t base = (uintptr_t)ss;
uintptr_t size = (1UL << ss->lg_size);
return (p >= base) && (p < base + size);
}
// Lookup SuperSlab with TLS hint
// - class_idx: Size class index (0-7 for Tiny classes)
// - ptr: Pointer to look up
// Returns: SuperSlab* if found, NULL otherwise
//
// Contract: O(1) amortized lookup with TLS caching
// - Fast path: 5-10 cycles (TLS hint hit)
// - Slow path: 15-25 cycles (hash table lookup + hint update)
static inline struct SuperSlab* ss_tls_hint_lookup(int class_idx, void* ptr) {
#if !HAKMEM_BUILD_RELEASE
g_tls_hint_stats.total_lookups++;
#endif
// Bounds check
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
return NULL;
}
// Fast path: Check TLS hint
struct SuperSlab* hint = g_tls_ss_hint[class_idx];
if (__builtin_expect(hint != NULL, 1)) {
if (__builtin_expect(ss_contains(hint, ptr), 1)) {
// TLS hint hit!
#if !HAKMEM_BUILD_RELEASE
g_tls_hint_stats.hint_hits++;
static __thread int s_verbose = -1;
if (__builtin_expect(s_verbose == -1, 0)) {
const char* e = getenv("HAKMEM_SS_TLS_HINT_TRACE");
s_verbose = (e && *e && *e != '0') ? 1 : 0;
}
if (s_verbose) {
fprintf(stderr, "[SS_TLS_HINT] HIT: class=%d ptr=%p ss=%p\n",
class_idx, ptr, (void*)hint);
}
#endif
return hint;
}
}
// Slow path: TLS hint miss, fallback to hash table
#if !HAKMEM_BUILD_RELEASE
g_tls_hint_stats.hint_misses++;
static __thread int s_verbose = -1;
if (__builtin_expect(s_verbose == -1, 0)) {
const char* e = getenv("HAKMEM_SS_TLS_HINT_TRACE");
s_verbose = (e && *e && *e != '0') ? 1 : 0;
}
if (s_verbose) {
fprintf(stderr, "[SS_TLS_HINT] MISS: class=%d ptr=%p (hint=%p)\n",
class_idx, ptr, (void*)hint);
}
#endif
// Lookup in hash table
struct SuperSlab* ss = ss_map_lookup(&g_ss_addr_map, ptr);
if (ss) {
// Update TLS hint for next time
g_tls_ss_hint[class_idx] = ss;
#if !HAKMEM_BUILD_RELEASE
g_tls_hint_stats.hash_hits++;
if (s_verbose) {
fprintf(stderr, "[SS_TLS_HINT] HASH_HIT: class=%d ptr=%p ss=%p (hint updated)\n",
class_idx, ptr, (void*)ss);
}
#endif
} else {
#if !HAKMEM_BUILD_RELEASE
g_tls_hint_stats.hash_misses++;
if (s_verbose) {
fprintf(stderr, "[SS_TLS_HINT] HASH_MISS: class=%d ptr=%p (not found)\n",
class_idx, ptr);
}
#endif
}
return ss;
}
// Invalidate TLS hint for a specific class
// Call this when freeing a SuperSlab to prevent dangling pointer
static inline void ss_tls_hint_invalidate(int class_idx, struct SuperSlab* ss) {
if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
if (g_tls_ss_hint[class_idx] == ss) {
g_tls_ss_hint[class_idx] = NULL;
#if !HAKMEM_BUILD_RELEASE
static __thread int s_verbose = -1;
if (__builtin_expect(s_verbose == -1, 0)) {
const char* e = getenv("HAKMEM_SS_TLS_HINT_TRACE");
s_verbose = (e && *e && *e != '0') ? 1 : 0;
}
if (s_verbose) {
fprintf(stderr, "[SS_TLS_HINT] INVALIDATE: class=%d ss=%p\n",
class_idx, (void*)ss);
}
#endif
}
}
}
// Prefill TLS hint (for hot path optimization)
// Call after allocating from a SuperSlab to warm up cache
static inline void ss_tls_hint_update(int class_idx, struct SuperSlab* ss) {
if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES && ss != NULL) {
g_tls_ss_hint[class_idx] = ss;
}
}
#endif // HAK_BOX_SS_TLS_HINT_H