Files
hakmem/core/hakmem_super_registry.h
Moe Charm (CI) f8b0f38f78 ENV Cleanup Step 8: Gate HAKMEM_SUPER_LOOKUP_DEBUG in header
Gate HAKMEM_SUPER_LOOKUP_DEBUG environment variable behind
#if !HAKMEM_BUILD_RELEASE in hakmem_super_registry.h inline function.

Changes:
- Wrap s_dbg initialization in conditional compilation
- Release builds use constant s_dbg = 0 for complete elimination
- Debug logging in hak_super_lookup() now fully compiled out in release

Performance: 30.3M ops/s Larson (stable, no regression)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-28 01:45:45 +09:00

209 lines
8.7 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#pragma once
#include <stdio.h>
#include <stdlib.h>
// Phase 1: SuperSlab Registry - Thread-safe O(1) lookup for SuperSlab ownership
//
// Purpose: Replace mincore() syscall (50-100ns) with userspace hash table lookup
// Performance: ~5-10ns per lookup, 10-20x faster than mincore()
//
// Thread Safety:
// - Readers: Lock-free with acquire semantics
// - Writers: Mutex-protected with release semantics
// - Publish order: ss initialization → release fence → base write
// - Unpublish order: base = 0 (release) → munmap (prevents reader deref)
#include <stdatomic.h>
#include <pthread.h>
#include <stdint.h>
#include "hakmem_tiny_superslab.h" // For SuperSlab and SUPERSLAB_MAGIC
// Registry configuration
// Increased from 4096 to 32768 to avoid registry exhaustion under
// high-churn microbenchmarks (e.g., larson with many active SuperSlabs).
// Still a power of two for fast masking.
#define SUPER_REG_SIZE 262144 // Power of 2 for fast modulo (8x larger for workloads)
#define SUPER_REG_MASK (SUPER_REG_SIZE - 1)
#define SUPER_MAX_PROBE 32 // Linear probing limit (increased from 8 for Phase 15 fix)
// Per-class registry for fast refill scan (Phase 6: Registry Optimization)
// Purpose: Avoid 262K linear scan by indexing SuperSlabs by size class
// - Each class has 16384 slots (total: 8 classes × 16384 = 128K entries)
// - Refill scan: O(class_size) instead of O(262144)
// - Expected speedup: +200-300% for Larson (2.59M → 7.8M ops/s)
#define SUPER_REG_PER_CLASS 16384 // Per-class registry capacity (increased for high-churn workloads)
// Registry entry: base address → SuperSlab pointer mapping
typedef struct {
_Atomic(uintptr_t) base; // Aligned base address (1MB or 2MB, 0 = empty slot) [atomic for proper sync]
_Atomic(SuperSlab*) ss; // Atomic SuperSlab pointer (MT-safe, prevents TOCTOU race)
uint8_t lg_size; // Phase 8.3: ACE - SuperSlab size (20=1MB, 21=2MB)
uint8_t _pad[7]; // Padding to 24 bytes (cache-friendly)
} SuperRegEntry;
// Global registry (lock-free reads, mutex-protected writes)
extern SuperRegEntry g_super_reg[SUPER_REG_SIZE];
extern pthread_mutex_t g_super_reg_lock;
extern int g_super_reg_initialized;
// Per-class registry for fast refill scan (Phase 6: Registry Optimization)
// Note: TINY_NUM_CLASSES is defined in hakmem_tiny.h (typically 8 for 16B-1KB)
// - g_super_reg_by_class[class][i] = SuperSlab pointer (NULL = empty slot)
// - g_super_reg_class_size[class] = number of active SuperSlabs for this class
// - Protected by g_super_reg_lock (shared with main registry)
#ifndef TINY_NUM_CLASSES
#define TINY_NUM_CLASSES 8 // Fallback if hakmem_tiny.h not included yet
#endif
extern SuperSlab* g_super_reg_by_class[TINY_NUM_CLASSES][SUPER_REG_PER_CLASS];
extern int g_super_reg_class_size[TINY_NUM_CLASSES];
// ============================================================================
// Phase 9: Lazy Deallocation - LRU Cache Manager
// ============================================================================
// Global LRU cache for empty SuperSlabs (lazy deallocation)
typedef struct {
SuperSlab* lru_head; // LRU list head (most recently used)
SuperSlab* lru_tail; // LRU list tail (least recently used)
uint32_t total_count; // Total SuperSlabs in cache
uint32_t max_cached; // Maximum cached SuperSlabs (default: 256)
uint64_t total_memory_mb; // Total memory in cache (MB)
uint64_t max_memory_mb; // Maximum memory limit (MB, default: 512)
uint64_t ttl_ns; // Time-to-live (nanoseconds, default: 60s)
uint32_t generation; // Current generation counter
} SuperSlabLRUCache;
extern SuperSlabLRUCache g_ss_lru_cache;
// Initialize LRU cache (called once at startup)
void hak_ss_lru_init(void);
// Try to reuse a cached SuperSlab (returns NULL if cache is empty)
SuperSlab* hak_ss_lru_pop(uint8_t size_class);
// Add SuperSlab to LRU cache (returns 1 if cached, 0 if evicted immediately)
int hak_ss_lru_push(SuperSlab* ss);
// Evict old SuperSlabs based on policy (TTL, max_cached, max_memory_mb)
void hak_ss_lru_evict(void);
// Mark SuperSlab as recently used (update timestamp, move to head)
void hak_ss_lru_touch(SuperSlab* ss);
// ============================================================================
// Phase 11: SuperSlab Prewarm - Eliminate mmap/munmap bottleneck
// ============================================================================
// Prewarm: Allocate SuperSlabs at startup and add to LRU cache
void hak_ss_prewarm_init(void);
// Prewarm specific size class with count SuperSlabs
void hak_ss_prewarm_class(int size_class, uint32_t count);
// Prewarm all classes (counts[i] = number of SuperSlabs for class i)
void hak_ss_prewarm_all(const uint32_t counts[TINY_NUM_CLASSES]);
// Initialize registry (call once at startup)
void hak_super_registry_init(void);
// Hash function for aligned addresses (variable size)
static inline int hak_super_hash(uintptr_t base, int lg_size) {
// Phase 8.3: ACE - Variable size hash (lg_size = 20 for 1MB, 21 for 2MB)
return (int)((base >> lg_size) & SUPER_REG_MASK);
}
// Lookup SuperSlab by pointer (lock-free, thread-safe)
// Returns: SuperSlab* if found, NULL otherwise
// Phase 8.3: ACE - Supports both 1MB and 2MB SuperSlabs
static inline SuperSlab* hak_super_lookup(void* ptr) {
if (!g_super_reg_initialized) return NULL;
#if !HAKMEM_BUILD_RELEASE
// Debug logging (ENV-gated)
static __thread int s_dbg = -1;
if (__builtin_expect(s_dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SUPER_LOOKUP_DEBUG");
s_dbg = (e && *e && *e != '0') ? 1 : 0;
}
#else
static const int s_dbg = 0;
#endif
// Try both 1MB and 2MB alignments (1MB first for Step 1 default)
// ACE will use both sizes dynamically in Step 3
for (int lg = 20; lg <= 21; lg++) {
uintptr_t mask = (1UL << lg) - 1;
uintptr_t base = (uintptr_t)ptr & ~mask;
int h = hak_super_hash(base, lg);
if (s_dbg == 1) {
fprintf(stderr, "[SUPER_LOOKUP] ptr=%p lg=%d aligned_base=%p hash=%d\n",
ptr, lg, (void*)base, h);
}
// Linear probing with acquire semantics
for (int i = 0; i < SUPER_MAX_PROBE; i++) {
SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK];
uintptr_t b = atomic_load_explicit(&e->base, memory_order_acquire);
if (s_dbg == 1 && b != 0) {
fprintf(stderr, "[SUPER_LOOKUP] probe[%d] entry_base=%p entry_lg=%d (match=%d)\n",
i, (void*)b, e->lg_size, (b == base && e->lg_size == lg));
}
// Match both base address AND lg_size
if (b == base && e->lg_size == lg) {
// Atomic load to prevent TOCTOU race with unregister
SuperSlab* ss = atomic_load_explicit(&e->ss, memory_order_acquire);
if (!ss) {
if (s_dbg == 1) {
fprintf(stderr, "[SUPER_LOOKUP] MATCH but ss=NULL (unregistered)\n");
}
return NULL; // Entry cleared by unregister
}
// CRITICAL: Check magic BEFORE returning pointer to prevent TOCTOU
// Race scenario: lookup → free (clear magic, munmap) → caller checks magic
// Fix: Check magic HERE while we're certain ss is still registered
if (ss->magic != SUPERSLAB_MAGIC) {
if (s_dbg == 1) {
fprintf(stderr, "[SUPER_LOOKUP] MATCH but bad magic=%llx (being freed)\n",
(unsigned long long)ss->magic);
}
return NULL; // Being freed
}
if (s_dbg == 1) {
fprintf(stderr, "[SUPER_LOOKUP] FOUND: ss=%p magic=%llx\n",
(void*)ss, (unsigned long long)ss->magic);
}
return ss;
}
if (b == 0) break; // Empty slot, try next lg_size
}
}
if (s_dbg == 1) {
fprintf(stderr, "[SUPER_LOOKUP] NOT FOUND (all lg sizes exhausted)\n");
}
return NULL; // Not found
}
// Register SuperSlab (mutex-protected, called after SuperSlab initialization)
// Returns: 1 on success, 0 if registry is full
int hak_super_register(uintptr_t base, SuperSlab* ss);
// Unregister SuperSlab (mutex-protected, MUST call before munmap)
// Critical: base = 0 happens BEFORE munmap to prevent reader segfault
void hak_super_unregister(uintptr_t base);
// Debug: Get registry statistics
typedef struct {
int total_slots;
int used_slots;
int max_probe_depth;
} SuperRegStats;
void hak_super_registry_stats(SuperRegStats* stats);