#include "hakmem_super_registry.h" #include "hakmem_tiny_superslab.h" #include #include // Global registry storage SuperRegEntry g_super_reg[SUPER_REG_SIZE]; pthread_mutex_t g_super_reg_lock = PTHREAD_MUTEX_INITIALIZER; int g_super_reg_initialized = 0; // Per-class registry storage (Phase 6: Registry Optimization) SuperSlab* g_super_reg_by_class[TINY_NUM_CLASSES][SUPER_REG_PER_CLASS]; int g_super_reg_class_size[TINY_NUM_CLASSES]; // Initialize registry (call once at startup) void hak_super_registry_init(void) { if (g_super_reg_initialized) return; // Zero-initialize all entries (hash table) memset(g_super_reg, 0, sizeof(g_super_reg)); // Zero-initialize per-class registry (Phase 6: Registry Optimization) memset(g_super_reg_by_class, 0, sizeof(g_super_reg_by_class)); memset(g_super_reg_class_size, 0, sizeof(g_super_reg_class_size)); // Memory fence to ensure initialization is visible to all threads atomic_thread_fence(memory_order_release); g_super_reg_initialized = 1; } // Register SuperSlab (mutex-protected) // CRITICAL: Call AFTER SuperSlab is fully initialized // Publish order: ss init → release fence → base write // Phase 8.3: ACE - lg_size aware registration // Phase 6: Registry Optimization - Also add to per-class registry for fast refill scan int hak_super_register(uintptr_t base, SuperSlab* ss) { if (!g_super_reg_initialized) { hak_super_registry_init(); } pthread_mutex_lock(&g_super_reg_lock); int lg = ss->lg_size; // Phase 8.3: Get lg_size from SuperSlab static int dbg_once = -1; if (__builtin_expect(dbg_once == -1, 0)) { const char* e = getenv("HAKMEM_SUPER_REG_DEBUG"); dbg_once = (e && *e && *e!='0'); } int h = hak_super_hash(base, lg); // Step 1: Register in hash table (for address → SuperSlab lookup) int hash_registered = 0; for (int i = 0; i < SUPER_MAX_PROBE; i++) { SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK]; if (atomic_load_explicit(&e->base, memory_order_acquire) == 0) { // Found empty slot // Step 1: Write SuperSlab pointer and lg_size (atomic for MT-safety) atomic_store_explicit(&e->ss, ss, memory_order_release); e->lg_size = lg; // Phase 8.3: Store lg_size for fast lookup // Step 2: Release fence (ensures ss/lg_size write is visible before base) atomic_thread_fence(memory_order_release); // Step 3: Publish base address (makes entry visible to readers) atomic_store_explicit(&e->base, base, memory_order_release); hash_registered = 1; if (dbg_once == 1) { fprintf(stderr, "[SUPER_REG] register base=%p lg=%d slot=%d class=%d magic=%llx\n", (void*)base, lg, (h + i) & SUPER_REG_MASK, ss->size_class, (unsigned long long)ss->magic); } break; } if (atomic_load_explicit(&e->base, memory_order_acquire) == base && e->lg_size == lg) { // Already registered (duplicate registration) hash_registered = 1; break; } } if (!hash_registered) { // Hash table full (probing limit reached) pthread_mutex_unlock(&g_super_reg_lock); fprintf(stderr, "HAKMEM: SuperSlab registry full! Increase SUPER_REG_SIZE\n"); return 0; } // Step 2: Register in per-class registry (Phase 6: Registry Optimization) // Purpose: Enable O(class_size) refill scan instead of O(262K) int class_idx = ss->size_class; if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) { int size = g_super_reg_class_size[class_idx]; if (size < SUPER_REG_PER_CLASS) { // Check for duplicate registration int already_in_class = 0; for (int i = 0; i < size; i++) { if (g_super_reg_by_class[class_idx][i] == ss) { already_in_class = 1; break; } } if (!already_in_class) { // Add to per-class registry g_super_reg_by_class[class_idx][size] = ss; g_super_reg_class_size[class_idx]++; } } else { // Per-class registry full (rare). Suppress unless verbose const char* q = getenv("HAKMEM_QUIET"); if (!(q && *q && *q != '0')) { fprintf(stderr, "HAKMEM: Per-class registry full for class %d! " "Increase SUPER_REG_PER_CLASS\n", class_idx); } } } pthread_mutex_unlock(&g_super_reg_lock); return 1; } // Unregister SuperSlab (mutex-protected) // CRITICAL: Call BEFORE munmap to prevent reader segfault // Unpublish order: base = 0 (release) → munmap outside this function // Phase 8.3: ACE - Try both lg_sizes (we don't know which one was used) // Phase 6: Registry Optimization - Also remove from per-class registry void hak_super_unregister(uintptr_t base) { static int dbg_once = -1; // shared with register path for debug toggle if (!g_super_reg_initialized) return; pthread_mutex_lock(&g_super_reg_lock); // Step 1: Find and remove from hash table SuperSlab* ss = NULL; // Save SuperSlab pointer for per-class removal for (int lg = 20; lg <= 21; lg++) { int h = hak_super_hash(base, lg); // Linear probing to find matching entry for (int i = 0; i < SUPER_MAX_PROBE; i++) { SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK]; if (atomic_load_explicit(&e->base, memory_order_acquire) == base && e->lg_size == lg) { // Found entry to remove // Save SuperSlab pointer BEFORE clearing (for per-class removal) ss = atomic_load_explicit(&e->ss, memory_order_acquire); // Step 1: Clear SuperSlab pointer (atomic, prevents TOCTOU race) atomic_store_explicit(&e->ss, NULL, memory_order_release); // Step 2: Unpublish base (makes entry invisible to readers) atomic_store_explicit(&e->base, 0, memory_order_release); // Step 3: Clear lg_size (optional cleanup) e->lg_size = 0; if (__builtin_expect(dbg_once == -1, 0)) { const char* e = getenv("HAKMEM_SUPER_REG_DEBUG"); dbg_once = (e && *e && *e!='0'); } if (dbg_once == 1) { fprintf(stderr, "[SUPER_REG] unregister base=%p\n", (void*)base); } // Found in hash table, continue to per-class removal goto hash_removed; } if (atomic_load_explicit(&e->base, memory_order_acquire) == 0) { // Not found in this lg_size, try next break; } } } hash_removed: // Step 2: Remove from per-class registry (Phase 6: Registry Optimization) if (ss && ss->magic == SUPERSLAB_MAGIC) { int class_idx = ss->size_class; if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) { int size = g_super_reg_class_size[class_idx]; // Linear scan to find and remove SuperSlab from per-class array for (int i = 0; i < size; i++) { if (g_super_reg_by_class[class_idx][i] == ss) { // Found: Remove by shifting last element to this position g_super_reg_class_size[class_idx]--; int new_size = g_super_reg_class_size[class_idx]; // Swap with last element (O(1) removal, order doesn't matter) if (i != new_size) { g_super_reg_by_class[class_idx][i] = g_super_reg_by_class[class_idx][new_size]; } g_super_reg_by_class[class_idx][new_size] = NULL; break; } } } } pthread_mutex_unlock(&g_super_reg_lock); // Not found is not an error (could be duplicate unregister) } // Debug: Get registry statistics void hak_super_registry_stats(SuperRegStats* stats) { if (!stats) return; stats->total_slots = SUPER_REG_SIZE; stats->used_slots = 0; stats->max_probe_depth = 0; pthread_mutex_lock(&g_super_reg_lock); // Count used slots for (int i = 0; i < SUPER_REG_SIZE; i++) { if (atomic_load_explicit(&g_super_reg[i].base, memory_order_acquire) != 0) { stats->used_slots++; } } // Calculate max probe depth for (int i = 0; i < SUPER_REG_SIZE; i++) { if (atomic_load_explicit(&g_super_reg[i].base, memory_order_acquire) != 0) { uintptr_t base = atomic_load_explicit(&g_super_reg[i].base, memory_order_acquire); int lg = g_super_reg[i].lg_size; // Phase 8.3: Use stored lg_size int h = hak_super_hash(base, lg); // Find actual probe depth for this entry for (int j = 0; j < SUPER_MAX_PROBE; j++) { int idx = (h + j) & SUPER_REG_MASK; if (atomic_load_explicit(&g_super_reg[idx].base, memory_order_acquire) == base && g_super_reg[idx].lg_size == lg) { if (j > stats->max_probe_depth) { stats->max_probe_depth = j; } break; } } } } pthread_mutex_unlock(&g_super_reg_lock); }