Files
hakmem/core/hakmem_super_registry.c

231 lines
8.3 KiB
C
Raw Normal View History

#include "hakmem_super_registry.h"
#include "hakmem_tiny_superslab.h"
#include <string.h>
#include <stdio.h>
// Global registry storage
SuperRegEntry g_super_reg[SUPER_REG_SIZE];
pthread_mutex_t g_super_reg_lock = PTHREAD_MUTEX_INITIALIZER;
int g_super_reg_initialized = 0;
// Per-class registry storage (Phase 6: Registry Optimization)
SuperSlab* g_super_reg_by_class[TINY_NUM_CLASSES][SUPER_REG_PER_CLASS];
int g_super_reg_class_size[TINY_NUM_CLASSES];
// Initialize registry (call once at startup)
void hak_super_registry_init(void) {
if (g_super_reg_initialized) return;
// Zero-initialize all entries (hash table)
memset(g_super_reg, 0, sizeof(g_super_reg));
// Zero-initialize per-class registry (Phase 6: Registry Optimization)
memset(g_super_reg_by_class, 0, sizeof(g_super_reg_by_class));
memset(g_super_reg_class_size, 0, sizeof(g_super_reg_class_size));
// Memory fence to ensure initialization is visible to all threads
atomic_thread_fence(memory_order_release);
g_super_reg_initialized = 1;
}
// Register SuperSlab (mutex-protected)
// CRITICAL: Call AFTER SuperSlab is fully initialized
// Publish order: ss init → release fence → base write
// Phase 8.3: ACE - lg_size aware registration
// Phase 6: Registry Optimization - Also add to per-class registry for fast refill scan
int hak_super_register(uintptr_t base, SuperSlab* ss) {
if (!g_super_reg_initialized) {
hak_super_registry_init();
}
pthread_mutex_lock(&g_super_reg_lock);
int lg = ss->lg_size; // Phase 8.3: Get lg_size from SuperSlab
int h = hak_super_hash(base, lg);
// Step 1: Register in hash table (for address → SuperSlab lookup)
int hash_registered = 0;
for (int i = 0; i < SUPER_MAX_PROBE; i++) {
SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK];
if (e->base == 0) {
// Found empty slot
// Step 1: Write SuperSlab pointer and lg_size (atomic for MT-safety)
atomic_store_explicit(&e->ss, ss, memory_order_release);
e->lg_size = lg; // Phase 8.3: Store lg_size for fast lookup
// Step 2: Release fence (ensures ss/lg_size write is visible before base)
atomic_thread_fence(memory_order_release);
// Step 3: Publish base address (makes entry visible to readers)
atomic_store_explicit((_Atomic uintptr_t*)&e->base, base,
memory_order_release);
hash_registered = 1;
break;
}
if (e->base == base && e->lg_size == lg) {
// Already registered (duplicate registration)
hash_registered = 1;
break;
}
}
if (!hash_registered) {
// Hash table full (probing limit reached)
pthread_mutex_unlock(&g_super_reg_lock);
fprintf(stderr, "HAKMEM: SuperSlab registry full! Increase SUPER_REG_SIZE\n");
return 0;
}
// Step 2: Register in per-class registry (Phase 6: Registry Optimization)
// Purpose: Enable O(class_size) refill scan instead of O(262K)
int class_idx = ss->size_class;
if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
int size = g_super_reg_class_size[class_idx];
if (size < SUPER_REG_PER_CLASS) {
// Check for duplicate registration
int already_in_class = 0;
for (int i = 0; i < size; i++) {
if (g_super_reg_by_class[class_idx][i] == ss) {
already_in_class = 1;
break;
}
}
if (!already_in_class) {
// Add to per-class registry
g_super_reg_by_class[class_idx][size] = ss;
g_super_reg_class_size[class_idx]++;
}
} else {
CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
// Per-class registry full (rare). Suppress unless verbose
const char* q = getenv("HAKMEM_QUIET");
if (!(q && *q && *q != '0')) {
fprintf(stderr, "HAKMEM: Per-class registry full for class %d! "
"Increase SUPER_REG_PER_CLASS\n", class_idx);
}
}
}
pthread_mutex_unlock(&g_super_reg_lock);
return 1;
}
// Unregister SuperSlab (mutex-protected)
// CRITICAL: Call BEFORE munmap to prevent reader segfault
// Unpublish order: base = 0 (release) → munmap outside this function
// Phase 8.3: ACE - Try both lg_sizes (we don't know which one was used)
// Phase 6: Registry Optimization - Also remove from per-class registry
void hak_super_unregister(uintptr_t base) {
if (!g_super_reg_initialized) return;
pthread_mutex_lock(&g_super_reg_lock);
// Step 1: Find and remove from hash table
SuperSlab* ss = NULL; // Save SuperSlab pointer for per-class removal
for (int lg = 20; lg <= 21; lg++) {
int h = hak_super_hash(base, lg);
// Linear probing to find matching entry
for (int i = 0; i < SUPER_MAX_PROBE; i++) {
SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK];
if (e->base == base && e->lg_size == lg) {
// Found entry to remove
// Save SuperSlab pointer BEFORE clearing (for per-class removal)
ss = atomic_load_explicit(&e->ss, memory_order_acquire);
// Step 1: Clear SuperSlab pointer (atomic, prevents TOCTOU race)
atomic_store_explicit(&e->ss, NULL, memory_order_release);
// Step 2: Unpublish base (makes entry invisible to readers)
atomic_store_explicit((_Atomic uintptr_t*)&e->base, 0,
memory_order_release);
// Step 3: Clear lg_size (optional cleanup)
e->lg_size = 0;
// Found in hash table, continue to per-class removal
goto hash_removed;
}
if (e->base == 0) {
// Not found in this lg_size, try next
break;
}
}
}
hash_removed:
// Step 2: Remove from per-class registry (Phase 6: Registry Optimization)
if (ss && ss->magic == SUPERSLAB_MAGIC) {
int class_idx = ss->size_class;
if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
int size = g_super_reg_class_size[class_idx];
// Linear scan to find and remove SuperSlab from per-class array
for (int i = 0; i < size; i++) {
if (g_super_reg_by_class[class_idx][i] == ss) {
// Found: Remove by shifting last element to this position
g_super_reg_class_size[class_idx]--;
int new_size = g_super_reg_class_size[class_idx];
// Swap with last element (O(1) removal, order doesn't matter)
if (i != new_size) {
g_super_reg_by_class[class_idx][i] =
g_super_reg_by_class[class_idx][new_size];
}
g_super_reg_by_class[class_idx][new_size] = NULL;
break;
}
}
}
}
pthread_mutex_unlock(&g_super_reg_lock);
// Not found is not an error (could be duplicate unregister)
}
// Debug: Get registry statistics
void hak_super_registry_stats(SuperRegStats* stats) {
if (!stats) return;
stats->total_slots = SUPER_REG_SIZE;
stats->used_slots = 0;
stats->max_probe_depth = 0;
pthread_mutex_lock(&g_super_reg_lock);
// Count used slots
for (int i = 0; i < SUPER_REG_SIZE; i++) {
if (g_super_reg[i].base != 0) {
stats->used_slots++;
}
}
// Calculate max probe depth
for (int i = 0; i < SUPER_REG_SIZE; i++) {
if (g_super_reg[i].base != 0) {
uintptr_t base = g_super_reg[i].base;
int lg = g_super_reg[i].lg_size; // Phase 8.3: Use stored lg_size
int h = hak_super_hash(base, lg);
// Find actual probe depth for this entry
for (int j = 0; j < SUPER_MAX_PROBE; j++) {
int idx = (h + j) & SUPER_REG_MASK;
if (g_super_reg[idx].base == base && g_super_reg[idx].lg_size == lg) {
if (j > stats->max_probe_depth) {
stats->max_probe_depth = j;
}
break;
}
}
}
}
pthread_mutex_unlock(&g_super_reg_lock);
}