Files
hakmem/core/hakmem_tiny_sfc.c

314 lines
11 KiB
C
Raw Normal View History

CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
// hakmem_tiny_sfc.c - Box 5-NEW: Super Front Cache (SFC) Implementation
// Purpose: Slow path (refill/spill/config/stats), not inline
// Fast path is in tiny_alloc_fast_sfc.inc.h (inline)
#include "tiny_alloc_fast_sfc.inc.h"
#include "hakmem_tiny.h"
#include "hakmem_tiny_config.h"
#include "hakmem_tiny_superslab.h"
#include "tiny_tls.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
// ============================================================================
// Box 5-NEW: TLS Variables (defined here, extern in header)
// ============================================================================
__thread void* g_sfc_head[TINY_NUM_CLASSES] = {NULL};
__thread uint32_t g_sfc_count[TINY_NUM_CLASSES] = {0};
uint32_t g_sfc_capacity[TINY_NUM_CLASSES] = {0}; // Non-TLS: shared read-only config
// ============================================================================
// Box 5-NEW: Statistics (compile-time gated)
// ============================================================================
#if HAKMEM_DEBUG_COUNTERS
sfc_stats_t g_sfc_stats[TINY_NUM_CLASSES] = {0};
#endif
// ============================================================================
// Box 5-NEW: Global Config (from ENV)
// ============================================================================
int g_sfc_enabled = 0; // Default: OFF (A/B testing)
static int g_sfc_default_capacity = SFC_DEFAULT_CAPACITY;
static int g_sfc_default_refill = SFC_DEFAULT_REFILL_COUNT;
static int g_sfc_default_spill_thresh = SFC_DEFAULT_SPILL_THRESH;
// Per-class overrides (0 = use default)
static int g_sfc_capacity_override[TINY_NUM_CLASSES] = {0};
static int g_sfc_refill_override[TINY_NUM_CLASSES] = {0};
// ============================================================================
// Box 5-NEW: Initialization
// ============================================================================
void sfc_init(void) {
// Parse ENV: HAKMEM_SFC_ENABLE
const char* env_enable = getenv("HAKMEM_SFC_ENABLE");
if (env_enable && *env_enable && *env_enable != '0') {
g_sfc_enabled = 1;
}
if (!g_sfc_enabled) {
// SFC disabled, skip initialization
return;
}
// Parse ENV: HAKMEM_SFC_CAPACITY (default capacity for all classes)
const char* env_cap = getenv("HAKMEM_SFC_CAPACITY");
if (env_cap && *env_cap) {
int cap = atoi(env_cap);
if (cap >= SFC_MIN_CAPACITY && cap <= SFC_MAX_CAPACITY) {
g_sfc_default_capacity = cap;
}
}
// Parse ENV: HAKMEM_SFC_REFILL_COUNT (default refill for all classes)
const char* env_refill = getenv("HAKMEM_SFC_REFILL_COUNT");
if (env_refill && *env_refill) {
int refill = atoi(env_refill);
if (refill >= 8 && refill <= 256) {
g_sfc_default_refill = refill;
}
}
// Parse ENV: HAKMEM_SFC_CAPACITY_CLASS{0..7} (per-class capacity override)
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
char var[64];
snprintf(var, sizeof(var), "HAKMEM_SFC_CAPACITY_CLASS%d", cls);
const char* env_cls_cap = getenv(var);
if (env_cls_cap && *env_cls_cap) {
int cap = atoi(env_cls_cap);
if (cap >= SFC_MIN_CAPACITY && cap <= SFC_MAX_CAPACITY) {
g_sfc_capacity_override[cls] = cap;
}
}
}
// Parse ENV: HAKMEM_SFC_REFILL_COUNT_CLASS{0..7} (per-class refill override)
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
char var[64];
snprintf(var, sizeof(var), "HAKMEM_SFC_REFILL_COUNT_CLASS%d", cls);
const char* env_cls_refill = getenv(var);
if (env_cls_refill && *env_cls_refill) {
int refill = atoi(env_cls_refill);
if (refill >= 8 && refill <= 256) {
g_sfc_refill_override[cls] = refill;
}
}
}
// Initialize per-class capacities (use override or default)
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
if (g_sfc_capacity_override[cls] > 0) {
g_sfc_capacity[cls] = g_sfc_capacity_override[cls];
} else {
g_sfc_capacity[cls] = g_sfc_default_capacity;
}
}
// One-shot debug log
static int debug_printed = 0;
if (!debug_printed) {
debug_printed = 1;
const char* env_debug = getenv("HAKMEM_SFC_DEBUG");
if (env_debug && *env_debug && *env_debug != '0') {
fprintf(stderr, "[SFC] Initialized: enabled=%d, default_cap=%d, default_refill=%d\n",
g_sfc_enabled, g_sfc_default_capacity, g_sfc_default_refill);
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
if (g_sfc_capacity_override[cls] > 0 || g_sfc_refill_override[cls] > 0) {
fprintf(stderr, "[SFC] Class %d: cap=%u, refill_override=%d\n",
cls, g_sfc_capacity[cls], g_sfc_refill_override[cls]);
}
}
}
}
// Ensure stats (if requested) are printed at process exit.
// This is inexpensive and guarded inside sfc_shutdown by HAKMEM_SFC_STATS_DUMP.
atexit(sfc_shutdown);
}
void sfc_shutdown(void) {
// Optional: Print stats at exit
#if HAKMEM_DEBUG_COUNTERS
const char* env_dump = getenv("HAKMEM_SFC_STATS_DUMP");
if (env_dump && *env_dump && *env_dump != '0') {
sfc_print_stats();
}
#endif
// No cleanup needed (TLS memory freed by OS)
}
// ============================================================================
// Box 5-NEW: Refill (Slow Path) - STUB (real logic in hakmem.c)
// ============================================================================
// Stub - real implementation is inline in hakmem.c malloc() to avoid LTO issues
// This is just a placeholder for future modular refactoring
int sfc_refill(int cls, int target_count) {
if (cls < 0 || cls >= TINY_NUM_CLASSES) return 0;
if (!g_sfc_enabled) return 0;
(void)target_count;
#if HAKMEM_DEBUG_COUNTERS
g_sfc_stats[cls].refill_calls++;
#endif
return 0; // Actual refill happens inline in hakmem.c
}
// ============================================================================
// Box 5-NEW: Spill (Slow Path) - STUB (real logic in hakmem.c)
// ============================================================================
// Stub - real implementation is inline in hakmem.c free() to avoid LTO issues
// This is just a placeholder for future modular refactoring
int sfc_spill(int cls, int spill_count) {
if (cls < 0 || cls >= TINY_NUM_CLASSES) return 0;
if (!g_sfc_enabled) return 0;
(void)spill_count;
#if HAKMEM_DEBUG_COUNTERS
g_sfc_stats[cls].spill_calls++;
#endif
return 0; // Actual spill happens inline in hakmem.c
}
// ============================================================================
// Box 5-NEW: Configuration API
// ============================================================================
sfc_config_t sfc_get_config(int cls) {
sfc_config_t cfg = {0};
if (cls >= 0 && cls < TINY_NUM_CLASSES) {
cfg.capacity = g_sfc_capacity[cls];
// Refill count (use override or default)
cfg.refill_count = (g_sfc_refill_override[cls] > 0)
? g_sfc_refill_override[cls]
: g_sfc_default_refill;
cfg.spill_thresh = g_sfc_default_spill_thresh;
}
return cfg;
}
void sfc_set_config(int cls, sfc_config_t cfg) {
if (cls < 0 || cls >= TINY_NUM_CLASSES) return;
// Validate capacity
if (cfg.capacity >= SFC_MIN_CAPACITY && cfg.capacity <= SFC_MAX_CAPACITY) {
g_sfc_capacity[cls] = cfg.capacity;
}
// Validate refill count
if (cfg.refill_count >= 8 && cfg.refill_count <= 256) {
g_sfc_refill_override[cls] = cfg.refill_count;
}
// Spill threshold (future use)
if (cfg.spill_thresh > 0 && cfg.spill_thresh <= 100) {
// Currently unused
}
}
// ============================================================================
// Box 5-NEW: Statistics API
// ============================================================================
#if HAKMEM_DEBUG_COUNTERS
sfc_stats_t sfc_get_stats(int cls) {
sfc_stats_t stats = {0};
if (cls >= 0 && cls < TINY_NUM_CLASSES) {
stats = g_sfc_stats[cls];
}
return stats;
}
void sfc_reset_stats(int cls) {
if (cls >= 0 && cls < TINY_NUM_CLASSES) {
memset(&g_sfc_stats[cls], 0, sizeof(sfc_stats_t));
}
}
void sfc_print_stats(void) {
fprintf(stderr, "\n=== SFC Statistics (Box 5-NEW) ===\n");
uint64_t total_alloc_hits = 0;
uint64_t total_alloc_misses = 0;
uint64_t total_refill_calls = 0;
uint64_t total_refill_blocks = 0;
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
sfc_stats_t* s = &g_sfc_stats[cls];
uint64_t total_allocs = s->alloc_hits + s->alloc_misses;
if (total_allocs == 0) continue; // Skip unused classes
total_alloc_hits += s->alloc_hits;
total_alloc_misses += s->alloc_misses;
total_refill_calls += s->refill_calls;
total_refill_blocks += s->refill_blocks;
double hit_rate = (double)s->alloc_hits / total_allocs * 100.0;
double refill_freq = (double)s->refill_calls / total_allocs * 100.0;
fprintf(stderr, "Class %d (%3zu B): allocs=%llu, hit_rate=%.2f%%, "
"refills=%llu (%.4f%%), spills=%llu, cap=%u\n",
cls, g_tiny_class_sizes[cls],
(unsigned long long)total_allocs, hit_rate,
(unsigned long long)s->refill_calls, refill_freq,
(unsigned long long)s->spill_calls,
g_sfc_capacity[cls]);
}
// Summary
uint64_t grand_total = total_alloc_hits + total_alloc_misses;
if (grand_total > 0) {
double overall_hit_rate = (double)total_alloc_hits / grand_total * 100.0;
double overall_refill_freq = (double)total_refill_calls / grand_total * 100.0;
fprintf(stderr, "\n=== SFC Summary ===\n");
fprintf(stderr, "Total allocs: %llu\n", (unsigned long long)grand_total);
fprintf(stderr, "Overall hit rate: %.2f%% (target: >95%%)\n", overall_hit_rate);
fprintf(stderr, "Refill frequency: %.4f%% (target: <0.03%%)\n", overall_refill_freq);
fprintf(stderr, "Refill calls: %llu (target: <50K for 4M ops/s workload)\n",
(unsigned long long)total_refill_calls);
fprintf(stderr, "Refill blocks: %llu (avg %.1f blocks/refill)\n",
(unsigned long long)total_refill_blocks,
total_refill_calls > 0 ? (double)total_refill_blocks / total_refill_calls : 0.0);
// Check targets
if (overall_hit_rate >= 95.0) {
fprintf(stderr, "✅ Hit rate target achieved!\n");
} else {
fprintf(stderr, "⚠️ Hit rate below target (increase capacity?)\n");
}
if (overall_refill_freq < 0.03) {
fprintf(stderr, "✅ Refill frequency target achieved (-98.5%% reduction)!\n");
} else {
fprintf(stderr, "⚠️ Refill frequency above target (increase refill_count?)\n");
}
}
fprintf(stderr, "===========================\n\n");
}
#endif // HAKMEM_DEBUG_COUNTERS
// ============================================================================
// End of hakmem_tiny_sfc.c
// ============================================================================