Files
hakmem/core/box/hak_kpi_util.inc.h

73 lines
2.6 KiB
C
Raw Normal View History

CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
// hak_kpi_util.inc.h — KPI measurement helpers (Linux / non-Linux)
#ifndef HAK_KPI_UTIL_INC_H
#define HAK_KPI_UTIL_INC_H
#ifdef __linux__
// Latency histogram (simple buckets for P50/P95/P99)
#define LATENCY_BUCKETS 100
static uint64_t g_latency_histogram[LATENCY_BUCKETS];
static uint64_t g_latency_samples = 0;
// Baseline page faults (at init)
static uint64_t g_baseline_soft_pf = 0;
static uint64_t g_baseline_hard_pf = 0;
static uint64_t g_baseline_rss_kb = 0;
// Get page faults from /proc/self/stat
static void get_page_faults(uint64_t* soft_pf, uint64_t* hard_pf) {
FILE* f = fopen("/proc/self/stat", "r");
if (!f) { *soft_pf = 0; *hard_pf = 0; return; }
unsigned long minflt = 0, majflt = 0;
unsigned long dummy; char comm[256], state;
(void)fscanf(f, "%lu %s %c %lu %lu %lu %lu %lu %lu %lu %lu %lu",
&dummy, comm, &state, &dummy, &dummy, &dummy, &dummy, &dummy,
&dummy, &minflt, &dummy, &majflt);
fclose(f);
*soft_pf = minflt; *hard_pf = majflt;
}
// Get RSS from /proc/self/statm (in KB)
static uint64_t get_rss_kb(void) {
FILE* f = fopen("/proc/self/statm", "r");
if (!f) return 0;
unsigned long size, resident; (void)fscanf(f, "%lu %lu", &size, &resident); fclose(f);
long page_size = sysconf(_SC_PAGESIZE);
return (resident * page_size) / 1024; // Convert to KB
}
static uint64_t calculate_percentile(double percentile) {
if (g_latency_samples == 0) return 0;
uint64_t target = (uint64_t)(g_latency_samples * percentile);
uint64_t cumulative = 0;
for (size_t i = 0; i < LATENCY_BUCKETS; i++) {
cumulative += g_latency_histogram[i];
if (cumulative >= target) return i * 10; // Return bucket midpoint (ns)
}
return (LATENCY_BUCKETS - 1) * 10;
}
// Implement hak_get_kpi()
void hak_get_kpi(hak_kpi_t* out) {
memset(out, 0, sizeof(hak_kpi_t));
// Latency (from histogram)
out->p50_alloc_ns = calculate_percentile(0.50);
out->p95_alloc_ns = calculate_percentile(0.95);
out->p99_alloc_ns = calculate_percentile(0.99);
// Page Faults (delta from baseline)
uint64_t soft_pf, hard_pf; get_page_faults(&soft_pf, &hard_pf);
out->soft_page_faults = soft_pf - g_baseline_soft_pf;
out->hard_page_faults = hard_pf - g_baseline_hard_pf;
// RSS (delta from baseline, in MB)
uint64_t rss_kb = get_rss_kb();
int64_t rss_delta_kb = (int64_t)rss_kb - (int64_t)g_baseline_rss_kb;
out->rss_delta_mb = rss_delta_kb / 1024;
}
#else
// Non-Linux: stub implementation
void hak_get_kpi(hak_kpi_t* out) { memset(out, 0, sizeof(hak_kpi_t)); }
#endif
#endif // HAK_KPI_UTIL_INC_H