**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV
**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS)
```
Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV
**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:
1. **core/hakmem_tiny.c:**
- `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
- `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
- `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
- `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
- `g_tls_bend[TINY_NUM_CLASSES] = {0}`
2. **core/tiny_fastcache.c:**
- `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`
3. **core/hakmem_tiny_magazine.c:**
- `g_tls_mags[TINY_NUM_CLASSES] = {0}`
4. **core/tiny_sticky.c:**
- `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
- `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
- `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`
**効果:**
```
Before: 1T: 2.09M ✅ | 4T: SEGV 💀
After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消)
```
**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s ✅
# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s ✅
```
**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
73 lines
2.6 KiB
C
73 lines
2.6 KiB
C
// hak_kpi_util.inc.h — KPI measurement helpers (Linux / non-Linux)
|
|
#ifndef HAK_KPI_UTIL_INC_H
|
|
#define HAK_KPI_UTIL_INC_H
|
|
|
|
#ifdef __linux__
|
|
// Latency histogram (simple buckets for P50/P95/P99)
|
|
#define LATENCY_BUCKETS 100
|
|
static uint64_t g_latency_histogram[LATENCY_BUCKETS];
|
|
static uint64_t g_latency_samples = 0;
|
|
|
|
// Baseline page faults (at init)
|
|
static uint64_t g_baseline_soft_pf = 0;
|
|
static uint64_t g_baseline_hard_pf = 0;
|
|
static uint64_t g_baseline_rss_kb = 0;
|
|
|
|
// Get page faults from /proc/self/stat
|
|
static void get_page_faults(uint64_t* soft_pf, uint64_t* hard_pf) {
|
|
FILE* f = fopen("/proc/self/stat", "r");
|
|
if (!f) { *soft_pf = 0; *hard_pf = 0; return; }
|
|
unsigned long minflt = 0, majflt = 0;
|
|
unsigned long dummy; char comm[256], state;
|
|
(void)fscanf(f, "%lu %s %c %lu %lu %lu %lu %lu %lu %lu %lu %lu",
|
|
&dummy, comm, &state, &dummy, &dummy, &dummy, &dummy, &dummy,
|
|
&dummy, &minflt, &dummy, &majflt);
|
|
fclose(f);
|
|
*soft_pf = minflt; *hard_pf = majflt;
|
|
}
|
|
|
|
// Get RSS from /proc/self/statm (in KB)
|
|
static uint64_t get_rss_kb(void) {
|
|
FILE* f = fopen("/proc/self/statm", "r");
|
|
if (!f) return 0;
|
|
unsigned long size, resident; (void)fscanf(f, "%lu %lu", &size, &resident); fclose(f);
|
|
long page_size = sysconf(_SC_PAGESIZE);
|
|
return (resident * page_size) / 1024; // Convert to KB
|
|
}
|
|
|
|
static uint64_t calculate_percentile(double percentile) {
|
|
if (g_latency_samples == 0) return 0;
|
|
uint64_t target = (uint64_t)(g_latency_samples * percentile);
|
|
uint64_t cumulative = 0;
|
|
for (size_t i = 0; i < LATENCY_BUCKETS; i++) {
|
|
cumulative += g_latency_histogram[i];
|
|
if (cumulative >= target) return i * 10; // Return bucket midpoint (ns)
|
|
}
|
|
return (LATENCY_BUCKETS - 1) * 10;
|
|
}
|
|
|
|
// Implement hak_get_kpi()
|
|
void hak_get_kpi(hak_kpi_t* out) {
|
|
memset(out, 0, sizeof(hak_kpi_t));
|
|
// Latency (from histogram)
|
|
out->p50_alloc_ns = calculate_percentile(0.50);
|
|
out->p95_alloc_ns = calculate_percentile(0.95);
|
|
out->p99_alloc_ns = calculate_percentile(0.99);
|
|
// Page Faults (delta from baseline)
|
|
uint64_t soft_pf, hard_pf; get_page_faults(&soft_pf, &hard_pf);
|
|
out->soft_page_faults = soft_pf - g_baseline_soft_pf;
|
|
out->hard_page_faults = hard_pf - g_baseline_hard_pf;
|
|
// RSS (delta from baseline, in MB)
|
|
uint64_t rss_kb = get_rss_kb();
|
|
int64_t rss_delta_kb = (int64_t)rss_kb - (int64_t)g_baseline_rss_kb;
|
|
out->rss_delta_mb = rss_delta_kb / 1024;
|
|
}
|
|
|
|
#else
|
|
// Non-Linux: stub implementation
|
|
void hak_get_kpi(hak_kpi_t* out) { memset(out, 0, sizeof(hak_kpi_t)); }
|
|
#endif
|
|
|
|
#endif // HAK_KPI_UTIL_INC_H
|
|
|