Files
hakmem/core/tiny_ready.h
Moe Charm (CI) 1da8754d45 CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消
**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV

**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261  ← ASCII "ba" (ゴミ値、未初期化TLS)
```

Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];`  ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV

**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:

1. **core/hakmem_tiny.c:**
   - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
   - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
   - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bend[TINY_NUM_CLASSES] = {0}`

2. **core/tiny_fastcache.c:**
   - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`

3. **core/hakmem_tiny_magazine.c:**
   - `g_tls_mags[TINY_NUM_CLASSES] = {0}`

4. **core/tiny_sticky.c:**
   - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`

**効果:**
```
Before: 1T: 2.09M   |  4T: SEGV 💀
After:  1T: 2.41M   |  4T: 4.19M   (+15% 1T, SEGV解消)
```

**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s 

# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s 
```

**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00

86 lines
3.3 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_ready.h - Ready List box (per-class, slab-entry hints)
// Purpose: O(1)-ish adopt candidate discovery to bypass deep scans in refill.
// Design: Lock-free ring of encoded slab entries (ss+slab_idx). Best-effort hints.
// Boundary:
// - Producer: publish境界ss_partial_publish/ remote初入荷 / first-freeprev==NULLで push
// - Consumer: refill境界tiny_refill_try_fast の最初)で pop→owner取得→bind
// A/B: ENV HAKMEM_TINY_READY=0 で無効化
#pragma once
#include <stdatomic.h>
#include <stdint.h>
#include "hakmem_tiny.h"
#ifndef TINY_READY_RING
#define TINY_READY_RING 128
#endif
// Per-class ring buffer of encoded slab entries
static _Atomic(uintptr_t) g_ready_ring[TINY_NUM_CLASSES][TINY_READY_RING];
static _Atomic(uint32_t) g_ready_rr[TINY_NUM_CLASSES];
static inline int tiny_ready_enabled(void) {
static int g_ready_en = -1;
if (__builtin_expect(g_ready_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_READY");
// Default ON unless explicitly disabled
g_ready_en = (e && *e == '0') ? 0 : 1;
}
return g_ready_en;
}
// Optional: limit scan width (ENV: HAKMEM_TINY_READY_WIDTH, default TINY_READY_RING)
static inline int tiny_ready_width(void) {
static int w = -1;
if (__builtin_expect(w == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_READY_WIDTH");
int defw = TINY_READY_RING;
if (e && *e) {
int v = atoi(e);
if (v <= 0) v = defw;
if (v > TINY_READY_RING) v = TINY_READY_RING;
w = v;
} else {
w = defw;
}
}
return w;
}
// Encode helpers are declared in main TU; forward here
static inline uintptr_t slab_entry_make(SuperSlab* ss, int slab_idx);
static inline SuperSlab* slab_entry_ss(uintptr_t ent);
static inline int slab_entry_idx(uintptr_t ent);
// Push: best-effort, tries a few slots, drops on contention (hint-only)
static inline void tiny_ready_push(int class_idx, SuperSlab* ss, int slab_idx) {
if (!tiny_ready_enabled()) return;
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) return;
if (__builtin_expect(ss == NULL || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss), 0)) return;
uintptr_t ent = slab_entry_make(ss, slab_idx);
uint32_t start = atomic_fetch_add_explicit(&g_ready_rr[class_idx], 1u, memory_order_relaxed);
// Try up to 4 slots to reduce collisions
for (int k = 0; k < 4; k++) {
uint32_t idx = (start + (uint32_t)k) % (uint32_t)TINY_READY_RING;
uintptr_t expected = 0;
if (atomic_compare_exchange_weak_explicit(&g_ready_ring[class_idx][idx], &expected, ent,
memory_order_release, memory_order_relaxed)) {
return;
}
}
// Drop if all tried slots were busy (hint ring, loss is acceptable)
}
// Pop any entry; scans ring once (only on refill miss, not on hot path)
static inline uintptr_t tiny_ready_pop(int class_idx) {
if (!tiny_ready_enabled()) return (uintptr_t)0;
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) return (uintptr_t)0;
int scan = tiny_ready_width();
for (int i = 0; i < scan; i++) {
uintptr_t ent = atomic_exchange_explicit(&g_ready_ring[class_idx][i], (uintptr_t)0, memory_order_acq_rel);
if (ent) return ent;
}
return (uintptr_t)0;
}