**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV
**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS)
```
Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV
**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:
1. **core/hakmem_tiny.c:**
- `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
- `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
- `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
- `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
- `g_tls_bend[TINY_NUM_CLASSES] = {0}`
2. **core/tiny_fastcache.c:**
- `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`
3. **core/hakmem_tiny_magazine.c:**
- `g_tls_mags[TINY_NUM_CLASSES] = {0}`
4. **core/tiny_sticky.c:**
- `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
- `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
- `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`
**効果:**
```
Before: 1T: 2.09M ✅ | 4T: SEGV 💀
After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消)
```
**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s ✅
# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s ✅
```
**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
99 lines
4.2 KiB
C++
99 lines
4.2 KiB
C++
// hakmem_tiny_slow.inc
|
|
// Slow path allocation implementation
|
|
|
|
// Slow path allocation function
|
|
// Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c)
|
|
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
|
void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx) {
|
|
#else
|
|
static void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx) {
|
|
#endif
|
|
(void)size; // size is already validated by caller
|
|
|
|
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
|
|
return NULL;
|
|
}
|
|
|
|
// Try refilling from HotMag
|
|
if (g_hotmag_enable && class_idx <= 3) {
|
|
TinyHotMag* hm = &g_tls_hot_mag[class_idx];
|
|
hotmag_try_refill(class_idx, hm);
|
|
void* ptr = hotmag_pop(class_idx);
|
|
if (ptr) { HAK_RET_ALLOC(class_idx, ptr); }
|
|
}
|
|
|
|
// Try TLS list refill
|
|
if (g_tls_list_enable) {
|
|
TinyTLSList* tls = &g_tls_lists[class_idx];
|
|
if (tls->count > 0) {
|
|
void* ptr = tls_list_pop(tls);
|
|
if (ptr) { HAK_RET_ALLOC(class_idx, ptr); }
|
|
// ptr が NULL の場合でも、ここで終了せず後段の Superslab 経路へフォールバックする
|
|
}
|
|
|
|
// Try refilling TLS list from slab
|
|
uint32_t want = tls->refill_low > 0 ? tls->refill_low : 32;
|
|
if (tls_refill_from_tls_slab(class_idx, tls, want) > 0) {
|
|
void* ptr = tls_list_pop(tls);
|
|
if (ptr) { HAK_RET_ALLOC(class_idx, ptr); }
|
|
// ここでも NULL の場合は続行(後段へフォールバック)
|
|
}
|
|
}
|
|
|
|
// Background coalescing/aggregation (ENV gated, very lightweight)
|
|
do {
|
|
// BG Remote Drain (coalescer)
|
|
static int bg_en = -1, bg_period = -1, bg_budget = -1;
|
|
static __thread uint32_t bg_tick[8];
|
|
if (__builtin_expect(bg_en == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_TINY_BG_REMOTE");
|
|
bg_en = (e && *e && *e != '0') ? 1 : 0;
|
|
const char* p = getenv("HAKMEM_TINY_BG_REMOTE_PERIOD");
|
|
bg_period = p ? atoi(p) : 1024;
|
|
if (bg_period <= 0) bg_period = 1024;
|
|
const char* b = getenv("HAKMEM_TINY_BG_REMOTE_BATCH");
|
|
bg_budget = b ? atoi(b) : 4;
|
|
if (bg_budget < 0) bg_budget = 0; if (bg_budget > 64) bg_budget = 64;
|
|
}
|
|
if (bg_en) {
|
|
if ((++bg_tick[class_idx] % (uint32_t)bg_period) == 0u) {
|
|
extern void tiny_remote_bg_drain_step(int class_idx, int budget);
|
|
tiny_remote_bg_drain_step(class_idx, bg_budget);
|
|
}
|
|
}
|
|
// Ready Aggregator (mailbox → ready push)
|
|
static int rdy_en = -1, rdy_period = -1, rdy_budget = -1;
|
|
static __thread uint32_t rdy_tick[8];
|
|
if (__builtin_expect(rdy_en == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_TINY_BG_READY");
|
|
rdy_en = (e && *e && *e != '0') ? 1 : 0;
|
|
const char* p = getenv("HAKMEM_TINY_BG_READY_PERIOD");
|
|
rdy_period = p ? atoi(p) : 1024;
|
|
if (rdy_period <= 0) rdy_period = 1024;
|
|
const char* b = getenv("HAKMEM_TINY_BG_READY_BUDGET");
|
|
rdy_budget = b ? atoi(b) : 1;
|
|
if (rdy_budget < 0) rdy_budget = 0; if (rdy_budget > 8) rdy_budget = 8;
|
|
}
|
|
if (rdy_en) {
|
|
if ((++rdy_tick[class_idx] % (uint32_t)rdy_period) == 0u) {
|
|
extern void tiny_ready_bg_aggregate_step(int class_idx, int mail_budget);
|
|
tiny_ready_bg_aggregate_step(class_idx, rdy_budget);
|
|
}
|
|
}
|
|
} while (0);
|
|
|
|
// Final fallback: allocate from superslab
|
|
void* ss_ptr = hak_tiny_alloc_superslab(class_idx);
|
|
if (ss_ptr) { HAK_RET_ALLOC(class_idx, ss_ptr); }
|
|
tiny_alloc_dump_tls_state(class_idx, "slow_fail", &g_tls_slabs[class_idx]);
|
|
// Optional one-shot debug when final slow path fails
|
|
static int g_alloc_dbg = -1; if (__builtin_expect(g_alloc_dbg == -1, 0)) { const char* e=getenv("HAKMEM_TINY_ALLOC_DEBUG"); g_alloc_dbg = (e && atoi(e)!=0)?1:0; }
|
|
if (g_alloc_dbg) {
|
|
static _Atomic int printed[8]; int exp=0;
|
|
if (atomic_compare_exchange_strong(&printed[class_idx], &exp, 1)) {
|
|
fprintf(stderr, "[ALLOC-SLOW] hak_tiny_alloc_superslab returned NULL class=%d size=%zu\n", class_idx, size);
|
|
}
|
|
}
|
|
return ss_ptr;
|
|
}
|