**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV
**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS)
```
Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV
**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:
1. **core/hakmem_tiny.c:**
- `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
- `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
- `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
- `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
- `g_tls_bend[TINY_NUM_CLASSES] = {0}`
2. **core/tiny_fastcache.c:**
- `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`
3. **core/hakmem_tiny_magazine.c:**
- `g_tls_mags[TINY_NUM_CLASSES] = {0}`
4. **core/tiny_sticky.c:**
- `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
- `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
- `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`
**効果:**
```
Before: 1T: 2.09M ✅ | 4T: SEGV 💀
After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消)
```
**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s ✅
# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s ✅
```
**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
150 lines
5.5 KiB
C
150 lines
5.5 KiB
C
// hakmem_tiny_refill_p0.inc.h
|
||
// ChatGPT Pro P0: Complete Batch Refill (SLL用)
|
||
//
|
||
// Purpose: Optimize sll_refill_small_from_ss with batch carving
|
||
// Based on: tls_refill_from_tls_slab (hakmem_tiny_tls_ops.h:115-126)
|
||
//
|
||
// Key optimization: ss_active_inc × 64 → ss_active_add × 1
|
||
//
|
||
// Maintains: Existing g_tls_sll_head fast path (no changes to hot path!)
|
||
//
|
||
// Enable P0 by default for testing (set to 0 to disable)
|
||
#ifndef HAKMEM_TINY_P0_BATCH_REFILL
|
||
#define HAKMEM_TINY_P0_BATCH_REFILL 1
|
||
#endif
|
||
|
||
#ifndef HAKMEM_TINY_REFILL_P0_INC_H
|
||
#define HAKMEM_TINY_REFILL_P0_INC_H
|
||
|
||
// Debug counters (compile-time gated)
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
extern unsigned long long g_rf_hit_slab[];
|
||
// Diagnostic counters for refill early returns
|
||
extern unsigned long long g_rf_early_no_ss[]; // Line 27: !g_use_superslab
|
||
extern unsigned long long g_rf_early_no_meta[]; // Line 35: !meta
|
||
extern unsigned long long g_rf_early_no_room[]; // Line 40: room <= 0
|
||
extern unsigned long long g_rf_early_want_zero[]; // Line 55: want == 0
|
||
#endif
|
||
|
||
// Refill TLS SLL from SuperSlab with batch carving (P0 optimization)
|
||
#include "tiny_refill_opt.h"
|
||
static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
|
||
if (!g_use_superslab || max_take <= 0) {
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
if (!g_use_superslab) g_rf_early_no_ss[class_idx]++;
|
||
#endif
|
||
return 0;
|
||
}
|
||
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
if (!tls->ss) {
|
||
// Try to obtain a SuperSlab for this class
|
||
if (superslab_refill(class_idx) == NULL) return 0;
|
||
}
|
||
TinySlabMeta* meta = tls->meta;
|
||
if (!meta) {
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
g_rf_early_no_meta[class_idx]++;
|
||
#endif
|
||
return 0;
|
||
}
|
||
|
||
// Compute how many we can actually push into SLL without overflow
|
||
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP);
|
||
int room = (int)sll_cap - (int)g_tls_sll_count[class_idx];
|
||
if (room <= 0) {
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
g_rf_early_no_room[class_idx]++;
|
||
#endif
|
||
return 0;
|
||
}
|
||
|
||
// For hot tiny classes (0..3), allow an env override to increase batch size
|
||
uint32_t want = (uint32_t)max_take;
|
||
if (class_idx <= 3) {
|
||
static int g_hot_override = -2; // -2 = uninitialized, -1 = no override, >0 = value
|
||
if (__builtin_expect(g_hot_override == -2, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_REFILL_COUNT_HOT");
|
||
int v = (e && *e) ? atoi(e) : -1;
|
||
if (v < 0) v = -1; if (v > 256) v = 256; // clamp
|
||
g_hot_override = v;
|
||
}
|
||
if (g_hot_override > 0) want = (uint32_t)g_hot_override;
|
||
} else {
|
||
// Mid classes (>=4): optional override for batch size
|
||
static int g_mid_override = -2; // -2 = uninitialized, -1 = no override, >0 = value
|
||
if (__builtin_expect(g_mid_override == -2, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_REFILL_COUNT_MID");
|
||
int v = (e && *e) ? atoi(e) : -1;
|
||
if (v < 0) v = -1; if (v > 256) v = 256; // clamp
|
||
g_mid_override = v;
|
||
}
|
||
if (g_mid_override > 0) want = (uint32_t)g_mid_override;
|
||
}
|
||
if (want > (uint32_t)room) want = (uint32_t)room;
|
||
if (want == 0) {
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
g_rf_early_want_zero[class_idx]++;
|
||
#endif
|
||
return 0;
|
||
}
|
||
|
||
size_t bs = g_tiny_class_sizes[class_idx];
|
||
int total_taken = 0;
|
||
|
||
// === P0 Batch Carving Loop ===
|
||
while (want > 0) {
|
||
// Handle freelist items first (usually 0)
|
||
TinyRefillChain chain;
|
||
uint32_t from_freelist = trc_pop_from_freelist(meta, want, &chain);
|
||
if (from_freelist > 0) {
|
||
trc_splice_to_sll(class_idx, &chain, &g_tls_sll_head[class_idx], &g_tls_sll_count[class_idx]);
|
||
// NOTE: from_freelist は既に used/active 計上済みのブロックの再循環。active 追加や
|
||
// nonempty_mask クリアは不要(クリアすると後続freeで立たない)。
|
||
extern unsigned long long g_rf_freelist_items[];
|
||
g_rf_freelist_items[class_idx] += from_freelist;
|
||
total_taken += from_freelist;
|
||
want -= from_freelist;
|
||
if (want == 0) break;
|
||
}
|
||
|
||
// === Linear Carve (P0 Key Optimization!) ===
|
||
if (meta->used >= meta->capacity) {
|
||
// Slab exhausted, try to get another
|
||
if (superslab_refill(class_idx) == NULL) break;
|
||
meta = tls->meta;
|
||
if (!meta) break;
|
||
continue;
|
||
}
|
||
|
||
uint32_t available = meta->capacity - meta->used;
|
||
uint32_t batch = want;
|
||
if (batch > available) batch = available;
|
||
if (batch == 0) break;
|
||
|
||
// Get slab base
|
||
uint8_t* slab_base = tls->slab_base ? tls->slab_base
|
||
: tiny_slab_base_for(tls->ss, tls->slab_idx);
|
||
TinyRefillChain carve;
|
||
trc_linear_carve(slab_base, bs, meta, batch, &carve);
|
||
trc_splice_to_sll(class_idx, &carve, &g_tls_sll_head[class_idx], &g_tls_sll_count[class_idx]);
|
||
// FIX: Update SuperSlab active counter (was missing!)
|
||
ss_active_add(tls->ss, batch);
|
||
extern unsigned long long g_rf_carve_items[];
|
||
g_rf_carve_items[class_idx] += batch;
|
||
|
||
total_taken += batch;
|
||
want -= batch;
|
||
}
|
||
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
// Track successful SLL refills from SuperSlab (compile-time gated)
|
||
// NOTE: Increment unconditionally to verify counter is working
|
||
g_rf_hit_slab[class_idx]++;
|
||
#endif
|
||
|
||
return total_taken;
|
||
}
|
||
|
||
#endif // HAKMEM_TINY_REFILL_P0_INC_H
|