Files
hakmem/core/box/pool_mid_tc.inc.h
Moe Charm (CI) 1da8754d45 CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消
**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV

**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261  ← ASCII "ba" (ゴミ値、未初期化TLS)
```

Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];`  ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV

**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:

1. **core/hakmem_tiny.c:**
   - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
   - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
   - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bend[TINY_NUM_CLASSES] = {0}`

2. **core/tiny_fastcache.c:**
   - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`

3. **core/hakmem_tiny_magazine.c:**
   - `g_tls_mags[TINY_NUM_CLASSES] = {0}`

4. **core/tiny_sticky.c:**
   - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`

**効果:**
```
Before: 1T: 2.09M   |  4T: SEGV 💀
After:  1T: 2.41M   |  4T: 4.19M   (+15% 1T, SEGV解消)
```

**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s 

# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s 
```

**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00

98 lines
3.7 KiB
C

// pool_mid_tc.inc.h — Box: Mid Transfer Cache (per-thread inbox)
#ifndef POOL_MID_TC_INC_H
#define POOL_MID_TC_INC_H
typedef struct MidTC {
atomic_uintptr_t inbox[POOL_NUM_CLASSES];
} MidTC;
#define MID_TC_BUCKETS 1024
typedef struct MidTCEntry { uint64_t tid; MidTC* tc; struct MidTCEntry* next; } MidTCEntry;
static pthread_mutex_t g_mid_tc_mu[MID_TC_BUCKETS];
static MidTCEntry* g_mid_tc_head[MID_TC_BUCKETS];
static __thread MidTC* t_mid_tc = NULL;
static int g_tc_enabled = 1; // env: HAKMEM_TC_ENABLE (default 1)
static int g_tc_drain_unbounded = 1; // env: HAKMEM_TC_UNBOUNDED (default 1)
static int g_tc_drain_max = 0; // env: HAKMEM_TC_DRAIN_MAX (0=unbounded)
static int g_tc_drain_trigger = 2; // env: HAKMEM_TC_DRAIN_TRIGGER (ring->top < trigger)
static inline uint32_t mid_tc_hash(uint64_t tid) {
tid ^= tid >> 33; tid *= 0xff51afd7ed558ccdULL; tid ^= tid >> 33; tid *= 0xc4ceb9fe1a85ec53ULL; tid ^= tid >> 33;
return (uint32_t)(tid & (MID_TC_BUCKETS - 1));
}
// Thread-safe initialization using pthread_once
static pthread_once_t mid_tc_init_once_control = PTHREAD_ONCE_INIT;
static void mid_tc_init_impl(void) {
for (int i = 0; i < MID_TC_BUCKETS; i++) {
pthread_mutex_init(&g_mid_tc_mu[i], NULL);
g_mid_tc_head[i] = NULL;
}
}
static void mid_tc_init_once(void) {
pthread_once(&mid_tc_init_once_control, mid_tc_init_impl);
}
static MidTC* mid_tc_get(void) {
if (t_mid_tc) return t_mid_tc;
mid_tc_init_once();
MidTC* tc = (MidTC*)hkm_libc_calloc(1, sizeof(MidTC)); // P0 Fix: Use libc malloc
if (!tc) return NULL;
uint64_t tid = (uint64_t)(uintptr_t)pthread_self();
uint32_t h = mid_tc_hash(tid);
pthread_mutex_lock(&g_mid_tc_mu[h]);
MidTCEntry* e = (MidTCEntry*)hkm_libc_malloc(sizeof(MidTCEntry)); // P0 Fix: Use libc malloc
if (e) { e->tid = tid; e->tc = tc; e->next = g_mid_tc_head[h]; g_mid_tc_head[h] = e; }
pthread_mutex_unlock(&g_mid_tc_mu[h]);
t_mid_tc = tc;
return tc;
}
static MidTC* mid_tc_lookup_by_tid(uint64_t tid) {
mid_tc_init_once();
uint32_t h = mid_tc_hash(tid);
MidTCEntry* e = g_mid_tc_head[h];
while (e) { if (e->tid == tid) return e->tc; e = e->next; }
return NULL;
}
static inline void mid_tc_push(MidTC* tc, int class_idx, PoolBlock* b) {
uintptr_t old_head;
do {
old_head = atomic_load_explicit(&tc->inbox[class_idx], memory_order_acquire);
b->next = (PoolBlock*)old_head;
} while (!atomic_compare_exchange_weak_explicit(&tc->inbox[class_idx], &old_head, (uintptr_t)b, memory_order_release, memory_order_relaxed));
}
static inline int mid_tc_drain_into_tls(int class_idx, PoolTLSRing* ring, PoolTLSBin* bin) {
MidTC* tc = mid_tc_get();
if (!tc) return 0;
HKM_TIME_START(t_tc);
uintptr_t head = atomic_exchange_explicit(&tc->inbox[class_idx], (uintptr_t)0, memory_order_acq_rel);
if (!head) { HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc); return 0; }
int moved = 0;
int limit = (g_tc_drain_unbounded || g_tc_drain_max <= 0) ? INT32_MAX : g_tc_drain_max;
PoolBlock* cur = (PoolBlock*)head;
while (cur && moved < limit) {
PoolBlock* nxt = cur->next;
if (ring->top < POOL_L2_RING_CAP) {
ring->items[ring->top++] = cur; moved++;
} else {
cur->next = bin->lo_head; bin->lo_head = cur; bin->lo_count++; moved++;
}
cur = nxt;
}
while (cur) { PoolBlock* nxt = cur->next; mid_tc_push(tc, class_idx, cur); cur = nxt; }
HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc);
return moved;
}
static inline int mid_tc_has_items(int class_idx) {
MidTC* tc = t_mid_tc; // do not allocate on peek
if (!tc) return 0;
return atomic_load_explicit(&tc->inbox[class_idx], memory_order_relaxed) != 0;
}
#endif // POOL_MID_TC_INC_H