**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV
**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS)
```
Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV
**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:
1. **core/hakmem_tiny.c:**
- `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
- `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
- `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
- `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
- `g_tls_bend[TINY_NUM_CLASSES] = {0}`
2. **core/tiny_fastcache.c:**
- `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`
3. **core/hakmem_tiny_magazine.c:**
- `g_tls_mags[TINY_NUM_CLASSES] = {0}`
4. **core/tiny_sticky.c:**
- `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
- `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
- `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`
**効果:**
```
Before: 1T: 2.09M ✅ | 4T: SEGV 💀
After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消)
```
**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s ✅
# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s ✅
```
**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
98 lines
3.7 KiB
C
98 lines
3.7 KiB
C
// pool_mid_tc.inc.h — Box: Mid Transfer Cache (per-thread inbox)
|
|
#ifndef POOL_MID_TC_INC_H
|
|
#define POOL_MID_TC_INC_H
|
|
|
|
typedef struct MidTC {
|
|
atomic_uintptr_t inbox[POOL_NUM_CLASSES];
|
|
} MidTC;
|
|
|
|
#define MID_TC_BUCKETS 1024
|
|
typedef struct MidTCEntry { uint64_t tid; MidTC* tc; struct MidTCEntry* next; } MidTCEntry;
|
|
static pthread_mutex_t g_mid_tc_mu[MID_TC_BUCKETS];
|
|
static MidTCEntry* g_mid_tc_head[MID_TC_BUCKETS];
|
|
static __thread MidTC* t_mid_tc = NULL;
|
|
static int g_tc_enabled = 1; // env: HAKMEM_TC_ENABLE (default 1)
|
|
static int g_tc_drain_unbounded = 1; // env: HAKMEM_TC_UNBOUNDED (default 1)
|
|
static int g_tc_drain_max = 0; // env: HAKMEM_TC_DRAIN_MAX (0=unbounded)
|
|
static int g_tc_drain_trigger = 2; // env: HAKMEM_TC_DRAIN_TRIGGER (ring->top < trigger)
|
|
|
|
static inline uint32_t mid_tc_hash(uint64_t tid) {
|
|
tid ^= tid >> 33; tid *= 0xff51afd7ed558ccdULL; tid ^= tid >> 33; tid *= 0xc4ceb9fe1a85ec53ULL; tid ^= tid >> 33;
|
|
return (uint32_t)(tid & (MID_TC_BUCKETS - 1));
|
|
}
|
|
|
|
// Thread-safe initialization using pthread_once
|
|
static pthread_once_t mid_tc_init_once_control = PTHREAD_ONCE_INIT;
|
|
static void mid_tc_init_impl(void) {
|
|
for (int i = 0; i < MID_TC_BUCKETS; i++) {
|
|
pthread_mutex_init(&g_mid_tc_mu[i], NULL);
|
|
g_mid_tc_head[i] = NULL;
|
|
}
|
|
}
|
|
static void mid_tc_init_once(void) {
|
|
pthread_once(&mid_tc_init_once_control, mid_tc_init_impl);
|
|
}
|
|
|
|
static MidTC* mid_tc_get(void) {
|
|
if (t_mid_tc) return t_mid_tc;
|
|
mid_tc_init_once();
|
|
MidTC* tc = (MidTC*)hkm_libc_calloc(1, sizeof(MidTC)); // P0 Fix: Use libc malloc
|
|
if (!tc) return NULL;
|
|
uint64_t tid = (uint64_t)(uintptr_t)pthread_self();
|
|
uint32_t h = mid_tc_hash(tid);
|
|
pthread_mutex_lock(&g_mid_tc_mu[h]);
|
|
MidTCEntry* e = (MidTCEntry*)hkm_libc_malloc(sizeof(MidTCEntry)); // P0 Fix: Use libc malloc
|
|
if (e) { e->tid = tid; e->tc = tc; e->next = g_mid_tc_head[h]; g_mid_tc_head[h] = e; }
|
|
pthread_mutex_unlock(&g_mid_tc_mu[h]);
|
|
t_mid_tc = tc;
|
|
return tc;
|
|
}
|
|
|
|
static MidTC* mid_tc_lookup_by_tid(uint64_t tid) {
|
|
mid_tc_init_once();
|
|
uint32_t h = mid_tc_hash(tid);
|
|
MidTCEntry* e = g_mid_tc_head[h];
|
|
while (e) { if (e->tid == tid) return e->tc; e = e->next; }
|
|
return NULL;
|
|
}
|
|
|
|
static inline void mid_tc_push(MidTC* tc, int class_idx, PoolBlock* b) {
|
|
uintptr_t old_head;
|
|
do {
|
|
old_head = atomic_load_explicit(&tc->inbox[class_idx], memory_order_acquire);
|
|
b->next = (PoolBlock*)old_head;
|
|
} while (!atomic_compare_exchange_weak_explicit(&tc->inbox[class_idx], &old_head, (uintptr_t)b, memory_order_release, memory_order_relaxed));
|
|
}
|
|
|
|
static inline int mid_tc_drain_into_tls(int class_idx, PoolTLSRing* ring, PoolTLSBin* bin) {
|
|
MidTC* tc = mid_tc_get();
|
|
if (!tc) return 0;
|
|
HKM_TIME_START(t_tc);
|
|
uintptr_t head = atomic_exchange_explicit(&tc->inbox[class_idx], (uintptr_t)0, memory_order_acq_rel);
|
|
if (!head) { HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc); return 0; }
|
|
int moved = 0;
|
|
int limit = (g_tc_drain_unbounded || g_tc_drain_max <= 0) ? INT32_MAX : g_tc_drain_max;
|
|
PoolBlock* cur = (PoolBlock*)head;
|
|
while (cur && moved < limit) {
|
|
PoolBlock* nxt = cur->next;
|
|
if (ring->top < POOL_L2_RING_CAP) {
|
|
ring->items[ring->top++] = cur; moved++;
|
|
} else {
|
|
cur->next = bin->lo_head; bin->lo_head = cur; bin->lo_count++; moved++;
|
|
}
|
|
cur = nxt;
|
|
}
|
|
while (cur) { PoolBlock* nxt = cur->next; mid_tc_push(tc, class_idx, cur); cur = nxt; }
|
|
HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc);
|
|
return moved;
|
|
}
|
|
|
|
static inline int mid_tc_has_items(int class_idx) {
|
|
MidTC* tc = t_mid_tc; // do not allocate on peek
|
|
if (!tc) return 0;
|
|
return atomic_load_explicit(&tc->inbox[class_idx], memory_order_relaxed) != 0;
|
|
}
|
|
|
|
#endif // POOL_MID_TC_INC_H
|
|
|