Files
hakmem/core/box/mailbox_box.c
Moe Charm (CI) 1da8754d45 CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消
**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV

**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261  ← ASCII "ba" (ゴミ値、未初期化TLS)
```

Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];`  ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV

**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:

1. **core/hakmem_tiny.c:**
   - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
   - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
   - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bend[TINY_NUM_CLASSES] = {0}`

2. **core/tiny_fastcache.c:**
   - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`

3. **core/hakmem_tiny_magazine.c:**
   - `g_tls_mags[TINY_NUM_CLASSES] = {0}`

4. **core/tiny_sticky.c:**
   - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`

**効果:**
```
Before: 1T: 2.09M   |  4T: SEGV 💀
After:  1T: 2.41M   |  4T: 4.19M   (+15% 1T, SEGV解消)
```

**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s 

# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s 
```

**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00

208 lines
8.9 KiB
C

// mailbox_box.c - Publish Mailbox box (fully separated)
#include "mailbox_box.h"
#include "hakmem_tiny.h"
#include "tiny_debug_ring.h"
#include <stdatomic.h>
#include <pthread.h>
#include <stdlib.h>
#include <stdio.h>
#ifndef MAILBOX_SHARDS
#define MAILBOX_SHARDS 64
#endif
// Shared state (per class)
static _Atomic(uintptr_t) g_pub_mailbox_entries[TINY_NUM_CLASSES][MAILBOX_SHARDS];
static _Atomic(uint32_t) g_pub_mailbox_claimed[TINY_NUM_CLASSES][MAILBOX_SHARDS];
static _Atomic(uint32_t) g_pub_mailbox_rr[TINY_NUM_CLASSES];
static _Atomic(uint32_t) g_pub_mailbox_used[TINY_NUM_CLASSES];
static _Atomic(uint32_t) g_pub_mailbox_scan[TINY_NUM_CLASSES];
static __thread uint8_t g_tls_mailbox_registered[TINY_NUM_CLASSES];
static __thread uint8_t g_tls_mailbox_slot[TINY_NUM_CLASSES];
static int g_mailbox_trace_en = -1;
static int g_mailbox_trace_limit = 4;
static _Atomic int g_mailbox_trace_seen[TINY_NUM_CLASSES];
// Optional: periodic slow discovery to widen 'used' even when >0 (A/B)
static int g_mailbox_slowdisc_en = -1; // env: HAKMEM_TINY_MAILBOX_SLOWDISC (default ON)
static int g_mailbox_slowdisc_period = -1; // env: HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD (default 256)
static __thread uint32_t g_mailbox_fetch_tick[TINY_NUM_CLASSES];
// Thread-exit hook to release claimed slots
static pthread_once_t g_mailbox_tls_once = PTHREAD_ONCE_INIT;
static pthread_key_t g_mailbox_tls_key;
static void mailbox_box_unregister_class(int class_idx);
static void mailbox_tls_cleanup(void* key) {
(void)key;
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
if (g_tls_mailbox_registered[i]) {
mailbox_box_unregister_class(i);
}
}
}
static void mailbox_tls_init(void) {
(void)pthread_key_create(&g_mailbox_tls_key, mailbox_tls_cleanup);
}
// Counters (extern from main module)
extern unsigned long long g_pub_mail_hits[];
extern unsigned long long g_rf_hit_mail[];
extern unsigned long long g_mailbox_register_calls[];
extern unsigned long long g_mailbox_slow_discoveries[];
void mailbox_box_register(int class_idx) {
if (g_tls_mailbox_registered[class_idx]) return;
g_mailbox_register_calls[class_idx]++;
// One-shot visibility trace (env: HAKMEM_TINY_RF_TRACE)
static int trace_en = -1;
if (__builtin_expect(trace_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_RF_TRACE");
trace_en = (e && atoi(e) != 0) ? 1 : 0;
}
pthread_once(&g_mailbox_tls_once, mailbox_tls_init);
pthread_setspecific(g_mailbox_tls_key, (void*)1);
uint32_t chosen = MAILBOX_SHARDS;
for (int attempt = 0; attempt < MAILBOX_SHARDS; attempt++) {
uint32_t idx = atomic_fetch_add_explicit(&g_pub_mailbox_rr[class_idx], 1u, memory_order_relaxed);
idx &= (MAILBOX_SHARDS - 1u);
uint32_t expected_claim = 0;
if (atomic_compare_exchange_weak_explicit(&g_pub_mailbox_claimed[class_idx][idx],
&expected_claim, 1u,
memory_order_release, memory_order_relaxed)) {
chosen = idx;
break;
}
}
if (chosen == MAILBOX_SHARDS) {
atomic_store_explicit(&g_pub_mailbox_claimed[class_idx][0], 1u, memory_order_release);
chosen = 0;
}
g_tls_mailbox_slot[class_idx] = (uint8_t)chosen;
g_tls_mailbox_registered[class_idx] = 1;
atomic_store_explicit(&g_pub_mailbox_entries[class_idx][chosen], (uintptr_t)0, memory_order_release);
// Monotonic raise of used to cover chosen index
uint32_t target = chosen + 1u;
while (1) {
uint32_t used = atomic_load_explicit(&g_pub_mailbox_used[class_idx], memory_order_acquire);
if (used >= target) break;
if (atomic_compare_exchange_weak_explicit(&g_pub_mailbox_used[class_idx], &used, target,
memory_order_acq_rel, memory_order_relaxed)) {
break;
}
}
if (trace_en) {
static _Atomic int printed[8];
int expected = 0;
if (atomic_compare_exchange_strong(&printed[class_idx], &expected, 1)) {
fprintf(stderr, "[MBTRACE] register class=%d slot=%u used=%u\n", class_idx, (unsigned)chosen, (unsigned)atomic_load_explicit(&g_pub_mailbox_used[class_idx], memory_order_relaxed));
}
}
}
static void mailbox_box_unregister_class(int class_idx) {
if (!g_tls_mailbox_registered[class_idx]) return;
uint32_t slot = g_tls_mailbox_slot[class_idx];
atomic_store_explicit(&g_pub_mailbox_claimed[class_idx][slot], 0u, memory_order_release);
g_tls_mailbox_registered[class_idx] = 0;
}
void mailbox_box_publish(int class_idx, SuperSlab* ss, int slab_idx) {
mailbox_box_register(class_idx);
// Encode entry locally (align >=1MB, lower 6 bits carry slab_idx)
uintptr_t ent = ((uintptr_t)ss) | ((uintptr_t)slab_idx & 0x3Fu);
uint32_t slot = g_tls_mailbox_slot[class_idx];
tiny_debug_ring_record(TINY_RING_EVENT_MAILBOX_PUBLISH,
(uint16_t)class_idx,
ss,
((uintptr_t)slot << 32) | (uintptr_t)slab_idx);
atomic_store_explicit(&g_pub_mailbox_entries[class_idx][slot], ent, memory_order_release);
g_pub_mail_hits[class_idx]++;
}
uintptr_t mailbox_box_peek_one(int class_idx) {
// Optional slow-discovery (triage only) to expand used when >0
int slow_en, period;
if (__builtin_expect(g_mailbox_slowdisc_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_MAILBOX_SLOWDISC");
g_mailbox_slowdisc_en = (!e || atoi(e) != 0) ? 1 : 0; // default ON
}
slow_en = g_mailbox_slowdisc_en;
if (slow_en) {
uint32_t tick = ++g_mailbox_fetch_tick[class_idx];
if (__builtin_expect(g_mailbox_slowdisc_period == -1, 0)) {
const char* p = getenv("HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD");
g_mailbox_slowdisc_period = p ? atoi(p) : 256;
}
period = g_mailbox_slowdisc_period;
if ((tick % (uint32_t)period) == 0u) {
// Widen used by one slot (best-effort)
uint32_t used = atomic_load_explicit(&g_pub_mailbox_used[class_idx], memory_order_acquire);
if (used < MAILBOX_SHARDS) {
atomic_compare_exchange_weak_explicit(&g_pub_mailbox_used[class_idx], &used, used + 1u,
memory_order_acq_rel, memory_order_relaxed);
g_mailbox_slow_discoveries[class_idx]++;
}
}
}
// Non-destructive peek of first non-zero entry
uint32_t used = atomic_load_explicit(&g_pub_mailbox_used[class_idx], memory_order_acquire);
for (uint32_t i = 0; i < used; i++) {
uintptr_t ent = atomic_load_explicit(&g_pub_mailbox_entries[class_idx][i], memory_order_acquire);
if (ent) return ent;
}
return (uintptr_t)0;
}
uintptr_t mailbox_box_fetch(int class_idx) {
if (__builtin_expect(g_mailbox_trace_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_MAILBOX_TRACE");
g_mailbox_trace_en = (e && atoi(e) != 0) ? 1 : 0;
const char* l = getenv("HAKMEM_TINY_MAILBOX_TRACE_LIMIT");
int v = l ? atoi(l) : 0;
if (v > 0) g_mailbox_trace_limit = v;
}
uint32_t used = atomic_load_explicit(&g_pub_mailbox_used[class_idx], memory_order_acquire);
// Optional slow discovery
if (__builtin_expect(g_mailbox_slowdisc_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_MAILBOX_SLOWDISC");
g_mailbox_slowdisc_en = (e ? ((atoi(e) != 0) ? 1 : 0) : 1);
}
if (__builtin_expect(g_mailbox_slowdisc_period == -1, 0)) {
const char* p = getenv("HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD");
int v = p ? atoi(p) : 256; g_mailbox_slowdisc_period = v;
}
if (g_mailbox_slowdisc_en && used < MAILBOX_SHARDS) {
uint32_t t = ++g_mailbox_fetch_tick[class_idx];
int period = g_mailbox_slowdisc_period;
if ((t % (uint32_t)period) == 0u) {
uint32_t old = used;
if (atomic_compare_exchange_weak_explicit(&g_pub_mailbox_used[class_idx], &used, used + 1u,
memory_order_acq_rel, memory_order_relaxed)) {
(void)old;
g_mailbox_slow_discoveries[class_idx]++;
used = used + 1u;
} else {
used = atomic_load_explicit(&g_pub_mailbox_used[class_idx], memory_order_acquire);
}
}
}
// Destructive fetch of first available entry (0..used-1)
for (uint32_t i = 0; i < used; i++) {
uintptr_t ent = atomic_exchange_explicit(&g_pub_mailbox_entries[class_idx][i], (uintptr_t)0,
memory_order_acq_rel);
if (ent) {
g_rf_hit_mail[class_idx]++;
tiny_debug_ring_record(TINY_RING_EVENT_MAILBOX_FETCH, (uint16_t)class_idx, (void*)ent, (uintptr_t)i);
return ent;
}
}
tiny_debug_ring_record(TINY_RING_EVENT_MAILBOX_FETCH_NULL, (uint16_t)class_idx, 0, 0);
return (uintptr_t)0;
}