Files
hakmem/core/box/pool_tls_ring.inc.h
Moe Charm (CI) 1da8754d45 CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消
**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV

**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261  ← ASCII "ba" (ゴミ値、未初期化TLS)
```

Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];`  ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV

**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:

1. **core/hakmem_tiny.c:**
   - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
   - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
   - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bend[TINY_NUM_CLASSES] = {0}`

2. **core/tiny_fastcache.c:**
   - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`

3. **core/hakmem_tiny_magazine.c:**
   - `g_tls_mags[TINY_NUM_CLASSES] = {0}`

4. **core/tiny_sticky.c:**
   - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`

**効果:**
```
Before: 1T: 2.09M   |  4T: SEGV 💀
After:  1T: 2.41M   |  4T: 4.19M   (+15% 1T, SEGV解消)
```

**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s 

# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s 
```

**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00

104 lines
4.6 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// pool_tls_ring.inc.h — Box: L2 Pool TLS ring/shard helpers
#ifndef POOL_TLS_RING_INC_H
#define POOL_TLS_RING_INC_H
// Minimal header write for Mid allocations (fast-path friendly)
static inline void mid_set_header(AllocHeader* hdr, size_t class_sz, uintptr_t site_id) {
if (g_hdr_light_enabled >= 1) return;
hdr->magic = HAKMEM_MAGIC;
hdr->method = ALLOC_METHOD_POOL;
hdr->size = class_sz;
if (!g_hdr_light_enabled) {
hdr->alloc_site = site_id;
hdr->class_bytes = 0;
hdr->owner_tid = (uintptr_t)(uintptr_t)pthread_self();
}
}
// Branchless size→class LUT for 0..52KBBridge classes対応
static inline int hak_pool_get_class_index(size_t size) {
for (int i = 0; i < POOL_NUM_CLASSES; i++) {
size_t cs = g_class_sizes[i];
if (cs != 0 && size == cs) return i;
}
uint32_t kb = (uint32_t)((size + 1023) >> 10);
extern const uint8_t SIZE_TO_CLASS[53];
return (kb < 53) ? SIZE_TO_CLASS[kb] : -1;
}
// site_id→shard64 shards
static inline int hak_pool_get_shard_index(uintptr_t site_id) {
if (!g_shard_mix_enabled) {
return (int)((site_id >> 4) & (POOL_NUM_SHARDS - 1));
}
uint64_t x = (uint64_t)site_id;
uint64_t tid = (uint64_t)(uintptr_t)pthread_self();
x ^= (tid << 1);
x += 0x9e3779b97f4a7c15ULL;
x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL;
x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL;
x = (x ^ (x >> 31));
return (int)((uint32_t)x & (POOL_NUM_SHARDS - 1));
}
// Bitmap helpersO(1)
static inline void set_nonempty_bit(int class_idx, int shard_idx) {
atomic_fetch_or(&g_pool.nonempty_mask[class_idx], (uint64_t)(1ULL << shard_idx));
}
static inline void clear_nonempty_bit(int class_idx, int shard_idx) {
atomic_fetch_and(&g_pool.nonempty_mask[class_idx], ~(uint64_t)(1ULL << shard_idx));
}
static inline int is_shard_nonempty(int class_idx, int shard_idx) {
uint64_t mask = atomic_load(&g_pool.nonempty_mask[class_idx]);
return (mask & (1ULL << shard_idx)) != 0;
}
// Remote MPSC → freelistロック下
static inline void drain_remote_locked(int class_idx, int shard_idx) {
uintptr_t head = atomic_exchange_explicit(&g_pool.remote_head[class_idx][shard_idx], (uintptr_t)0, memory_order_acq_rel);
unsigned drained = 0;
while (head) {
PoolBlock* b = (PoolBlock*)head; head = (uintptr_t)b->next;
b->next = g_pool.freelist[class_idx][shard_idx];
g_pool.freelist[class_idx][shard_idx] = b; drained++;
}
if (drained) {
atomic_fetch_sub_explicit(&g_pool.remote_count[class_idx][shard_idx], drained, memory_order_relaxed);
if (g_pool.freelist[class_idx][shard_idx]) set_nonempty_bit(class_idx, shard_idx);
}
}
// 近傍のnon-empty shardを選ぶ無ければpreferred
static inline int choose_nonempty_shard(int class_idx, int preferred) {
uint64_t mask = atomic_load_explicit(&g_pool.nonempty_mask[class_idx], memory_order_acquire);
if (!mask) return preferred;
int shift = preferred & 63; uint64_t rot = (mask >> shift) | (mask << (64 - shift));
if (!rot) return preferred; int off = __builtin_ctzll(rot);
return (preferred + off) & (POOL_NUM_SHARDS - 1);
}
// TLSアクティブページの確保bump-run
static inline int alloc_tls_page(int class_idx, PoolTLSPage* ap) {
size_t user_size = g_class_sizes[class_idx]; size_t block_size = HEADER_SIZE + user_size;
int blocks_per_page = POOL_PAGE_SIZE / block_size; if (blocks_per_page <= 0) return 0;
void* page = mmap(NULL, POOL_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (!page) return 0;
ap->page = page; ap->bump = (char*)page; ap->end = (char*)page + POOL_PAGE_SIZE; ap->count = blocks_per_page;
mid_desc_register(page, class_idx, (uint64_t)(uintptr_t)pthread_self());
g_pool.refills[class_idx]++; g_pool.total_pages_allocated++; g_pool.pages_by_class[class_idx]++; g_pool.total_bytes_allocated += POOL_PAGE_SIZE; return 1;
}
// TLS ring/LIFO への補充(リンク無し)
static inline int refill_tls_from_active_page(int class_idx, PoolTLSRing* ring, PoolTLSBin* bin, PoolTLSPage* ap, int need) {
if (!ap || !ap->page || ap->count <= 0 || ap->bump >= ap->end) return 0; size_t blk = HEADER_SIZE + g_class_sizes[class_idx];
int moved = 0, to_add = need;
while (to_add > 0 && ap->bump < ap->end && ap->count > 0) {
PoolBlock* b = (PoolBlock*)(void*)ap->bump;
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = b; } else { b->next = bin->lo_head; bin->lo_head = b; bin->lo_count++; }
ap->bump += blk; ap->count--; moved++; to_add--;
}
return moved;
}
#endif // POOL_TLS_RING_INC_H