Files
hakmem/core/box/pool_tls_core.inc.h

98 lines
4.0 KiB
C
Raw Normal View History

CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
// pool_tls_core.inc.h — Box: L2 Pool TLS helpers (no public symbol collisions)
#ifndef POOL_TLS_CORE_INC_H
#define POOL_TLS_CORE_INC_H
// This box provides internal helpers used by hakmem_pool.c.
// It intentionally does NOT define the public symbol hak_pool_get_shard_index.
// Bitmap helpers (O(1) empty class detection)
static inline void set_nonempty_bit(int class_idx, int shard_idx) {
atomic_fetch_or(&g_pool.nonempty_mask[class_idx], (uint64_t)(1ULL << shard_idx));
}
static inline void clear_nonempty_bit(int class_idx, int shard_idx) {
atomic_fetch_and(&g_pool.nonempty_mask[class_idx], ~(uint64_t)(1ULL << shard_idx));
}
static inline int is_shard_nonempty(int class_idx, int shard_idx) {
uint64_t mask = atomic_load(&g_pool.nonempty_mask[class_idx]);
return (mask & (1ULL << shard_idx)) != 0;
}
// Drain remote-free MPSC stack into freelist under the shard lock
static inline void drain_remote_locked(int class_idx, int shard_idx) {
uintptr_t head = atomic_exchange_explicit(&g_pool.remote_head[class_idx][shard_idx], (uintptr_t)0, memory_order_acq_rel);
unsigned drained = 0;
while (head) {
PoolBlock* b = (PoolBlock*)head;
head = (uintptr_t)b->next; // next pointer stored in first word
b->next = g_pool.freelist[class_idx][shard_idx];
g_pool.freelist[class_idx][shard_idx] = b;
drained++;
}
if (drained) {
atomic_fetch_sub_explicit(&g_pool.remote_count[class_idx][shard_idx], drained, memory_order_relaxed);
if (g_pool.freelist[class_idx][shard_idx]) set_nonempty_bit(class_idx, shard_idx);
}
}
// Choose a non-empty shard near preferred using the nonempty mask. If none, return preferred.
static inline int choose_nonempty_shard(int class_idx, int preferred) {
uint64_t mask = atomic_load_explicit(&g_pool.nonempty_mask[class_idx], memory_order_acquire);
if (!mask) return preferred;
int shift = preferred & 63;
uint64_t rot = (mask >> shift) | (mask << (64 - shift));
if (!rot) return preferred;
int off = __builtin_ctzll(rot);
return (preferred + off) & (POOL_NUM_SHARDS - 1);
}
// Allocate a private page for TLS active page and split into a local list
static inline int alloc_tls_page(int class_idx, PoolTLSPage* ap) {
size_t user_size = g_class_sizes[class_idx];
size_t block_size = HEADER_SIZE + user_size;
int blocks_per_page = POOL_PAGE_SIZE / block_size;
if (blocks_per_page <= 0) return 0;
void* page = mmap(NULL, POOL_PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (!page) return 0;
// Bump-run initialization (no per-block linking)
ap->page = page;
ap->bump = (char*)page;
ap->end = (char*)page + POOL_PAGE_SIZE;
ap->count = blocks_per_page;
// Register page with owner (this thread) for owner-fast free detection
mid_desc_register(page, class_idx, (uint64_t)(uintptr_t)pthread_self());
g_pool.refills[class_idx]++;
g_pool.total_pages_allocated++;
g_pool.pages_by_class[class_idx]++;
g_pool.total_bytes_allocated += POOL_PAGE_SIZE;
return 1;
}
// Refill TLS ring/LIFO from active page without building links. Returns number added.
static inline int refill_tls_from_active_page(int class_idx, PoolTLSRing* ring, PoolTLSBin* bin, PoolTLSPage* ap, int need) {
if (!ap || !ap->page || ap->count <= 0 || ap->bump >= ap->end) return 0;
size_t blk = HEADER_SIZE + g_class_sizes[class_idx];
int moved = 0;
int to_add = need;
while (to_add > 0 && ap->bump < ap->end && ap->count > 0) {
PoolBlock* b = (PoolBlock*)(void*)ap->bump;
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
ring->items[ring->top++] = b;
} else {
b->next = bin->lo_head; bin->lo_head = b; bin->lo_count++;
}
ap->bump += blk;
ap->count--;
moved++;
to_add--;
}
if (ap->bump >= ap->end || ap->count <= 0) {
ap->page = NULL; ap->bump = ap->end; ap->count = 0;
}
return moved;
}
#endif // POOL_TLS_CORE_INC_H