Files
hakmem/core/box/pool_mid_desc.inc.h

102 lines
3.8 KiB
C
Raw Normal View History

CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
// pool_mid_desc.inc.h — Box: Mid Page Descriptor Registry (64KiB pages)
#ifndef POOL_MID_DESC_INC_H
#define POOL_MID_DESC_INC_H
#define MID_DESC_BUCKETS 2048
typedef struct MidPageDesc {
void* page;
uint8_t class_idx;
uint8_t _pad0;
uint16_t _pad1;
uint64_t owner_tid;
atomic_int in_use; // live allocations on this page
int blocks_per_page; // total blocks on this page
atomic_int pending_dn; // background DONTNEED enqueued
struct MidPageDesc* next;
} MidPageDesc;
static pthread_mutex_t g_mid_desc_mu[MID_DESC_BUCKETS];
static MidPageDesc* g_mid_desc_head[MID_DESC_BUCKETS];
static inline uint32_t mid_desc_hash(void* page) {
uintptr_t x = (uintptr_t)page >> 16; // 64KiB alignment granularity
// mix
x ^= x >> 33; x *= 0xff51afd7ed558ccdULL; x ^= x >> 33; x *= 0xc4ceb9fe1a85ec53ULL; x ^= x >> 33;
return (uint32_t)(x & (MID_DESC_BUCKETS - 1));
}
// Thread-safe initialization using pthread_once
static pthread_once_t mid_desc_init_once_control = PTHREAD_ONCE_INIT;
static void mid_desc_init_impl(void) {
for (int i = 0; i < MID_DESC_BUCKETS; i++) {
pthread_mutex_init(&g_mid_desc_mu[i], NULL);
g_mid_desc_head[i] = NULL;
}
}
static void mid_desc_init_once(void) {
pthread_once(&mid_desc_init_once_control, mid_desc_init_impl);
}
static void mid_desc_register(void* page, int class_idx, uint64_t owner_tid) {
mid_desc_init_once();
uint32_t h = mid_desc_hash(page);
pthread_mutex_lock(&g_mid_desc_mu[h]);
MidPageDesc* d = (MidPageDesc*)hkm_libc_malloc(sizeof(MidPageDesc)); // P0 Fix: Use libc malloc
if (d) {
d->page = page; d->class_idx = (uint8_t)class_idx; d->owner_tid = owner_tid; d->next = g_mid_desc_head[h];
atomic_store(&d->in_use, 0);
d->blocks_per_page = 0; // optional; not used for emptiness in P0
atomic_store(&d->pending_dn, 0);
g_mid_desc_head[h] = d;
}
pthread_mutex_unlock(&g_mid_desc_mu[h]);
}
static MidPageDesc* mid_desc_lookup(void* addr) {
mid_desc_init_once();
void* page = (void*)((uintptr_t)addr & ~((uintptr_t)POOL_PAGE_SIZE - 1));
uint32_t h = mid_desc_hash(page);
for (MidPageDesc* d = g_mid_desc_head[h]; d; d = d->next) {
if (d->page == page) return d;
}
return NULL;
}
static void mid_desc_adopt(void* addr, int class_idx, uint64_t owner_tid) {
if (owner_tid == 0) return;
void* page = (void*)((uintptr_t)addr & ~((uintptr_t)POOL_PAGE_SIZE - 1));
uint32_t h = mid_desc_hash(page);
pthread_mutex_lock(&g_mid_desc_mu[h]);
MidPageDesc* d = g_mid_desc_head[h];
while (d) { if (d->page == page) break; d = d->next; }
if (d) {
if (d->owner_tid == 0) d->owner_tid = owner_tid;
} else {
MidPageDesc* nd = (MidPageDesc*)hkm_libc_malloc(sizeof(MidPageDesc)); // P0 Fix: Use libc malloc
if (nd) { nd->page = page; nd->class_idx = (uint8_t)class_idx; nd->owner_tid = owner_tid; nd->next = g_mid_desc_head[h]; g_mid_desc_head[h] = nd; }
}
pthread_mutex_unlock(&g_mid_desc_mu[h]);
}
// Increment page in-use counter for given raw block pointer
static inline void mid_page_inuse_inc(void* raw) {
MidPageDesc* d = mid_desc_lookup(raw);
if (d) atomic_fetch_add_explicit(&d->in_use, 1, memory_order_relaxed);
}
// Decrement page in-use counter and enqueue DONTNEED when it drops to 0
extern int hak_batch_add_page(void* page, size_t size);
static inline void mid_page_inuse_dec_and_maybe_dn(void* raw) {
MidPageDesc* d = mid_desc_lookup(raw);
if (!d) return;
int nv = atomic_fetch_sub_explicit(&d->in_use, 1, memory_order_relaxed) - 1;
if (nv <= 0) {
// Fire once per empty transition
if (atomic_exchange_explicit(&d->pending_dn, 1, memory_order_acq_rel) == 0) {
hak_batch_add_page(d->page, POOL_PAGE_SIZE);
}
}
}
#endif // POOL_MID_DESC_INC_H