Files
hakmem/core/tiny_ready.h

92 lines
3.6 KiB
C
Raw Normal View History

CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
// tiny_ready.h - Ready List box (per-class, slab-entry hints)
// Purpose: O(1)-ish adopt candidate discovery to bypass deep scans in refill.
// Design: Lock-free ring of encoded slab entries (ss+slab_idx). Best-effort hints.
// Boundary:
// - Producer: publish境界ss_partial_publish/ remote初入荷 / first-freeprev==NULLで push
// - Consumer: refill境界tiny_refill_try_fast の最初)で pop→owner取得→bind
// A/B: ENV HAKMEM_TINY_READY=0 で無効化
#pragma once
#include <stdatomic.h>
#include <stdint.h>
#include "hakmem_tiny.h"
#ifndef TINY_READY_RING
#define TINY_READY_RING 128
#endif
// Per-class ring buffer of encoded slab entries
static _Atomic(uintptr_t) g_ready_ring[TINY_NUM_CLASSES][TINY_READY_RING];
static _Atomic(uint32_t) g_ready_rr[TINY_NUM_CLASSES];
static inline int tiny_ready_enabled(void) {
static int g_ready_en = -1;
if (__builtin_expect(g_ready_en == -1, 0)) {
// Hard disable gate for isolation runs
const char* dis = getenv("HAKMEM_TINY_DISABLE_READY");
if (dis && atoi(dis) != 0) {
g_ready_en = 0;
return g_ready_en;
}
CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
const char* e = getenv("HAKMEM_TINY_READY");
// Default ON unless explicitly disabled
g_ready_en = (e && *e == '0') ? 0 : 1;
}
return g_ready_en;
}
// Optional: limit scan width (ENV: HAKMEM_TINY_READY_WIDTH, default TINY_READY_RING)
static inline int tiny_ready_width(void) {
static int w = -1;
if (__builtin_expect(w == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_READY_WIDTH");
int defw = TINY_READY_RING;
if (e && *e) {
int v = atoi(e);
if (v <= 0) v = defw;
if (v > TINY_READY_RING) v = TINY_READY_RING;
w = v;
} else {
w = defw;
}
}
return w;
}
// Encode helpers are declared in main TU; forward here
static inline uintptr_t slab_entry_make(SuperSlab* ss, int slab_idx);
static inline SuperSlab* slab_entry_ss(uintptr_t ent);
static inline int slab_entry_idx(uintptr_t ent);
// Push: best-effort, tries a few slots, drops on contention (hint-only)
static inline void tiny_ready_push(int class_idx, SuperSlab* ss, int slab_idx) {
if (!tiny_ready_enabled()) return;
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) return;
if (__builtin_expect(ss == NULL || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss), 0)) return;
uintptr_t ent = slab_entry_make(ss, slab_idx);
uint32_t start = atomic_fetch_add_explicit(&g_ready_rr[class_idx], 1u, memory_order_relaxed);
// Try up to 4 slots to reduce collisions
for (int k = 0; k < 4; k++) {
uint32_t idx = (start + (uint32_t)k) % (uint32_t)TINY_READY_RING;
uintptr_t expected = 0;
if (atomic_compare_exchange_weak_explicit(&g_ready_ring[class_idx][idx], &expected, ent,
memory_order_release, memory_order_relaxed)) {
return;
}
}
// Drop if all tried slots were busy (hint ring, loss is acceptable)
}
// Pop any entry; scans ring once (only on refill miss, not on hot path)
static inline uintptr_t tiny_ready_pop(int class_idx) {
if (!tiny_ready_enabled()) return (uintptr_t)0;
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) return (uintptr_t)0;
int scan = tiny_ready_width();
for (int i = 0; i < scan; i++) {
uintptr_t ent = atomic_exchange_explicit(&g_ready_ring[class_idx][i], (uintptr_t)0, memory_order_acq_rel);
if (ent) return ent;
}
return (uintptr_t)0;
}