Files
hakmem/core/hakmem_tiny_slow.inc

90 lines
3.2 KiB
PHP
Raw Normal View History

// hakmem_tiny_slow.inc
// Slow path allocation implementation
// Slow path allocation function
// Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c)
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx) {
#else
static void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx) {
#endif
(void)size; // size is already validated by caller
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
return NULL;
}
// Try refilling from HotMag
if (g_hotmag_enable && class_idx <= 3) {
TinyHotMag* hm = &g_tls_hot_mag[class_idx];
hotmag_try_refill(class_idx, hm);
void* ptr = hotmag_pop(class_idx);
if (ptr) { HAK_RET_ALLOC(class_idx, ptr); }
}
// Try TLS SLL via Box (Phase12 正式経路)
// C7 は headerless: 既存仕様通り TLS/SLL をスキップ
if (g_tls_sll_enable && class_idx != 7) {
// Box: 単一APIで TLS SLL を扱う(内部で head/count/next を管理)
void* ptr = NULL;
if (tls_sll_pop(class_idx, &ptr)) {
return ptr;
}
}
// Try TLS list (legacy small-mag) via既存API構造体直接触らない
if (g_tls_list_enable && class_idx != 7) {
TinyTLSList* tls = &g_tls_lists[class_idx];
// Fail-Fast: guard against poisoned head (remote sentinel)
if (__builtin_expect((uintptr_t)tls->head == TINY_REMOTE_SENTINEL, 0)) {
tls->head = NULL;
tls->count = 0;
}
if (tls->count > 0) {
void* ptr = tls_list_pop(tls, class_idx);
if (ptr) {
HAK_RET_ALLOC(class_idx, ptr);
}
}
// Try refilling TLS list from TLS-cached Superslab slab
uint32_t want = tls->refill_low > 0 ? tls->refill_low : 32;
if (tls_refill_from_tls_slab(class_idx, tls, want) > 0) {
void* ptr = tls_list_pop(tls, class_idx);
if (ptr) {
HAK_RET_ALLOC(class_idx, ptr);
}
}
}
// Background coalescing/aggregation (ENV removed, fixed OFF)
CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
do {
(void)class_idx; // Background steps disabled
CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
} while (0);
// Final fallback: allocate from superslab via Box API wrapper (Stage A)
// NOTE:
// - hak_tiny_alloc_superslab_box() is a thin façade over the legacy
// per-class SuperslabHead backend in Phase 12 Stage A.
// - Callers (slow path) no longer depend on internal Superslab layout.
void* ss_ptr = hak_tiny_alloc_superslab_box(class_idx);
if (ss_ptr) {
HAK_RET_ALLOC(class_idx, ss_ptr);
}
tiny_alloc_dump_tls_state(class_idx, "slow_fail", &g_tls_slabs[class_idx]);
// Optional one-shot debug when final slow path fails
static int g_alloc_dbg = -1; if (__builtin_expect(g_alloc_dbg == -1, 0)) { const char* e=getenv("HAKMEM_TINY_ALLOC_DEBUG"); g_alloc_dbg = (e && atoi(e)!=0)?1:0; }
if (g_alloc_dbg) {
static _Atomic int printed[8]; int exp=0;
if (atomic_compare_exchange_strong(&printed[class_idx], &exp, 1)) {
fprintf(stderr, "[ALLOC-SLOW] hak_tiny_alloc_superslab returned NULL class=%d size=%zu\n", class_idx, size);
}
}
return ss_ptr;
}