Files
hakmem/core/hakmem_tiny_bump.inc.h
Moe Charm (CI) 1da8754d45 CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消
**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV

**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261  ← ASCII "ba" (ゴミ値、未初期化TLS)
```

Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];`  ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV

**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:

1. **core/hakmem_tiny.c:**
   - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
   - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
   - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bend[TINY_NUM_CLASSES] = {0}`

2. **core/tiny_fastcache.c:**
   - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`

3. **core/hakmem_tiny_magazine.c:**
   - `g_tls_mags[TINY_NUM_CLASSES] = {0}`

4. **core/tiny_sticky.c:**
   - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`

**効果:**
```
Before: 1T: 2.09M   |  4T: SEGV 💀
After:  1T: 2.41M   |  4T: 4.19M   (+15% 1T, SEGV解消)
```

**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s 

# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s 
```

**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00

108 lines
3.4 KiB
C

// hakmem_tiny_bump.inc.h
// Layer 1: TLS Bump Allocator (Ultra-fast path)
//
// Purpose: 2-3 instruction allocation for hot classes (8B, 16B, 32B)
// Design: bcur += size; if (bcur <= bend) return old;
//
// Part of 3-layer architecture simplification (2025-11-01)
// Based on ChatGPT Pro UltraThink recommendations
#ifndef HAKMEM_TINY_BUMP_INC_H
#define HAKMEM_TINY_BUMP_INC_H
// likely/unlikely macros
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#endif
#ifndef unlikely
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
// ============================================================================
// Data Structure
// ============================================================================
typedef struct {
void* bcur; // Current bump pointer
void* bend; // Bump end (exclusive)
} TinyBump;
// Per-class bump allocator (hot classes only: class 0, 1, 2 = 8B, 16B, 32B)
static __thread TinyBump g_tiny_bump[3] = {{NULL, NULL}, {NULL, NULL}, {NULL, NULL}};
// ============================================================================
// Layer 1: Ultra-fast bump allocation (2-3 instructions/op)
// ============================================================================
// Class 0: 8B
__attribute__((always_inline))
static inline void* tiny_bump_alloc_8B(void) {
void* old = g_tiny_bump[0].bcur;
void* new_cur = (char*)old + 8;
if (likely(new_cur <= g_tiny_bump[0].bend)) {
g_tiny_bump[0].bcur = new_cur;
return old;
}
return NULL; // Exhausted, fallback to Layer 2
}
// Class 1: 16B
__attribute__((always_inline))
static inline void* tiny_bump_alloc_16B(void) {
void* old = g_tiny_bump[1].bcur;
void* new_cur = (char*)old + 16;
if (likely(new_cur <= g_tiny_bump[1].bend)) {
g_tiny_bump[1].bcur = new_cur;
return old;
}
return NULL; // Exhausted, fallback to Layer 2
}
// Class 2: 32B
__attribute__((always_inline))
static inline void* tiny_bump_alloc_32B(void) {
void* old = g_tiny_bump[2].bcur;
void* new_cur = (char*)old + 32;
if (likely(new_cur <= g_tiny_bump[2].bend)) {
g_tiny_bump[2].bcur = new_cur;
return old;
}
return NULL; // Exhausted, fallback to Layer 2
}
// Generic bump alloc (for use in slow path)
__attribute__((always_inline))
static inline void* tiny_bump_alloc(int class_idx) {
if (class_idx == 0) return tiny_bump_alloc_8B();
if (class_idx == 1) return tiny_bump_alloc_16B();
if (class_idx == 2) return tiny_bump_alloc_32B();
return NULL; // Not a hot class
}
// ============================================================================
// Bump refill (called from Layer 3: slow path)
// ============================================================================
__attribute__((noinline, unused))
static void tiny_bump_refill(int class_idx, void* base, size_t size) {
if (class_idx < 0 || class_idx > 2) return; // Only hot classes
g_tiny_bump[class_idx].bcur = base;
g_tiny_bump[class_idx].bend = (char*)base + size;
}
// Reset bump allocator (e.g., thread shutdown)
static void tiny_bump_reset(int class_idx) {
if (class_idx < 0 || class_idx > 2) return;
g_tiny_bump[class_idx].bcur = NULL;
g_tiny_bump[class_idx].bend = NULL;
}
// Reset all bump allocators
static __attribute__((unused)) void tiny_bump_reset_all(void) {
for (int i = 0; i < 3; i++) {
tiny_bump_reset(i);
}
}
#endif // HAKMEM_TINY_BUMP_INC_H