**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV
**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS)
```
Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV
**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:
1. **core/hakmem_tiny.c:**
- `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
- `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
- `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
- `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
- `g_tls_bend[TINY_NUM_CLASSES] = {0}`
2. **core/tiny_fastcache.c:**
- `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`
3. **core/hakmem_tiny_magazine.c:**
- `g_tls_mags[TINY_NUM_CLASSES] = {0}`
4. **core/tiny_sticky.c:**
- `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
- `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
- `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`
**効果:**
```
Before: 1T: 2.09M ✅ | 4T: SEGV 💀
After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消)
```
**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s ✅
# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s ✅
```
**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
108 lines
3.4 KiB
C
108 lines
3.4 KiB
C
// hakmem_tiny_bump.inc.h
|
|
// Layer 1: TLS Bump Allocator (Ultra-fast path)
|
|
//
|
|
// Purpose: 2-3 instruction allocation for hot classes (8B, 16B, 32B)
|
|
// Design: bcur += size; if (bcur <= bend) return old;
|
|
//
|
|
// Part of 3-layer architecture simplification (2025-11-01)
|
|
// Based on ChatGPT Pro UltraThink recommendations
|
|
|
|
#ifndef HAKMEM_TINY_BUMP_INC_H
|
|
#define HAKMEM_TINY_BUMP_INC_H
|
|
|
|
// likely/unlikely macros
|
|
#ifndef likely
|
|
#define likely(x) __builtin_expect(!!(x), 1)
|
|
#endif
|
|
#ifndef unlikely
|
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
|
#endif
|
|
|
|
// ============================================================================
|
|
// Data Structure
|
|
// ============================================================================
|
|
|
|
typedef struct {
|
|
void* bcur; // Current bump pointer
|
|
void* bend; // Bump end (exclusive)
|
|
} TinyBump;
|
|
|
|
// Per-class bump allocator (hot classes only: class 0, 1, 2 = 8B, 16B, 32B)
|
|
static __thread TinyBump g_tiny_bump[3] = {{NULL, NULL}, {NULL, NULL}, {NULL, NULL}};
|
|
|
|
// ============================================================================
|
|
// Layer 1: Ultra-fast bump allocation (2-3 instructions/op)
|
|
// ============================================================================
|
|
|
|
// Class 0: 8B
|
|
__attribute__((always_inline))
|
|
static inline void* tiny_bump_alloc_8B(void) {
|
|
void* old = g_tiny_bump[0].bcur;
|
|
void* new_cur = (char*)old + 8;
|
|
if (likely(new_cur <= g_tiny_bump[0].bend)) {
|
|
g_tiny_bump[0].bcur = new_cur;
|
|
return old;
|
|
}
|
|
return NULL; // Exhausted, fallback to Layer 2
|
|
}
|
|
|
|
// Class 1: 16B
|
|
__attribute__((always_inline))
|
|
static inline void* tiny_bump_alloc_16B(void) {
|
|
void* old = g_tiny_bump[1].bcur;
|
|
void* new_cur = (char*)old + 16;
|
|
if (likely(new_cur <= g_tiny_bump[1].bend)) {
|
|
g_tiny_bump[1].bcur = new_cur;
|
|
return old;
|
|
}
|
|
return NULL; // Exhausted, fallback to Layer 2
|
|
}
|
|
|
|
// Class 2: 32B
|
|
__attribute__((always_inline))
|
|
static inline void* tiny_bump_alloc_32B(void) {
|
|
void* old = g_tiny_bump[2].bcur;
|
|
void* new_cur = (char*)old + 32;
|
|
if (likely(new_cur <= g_tiny_bump[2].bend)) {
|
|
g_tiny_bump[2].bcur = new_cur;
|
|
return old;
|
|
}
|
|
return NULL; // Exhausted, fallback to Layer 2
|
|
}
|
|
|
|
// Generic bump alloc (for use in slow path)
|
|
__attribute__((always_inline))
|
|
static inline void* tiny_bump_alloc(int class_idx) {
|
|
if (class_idx == 0) return tiny_bump_alloc_8B();
|
|
if (class_idx == 1) return tiny_bump_alloc_16B();
|
|
if (class_idx == 2) return tiny_bump_alloc_32B();
|
|
return NULL; // Not a hot class
|
|
}
|
|
|
|
// ============================================================================
|
|
// Bump refill (called from Layer 3: slow path)
|
|
// ============================================================================
|
|
|
|
__attribute__((noinline, unused))
|
|
static void tiny_bump_refill(int class_idx, void* base, size_t size) {
|
|
if (class_idx < 0 || class_idx > 2) return; // Only hot classes
|
|
g_tiny_bump[class_idx].bcur = base;
|
|
g_tiny_bump[class_idx].bend = (char*)base + size;
|
|
}
|
|
|
|
// Reset bump allocator (e.g., thread shutdown)
|
|
static void tiny_bump_reset(int class_idx) {
|
|
if (class_idx < 0 || class_idx > 2) return;
|
|
g_tiny_bump[class_idx].bcur = NULL;
|
|
g_tiny_bump[class_idx].bend = NULL;
|
|
}
|
|
|
|
// Reset all bump allocators
|
|
static __attribute__((unused)) void tiny_bump_reset_all(void) {
|
|
for (int i = 0; i < 3; i++) {
|
|
tiny_bump_reset(i);
|
|
}
|
|
}
|
|
|
|
#endif // HAKMEM_TINY_BUMP_INC_H
|