**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV
**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS)
```
Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV
**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:
1. **core/hakmem_tiny.c:**
- `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
- `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
- `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
- `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
- `g_tls_bend[TINY_NUM_CLASSES] = {0}`
2. **core/tiny_fastcache.c:**
- `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
- `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`
3. **core/hakmem_tiny_magazine.c:**
- `g_tls_mags[TINY_NUM_CLASSES] = {0}`
4. **core/tiny_sticky.c:**
- `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
- `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
- `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`
**効果:**
```
Before: 1T: 2.09M ✅ | 4T: SEGV 💀
After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消)
```
**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s ✅
# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s ✅
```
**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
102 lines
3.2 KiB
C
102 lines
3.2 KiB
C
// tiny_refill_opt.h - Inline helpers to batch and splice refill chains
|
|
// Box: Refill Boundary optimization helpers (kept header-only)
|
|
#pragma once
|
|
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdatomic.h>
|
|
#include <stdlib.h>
|
|
|
|
#ifndef HAKMEM_TINY_REFILL_OPT
|
|
#define HAKMEM_TINY_REFILL_OPT 1
|
|
#endif
|
|
|
|
// Local chain structure (head/tail pointers)
|
|
typedef struct TinyRefillChain {
|
|
void* head;
|
|
void* tail;
|
|
uint32_t count;
|
|
} TinyRefillChain;
|
|
|
|
static inline void trc_init(TinyRefillChain* c) {
|
|
c->head = NULL; c->tail = NULL; c->count = 0;
|
|
}
|
|
|
|
static inline void refill_opt_dbg(const char* stage, int class_idx, uint32_t n) {
|
|
#if HAKMEM_TINY_REFILL_OPT
|
|
static int en = -1;
|
|
static _Atomic int printed = 0;
|
|
if (__builtin_expect(en == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_TINY_REFILL_OPT_DEBUG");
|
|
en = (e && *e && *e != '0') ? 1 : 0;
|
|
}
|
|
if (!en) return;
|
|
int exp = 0;
|
|
if (atomic_compare_exchange_strong(&printed, &exp, 1)) {
|
|
fprintf(stderr, "[REFILL_OPT] stage=%s cls=%d n=%u\n", stage ? stage : "(null)", class_idx, (unsigned)n);
|
|
fflush(stderr);
|
|
}
|
|
#else
|
|
(void)stage; (void)class_idx; (void)n;
|
|
#endif
|
|
}
|
|
|
|
static inline void trc_push_front(TinyRefillChain* c, void* node) {
|
|
if (c->head == NULL) {
|
|
c->head = node; c->tail = node; *(void**)node = NULL; c->count = 1;
|
|
} else {
|
|
*(void**)node = c->head; c->head = node; c->count++;
|
|
}
|
|
}
|
|
|
|
// Splice local chain into TLS SLL (single meta write)
|
|
static inline void trc_splice_to_sll(int class_idx, TinyRefillChain* c,
|
|
void** sll_head, uint32_t* sll_count) {
|
|
if (!c || c->head == NULL) return;
|
|
if (c->tail) {
|
|
*(void**)c->tail = *sll_head;
|
|
}
|
|
*sll_head = c->head;
|
|
if (sll_count) *sll_count += c->count;
|
|
}
|
|
|
|
// Pop up to 'want' nodes from freelist into local chain
|
|
static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta,
|
|
uint32_t want,
|
|
TinyRefillChain* out) {
|
|
if (!out || want == 0) return 0;
|
|
trc_init(out);
|
|
uint32_t taken = 0;
|
|
while (taken < want && meta->freelist) {
|
|
void* p = meta->freelist;
|
|
meta->freelist = *(void**)p;
|
|
trc_push_front(out, p);
|
|
taken++;
|
|
}
|
|
// DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead)
|
|
return taken;
|
|
}
|
|
|
|
// Carve a contiguous batch of size 'batch' from linear area, return as chain
|
|
static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs,
|
|
struct TinySlabMeta* meta,
|
|
uint32_t batch,
|
|
TinyRefillChain* out) {
|
|
if (!out || batch == 0) return 0;
|
|
trc_init(out);
|
|
uint8_t* cursor = base + ((size_t)meta->used * bs);
|
|
void* head = (void*)cursor;
|
|
for (uint32_t i = 1; i < batch; i++) {
|
|
uint8_t* next = cursor + bs;
|
|
*(void**)cursor = (void*)next;
|
|
cursor = next;
|
|
}
|
|
void* tail = (void*)cursor;
|
|
meta->used += batch;
|
|
out->head = head;
|
|
out->tail = tail;
|
|
out->count = batch;
|
|
// DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead)
|
|
return batch;
|
|
}
|