Files
hakmem/core/hakmem_tiny_lifecycle.inc
Moe Charm (CI) 1da8754d45 CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消
**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV

**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261  ← ASCII "ba" (ゴミ値、未初期化TLS)
```

Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];`  ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV

**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:

1. **core/hakmem_tiny.c:**
   - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
   - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
   - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bend[TINY_NUM_CLASSES] = {0}`

2. **core/tiny_fastcache.c:**
   - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`

3. **core/hakmem_tiny_magazine.c:**
   - `g_tls_mags[TINY_NUM_CLASSES] = {0}`

4. **core/tiny_sticky.c:**
   - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`

**効果:**
```
Before: 1T: 2.09M   |  4T: SEGV 💀
After:  1T: 2.41M   |  4T: 4.19M   (+15% 1T, SEGV解消)
```

**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s 

# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s 
```

**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00

257 lines
9.5 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem_tiny_lifecycle.inc
// Phase 2D-3: Lifecycle management functions extraction
//
// This file contains lifecycle management functions extracted from hakmem_tiny.c
// to improve code organization. Reduces main file by ~226 lines (16%).
//
// Functions:
// - hak_tiny_trim(): Trim and cleanup operations
// - tiny_tls_cache_drain(): TLS cache draining
// - tiny_apply_mem_diet(): Memory diet mode application
//
// Cold/maintenance path - not performance critical.
#include "tiny_tls_guard.h"
void hak_tiny_trim(void) {
static _Atomic int g_trim_call_count = 0;
int call_count = atomic_fetch_add_explicit(&g_trim_call_count, 1, memory_order_relaxed);
if (call_count < 5) { // First 5 calls only
fprintf(stderr, "[DEBUG hak_tiny_trim] Call #%d\n", call_count + 1);
}
if (!g_tiny_initialized) return;
// Lazy init for SS reserve env
if (__builtin_expect(g_empty_reserve, 1) == -1) {
char* er = getenv("HAKMEM_TINY_SS_RESERVE");
int v = (er ? atoi(er) : EMPTY_SUPERSLAB_RESERVE);
if (v < 0) {
v = 0;
} else if (v > 4) {
v = 4; // guardrails
}
g_empty_reserve = v;
}
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
tiny_tls_cache_drain(class_idx);
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
pthread_mutex_lock(lock);
TinySlab** head = &g_tiny_pool.free_slabs[class_idx];
TinySlab* prev = NULL;
TinySlab* slab = *head;
while (slab) {
TinySlab* next = slab->next;
if (slab->free_count == slab->total_count) {
if (prev) prev->next = next; else *head = next;
release_slab(slab);
slab = next;
continue;
}
prev = slab;
slab = next;
}
pthread_mutex_unlock(lock);
}
// Optional: attempt SuperSlab reclamation for completely empty SS (conservative)
static int g_trim_ss_enabled = -1;
static int g_ss_partial_env = -1;
if (g_trim_ss_enabled == -1) {
char* env = getenv("HAKMEM_TINY_TRIM_SS");
if (env) {
g_trim_ss_enabled = (atoi(env) != 0) ? 1 : 0;
} else {
g_trim_ss_enabled = 1; // default ON for better memory efficiency
}
}
if (g_ss_partial_env == -1) {
char* env = getenv("HAKMEM_TINY_SS_PARTIAL");
if (env) {
g_ss_partial_enable = (atoi(env) != 0) ? 1 : 0;
}
char* interval = getenv("HAKMEM_TINY_SS_PARTIAL_INTERVAL");
if (interval) {
int v = atoi(interval);
if (v < 1) v = 1;
g_ss_partial_interval = (uint32_t)v;
}
g_ss_partial_env = 1;
}
if (!g_trim_ss_enabled) return;
uint32_t partial_epoch = 0;
if (g_ss_partial_enable) {
partial_epoch = atomic_fetch_add_explicit(&g_ss_partial_epoch, 1u, memory_order_relaxed) + 1u;
}
// Walk the registry and collect empty SuperSlabs by class
for (int i = 0; i < SUPER_REG_SIZE; i++) {
SuperRegEntry* e = &g_super_reg[i];
uintptr_t base = atomic_load_explicit((_Atomic uintptr_t*)&e->base, memory_order_acquire);
if (base == 0) continue;
SuperSlab* ss = e->ss;
if (!ss || ss->magic != SUPERSLAB_MAGIC) continue;
// Only consider completely empty SuperSlabs
uint32_t active = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed);
static _Atomic int g_debug_ss_scan = 0;
int scan_count = atomic_fetch_add_explicit(&g_debug_ss_scan, 1, memory_order_relaxed);
if (scan_count < 20) { // First 20 SS scans
fprintf(stderr, "[DEBUG trim scan] ss=%p class=%d active=%u\n",
(void*)ss, ss->size_class, active);
}
if (active != 0) continue;
int k = ss->size_class;
if (k < 0 || k >= TINY_NUM_CLASSES) continue;
// Do not free if current thread still caches this SS in TLS
if (g_tls_slabs[k].ss == ss) continue;
// Keep up to EMPTY_SUPERSLAB_RESERVE per class as reserve; free extras
pthread_mutex_lock(&g_empty_lock);
if (g_empty_reserve == 0) {
pthread_mutex_unlock(&g_empty_lock);
if (superslab_ref_get(ss) == 0) {
superslab_free(ss);
}
continue;
}
if (g_empty_superslabs[k] == NULL) {
g_empty_superslabs[k] = ss;
g_empty_counts[k] = 1;
superslab_partial_release(ss, partial_epoch);
pthread_mutex_unlock(&g_empty_lock);
continue;
}
// If same as reserved, nothing to do
if (g_empty_superslabs[k] == ss) {
superslab_partial_release(ss, partial_epoch);
pthread_mutex_unlock(&g_empty_lock);
continue;
}
int can_free = (g_empty_counts[k] >= g_empty_reserve);
if (!can_free) {
// Replace reserve with this newer SS
g_empty_superslabs[k] = ss;
g_empty_counts[k] = 1;
superslab_partial_release(ss, partial_epoch);
pthread_mutex_unlock(&g_empty_lock);
continue;
}
pthread_mutex_unlock(&g_empty_lock);
// Free outside of the empty_lock保守的: refcount==0 のときのみ)
if (superslab_ref_get(ss) == 0) {
superslab_free(ss);
}
}
}
static void tiny_tls_cache_drain(int class_idx) {
TinyTLSList* tls = &g_tls_lists[class_idx];
// Drain TLS SLL cache
void* sll = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = NULL;
g_tls_sll_count[class_idx] = 0;
while (sll) {
void* next = *(void**)sll;
tiny_tls_list_guard_push(class_idx, tls, sll);
tls_list_push(tls, sll);
sll = next;
}
// Drain fast tier cache
void* fast = g_fast_head[class_idx];
g_fast_head[class_idx] = NULL;
g_fast_count[class_idx] = 0;
while (fast) {
void* next = *(void**)fast;
tiny_tls_list_guard_push(class_idx, tls, fast);
tls_list_push(tls, fast);
fast = next;
}
// Spill TLS list back to owners
void* head = NULL;
void* tail = NULL;
while (1) {
uint32_t taken = tls_list_bulk_take(tls, 0u, &head, &tail);
if (taken == 0u || head == NULL) break;
void* cur = head;
while (cur) {
void* next = *(void**)cur;
SuperSlab* ss = hak_super_lookup(cur);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
hak_tiny_free_superslab(cur, ss);
} else {
TinySlab* slab = hak_tiny_owner_slab(cur);
if (slab) {
int cls = slab->class_idx;
size_t block_size = g_tiny_class_sizes[cls];
int block_idx = (int)(((uintptr_t)cur - (uintptr_t)slab->base) / block_size);
pthread_mutex_t* lock = &g_tiny_class_locks[cls].m;
pthread_mutex_lock(lock);
if (hak_tiny_is_used(slab, block_idx)) {
hak_tiny_set_free(slab, block_idx);
int was_full = (slab->free_count == 0);
slab->free_count++;
g_tiny_pool.free_count[cls]++;
if (was_full) {
move_to_free_list(cls, slab);
}
if (slab->free_count == slab->total_count) {
TinySlab** headp = &g_tiny_pool.free_slabs[cls];
TinySlab* prev = NULL;
for (TinySlab* s = *headp; s; prev = s, s = s->next) {
if (s == slab) {
if (prev) prev->next = s->next;
else *headp = s->next;
break;
}
}
release_slab(slab);
}
}
pthread_mutex_unlock(lock);
}
}
cur = next;
}
}
// Release TLS-bound SuperSlab reference when caches are empty
TinyTLSSlab* tls_slab = &g_tls_slabs[class_idx];
SuperSlab* held_ss = tls_slab->ss;
if (held_ss) {
int keep_binding = 0;
if (tls_slab->meta && tls_slab->meta->used > 0) {
keep_binding = 1;
}
if (!keep_binding) {
tls_slab->ss = NULL;
tls_slab->meta = NULL;
tls_slab->slab_base = NULL;
tls_slab->slab_idx = 0;
superslab_ref_dec(held_ss);
}
}
g_tls_active_slab_a[class_idx] = NULL;
g_tls_active_slab_b[class_idx] = NULL;
}
static void tiny_apply_mem_diet(void) {
g_mag_cap_limit = 64;
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
if (g_fast_cap[class_idx] > 0) {
uint16_t limit = (class_idx <= 3) ? 48 : 32;
if (limit < 16) limit = 16;
if (g_fast_cap[class_idx] > limit) {
g_fast_cap[class_idx] = limit;
}
}
TinyTLSList* tls = &g_tls_lists[class_idx];
uint32_t new_cap = tls->cap;
if (new_cap > (uint32_t)g_mag_cap_limit) new_cap = (uint32_t)g_mag_cap_limit;
if (new_cap < 16u) new_cap = 16u;
tls->cap = new_cap;
tls->refill_low = tiny_tls_default_refill(new_cap);
tls->spill_high = tiny_tls_default_spill(new_cap);
tiny_tls_publish_targets(class_idx, new_cap);
}
}