Files
hakmem/core/hakmem_tiny_lifecycle.inc
Moe Charm (CI) 03df05ec75 Phase 12: Shared SuperSlab Pool implementation (WIP - runtime crash)
## Summary
Implemented Phase 12 Shared SuperSlab Pool (mimalloc-style) to address
SuperSlab allocation churn (877 SuperSlabs → 100-200 target).

## Implementation (ChatGPT + Claude)
1. **Metadata changes** (superslab_types.h):
   - Added class_idx to TinySlabMeta (per-slab dynamic class)
   - Removed size_class from SuperSlab (no longer per-SuperSlab)
   - Changed owner_tid (16-bit) → owner_tid_low (8-bit)

2. **Shared Pool** (hakmem_shared_pool.{h,c}):
   - Global pool shared by all size classes
   - shared_pool_acquire_slab() - Get free slab for class_idx
   - shared_pool_release_slab() - Return slab when empty
   - Per-class hints for fast path optimization

3. **Integration** (23 files modified):
   - Updated all ss->size_class → meta->class_idx
   - Updated all meta->owner_tid → meta->owner_tid_low
   - superslab_refill() now uses shared pool
   - Free path releases empty slabs back to pool

4. **Build system** (Makefile):
   - Added hakmem_shared_pool.o to OBJS_BASE and TINY_BENCH_OBJS_BASE

## Status: ⚠️ Build OK, Runtime CRASH

**Build**:  SUCCESS
- All 23 files compile without errors
- Only warnings: superslab_allocate type mismatch (legacy code)

**Runtime**:  SEGFAULT
- Crash location: sll_refill_small_from_ss()
- Exit code: 139 (SIGSEGV)
- Test case: ./bench_random_mixed_hakmem 1000 256 42

## Known Issues
1. **SEGFAULT in refill path** - Likely shared_pool_acquire_slab() issue
2. **Legacy superslab_allocate()** still exists (type mismatch warning)
3. **Remaining TODOs** from design doc:
   - SuperSlab physical layout integration
   - slab_handle.h cleanup
   - Remove old per-class head implementation

## Next Steps
1. Debug SEGFAULT (gdb backtrace shows sll_refill_small_from_ss)
2. Fix shared_pool_acquire_slab() or superslab_init_slab()
3. Basic functionality test (1K → 100K iterations)
4. Measure SuperSlab count reduction (877 → 100-200)
5. Performance benchmark (+650-860% expected)

## Files Changed (25 files)
core/box/free_local_box.c
core/box/free_remote_box.c
core/box/front_gate_classifier.c
core/hakmem_super_registry.c
core/hakmem_tiny.c
core/hakmem_tiny_bg_spill.c
core/hakmem_tiny_free.inc
core/hakmem_tiny_lifecycle.inc
core/hakmem_tiny_magazine.c
core/hakmem_tiny_query.c
core/hakmem_tiny_refill.inc.h
core/hakmem_tiny_superslab.c
core/hakmem_tiny_superslab.h
core/hakmem_tiny_tls_ops.h
core/slab_handle.h
core/superslab/superslab_inline.h
core/superslab/superslab_types.h
core/tiny_debug.h
core/tiny_free_fast.inc.h
core/tiny_free_magazine.inc.h
core/tiny_remote.c
core/tiny_superslab_alloc.inc.h
core/tiny_superslab_free.inc.h
Makefile

## New Files (3 files)
PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md
core/hakmem_shared_pool.c
core/hakmem_shared_pool.h

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-13 16:33:03 +09:00

271 lines
10 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem_tiny_lifecycle.inc
// Phase 2D-3: Lifecycle management functions extraction
//
// This file contains lifecycle management functions extracted from hakmem_tiny.c
// to improve code organization. Reduces main file by ~226 lines (16%).
//
// Functions:
// - hak_tiny_trim(): Trim and cleanup operations
// - tiny_tls_cache_drain(): TLS cache draining
// - tiny_apply_mem_diet(): Memory diet mode application
//
// Cold/maintenance path - not performance critical.
#include "tiny_tls_guard.h"
// Phase 12: Helper to derive a representative class index for a SuperSlab
// from per-slab metadata (all slabs are empty when used in trim).
static inline int superslab_any_class_idx(SuperSlab* ss) {
if (!ss) return -1;
int cap = ss_slabs_capacity(ss);
for (int s = 0; s < cap; s++) {
uint8_t cls = ss->slabs[s].class_idx;
if (cls < TINY_NUM_CLASSES) return (int)cls;
}
return -1;
}
void hak_tiny_trim(void) {
static _Atomic int g_trim_call_count = 0;
int call_count = atomic_fetch_add_explicit(&g_trim_call_count, 1, memory_order_relaxed);
if (call_count < 5) { // First 5 calls only
fprintf(stderr, "[DEBUG hak_tiny_trim] Call #%d\n", call_count + 1);
}
if (!g_tiny_initialized) return;
// Lazy init for SS reserve env
if (__builtin_expect(g_empty_reserve, 1) == -1) {
char* er = getenv("HAKMEM_TINY_SS_RESERVE");
int v = (er ? atoi(er) : EMPTY_SUPERSLAB_RESERVE);
if (v < 0) {
v = 0;
} else if (v > 4) {
v = 4; // guardrails
}
g_empty_reserve = v;
}
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
tiny_tls_cache_drain(class_idx);
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
pthread_mutex_lock(lock);
TinySlab** head = &g_tiny_pool.free_slabs[class_idx];
TinySlab* prev = NULL;
TinySlab* slab = *head;
while (slab) {
TinySlab* next = slab->next;
if (slab->free_count == slab->total_count) {
if (prev) prev->next = next; else *head = next;
release_slab(slab);
slab = next;
continue;
}
prev = slab;
slab = next;
}
pthread_mutex_unlock(lock);
}
// Optional: attempt SuperSlab reclamation for completely empty SS (conservative)
static int g_trim_ss_enabled = -1;
static int g_ss_partial_env = -1;
if (g_trim_ss_enabled == -1) {
char* env = getenv("HAKMEM_TINY_TRIM_SS");
if (env) {
g_trim_ss_enabled = (atoi(env) != 0) ? 1 : 0;
} else {
g_trim_ss_enabled = 1; // default ON for better memory efficiency
}
}
if (g_ss_partial_env == -1) {
char* env = getenv("HAKMEM_TINY_SS_PARTIAL");
if (env) {
g_ss_partial_enable = (atoi(env) != 0) ? 1 : 0;
}
char* interval = getenv("HAKMEM_TINY_SS_PARTIAL_INTERVAL");
if (interval) {
int v = atoi(interval);
if (v < 1) v = 1;
g_ss_partial_interval = (uint32_t)v;
}
g_ss_partial_env = 1;
}
if (!g_trim_ss_enabled) return;
uint32_t partial_epoch = 0;
if (g_ss_partial_enable) {
partial_epoch = atomic_fetch_add_explicit(&g_ss_partial_epoch, 1u, memory_order_relaxed) + 1u;
}
// Walk the registry and collect empty SuperSlabs by class
for (int i = 0; i < SUPER_REG_SIZE; i++) {
SuperRegEntry* e = &g_super_reg[i];
uintptr_t base = atomic_load_explicit((_Atomic uintptr_t*)&e->base, memory_order_acquire);
if (base == 0) continue;
SuperSlab* ss = e->ss;
if (!ss || ss->magic != SUPERSLAB_MAGIC) continue;
// Only consider completely empty SuperSlabs
uint32_t active = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed);
static _Atomic int g_debug_ss_scan = 0;
int scan_count = atomic_fetch_add_explicit(&g_debug_ss_scan, 1, memory_order_relaxed);
if (scan_count < 20) { // First 20 SS scans
int log_cls = superslab_any_class_idx(ss);
fprintf(stderr, "[DEBUG trim scan] ss=%p class=%d active=%u\n",
(void*)ss, log_cls, active);
}
if (active != 0) continue;
int k = superslab_any_class_idx(ss);
if (k < 0 || k >= TINY_NUM_CLASSES) continue;
// Do not free if current thread still caches this SS in TLS
if (g_tls_slabs[k].ss == ss) continue;
// Keep up to EMPTY_SUPERSLAB_RESERVE per class as reserve; free extras
pthread_mutex_lock(&g_empty_lock);
if (g_empty_reserve == 0) {
pthread_mutex_unlock(&g_empty_lock);
if (superslab_ref_get(ss) == 0) {
superslab_free(ss);
}
continue;
}
if (g_empty_superslabs[k] == NULL) {
g_empty_superslabs[k] = ss;
g_empty_counts[k] = 1;
superslab_partial_release(ss, partial_epoch);
pthread_mutex_unlock(&g_empty_lock);
continue;
}
// If same as reserved, nothing to do
if (g_empty_superslabs[k] == ss) {
superslab_partial_release(ss, partial_epoch);
pthread_mutex_unlock(&g_empty_lock);
continue;
}
int can_free = (g_empty_counts[k] >= g_empty_reserve);
if (!can_free) {
// Replace reserve with this newer SS
g_empty_superslabs[k] = ss;
g_empty_counts[k] = 1;
superslab_partial_release(ss, partial_epoch);
pthread_mutex_unlock(&g_empty_lock);
continue;
}
pthread_mutex_unlock(&g_empty_lock);
// Free outside of the empty_lock保守的: refcount==0 のときのみ)
if (superslab_ref_get(ss) == 0) {
superslab_free(ss);
}
}
}
static void tiny_tls_cache_drain(int class_idx) {
TinyTLSList* tls = &g_tls_lists[class_idx];
// Phase E1-CORRECT: Drain TLS SLL cache for ALL classes
#include "box/tiny_next_ptr_box.h"
void* sll = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = NULL;
g_tls_sll_count[class_idx] = 0;
while (sll) {
void* next = tiny_next_read(class_idx, sll);
tiny_tls_list_guard_push(class_idx, tls, sll);
tls_list_push(tls, sll, class_idx);
sll = next;
}
// Phase E1-CORRECT: Drain fast tier cache for ALL classes
void* fast = g_fast_head[class_idx];
g_fast_head[class_idx] = NULL;
g_fast_count[class_idx] = 0;
while (fast) {
void* next = tiny_next_read(class_idx, fast);
tiny_tls_list_guard_push(class_idx, tls, fast);
tls_list_push(tls, fast, class_idx);
fast = next;
}
// Spill TLS list back to owners
void* head = NULL;
void* tail = NULL;
while (1) {
uint32_t taken = tls_list_bulk_take(tls, 0u, &head, &tail, class_idx);
if (taken == 0u || head == NULL) break;
void* cur = head;
while (cur) {
void* next = tiny_next_read(class_idx, cur);
SuperSlab* ss = hak_super_lookup(cur);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
hak_tiny_free_superslab(cur, ss);
} else {
TinySlab* slab = hak_tiny_owner_slab(cur);
if (slab) {
int cls = slab->class_idx;
size_t block_size = g_tiny_class_sizes[cls];
int block_idx = (int)(((uintptr_t)cur - (uintptr_t)slab->base) / block_size);
pthread_mutex_t* lock = &g_tiny_class_locks[cls].m;
pthread_mutex_lock(lock);
if (hak_tiny_is_used(slab, block_idx)) {
hak_tiny_set_free(slab, block_idx);
int was_full = (slab->free_count == 0);
slab->free_count++;
g_tiny_pool.free_count[cls]++;
if (was_full) {
move_to_free_list(cls, slab);
}
if (slab->free_count == slab->total_count) {
TinySlab** headp = &g_tiny_pool.free_slabs[cls];
TinySlab* prev = NULL;
for (TinySlab* s = *headp; s; prev = s, s = s->next) {
if (s == slab) {
if (prev) prev->next = s->next;
else *headp = s->next;
break;
}
}
release_slab(slab);
}
}
pthread_mutex_unlock(lock);
}
}
cur = next;
}
}
// Release TLS-bound SuperSlab reference when caches are empty
TinyTLSSlab* tls_slab = &g_tls_slabs[class_idx];
SuperSlab* held_ss = tls_slab->ss;
if (held_ss) {
int keep_binding = 0;
if (tls_slab->meta && tls_slab->meta->used > 0) {
keep_binding = 1;
}
if (!keep_binding) {
tls_slab->ss = NULL;
tls_slab->meta = NULL;
tls_slab->slab_base = NULL;
tls_slab->slab_idx = 0;
superslab_ref_dec(held_ss);
}
}
g_tls_active_slab_a[class_idx] = NULL;
g_tls_active_slab_b[class_idx] = NULL;
}
static void tiny_apply_mem_diet(void) {
g_mag_cap_limit = 64;
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
if (g_fast_cap[class_idx] > 0) {
uint16_t limit = (class_idx <= 3) ? 48 : 32;
if (limit < 16) limit = 16;
if (g_fast_cap[class_idx] > limit) {
g_fast_cap[class_idx] = limit;
}
}
TinyTLSList* tls = &g_tls_lists[class_idx];
uint32_t new_cap = tls->cap;
if (new_cap > (uint32_t)g_mag_cap_limit) new_cap = (uint32_t)g_mag_cap_limit;
if (new_cap < 16u) new_cap = 16u;
tls->cap = new_cap;
tls->refill_low = tiny_tls_default_refill(new_cap);
tls->spill_high = tiny_tls_default_spill(new_cap);
tiny_tls_publish_targets(class_idx, new_cap);
}
}