Files
hakmem/core/hakmem_tiny_magazine.c
Moe Charm (CI) 03df05ec75 Phase 12: Shared SuperSlab Pool implementation (WIP - runtime crash)
## Summary
Implemented Phase 12 Shared SuperSlab Pool (mimalloc-style) to address
SuperSlab allocation churn (877 SuperSlabs → 100-200 target).

## Implementation (ChatGPT + Claude)
1. **Metadata changes** (superslab_types.h):
   - Added class_idx to TinySlabMeta (per-slab dynamic class)
   - Removed size_class from SuperSlab (no longer per-SuperSlab)
   - Changed owner_tid (16-bit) → owner_tid_low (8-bit)

2. **Shared Pool** (hakmem_shared_pool.{h,c}):
   - Global pool shared by all size classes
   - shared_pool_acquire_slab() - Get free slab for class_idx
   - shared_pool_release_slab() - Return slab when empty
   - Per-class hints for fast path optimization

3. **Integration** (23 files modified):
   - Updated all ss->size_class → meta->class_idx
   - Updated all meta->owner_tid → meta->owner_tid_low
   - superslab_refill() now uses shared pool
   - Free path releases empty slabs back to pool

4. **Build system** (Makefile):
   - Added hakmem_shared_pool.o to OBJS_BASE and TINY_BENCH_OBJS_BASE

## Status: ⚠️ Build OK, Runtime CRASH

**Build**:  SUCCESS
- All 23 files compile without errors
- Only warnings: superslab_allocate type mismatch (legacy code)

**Runtime**:  SEGFAULT
- Crash location: sll_refill_small_from_ss()
- Exit code: 139 (SIGSEGV)
- Test case: ./bench_random_mixed_hakmem 1000 256 42

## Known Issues
1. **SEGFAULT in refill path** - Likely shared_pool_acquire_slab() issue
2. **Legacy superslab_allocate()** still exists (type mismatch warning)
3. **Remaining TODOs** from design doc:
   - SuperSlab physical layout integration
   - slab_handle.h cleanup
   - Remove old per-class head implementation

## Next Steps
1. Debug SEGFAULT (gdb backtrace shows sll_refill_small_from_ss)
2. Fix shared_pool_acquire_slab() or superslab_init_slab()
3. Basic functionality test (1K → 100K iterations)
4. Measure SuperSlab count reduction (877 → 100-200)
5. Performance benchmark (+650-860% expected)

## Files Changed (25 files)
core/box/free_local_box.c
core/box/free_remote_box.c
core/box/front_gate_classifier.c
core/hakmem_super_registry.c
core/hakmem_tiny.c
core/hakmem_tiny_bg_spill.c
core/hakmem_tiny_free.inc
core/hakmem_tiny_lifecycle.inc
core/hakmem_tiny_magazine.c
core/hakmem_tiny_query.c
core/hakmem_tiny_refill.inc.h
core/hakmem_tiny_superslab.c
core/hakmem_tiny_superslab.h
core/hakmem_tiny_tls_ops.h
core/slab_handle.h
core/superslab/superslab_inline.h
core/superslab/superslab_types.h
core/tiny_debug.h
core/tiny_free_fast.inc.h
core/tiny_free_magazine.inc.h
core/tiny_remote.c
core/tiny_superslab_alloc.inc.h
core/tiny_superslab_free.inc.h
Makefile

## New Files (3 files)
PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md
core/hakmem_shared_pool.c
core/hakmem_shared_pool.h

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-13 16:33:03 +09:00

153 lines
5.6 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "hakmem_tiny_magazine.h"
#include "hakmem_tiny_config.h" // Centralized configuration
#include "hakmem_tiny.h" // For TINY_NUM_CLASSES
#include "hakmem_tiny_superslab.h"
#include "hakmem_super_registry.h" // Phase 1: For hak_super_lookup()
#include "tiny_remote.h"
#include "hakmem_prof.h"
#include "hakmem_internal.h"
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
#include <pthread.h>
static inline uint32_t tiny_self_u32_guard(void) {
return (uint32_t)(uintptr_t)pthread_self();
}
static inline void superslab_dec_active_safe(SuperSlab* ss) {
if (!ss) return;
uint32_t old = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed);
while (old != 0u) {
if (atomic_compare_exchange_weak_explicit(&ss->total_active_blocks,
&old,
old - 1u,
memory_order_relaxed,
memory_order_relaxed)) {
break;
}
}
}
__thread TinyTLSMag g_tls_mags[TINY_NUM_CLASSES] = {0};
// Global cap limiter (can be reduced via env HAKMEM_TINY_MAG_CAP)
int g_mag_cap_limit = TINY_TLS_MAG_CAP;
// Normal-path per-class overrides (env tunables)
int g_mag_cap_override[TINY_NUM_CLASSES] = {0}; // HAKMEM_TINY_MAG_CAP_C{0..7}
__thread int g_tls_small_mags_inited = 0;
// tiny_default_cap() and tiny_cap_max_for_class() now defined as inline functions
// in hakmem_tiny_config.h for centralized configuration
int tiny_effective_cap(int class_idx) {
// Env override takes precedence per class
int ov = g_mag_cap_override[class_idx];
if (ov > 0) return ov;
return tiny_default_cap(class_idx); // Use centralized config function
}
void tiny_small_mags_init_once(void) {
if (__builtin_expect(g_tls_small_mags_inited, 1)) return;
for (int k = 0; k <= 3; k++) {
TinyTLSMag* m = &g_tls_mags[k];
if (m->cap == 0) {
int base = tiny_effective_cap(k);
int cap = (base < TINY_TLS_MAG_CAP) ? base : TINY_TLS_MAG_CAP;
if (g_mag_cap_limit < cap) cap = g_mag_cap_limit;
m->cap = cap;
m->top = 0;
}
}
g_tls_small_mags_inited = 1;
}
void tiny_mag_init_if_needed(int class_idx) {
TinyTLSMag* mag = &g_tls_mags[class_idx];
if (mag->cap == 0) {
int base = tiny_effective_cap(class_idx);
int cap = (base < TINY_TLS_MAG_CAP) ? base : TINY_TLS_MAG_CAP;
if (g_mag_cap_limit < cap) cap = g_mag_cap_limit;
mag->cap = cap;
mag->top = 0;
}
}
// ============================================================================
// ACE Learning Layer: Runtime TLS Capacity Adjustment
// ============================================================================
void hkm_ace_set_tls_capacity(int class_idx, uint32_t capacity) {
// Validate inputs
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
return;
}
if (capacity < 16 || capacity > (uint32_t)tiny_cap_max_for_class(class_idx)) {
return;
}
// Set override (will be used by new thread-local magazines on next init)
// Note: Lazy sync implementation is in hakmem_tiny_magazine.h (inlined)
g_mag_cap_override[class_idx] = (int)capacity;
}
// ============================================================================
// Phase 7.7: Magazine Flush API
// ============================================================================
// Flush Magazine cache for a specific size class
// Forces all cached blocks to be returned to freelists, enabling empty
// SuperSlab detection and deallocation
void hak_tiny_magazine_flush(int class_idx) {
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) return;
// Initialize if needed
tiny_mag_init_if_needed(class_idx);
TinyTLSMag* mag = &g_tls_mags[class_idx];
if (mag->top == 0) return; // Nothing to flush
// Lock and flush entire Magazine to freelist
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
struct timespec tss; int ss_time = hkm_prof_begin(&tss);
pthread_mutex_lock(lock);
// Flush ALL blocks (not just half like normal spill)
int flush_count = mag->top;
uint32_t self_tid = tiny_self_u32_guard();
for (int i = 0; i < flush_count; i++) {
TinyMagItem it = mag->items[--mag->top];
// Return to SuperSlab freelist
SuperSlab* owner_ss = hak_super_lookup(it.ptr);
if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) {
int slab_idx = slab_index_for(owner_ss, it.ptr);
TinySlabMeta* meta = &owner_ss->slabs[slab_idx];
if (!tiny_remote_guard_allow_local_push(owner_ss, slab_idx, meta, it.ptr, "mag_flush", self_tid)) {
(void)ss_remote_push(owner_ss, slab_idx, it.ptr);
if (meta->used > 0) meta->used--;
continue;
}
uint8_t cls = (meta->class_idx < TINY_NUM_CLASSES) ? meta->class_idx : (uint8_t)class_idx;
tiny_next_write(cls, it.ptr, meta->freelist);
meta->freelist = it.ptr;
meta->used--;
// Active was decremented at free time
// 空検出・解放はフラッシュ系APIへ委譲ホットパス除外
}
}
pthread_mutex_unlock(lock);
hkm_prof_end(ss_time, HKP_TINY_SPILL, &tss);
}
// Flush all Magazine caches
// Call this when memory needs to be released (e.g., before measuring RSS)
void hak_tiny_magazine_flush_all(void) {
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
hak_tiny_magazine_flush(i);
}
hak_tiny_trim();
}