Files
hakmem/core/tiny_free_magazine.inc.h

505 lines
23 KiB
C
Raw Normal View History

// tiny_free_magazine.inc.h - Magazine Layer for hak_tiny_free_with_slab()
// Purpose: TLS caching (TinyQuickSlot, TLS SLL, Magazine) and spill logic
// Extracted from: hakmem_tiny_free.inc lines 208-620
// Box Theory: Box 5 (TLS Cache) integration
//
// Context: This file is #included within hak_tiny_free_with_slab() function body
// Prerequisites: ss, meta, class_idx, ptr variables must be defined in calling scope
#if !HAKMEM_BUILD_RELEASE
// SuperSlab uses Magazine for TLS caching (same as TinySlab)
tiny_small_mags_init_once();
if (class_idx > 3) tiny_mag_init_if_needed(class_idx);
TinyTLSMag* mag = &g_tls_mags[class_idx];
int cap = mag->cap;
// 32/64B: SLL優先mag優先は無効化
// Prefer TinyQuickSlot (compile-out if HAKMEM_TINY_NO_QUICK)
#if !defined(HAKMEM_TINY_NO_QUICK)
if (g_quick_enable && class_idx <= 4) {
TinyQuickSlot* qs = &g_tls_quick[class_idx];
if (__builtin_expect(qs->top < QUICK_CAP, 1)) {
Phase E1-CORRECT: Fix USER/BASE pointer conversion bugs in slab_index_for calls CRITICAL BUG FIX: Phase E1 introduced 1-byte headers for ALL size classes (C0-C7), changing the pointer contract. However, many locations still called slab_index_for() with USER pointers (storage+1) instead of BASE pointers (storage), causing off-by-one slab index calculations that corrupted memory. Root Cause: - USER pointer = BASE + 1 (returned by malloc, points past header) - BASE pointer = storage start (where 1-byte header is written) - slab_index_for() expects BASE pointer for correct slab boundary calculations - Passing USER pointer → wrong slab_idx → wrong metadata → freelist corruption Impact Before Fix: - bench_random_mixed crashes at ~14K iterations with SEGV - Massive C7 alignment check failures (wrong slab classification) - Memory corruption from writing to wrong slab freelists Fixes Applied (8 locations): 1. core/hakmem_tiny_free.inc:137 - Added USER→BASE conversion before slab_index_for() 2. core/hakmem_tiny_ultra_simple.inc:148 - Added USER→BASE conversion before slab_index_for() 3. core/tiny_free_fast.inc.h:220 - Added USER→BASE conversion before slab_index_for() 4-5. core/tiny_free_magazine.inc.h:126,315 - Added USER→BASE conversion before slab_index_for() (2 locations) 6. core/box/free_local_box.c:14,22,62 - Added USER→BASE conversion before slab_index_for() - Fixed delta calculation to use BASE instead of USER - Fixed debug logging to use BASE instead of USER 7. core/hakmem_tiny.c:448,460,473 (tiny_debug_track_alloc_ret) - Added USER→BASE conversion before slab_index_for() (2 calls) - Fixed delta calculation to use BASE instead of USER - This function is called on EVERY allocation in debug builds Results After Fix: ✅ bench_random_mixed stable up to 66K iterations (~4.7x improvement) ✅ C7 alignment check failures eliminated (was: 100% failure rate) ✅ Front Gate "Unknown" classification dropped to 0% (was: 1.67%) ✅ No segfaults for workloads up to ~33K allocations Remaining Issue: ❌ Segfault still occurs at iteration 66152 (allocs=33137, frees=33014) - Different bug from USER/BASE conversion issues - Likely capacity/boundary condition (further investigation needed) Testing: - bench_random_mixed_hakmem 1K-66K iterations: PASS - bench_random_mixed_hakmem 67K+ iterations: FAIL (different bug) - bench_fixed_size_hakmem 200K iterations: PASS 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 05:21:36 +09:00
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
qs->items[qs->top++] = HAK_BASE_TO_RAW(base_ptr);
HAK_STAT_FREE(class_idx);
return;
}
}
#endif
// Fast path: TLS SLL push for hottest classes
if (!g_tls_list_enable && g_tls_sll_enable && g_tls_sll[class_idx].count < sll_cap_for_class(class_idx, (uint32_t)cap)) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)cap);
if (tls_sll_push(class_idx, base_ptr, sll_cap)) {
// BUGFIX: Decrement used counter (was missing, causing Fail-Fast on next free)
meta->used--;
// Active → Inactive: count down immediately (TLS保管中は"使用中"ではない)
ss_active_dec_one(ss);
HAK_TP1(sll_push, class_idx);
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 3);
HAK_STAT_FREE(class_idx);
return;
}
}
// Next: Magazine push必要ならmag→SLLへバルク転送で空きを作る
// Hysteresis: allow slight overfill before deciding to spill under lock
if (mag->top >= cap && g_spill_hyst > 0) {
(void)bulk_mag_to_sll_if_room(class_idx, mag, cap / 2);
}
if (mag->top < cap + g_spill_hyst) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = NULL; // SuperSlab owner not a TinySlab; leave NULL
#endif
mag->top++;
#if HAKMEM_DEBUG_COUNTERS
g_magazine_push_count++; // Phase 7.6: Track pushes
#endif
// Active → Inactive: decrement nowアプリ解放時に非アクティブ扱い
ss_active_dec_one(ss);
HAK_TP1(mag_push, class_idx);
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 2);
HAK_STAT_FREE(class_idx);
return;
}
// Background spill: queue to BG thread instead of locking (when enabled)
if (g_bg_spill_enable) {
uint32_t qlen = atomic_load_explicit(&g_bg_spill_len[class_idx], memory_order_relaxed);
if ((int)qlen < g_bg_spill_target) {
// Build a small chain: include current ptr and pop from mag up to limit
int limit = g_bg_spill_max_batch;
if (limit > cap/2) limit = cap/2;
if (limit > 32) limit = 32; // keep free-path bounded
// Phase 10: Use hak_base_ptr_t
void* head = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr)));
#if HAKMEM_TINY_HEADER_CLASSIDX
Phase E1-CORRECT: Fix USER/BASE pointer conversion bugs in slab_index_for calls CRITICAL BUG FIX: Phase E1 introduced 1-byte headers for ALL size classes (C0-C7), changing the pointer contract. However, many locations still called slab_index_for() with USER pointers (storage+1) instead of BASE pointers (storage), causing off-by-one slab index calculations that corrupted memory. Root Cause: - USER pointer = BASE + 1 (returned by malloc, points past header) - BASE pointer = storage start (where 1-byte header is written) - slab_index_for() expects BASE pointer for correct slab boundary calculations - Passing USER pointer → wrong slab_idx → wrong metadata → freelist corruption Impact Before Fix: - bench_random_mixed crashes at ~14K iterations with SEGV - Massive C7 alignment check failures (wrong slab classification) - Memory corruption from writing to wrong slab freelists Fixes Applied (8 locations): 1. core/hakmem_tiny_free.inc:137 - Added USER→BASE conversion before slab_index_for() 2. core/hakmem_tiny_ultra_simple.inc:148 - Added USER→BASE conversion before slab_index_for() 3. core/tiny_free_fast.inc.h:220 - Added USER→BASE conversion before slab_index_for() 4-5. core/tiny_free_magazine.inc.h:126,315 - Added USER→BASE conversion before slab_index_for() (2 locations) 6. core/box/free_local_box.c:14,22,62 - Added USER→BASE conversion before slab_index_for() - Fixed delta calculation to use BASE instead of USER - Fixed debug logging to use BASE instead of USER 7. core/hakmem_tiny.c:448,460,473 (tiny_debug_track_alloc_ret) - Added USER→BASE conversion before slab_index_for() (2 calls) - Fixed delta calculation to use BASE instead of USER - This function is called on EVERY allocation in debug builds Results After Fix: ✅ bench_random_mixed stable up to 66K iterations (~4.7x improvement) ✅ C7 alignment check failures eliminated (was: 100% failure rate) ✅ Front Gate "Unknown" classification dropped to 0% (was: 1.67%) ✅ No segfaults for workloads up to ~33K allocations Remaining Issue: ❌ Segfault still occurs at iteration 66152 (allocs=33137, frees=33014) - Different bug from USER/BASE conversion issues - Likely capacity/boundary condition (further investigation needed) Testing: - bench_random_mixed_hakmem 1K-66K iterations: PASS - bench_random_mixed_hakmem 67K+ iterations: FAIL (different bug) - bench_fixed_size_hakmem 200K iterations: PASS 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 05:21:36 +09:00
const size_t next_off = 1; // Phase E1-CORRECT: Always 1
#else
const size_t next_off = 0;
#endif
// Build single-linked list via Box next-ptr API (per-class)
tiny_next_write(class_idx, head, NULL);
void* tail = head; // current tail
int taken = 1;
while (taken < limit && mag->top > 0) {
void* p2 = mag->items[--mag->top].ptr;
#if HAKMEM_TINY_HEADER_CLASSIDX
Phase E1-CORRECT: Fix USER/BASE pointer conversion bugs in slab_index_for calls CRITICAL BUG FIX: Phase E1 introduced 1-byte headers for ALL size classes (C0-C7), changing the pointer contract. However, many locations still called slab_index_for() with USER pointers (storage+1) instead of BASE pointers (storage), causing off-by-one slab index calculations that corrupted memory. Root Cause: - USER pointer = BASE + 1 (returned by malloc, points past header) - BASE pointer = storage start (where 1-byte header is written) - slab_index_for() expects BASE pointer for correct slab boundary calculations - Passing USER pointer → wrong slab_idx → wrong metadata → freelist corruption Impact Before Fix: - bench_random_mixed crashes at ~14K iterations with SEGV - Massive C7 alignment check failures (wrong slab classification) - Memory corruption from writing to wrong slab freelists Fixes Applied (8 locations): 1. core/hakmem_tiny_free.inc:137 - Added USER→BASE conversion before slab_index_for() 2. core/hakmem_tiny_ultra_simple.inc:148 - Added USER→BASE conversion before slab_index_for() 3. core/tiny_free_fast.inc.h:220 - Added USER→BASE conversion before slab_index_for() 4-5. core/tiny_free_magazine.inc.h:126,315 - Added USER→BASE conversion before slab_index_for() (2 locations) 6. core/box/free_local_box.c:14,22,62 - Added USER→BASE conversion before slab_index_for() - Fixed delta calculation to use BASE instead of USER - Fixed debug logging to use BASE instead of USER 7. core/hakmem_tiny.c:448,460,473 (tiny_debug_track_alloc_ret) - Added USER→BASE conversion before slab_index_for() (2 calls) - Fixed delta calculation to use BASE instead of USER - This function is called on EVERY allocation in debug builds Results After Fix: ✅ bench_random_mixed stable up to 66K iterations (~4.7x improvement) ✅ C7 alignment check failures eliminated (was: 100% failure rate) ✅ Front Gate "Unknown" classification dropped to 0% (was: 1.67%) ✅ No segfaults for workloads up to ~33K allocations Remaining Issue: ❌ Segfault still occurs at iteration 66152 (allocs=33137, frees=33014) - Different bug from USER/BASE conversion issues - Likely capacity/boundary condition (further investigation needed) Testing: - bench_random_mixed_hakmem 1K-66K iterations: PASS - bench_random_mixed_hakmem 67K+ iterations: FAIL (different bug) - bench_fixed_size_hakmem 200K iterations: PASS 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 05:21:36 +09:00
const size_t next_off2 = 1; // Phase E1-CORRECT: Always 1
#else
const size_t next_off2 = 0;
#endif
tiny_next_write(class_idx, p2, head);
head = p2;
taken++;
}
// Push chain to spill queue (single CAS)
bg_spill_push_chain(class_idx, head, tail, taken);
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 3);
HAK_STAT_FREE(class_idx);
return;
}
}
// Spill half (SuperSlab version - simpler than TinySlab)
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
// Profiling fix for debug build
struct timespec tss;
int ss_time = hkm_prof_begin(&tss);
pthread_mutex_lock(lock);
// Batch spill: reduce lock frequency and work per call
int spill = cap / 2;
int over = mag->top - (cap + g_spill_hyst);
if (over > 0 && over < spill) spill = over;
for (int i = 0; i < spill && mag->top > 0; i++) {
TinyMagItem it = mag->items[--mag->top];
// Phase 7.6: SuperSlab spill - return to freelist
SuperSlab* owner_ss = hak_super_lookup(it.ptr);
if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) {
// Direct freelist push (same as old hak_tiny_free_superslab)
// Phase 10: it.ptr is BASE.
// FIX: it.ptr is BASE, use it directly (do not subtract 1)
void* base = it.ptr;
Phase E1-CORRECT: Fix USER/BASE pointer conversion bugs in slab_index_for calls CRITICAL BUG FIX: Phase E1 introduced 1-byte headers for ALL size classes (C0-C7), changing the pointer contract. However, many locations still called slab_index_for() with USER pointers (storage+1) instead of BASE pointers (storage), causing off-by-one slab index calculations that corrupted memory. Root Cause: - USER pointer = BASE + 1 (returned by malloc, points past header) - BASE pointer = storage start (where 1-byte header is written) - slab_index_for() expects BASE pointer for correct slab boundary calculations - Passing USER pointer → wrong slab_idx → wrong metadata → freelist corruption Impact Before Fix: - bench_random_mixed crashes at ~14K iterations with SEGV - Massive C7 alignment check failures (wrong slab classification) - Memory corruption from writing to wrong slab freelists Fixes Applied (8 locations): 1. core/hakmem_tiny_free.inc:137 - Added USER→BASE conversion before slab_index_for() 2. core/hakmem_tiny_ultra_simple.inc:148 - Added USER→BASE conversion before slab_index_for() 3. core/tiny_free_fast.inc.h:220 - Added USER→BASE conversion before slab_index_for() 4-5. core/tiny_free_magazine.inc.h:126,315 - Added USER→BASE conversion before slab_index_for() (2 locations) 6. core/box/free_local_box.c:14,22,62 - Added USER→BASE conversion before slab_index_for() - Fixed delta calculation to use BASE instead of USER - Fixed debug logging to use BASE instead of USER 7. core/hakmem_tiny.c:448,460,473 (tiny_debug_track_alloc_ret) - Added USER→BASE conversion before slab_index_for() (2 calls) - Fixed delta calculation to use BASE instead of USER - This function is called on EVERY allocation in debug builds Results After Fix: ✅ bench_random_mixed stable up to 66K iterations (~4.7x improvement) ✅ C7 alignment check failures eliminated (was: 100% failure rate) ✅ Front Gate "Unknown" classification dropped to 0% (was: 1.67%) ✅ No segfaults for workloads up to ~33K allocations Remaining Issue: ❌ Segfault still occurs at iteration 66152 (allocs=33137, frees=33014) - Different bug from USER/BASE conversion issues - Likely capacity/boundary condition (further investigation needed) Testing: - bench_random_mixed_hakmem 1K-66K iterations: PASS - bench_random_mixed_hakmem 67K+ iterations: FAIL (different bug) - bench_fixed_size_hakmem 200K iterations: PASS 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 05:21:36 +09:00
int slab_idx = slab_index_for(owner_ss, base);
// BUGFIX: Validate slab_idx before array access (prevents OOB)
if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(owner_ss)) {
continue; // Skip invalid index
}
TinySlabMeta* meta = &owner_ss->slabs[slab_idx];
Phase 12: Shared SuperSlab Pool implementation (WIP - runtime crash) ## Summary Implemented Phase 12 Shared SuperSlab Pool (mimalloc-style) to address SuperSlab allocation churn (877 SuperSlabs → 100-200 target). ## Implementation (ChatGPT + Claude) 1. **Metadata changes** (superslab_types.h): - Added class_idx to TinySlabMeta (per-slab dynamic class) - Removed size_class from SuperSlab (no longer per-SuperSlab) - Changed owner_tid (16-bit) → owner_tid_low (8-bit) 2. **Shared Pool** (hakmem_shared_pool.{h,c}): - Global pool shared by all size classes - shared_pool_acquire_slab() - Get free slab for class_idx - shared_pool_release_slab() - Return slab when empty - Per-class hints for fast path optimization 3. **Integration** (23 files modified): - Updated all ss->size_class → meta->class_idx - Updated all meta->owner_tid → meta->owner_tid_low - superslab_refill() now uses shared pool - Free path releases empty slabs back to pool 4. **Build system** (Makefile): - Added hakmem_shared_pool.o to OBJS_BASE and TINY_BENCH_OBJS_BASE ## Status: ⚠️ Build OK, Runtime CRASH **Build**: ✅ SUCCESS - All 23 files compile without errors - Only warnings: superslab_allocate type mismatch (legacy code) **Runtime**: ❌ SEGFAULT - Crash location: sll_refill_small_from_ss() - Exit code: 139 (SIGSEGV) - Test case: ./bench_random_mixed_hakmem 1000 256 42 ## Known Issues 1. **SEGFAULT in refill path** - Likely shared_pool_acquire_slab() issue 2. **Legacy superslab_allocate()** still exists (type mismatch warning) 3. **Remaining TODOs** from design doc: - SuperSlab physical layout integration - slab_handle.h cleanup - Remove old per-class head implementation ## Next Steps 1. Debug SEGFAULT (gdb backtrace shows sll_refill_small_from_ss) 2. Fix shared_pool_acquire_slab() or superslab_init_slab() 3. Basic functionality test (1K → 100K iterations) 4. Measure SuperSlab count reduction (877 → 100-200) 5. Performance benchmark (+650-860% expected) ## Files Changed (25 files) core/box/free_local_box.c core/box/free_remote_box.c core/box/front_gate_classifier.c core/hakmem_super_registry.c core/hakmem_tiny.c core/hakmem_tiny_bg_spill.c core/hakmem_tiny_free.inc core/hakmem_tiny_lifecycle.inc core/hakmem_tiny_magazine.c core/hakmem_tiny_query.c core/hakmem_tiny_refill.inc.h core/hakmem_tiny_superslab.c core/hakmem_tiny_superslab.h core/hakmem_tiny_tls_ops.h core/slab_handle.h core/superslab/superslab_inline.h core/superslab/superslab_types.h core/tiny_debug.h core/tiny_free_fast.inc.h core/tiny_free_magazine.inc.h core/tiny_remote.c core/tiny_superslab_alloc.inc.h core/tiny_superslab_free.inc.h Makefile ## New Files (3 files) PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md core/hakmem_shared_pool.c core/hakmem_shared_pool.h 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-13 16:33:03 +09:00
// Use per-slab class for freelist linkage (Phase 12)
tiny_next_write(meta->class_idx, it.ptr, meta->freelist);
meta->freelist = it.ptr;
meta->used--;
// Decrement SuperSlab active counter (spill returns blocks to SS)
ss_active_dec_one(owner_ss);
// Phase 8.4: Empty SuperSlab detection (will use meta->used scan)
// TODO: Implement scan-based empty detection
// Empty SuperSlab detection/munmapは別途フラッシュAPIで実施ホットパスから除外
}
}
pthread_mutex_unlock(lock);
hkm_prof_end(ss_time, HKP_TINY_SPILL, &tss);
// Adaptive increase of cap after spill
int max_cap = tiny_cap_max_for_class(class_idx);
if (mag->cap < max_cap) {
int new_cap = mag->cap + (mag->cap / 2);
if (new_cap > max_cap) new_cap = max_cap;
if (new_cap > TINY_TLS_MAG_CAP) new_cap = TINY_TLS_MAG_CAP;
mag->cap = new_cap;
}
// Finally, try FastCache push first (≤128B) — compile-out if HAKMEM_TINY_NO_FRONT_CACHE
#if !defined(HAKMEM_TINY_NO_FRONT_CACHE)
if (g_fastcache_enable && class_idx <= 4) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (fastcache_push(class_idx, HAK_BASE_TO_RAW(base_ptr))) {
HAK_TP1(front_push, class_idx);
HAK_STAT_FREE(class_idx);
return;
}
}
#endif
// Then TLS SLL if room, else magazine
if (g_tls_sll_enable && g_tls_sll[class_idx].count < sll_cap_for_class(class_idx, (uint32_t)mag->cap)) {
uint32_t sll_cap2 = sll_cap_for_class(class_idx, (uint32_t)mag->cap);
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (!tls_sll_push(class_idx, base_ptr, sll_cap2)) {
// fallback to magazine
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
} else {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
#if HAKMEM_DEBUG_COUNTERS
g_magazine_push_count++; // Phase 7.6: Track pushes
#endif
HAK_STAT_FREE(class_idx);
return;
#endif // HAKMEM_BUILD_RELEASE
// Phase 7.6: TinySlab path (original)
//g_tiny_free_with_slab_count++; // Phase 7.6: Track calls - DISABLED due to segfault
// Same-thread → TLS magazine; remote-thread → MPSC stack
if (slab && pthread_equal(slab->owner_tid, tiny_self_pt())) {
int class_idx = slab->class_idx;
Phase E1-CORRECT: Fix USER/BASE pointer conversion bugs in slab_index_for calls CRITICAL BUG FIX: Phase E1 introduced 1-byte headers for ALL size classes (C0-C7), changing the pointer contract. However, many locations still called slab_index_for() with USER pointers (storage+1) instead of BASE pointers (storage), causing off-by-one slab index calculations that corrupted memory. Root Cause: - USER pointer = BASE + 1 (returned by malloc, points past header) - BASE pointer = storage start (where 1-byte header is written) - slab_index_for() expects BASE pointer for correct slab boundary calculations - Passing USER pointer → wrong slab_idx → wrong metadata → freelist corruption Impact Before Fix: - bench_random_mixed crashes at ~14K iterations with SEGV - Massive C7 alignment check failures (wrong slab classification) - Memory corruption from writing to wrong slab freelists Fixes Applied (8 locations): 1. core/hakmem_tiny_free.inc:137 - Added USER→BASE conversion before slab_index_for() 2. core/hakmem_tiny_ultra_simple.inc:148 - Added USER→BASE conversion before slab_index_for() 3. core/tiny_free_fast.inc.h:220 - Added USER→BASE conversion before slab_index_for() 4-5. core/tiny_free_magazine.inc.h:126,315 - Added USER→BASE conversion before slab_index_for() (2 locations) 6. core/box/free_local_box.c:14,22,62 - Added USER→BASE conversion before slab_index_for() - Fixed delta calculation to use BASE instead of USER - Fixed debug logging to use BASE instead of USER 7. core/hakmem_tiny.c:448,460,473 (tiny_debug_track_alloc_ret) - Added USER→BASE conversion before slab_index_for() (2 calls) - Fixed delta calculation to use BASE instead of USER - This function is called on EVERY allocation in debug builds Results After Fix: ✅ bench_random_mixed stable up to 66K iterations (~4.7x improvement) ✅ C7 alignment check failures eliminated (was: 100% failure rate) ✅ Front Gate "Unknown" classification dropped to 0% (was: 1.67%) ✅ No segfaults for workloads up to ~33K allocations Remaining Issue: ❌ Segfault still occurs at iteration 66152 (allocs=33137, frees=33014) - Different bug from USER/BASE conversion issues - Likely capacity/boundary condition (further investigation needed) Testing: - bench_random_mixed_hakmem 1K-66K iterations: PASS - bench_random_mixed_hakmem 67K+ iterations: FAIL (different bug) - bench_fixed_size_hakmem 200K iterations: PASS 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 05:21:36 +09:00
// Phase E1-CORRECT: C7 now has headers, can use TLS list like other classes
if (g_tls_list_enable) {
TinyTLSList* tls = &g_tls_lists[class_idx];
uint32_t seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed);
if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) {
tiny_tls_refresh_params(class_idx, tls);
}
// TinyHotMag front push8/16/32B, A/B
if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) {
// Phase 10: Use hak_base_ptr_t
void* base = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr)));
if (hotmag_push(class_idx, base)) {
HAK_STAT_FREE(class_idx);
return;
}
}
if (tls->count < tls->cap) {
// Phase 10: Use hak_base_ptr_t
void* base = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr)));
tiny_tls_list_guard_push(class_idx, tls, base);
tls_list_push_fast(tls, base, class_idx);
HAK_STAT_FREE(class_idx);
return;
}
seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed);
if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) {
tiny_tls_refresh_params(class_idx, tls);
}
{
// Phase 10: Use hak_base_ptr_t
void* base = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr)));
tiny_tls_list_guard_push(class_idx, tls, base);
tls_list_push_fast(tls, base, class_idx);
}
if (tls_list_should_spill(tls)) {
tls_list_spill_excess(class_idx, tls);
}
HAK_STAT_FREE(class_idx);
return;
}
tiny_mag_init_if_needed(class_idx);
TinyTLSMag* mag = &g_tls_mags[class_idx];
int cap = mag->cap;
// 32/64B: SLL優先mag優先は無効化
// Fast path: FastCache push (preferred for ≤128B), then TLS SLL
if (g_fastcache_enable && class_idx <= 4) {
if (fastcache_push(class_idx, ptr)) {
HAK_STAT_FREE(class_idx);
return;
}
}
// Fast path: TLS SLL push (preferred)
if (!g_tls_list_enable && g_tls_sll_enable && class_idx <= 5) {
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)cap);
if (g_tls_sll[class_idx].count < sll_cap) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (tls_sll_push(class_idx, base_ptr, sll_cap)) {
HAK_STAT_FREE(class_idx);
return;
}
}
}
// Next: if magazine has room, push immediately and return満杯ならmag→SLLへバルク
if (mag->top >= cap) {
(void)bulk_mag_to_sll_if_room(class_idx, mag, cap / 2);
}
// Remote-drain can be handled opportunistically on future calls.
if (mag->top < cap) {
{
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
#if HAKMEM_DEBUG_COUNTERS
g_magazine_push_count++; // Phase 7.6: Track pushes
#endif
// Note: SuperSlab uses separate path (slab == NULL branch above)
HAK_STAT_FREE(class_idx); // Phase 3
return;
}
// Magazine full: before spilling, opportunistically drain remotes once under lock.
if (atomic_load_explicit(&slab->remote_count, memory_order_relaxed) >= (unsigned)g_remote_drain_thresh_per_class[class_idx] || atomic_load_explicit(&slab->remote_head, memory_order_acquire)) {
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
pthread_mutex_lock(lock);
HAK_TP1(remote_drain, class_idx);
tiny_remote_drain_locked(slab);
pthread_mutex_unlock(lock);
}
// Spill half under class lock
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
// Profiling fix
struct timespec tss;
int ss_time = hkm_prof_begin(&tss);
pthread_mutex_lock(lock);
int spill = cap / 2;
// Phase 4.2: High-water threshold for gating Phase 4 logic
int high_water = (cap * 3) / 4; // 75% of capacity
for (int i = 0; i < spill && mag->top > 0; i++) {
TinyMagItem it = mag->items[--mag->top];
// Phase 7.6: Check for SuperSlab first (mixed Magazine support)
SuperSlab* ss_owner = hak_super_lookup(it.ptr);
if (ss_owner && ss_owner->magic == SUPERSLAB_MAGIC) {
// SuperSlab spill - return to freelist
// FIX: it.ptr is BASE, use directly
void* base = it.ptr;
Phase E1-CORRECT: Fix USER/BASE pointer conversion bugs in slab_index_for calls CRITICAL BUG FIX: Phase E1 introduced 1-byte headers for ALL size classes (C0-C7), changing the pointer contract. However, many locations still called slab_index_for() with USER pointers (storage+1) instead of BASE pointers (storage), causing off-by-one slab index calculations that corrupted memory. Root Cause: - USER pointer = BASE + 1 (returned by malloc, points past header) - BASE pointer = storage start (where 1-byte header is written) - slab_index_for() expects BASE pointer for correct slab boundary calculations - Passing USER pointer → wrong slab_idx → wrong metadata → freelist corruption Impact Before Fix: - bench_random_mixed crashes at ~14K iterations with SEGV - Massive C7 alignment check failures (wrong slab classification) - Memory corruption from writing to wrong slab freelists Fixes Applied (8 locations): 1. core/hakmem_tiny_free.inc:137 - Added USER→BASE conversion before slab_index_for() 2. core/hakmem_tiny_ultra_simple.inc:148 - Added USER→BASE conversion before slab_index_for() 3. core/tiny_free_fast.inc.h:220 - Added USER→BASE conversion before slab_index_for() 4-5. core/tiny_free_magazine.inc.h:126,315 - Added USER→BASE conversion before slab_index_for() (2 locations) 6. core/box/free_local_box.c:14,22,62 - Added USER→BASE conversion before slab_index_for() - Fixed delta calculation to use BASE instead of USER - Fixed debug logging to use BASE instead of USER 7. core/hakmem_tiny.c:448,460,473 (tiny_debug_track_alloc_ret) - Added USER→BASE conversion before slab_index_for() (2 calls) - Fixed delta calculation to use BASE instead of USER - This function is called on EVERY allocation in debug builds Results After Fix: ✅ bench_random_mixed stable up to 66K iterations (~4.7x improvement) ✅ C7 alignment check failures eliminated (was: 100% failure rate) ✅ Front Gate "Unknown" classification dropped to 0% (was: 1.67%) ✅ No segfaults for workloads up to ~33K allocations Remaining Issue: ❌ Segfault still occurs at iteration 66152 (allocs=33137, frees=33014) - Different bug from USER/BASE conversion issues - Likely capacity/boundary condition (further investigation needed) Testing: - bench_random_mixed_hakmem 1K-66K iterations: PASS - bench_random_mixed_hakmem 67K+ iterations: FAIL (different bug) - bench_fixed_size_hakmem 200K iterations: PASS 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 05:21:36 +09:00
int slab_idx = slab_index_for(ss_owner, base);
// BUGFIX: Validate slab_idx before array access (prevents OOB)
if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss_owner)) {
HAK_STAT_FREE(class_idx);
continue; // Skip invalid index
}
TinySlabMeta* meta = &ss_owner->slabs[slab_idx];
Phase 12: Shared SuperSlab Pool implementation (WIP - runtime crash) ## Summary Implemented Phase 12 Shared SuperSlab Pool (mimalloc-style) to address SuperSlab allocation churn (877 SuperSlabs → 100-200 target). ## Implementation (ChatGPT + Claude) 1. **Metadata changes** (superslab_types.h): - Added class_idx to TinySlabMeta (per-slab dynamic class) - Removed size_class from SuperSlab (no longer per-SuperSlab) - Changed owner_tid (16-bit) → owner_tid_low (8-bit) 2. **Shared Pool** (hakmem_shared_pool.{h,c}): - Global pool shared by all size classes - shared_pool_acquire_slab() - Get free slab for class_idx - shared_pool_release_slab() - Return slab when empty - Per-class hints for fast path optimization 3. **Integration** (23 files modified): - Updated all ss->size_class → meta->class_idx - Updated all meta->owner_tid → meta->owner_tid_low - superslab_refill() now uses shared pool - Free path releases empty slabs back to pool 4. **Build system** (Makefile): - Added hakmem_shared_pool.o to OBJS_BASE and TINY_BENCH_OBJS_BASE ## Status: ⚠️ Build OK, Runtime CRASH **Build**: ✅ SUCCESS - All 23 files compile without errors - Only warnings: superslab_allocate type mismatch (legacy code) **Runtime**: ❌ SEGFAULT - Crash location: sll_refill_small_from_ss() - Exit code: 139 (SIGSEGV) - Test case: ./bench_random_mixed_hakmem 1000 256 42 ## Known Issues 1. **SEGFAULT in refill path** - Likely shared_pool_acquire_slab() issue 2. **Legacy superslab_allocate()** still exists (type mismatch warning) 3. **Remaining TODOs** from design doc: - SuperSlab physical layout integration - slab_handle.h cleanup - Remove old per-class head implementation ## Next Steps 1. Debug SEGFAULT (gdb backtrace shows sll_refill_small_from_ss) 2. Fix shared_pool_acquire_slab() or superslab_init_slab() 3. Basic functionality test (1K → 100K iterations) 4. Measure SuperSlab count reduction (877 → 100-200) 5. Performance benchmark (+650-860% expected) ## Files Changed (25 files) core/box/free_local_box.c core/box/free_remote_box.c core/box/front_gate_classifier.c core/hakmem_super_registry.c core/hakmem_tiny.c core/hakmem_tiny_bg_spill.c core/hakmem_tiny_free.inc core/hakmem_tiny_lifecycle.inc core/hakmem_tiny_magazine.c core/hakmem_tiny_query.c core/hakmem_tiny_refill.inc.h core/hakmem_tiny_superslab.c core/hakmem_tiny_superslab.h core/hakmem_tiny_tls_ops.h core/slab_handle.h core/superslab/superslab_inline.h core/superslab/superslab_types.h core/tiny_debug.h core/tiny_free_fast.inc.h core/tiny_free_magazine.inc.h core/tiny_remote.c core/tiny_superslab_alloc.inc.h core/tiny_superslab_free.inc.h Makefile ## New Files (3 files) PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md core/hakmem_shared_pool.c core/hakmem_shared_pool.h 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-13 16:33:03 +09:00
// Use per-slab class for freelist linkage (Phase 12)
tiny_next_write(meta->class_idx, it.ptr, meta->freelist);
meta->freelist = it.ptr;
meta->used--;
// 空SuperSlab処理はフラッシュ/バックグラウンドで対応(ホットパス除外)
HAK_STAT_FREE(class_idx);
continue; // Skip TinySlab processing
}
TinySlab* owner =
#if HAKMEM_TINY_MAG_OWNER
it.owner;
#else
NULL;
#endif
if (!owner) {
owner = tls_active_owner_for_ptr(class_idx, it.ptr);
}
if (!owner) {
owner = hak_tiny_owner_slab(it.ptr);
}
if (!owner) continue;
// Phase 4.2: Adaptive gating - skip Phase 4 when TLS Magazine is high-water
// Rationale: When mag->top >= 75%, next alloc will come from TLS anyway
// so pushing to mini-mag is wasted work
int is_high_water = (mag->top >= high_water);
if (!is_high_water) {
// Low-water: Phase 4.1 logic (try mini-magazine first)
uint8_t cidx = owner->class_idx; // Option A: 1回だけ読む
TinySlab* tls_a = g_tls_active_slab_a[cidx];
TinySlab* tls_b = g_tls_active_slab_b[cidx];
// Option B: Branch prediction hint (spill → TLS-active への戻りが likely)
if (__builtin_expect((owner == tls_a || owner == tls_b) &&
!mini_mag_is_full(&owner->mini_mag), 1)) {
// Fast path: mini-magazineに戻すbitmap触らない
mini_mag_push(&owner->mini_mag, it.ptr);
HAK_TP1(spill_tiny, cidx);
HAK_STAT_FREE(cidx);
continue; // bitmap操作スキップ
}
}
// High-water or Phase 4.1 mini-mag full: fall through to bitmap
// Slow path: bitmap直接書き込み既存ロジック
size_t bs = g_tiny_class_sizes[owner->class_idx];
int idx = ((uintptr_t)it.ptr - (uintptr_t)owner->base) / bs;
if (hak_tiny_is_used(owner, idx)) {
hak_tiny_set_free(owner, idx);
int was_full = (owner->free_count == 0);
owner->free_count++;
if (was_full) move_to_free_list(owner->class_idx, owner);
if (owner->free_count == owner->total_count) {
// If this slab is TLS-active for this thread, clear the pointer before releasing
if (g_tls_active_slab_a[owner->class_idx] == owner) g_tls_active_slab_a[owner->class_idx] = NULL;
if (g_tls_active_slab_b[owner->class_idx] == owner) g_tls_active_slab_b[owner->class_idx] = NULL;
TinySlab** headp = &g_tiny_pool.free_slabs[owner->class_idx];
TinySlab* prev = NULL;
for (TinySlab* s = *headp; s; prev = s, s = s->next) {
if (s == owner) { if (prev) prev->next = s->next; else *headp = s->next; break; }
}
release_slab(owner);
}
HAK_TP1(spill_tiny, owner->class_idx);
HAK_STAT_FREE(owner->class_idx);
}
}
pthread_mutex_unlock(lock);
hkm_prof_end(ss_time, HKP_TINY_SPILL, &tss);
// Adaptive increase of cap after spill
int max_cap = tiny_cap_max_for_class(class_idx);
if (mag->cap < max_cap) {
int new_cap = mag->cap + (mag->cap / 2);
if (new_cap > max_cap) new_cap = max_cap;
if (new_cap > TINY_TLS_MAG_CAP) new_cap = TINY_TLS_MAG_CAP;
mag->cap = new_cap;
}
// Finally: prefer TinyQuickSlot → SLL → UltraFront → HotMag → Magazine順序で局所性を確保
#if !HAKMEM_BUILD_RELEASE && !defined(HAKMEM_TINY_NO_QUICK)
if (g_quick_enable && class_idx <= 4) {
TinyQuickSlot* qs = &g_tls_quick[class_idx];
if (__builtin_expect(qs->top < QUICK_CAP, 1)) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
qs->items[qs->top++] = HAK_BASE_TO_RAW(base_ptr);
} else if (g_tls_sll_enable) {
uint32_t sll_cap2 = sll_cap_for_class(class_idx, (uint32_t)mag->cap);
if (g_tls_sll[class_idx].count < sll_cap2) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (!tls_sll_push(class_idx, base_ptr, sll_cap2)) {
if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(base_ptr))) {
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
}
} else if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr))))) { // FIX: use ptr_user_to_base
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
} else {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(base_ptr))) {
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
}
} else
#endif
{
if (g_tls_sll_enable && class_idx <= 5) {
uint32_t sll_cap2 = sll_cap_for_class(class_idx, (uint32_t)mag->cap);
if (g_tls_sll[class_idx].count < sll_cap2) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (!tls_sll_push(class_idx, base_ptr, sll_cap2)) {
if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(base_ptr))) {
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
}
} else if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr))))) { // FIX: use ptr_user_to_base
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
} else {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(base_ptr))) {
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
}
}
#if HAKMEM_DEBUG_COUNTERS
g_magazine_push_count++; // Phase 7.6: Track pushes
#endif
// Note: SuperSlab uses separate path (slab == NULL branch above)
HAK_STAT_FREE(class_idx); // Phase 3
return;
} else if (slab) {
Phase E1-CORRECT: Fix USER/BASE pointer conversion bugs in slab_index_for calls CRITICAL BUG FIX: Phase E1 introduced 1-byte headers for ALL size classes (C0-C7), changing the pointer contract. However, many locations still called slab_index_for() with USER pointers (storage+1) instead of BASE pointers (storage), causing off-by-one slab index calculations that corrupted memory. Root Cause: - USER pointer = BASE + 1 (returned by malloc, points past header) - BASE pointer = storage start (where 1-byte header is written) - slab_index_for() expects BASE pointer for correct slab boundary calculations - Passing USER pointer → wrong slab_idx → wrong metadata → freelist corruption Impact Before Fix: - bench_random_mixed crashes at ~14K iterations with SEGV - Massive C7 alignment check failures (wrong slab classification) - Memory corruption from writing to wrong slab freelists Fixes Applied (8 locations): 1. core/hakmem_tiny_free.inc:137 - Added USER→BASE conversion before slab_index_for() 2. core/hakmem_tiny_ultra_simple.inc:148 - Added USER→BASE conversion before slab_index_for() 3. core/tiny_free_fast.inc.h:220 - Added USER→BASE conversion before slab_index_for() 4-5. core/tiny_free_magazine.inc.h:126,315 - Added USER→BASE conversion before slab_index_for() (2 locations) 6. core/box/free_local_box.c:14,22,62 - Added USER→BASE conversion before slab_index_for() - Fixed delta calculation to use BASE instead of USER - Fixed debug logging to use BASE instead of USER 7. core/hakmem_tiny.c:448,460,473 (tiny_debug_track_alloc_ret) - Added USER→BASE conversion before slab_index_for() (2 calls) - Fixed delta calculation to use BASE instead of USER - This function is called on EVERY allocation in debug builds Results After Fix: ✅ bench_random_mixed stable up to 66K iterations (~4.7x improvement) ✅ C7 alignment check failures eliminated (was: 100% failure rate) ✅ Front Gate "Unknown" classification dropped to 0% (was: 1.67%) ✅ No segfaults for workloads up to ~33K allocations Remaining Issue: ❌ Segfault still occurs at iteration 66152 (allocs=33137, frees=33014) - Different bug from USER/BASE conversion issues - Likely capacity/boundary condition (further investigation needed) Testing: - bench_random_mixed_hakmem 1K-66K iterations: PASS - bench_random_mixed_hakmem 67K+ iterations: FAIL (different bug) - bench_fixed_size_hakmem 200K iterations: PASS 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 05:21:36 +09:00
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
// FIX: Use ptr_user_to_base to get correct base
void* base = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr)));
tiny_remote_push(slab, base);
}
}