Fix critical SuperSlab accounting bug + ACE improvements
Critical Bug Fix (OOM Root Cause): - ss_remote_push() was missing ss_active_dec_one() call - Cross-thread frees did not decrement total_active_blocks - SuperSlabs appeared "full" even when empty - hak_tiny_trim() could never free SuperSlabs → OOM - Result: alloc=49,123 freed=0 bytes=103GB One-Line Fix (core/hakmem_tiny_superslab.h:360): + ss_active_dec_one(ss); // Decrement on cross-thread free Impact: - OOM eliminated (167GB VmSize → clean exit) - SuperSlabs now properly freed - Performance maintained: 4.19M ops/s (±0%) - Memory leak fixed (freed: 0 → expected ~45,000+) ACE Improvements: - Set SUPERSLAB_LG_DEFAULT = 21 (2MB, was 1MB) - g_ss_min_lg_env now uses SUPERSLAB_LG_DEFAULT - hak_tiny_superslab_next_lg() fallback to default if uninitialized - Centralized ACE constants in .h for easier tuning Verification: - Larson benchmark: Clean completion, no OOM - Throughput: 4,192,124 ops/s (baseline maintained) Root cause analysis by Task agent: Larson 50%+ cross-thread frees triggered accounting leak, preventing SuperSlab reclamation. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -319,7 +319,7 @@ SuperSlab* superslab_allocate(uint8_t size_class) {
|
||||
}
|
||||
// Optional env clamp for SuperSlab size
|
||||
static int env_parsed = 0;
|
||||
static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_MIN;
|
||||
static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_DEFAULT; // Start with default (2MB)
|
||||
static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX;
|
||||
if (!env_parsed) {
|
||||
char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB");
|
||||
|
||||
@ -33,12 +33,12 @@ uint32_t tiny_remote_drain_threshold(void);
|
||||
#define SUPERSLAB_SIZE_MIN (1 * 1024 * 1024) // 1MB min size
|
||||
#define SUPERSLAB_LG_MAX 21 // lg(2MB)
|
||||
#define SUPERSLAB_LG_MIN 20 // lg(1MB)
|
||||
#define SUPERSLAB_LG_DEFAULT 20 // Default: 1MB (ACE will adapt)
|
||||
#define SUPERSLAB_LG_DEFAULT 21 // Default: 2MB (syscall reduction, ACE will adapt)
|
||||
|
||||
#define SLAB_SIZE (64 * 1024) // 64KB per slab (fixed)
|
||||
|
||||
// Legacy defines (kept for backward compatibility, use lg_size instead)
|
||||
#define SUPERSLAB_SIZE SUPERSLAB_SIZE_MIN // Default to 1MB
|
||||
#define SUPERSLAB_SIZE SUPERSLAB_SIZE_MAX // Default to 2MB (syscall reduction)
|
||||
#define SUPERSLAB_MASK (SUPERSLAB_SIZE - 1)
|
||||
// IMPORTANT: Support variable-size SuperSlab (1MB=16 slabs, 2MB=32 slabs)
|
||||
// Arrays below must be sized for the MAX to avoid OOB when lg_size=21 (2MB)
|
||||
@ -259,8 +259,9 @@ static inline uint64_t hak_now_ns(void) {
|
||||
|
||||
// Get next lg_size for new SuperSlab allocation (uses target_lg)
|
||||
static inline uint8_t hak_tiny_superslab_next_lg(int class_idx) {
|
||||
return g_ss_ace[class_idx].target_lg ? g_ss_ace[class_idx].target_lg
|
||||
uint8_t lg = g_ss_ace[class_idx].target_lg ? g_ss_ace[class_idx].target_lg
|
||||
: g_ss_ace[class_idx].current_lg;
|
||||
return lg ? lg : SUPERSLAB_LG_DEFAULT; // Use default if uninitialized
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
@ -287,7 +288,7 @@ extern int g_debug_remote_guard;
|
||||
static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* ptr) {
|
||||
static _Atomic int g_remote_push_count = 0;
|
||||
int count = atomic_fetch_add_explicit(&g_remote_push_count, 1, memory_order_relaxed);
|
||||
if (count < 5) {
|
||||
if (g_debug_remote_guard && count < 5) {
|
||||
fprintf(stderr, "[REMOTE_PUSH] ss=%p slab_idx=%d ptr=%p count=%d\n",
|
||||
(void*)ss, slab_idx, ptr, count);
|
||||
}
|
||||
@ -356,8 +357,10 @@ static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* ptr) {
|
||||
((uintptr_t)slab_idx << 32) | (uint32_t)(old == 0));
|
||||
}
|
||||
atomic_fetch_add_explicit(&ss->remote_counts[slab_idx], 1u, memory_order_relaxed);
|
||||
ss_active_dec_one(ss); // Fix: Decrement active blocks on cross-thread free
|
||||
atomic_store_explicit(&g_ss_remote_seen, 1, memory_order_relaxed);
|
||||
int transitioned = (old == 0);
|
||||
// (optional hint to Ready ring moved to mailbox/aggregator to avoid header coupling)
|
||||
if (transitioned) {
|
||||
// First remote observed for this slab: mark slab_listed and notify publisher paths
|
||||
unsigned prev = atomic_exchange_explicit(&ss->slab_listed[slab_idx], 1u, memory_order_acq_rel);
|
||||
|
||||
Reference in New Issue
Block a user