Feat(phase9): Make shared_pool SuperSlab acquisition deadlock-free (Task 1)
Refactored SuperSlab allocation within shared pool to prevent deadlocks. replaced by , which is now lock-agnostic. is temporarily released before calling and re-acquired afterwards in . This eliminates deadlock potential between shared pool and registry locks. OOMs previously observed were due to shared pool's soft limits, not a code bug.
This commit is contained in:
@ -544,7 +544,7 @@ static inline void sp_stage_stats_dump_if_enabled(void) {
|
||||
fprintf(stderr, "[SP_STAGE_STATS] total: stage0.5=%lu stage1=%lu stage2=%lu stage3=%lu\n",
|
||||
(unsigned long)s0, (unsigned long)s1, (unsigned long)s2, (unsigned long)s3);
|
||||
#else
|
||||
(void)g_sp_stage0_hits; (void)g_sp_stage1_hits; (void)g_sp_stage2_hits; (void)g_sp_stage3_hits;
|
||||
(void)g_sp_stage1_hits; (void)g_sp_stage2_hits; (void)g_sp_stage3_hits;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -795,23 +795,11 @@ static int sp_freelist_pop_lockfree(int class_idx, SharedSSMeta** out_meta, int*
|
||||
return 1; // Success
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal: allocate and register a new SuperSlab for the shared pool.
|
||||
*
|
||||
* Phase 12 NOTE:
|
||||
* - We MUST use the real superslab_allocate() path so that:
|
||||
* - backing memory is a full SuperSlab region (1–2MB),
|
||||
* - header/layout are initialized correctly,
|
||||
* - registry integration stays consistent.
|
||||
* - shared_pool is responsible only for:
|
||||
* - tracking pointers,
|
||||
* - marking per-slab class_idx as UNASSIGNED initially.
|
||||
* It does NOT bypass registry/LRU.
|
||||
*
|
||||
* Caller must hold alloc_lock.
|
||||
*/
|
||||
// Internal helper: Allocates a new SuperSlab from the OS and performs basic initialization.
|
||||
// Does NOT interact with g_shared_pool.slabs[] or g_shared_pool.total_count directly.
|
||||
// Caller is responsible for adding the SuperSlab to g_shared_pool's arrays and metadata.
|
||||
static SuperSlab*
|
||||
shared_pool_allocate_superslab_unlocked(void)
|
||||
sp_internal_allocate_superslab(void)
|
||||
{
|
||||
// Use size_class 0 as a neutral hint; Phase 12 per-slab class_idx is authoritative.
|
||||
extern SuperSlab* superslab_allocate(uint8_t size_class);
|
||||
@ -838,41 +826,42 @@ shared_pool_allocate_superslab_unlocked(void)
|
||||
// P1.1: Initialize class_map to UNASSIGNED as well
|
||||
ss->class_map[i] = 255;
|
||||
}
|
||||
|
||||
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
||||
shared_pool_ensure_capacity_unlocked(g_shared_pool.total_count + 1);
|
||||
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
||||
// Pool table expansion failed; leave ss alive (registry-owned),
|
||||
// but do not treat it as part of shared_pool.
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
g_shared_pool.slabs[g_shared_pool.total_count] = ss;
|
||||
g_shared_pool.total_count++;
|
||||
// Not counted as active until at least one slab is assigned.
|
||||
return ss;
|
||||
}
|
||||
|
||||
SuperSlab*
|
||||
shared_pool_acquire_superslab(void)
|
||||
{
|
||||
// Phase 12 debug safety:
|
||||
// If shared backend is disabled at Box API level, this function SHOULD NOT be called.
|
||||
// But since bench currently SEGVs here even with legacy forced, treat this as a hard guard:
|
||||
// we early-return error instead of touching potentially-bad state.
|
||||
//
|
||||
// This isolates shared_pool from the current crash so we can validate legacy path first.
|
||||
// FIXED: Remove the return -1; that was preventing operation
|
||||
|
||||
shared_pool_init();
|
||||
|
||||
pthread_mutex_lock(&g_shared_pool.alloc_lock);
|
||||
|
||||
// For now, always allocate a fresh SuperSlab and register it.
|
||||
// More advanced reuse/GC comes later.
|
||||
SuperSlab* ss = shared_pool_allocate_superslab_unlocked();
|
||||
// Release lock to avoid deadlock with registry during superslab_allocate
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
SuperSlab* ss = sp_internal_allocate_superslab(); // Call lock-free internal helper
|
||||
pthread_mutex_lock(&g_shared_pool.alloc_lock);
|
||||
|
||||
if (!ss) {
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Add newly allocated SuperSlab to the shared pool's internal array
|
||||
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
||||
shared_pool_ensure_capacity_unlocked(g_shared_pool.total_count + 1);
|
||||
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
||||
// Pool table expansion failed; leave ss alive (registry-owned),
|
||||
// but do not treat it as part of shared_pool.
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
g_shared_pool.slabs[g_shared_pool.total_count] = ss;
|
||||
g_shared_pool.total_count++;
|
||||
|
||||
// Not counted as active until at least one slab is assigned.
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
return ss;
|
||||
}
|
||||
@ -1221,7 +1210,48 @@ stage2_fallback:
|
||||
|
||||
// Stage 3b: If LRU miss, allocate new SuperSlab
|
||||
if (!new_ss) {
|
||||
new_ss = shared_pool_allocate_superslab_unlocked();
|
||||
// Release the alloc_lock to avoid deadlock with registry during superslab_allocate
|
||||
if (g_lock_stats_enabled == 1) {
|
||||
atomic_fetch_add(&g_lock_release_count, 1);
|
||||
}
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
|
||||
SuperSlab* allocated_ss = sp_internal_allocate_superslab();
|
||||
|
||||
// Re-acquire the alloc_lock
|
||||
if (g_lock_stats_enabled == 1) {
|
||||
atomic_fetch_add(&g_lock_acquire_count, 1);
|
||||
atomic_fetch_add(&g_lock_acquire_slab_count, 1); // This is part of acquisition path
|
||||
}
|
||||
pthread_mutex_lock(&g_shared_pool.alloc_lock);
|
||||
|
||||
if (!allocated_ss) {
|
||||
// Allocation failed; return now.
|
||||
if (g_lock_stats_enabled == 1) {
|
||||
atomic_fetch_add(&g_lock_release_count, 1);
|
||||
}
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
return -1; // Out of memory
|
||||
}
|
||||
|
||||
new_ss = allocated_ss;
|
||||
|
||||
// Add newly allocated SuperSlab to the shared pool's internal array
|
||||
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
||||
shared_pool_ensure_capacity_unlocked(g_shared_pool.total_count + 1);
|
||||
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
||||
// Pool table expansion failed; leave ss alive (registry-owned),
|
||||
// but do not treat it as part of shared_pool.
|
||||
// This is a critical error, return early.
|
||||
if (g_lock_stats_enabled == 1) {
|
||||
atomic_fetch_add(&g_lock_release_count, 1);
|
||||
}
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
g_shared_pool.slabs[g_shared_pool.total_count] = new_ss;
|
||||
g_shared_pool.total_count++;
|
||||
}
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
|
||||
Reference in New Issue
Block a user