Feat(phase9): Make shared_pool SuperSlab acquisition deadlock-free (Task 1)
Refactored SuperSlab allocation within shared pool to prevent deadlocks. replaced by , which is now lock-agnostic. is temporarily released before calling and re-acquired afterwards in . This eliminates deadlock potential between shared pool and registry locks. OOMs previously observed were due to shared pool's soft limits, not a code bug.
This commit is contained in:
@ -544,7 +544,7 @@ static inline void sp_stage_stats_dump_if_enabled(void) {
|
|||||||
fprintf(stderr, "[SP_STAGE_STATS] total: stage0.5=%lu stage1=%lu stage2=%lu stage3=%lu\n",
|
fprintf(stderr, "[SP_STAGE_STATS] total: stage0.5=%lu stage1=%lu stage2=%lu stage3=%lu\n",
|
||||||
(unsigned long)s0, (unsigned long)s1, (unsigned long)s2, (unsigned long)s3);
|
(unsigned long)s0, (unsigned long)s1, (unsigned long)s2, (unsigned long)s3);
|
||||||
#else
|
#else
|
||||||
(void)g_sp_stage0_hits; (void)g_sp_stage1_hits; (void)g_sp_stage2_hits; (void)g_sp_stage3_hits;
|
(void)g_sp_stage1_hits; (void)g_sp_stage2_hits; (void)g_sp_stage3_hits;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -795,23 +795,11 @@ static int sp_freelist_pop_lockfree(int class_idx, SharedSSMeta** out_meta, int*
|
|||||||
return 1; // Success
|
return 1; // Success
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
// Internal helper: Allocates a new SuperSlab from the OS and performs basic initialization.
|
||||||
* Internal: allocate and register a new SuperSlab for the shared pool.
|
// Does NOT interact with g_shared_pool.slabs[] or g_shared_pool.total_count directly.
|
||||||
*
|
// Caller is responsible for adding the SuperSlab to g_shared_pool's arrays and metadata.
|
||||||
* Phase 12 NOTE:
|
|
||||||
* - We MUST use the real superslab_allocate() path so that:
|
|
||||||
* - backing memory is a full SuperSlab region (1–2MB),
|
|
||||||
* - header/layout are initialized correctly,
|
|
||||||
* - registry integration stays consistent.
|
|
||||||
* - shared_pool is responsible only for:
|
|
||||||
* - tracking pointers,
|
|
||||||
* - marking per-slab class_idx as UNASSIGNED initially.
|
|
||||||
* It does NOT bypass registry/LRU.
|
|
||||||
*
|
|
||||||
* Caller must hold alloc_lock.
|
|
||||||
*/
|
|
||||||
static SuperSlab*
|
static SuperSlab*
|
||||||
shared_pool_allocate_superslab_unlocked(void)
|
sp_internal_allocate_superslab(void)
|
||||||
{
|
{
|
||||||
// Use size_class 0 as a neutral hint; Phase 12 per-slab class_idx is authoritative.
|
// Use size_class 0 as a neutral hint; Phase 12 per-slab class_idx is authoritative.
|
||||||
extern SuperSlab* superslab_allocate(uint8_t size_class);
|
extern SuperSlab* superslab_allocate(uint8_t size_class);
|
||||||
@ -838,41 +826,42 @@ shared_pool_allocate_superslab_unlocked(void)
|
|||||||
// P1.1: Initialize class_map to UNASSIGNED as well
|
// P1.1: Initialize class_map to UNASSIGNED as well
|
||||||
ss->class_map[i] = 255;
|
ss->class_map[i] = 255;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
|
||||||
shared_pool_ensure_capacity_unlocked(g_shared_pool.total_count + 1);
|
|
||||||
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
|
||||||
// Pool table expansion failed; leave ss alive (registry-owned),
|
|
||||||
// but do not treat it as part of shared_pool.
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
g_shared_pool.slabs[g_shared_pool.total_count] = ss;
|
|
||||||
g_shared_pool.total_count++;
|
|
||||||
// Not counted as active until at least one slab is assigned.
|
|
||||||
return ss;
|
return ss;
|
||||||
}
|
}
|
||||||
|
|
||||||
SuperSlab*
|
SuperSlab*
|
||||||
shared_pool_acquire_superslab(void)
|
shared_pool_acquire_superslab(void)
|
||||||
{
|
{
|
||||||
// Phase 12 debug safety:
|
|
||||||
// If shared backend is disabled at Box API level, this function SHOULD NOT be called.
|
|
||||||
// But since bench currently SEGVs here even with legacy forced, treat this as a hard guard:
|
|
||||||
// we early-return error instead of touching potentially-bad state.
|
|
||||||
//
|
|
||||||
// This isolates shared_pool from the current crash so we can validate legacy path first.
|
|
||||||
// FIXED: Remove the return -1; that was preventing operation
|
|
||||||
|
|
||||||
shared_pool_init();
|
shared_pool_init();
|
||||||
|
|
||||||
pthread_mutex_lock(&g_shared_pool.alloc_lock);
|
pthread_mutex_lock(&g_shared_pool.alloc_lock);
|
||||||
|
|
||||||
// For now, always allocate a fresh SuperSlab and register it.
|
// For now, always allocate a fresh SuperSlab and register it.
|
||||||
// More advanced reuse/GC comes later.
|
// More advanced reuse/GC comes later.
|
||||||
SuperSlab* ss = shared_pool_allocate_superslab_unlocked();
|
// Release lock to avoid deadlock with registry during superslab_allocate
|
||||||
|
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||||
|
SuperSlab* ss = sp_internal_allocate_superslab(); // Call lock-free internal helper
|
||||||
|
pthread_mutex_lock(&g_shared_pool.alloc_lock);
|
||||||
|
|
||||||
|
if (!ss) {
|
||||||
|
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add newly allocated SuperSlab to the shared pool's internal array
|
||||||
|
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
||||||
|
shared_pool_ensure_capacity_unlocked(g_shared_pool.total_count + 1);
|
||||||
|
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
||||||
|
// Pool table expansion failed; leave ss alive (registry-owned),
|
||||||
|
// but do not treat it as part of shared_pool.
|
||||||
|
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
g_shared_pool.slabs[g_shared_pool.total_count] = ss;
|
||||||
|
g_shared_pool.total_count++;
|
||||||
|
|
||||||
|
// Not counted as active until at least one slab is assigned.
|
||||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||||
return ss;
|
return ss;
|
||||||
}
|
}
|
||||||
@ -1221,7 +1210,48 @@ stage2_fallback:
|
|||||||
|
|
||||||
// Stage 3b: If LRU miss, allocate new SuperSlab
|
// Stage 3b: If LRU miss, allocate new SuperSlab
|
||||||
if (!new_ss) {
|
if (!new_ss) {
|
||||||
new_ss = shared_pool_allocate_superslab_unlocked();
|
// Release the alloc_lock to avoid deadlock with registry during superslab_allocate
|
||||||
|
if (g_lock_stats_enabled == 1) {
|
||||||
|
atomic_fetch_add(&g_lock_release_count, 1);
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||||
|
|
||||||
|
SuperSlab* allocated_ss = sp_internal_allocate_superslab();
|
||||||
|
|
||||||
|
// Re-acquire the alloc_lock
|
||||||
|
if (g_lock_stats_enabled == 1) {
|
||||||
|
atomic_fetch_add(&g_lock_acquire_count, 1);
|
||||||
|
atomic_fetch_add(&g_lock_acquire_slab_count, 1); // This is part of acquisition path
|
||||||
|
}
|
||||||
|
pthread_mutex_lock(&g_shared_pool.alloc_lock);
|
||||||
|
|
||||||
|
if (!allocated_ss) {
|
||||||
|
// Allocation failed; return now.
|
||||||
|
if (g_lock_stats_enabled == 1) {
|
||||||
|
atomic_fetch_add(&g_lock_release_count, 1);
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||||
|
return -1; // Out of memory
|
||||||
|
}
|
||||||
|
|
||||||
|
new_ss = allocated_ss;
|
||||||
|
|
||||||
|
// Add newly allocated SuperSlab to the shared pool's internal array
|
||||||
|
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
||||||
|
shared_pool_ensure_capacity_unlocked(g_shared_pool.total_count + 1);
|
||||||
|
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
||||||
|
// Pool table expansion failed; leave ss alive (registry-owned),
|
||||||
|
// but do not treat it as part of shared_pool.
|
||||||
|
// This is a critical error, return early.
|
||||||
|
if (g_lock_stats_enabled == 1) {
|
||||||
|
atomic_fetch_add(&g_lock_release_count, 1);
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
g_shared_pool.slabs[g_shared_pool.total_count] = new_ss;
|
||||||
|
g_shared_pool.total_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !HAKMEM_BUILD_RELEASE
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
|
|||||||
Reference in New Issue
Block a user