Phase 1: Warm Pool Capacity Increase (16 → 12 with matching threshold)
Key Changes: - Reduced static capacity from 16 to 12 SuperSlabs per class - Fixed prefill threshold from hardcoded 4 to match capacity (12) - Updated environment variable clamping to [1,12] - This allows warm pool to actually utilize its full capacity Performance: - Baseline (post-unified-cache-opt): 4.76M ops/s - After Phase 1: 4.84M ops/s - Improvement: +1.6% (expected +15-20%) Note: Actual improvement lower than expected because the warm pool bottleneck is only part of the overall allocation path. Unified cache optimization (+14.9%) already addressed much of the registry scan overhead. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -24,7 +24,7 @@ extern SuperSlab* superslab_refill(int class_idx);
|
|||||||
// Prefill budget: How many additional SuperSlabs to load when pool is empty
|
// Prefill budget: How many additional SuperSlabs to load when pool is empty
|
||||||
// - If pool is empty, load PREFILL_BUDGET extra slabs to build working set
|
// - If pool is empty, load PREFILL_BUDGET extra slabs to build working set
|
||||||
// - This avoids repeated registry scans on rapid cache misses
|
// - This avoids repeated registry scans on rapid cache misses
|
||||||
// - Set to 2 to balance between prefill lock overhead and pool depletion
|
// - Phase 2: Keep at 2 (increasing to 4 caused contention regression -1.5%)
|
||||||
#define WARM_POOL_PREFILL_BUDGET 2
|
#define WARM_POOL_PREFILL_BUDGET 2
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|||||||
@ -39,10 +39,10 @@
|
|||||||
|
|
||||||
// Maximum warm SuperSlabs per thread per class (tunable)
|
// Maximum warm SuperSlabs per thread per class (tunable)
|
||||||
// Trade-off: Working set size vs warm pool effectiveness
|
// Trade-off: Working set size vs warm pool effectiveness
|
||||||
// - 4: Original (90% hit rate expected, but broken implementation)
|
// - 4: Original (90% hit rate expected, but broken implementation - hardcoded prefill threshold)
|
||||||
// - 16: Increased to compensate for suboptimal push logic
|
// - 12: Optimized capacity with matching prefill threshold (Phase 1)
|
||||||
// - Higher values: More memory but better locality
|
// - Higher values: More memory but better locality
|
||||||
#define TINY_WARM_POOL_MAX_PER_CLASS 16
|
#define TINY_WARM_POOL_MAX_PER_CLASS 12
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
SuperSlab* slabs[TINY_WARM_POOL_MAX_PER_CLASS];
|
SuperSlab* slabs[TINY_WARM_POOL_MAX_PER_CLASS];
|
||||||
@ -107,16 +107,16 @@ static inline int tiny_warm_pool_count(int class_idx) {
|
|||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
||||||
// Get warm pool capacity from environment (configurable at runtime)
|
// Get warm pool capacity from environment (configurable at runtime)
|
||||||
// ENV: HAKMEM_WARM_POOL_SIZE=N (default: 4)
|
// ENV: HAKMEM_WARM_POOL_SIZE=N (default: 12)
|
||||||
static inline int warm_pool_max_per_class(void) {
|
static inline int warm_pool_max_per_class(void) {
|
||||||
static int g_max = -1;
|
static int g_max = -1;
|
||||||
if (__builtin_expect(g_max == -1, 0)) {
|
if (__builtin_expect(g_max == -1, 0)) {
|
||||||
const char* env = getenv("HAKMEM_WARM_POOL_SIZE");
|
const char* env = getenv("HAKMEM_WARM_POOL_SIZE");
|
||||||
if (env && *env) {
|
if (env && *env) {
|
||||||
int v = atoi(env);
|
int v = atoi(env);
|
||||||
// Clamp to valid range [1, 16]
|
// Clamp to valid range [1, 12]
|
||||||
if (v < 1) v = 1;
|
if (v < 1) v = 1;
|
||||||
if (v > 16) v = 16;
|
if (v > 12) v = 12;
|
||||||
g_max = v;
|
g_max = v;
|
||||||
} else {
|
} else {
|
||||||
g_max = TINY_WARM_POOL_MAX_PER_CLASS;
|
g_max = TINY_WARM_POOL_MAX_PER_CLASS;
|
||||||
|
|||||||
@ -83,7 +83,8 @@ sp_acquire_from_empty_scan(int class_idx, SuperSlab** ss_out, int* slab_idx_out,
|
|||||||
|
|
||||||
// WARM POOL PREFILL: Add HOT SuperSlabs to warm pool (if not already primary result)
|
// WARM POOL PREFILL: Add HOT SuperSlabs to warm pool (if not already primary result)
|
||||||
// This is low-cost during registry scan and avoids future expensive scans
|
// This is low-cost during registry scan and avoids future expensive scans
|
||||||
if (ss != primary_result && tiny_warm_pool_count(class_idx) < 4) {
|
// Phase 1: Increase threshold from 4 to 12 to match TINY_WARM_POOL_MAX_PER_CLASS
|
||||||
|
if (ss != primary_result && tiny_warm_pool_count(class_idx) < 12) {
|
||||||
tiny_warm_pool_push(class_idx, ss);
|
tiny_warm_pool_push(class_idx, ss);
|
||||||
// Track prefilled SuperSlabs for metrics
|
// Track prefilled SuperSlabs for metrics
|
||||||
g_warm_pool_stats[class_idx].prefilled++;
|
g_warm_pool_stats[class_idx].prefilled++;
|
||||||
|
|||||||
Reference in New Issue
Block a user