## Changes ### 1. core/page_arena.c - Removed init failure message (lines 25-27) - error is handled by returning early - All other fprintf statements already wrapped in existing #if !HAKMEM_BUILD_RELEASE blocks ### 2. core/hakmem.c - Wrapped SIGSEGV handler init message (line 72) - CRITICAL: Kept SIGSEGV/SIGBUS/SIGABRT error messages (lines 62-64) - production needs crash logs ### 3. core/hakmem_shared_pool.c - Wrapped all debug fprintf statements in #if !HAKMEM_BUILD_RELEASE: - Node pool exhaustion warning (line 252) - SP_META_CAPACITY_ERROR warning (line 421) - SP_FIX_GEOMETRY debug logging (line 745) - SP_ACQUIRE_STAGE0.5_EMPTY debug logging (line 865) - SP_ACQUIRE_STAGE0_L0 debug logging (line 803) - SP_ACQUIRE_STAGE1_LOCKFREE debug logging (line 922) - SP_ACQUIRE_STAGE2_LOCKFREE debug logging (line 996) - SP_ACQUIRE_STAGE3 debug logging (line 1116) - SP_SLOT_RELEASE debug logging (line 1245) - SP_SLOT_FREELIST_LOCKFREE debug logging (line 1305) - SP_SLOT_COMPLETELY_EMPTY debug logging (line 1316) - Fixed lock_stats_init() for release builds (lines 60-65) - ensure g_lock_stats_enabled is initialized ## Performance Validation Before: 51M ops/s (with debug fprintf overhead) After: 49.1M ops/s (consistent performance, fprintf removed from hot paths) ## Build & Test ```bash ./build.sh larson_hakmem ./out/release/larson_hakmem 1 5 1 1000 100 10000 42 # Result: 49.1M ops/s ``` Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
12 KiB
Phase 2c: BigCache & L2.5 Pool Dynamic Expansion
Date: 2025-11-08 Priority: 🟡 MEDIUM - Memory efficiency Estimated Effort: 3-5 days Status: Ready for implementation Depends on: Phase 2a, 2b (not blocking, can run in parallel)
Executive Summary
Problem: BigCache and L2.5 Pool use fixed-size arrays → Hash collisions, contention Solution: Implement dynamic hash tables and shard allocation Expected Result: Better cache hit rate, less contention, more memory efficient
Part 1: BigCache Dynamic Hash Table
Current Architecture (INEFFICIENT)
File: core/hakmem_bigcache.c
#define BIGCACHE_SIZE 256
#define BIGCACHE_WAYS 8
typedef struct BigCacheEntry {
void* ptr;
size_t size;
uintptr_t site_id;
// ...
} BigCacheEntry;
// Fixed 2D array!
static BigCacheEntry g_cache[BIGCACHE_SIZE][BIGCACHE_WAYS];
Problems:
- Hash collisions: 256 slots → high collision rate for large workloads
- Eviction overhead: When a slot is full, must evict (even if memory available)
- Wasted capacity: Some slots may be empty while others are full
Proposed Architecture (DYNAMIC)
Hash table with chaining:
typedef struct BigCacheNode {
void* ptr;
size_t size;
uintptr_t site_id;
struct BigCacheNode* next; // ← Chain for collisions
uint64_t timestamp; // For LRU eviction
} BigCacheNode;
typedef struct BigCacheTable {
BigCacheNode** buckets; // Array of bucket heads
size_t capacity; // Current number of buckets
size_t count; // Total entries in cache
pthread_rwlock_t lock; // Protect resizing
} BigCacheTable;
static BigCacheTable g_bigcache;
Implementation Tasks
Task 1: Redesign BigCache Structure (2-3 hours)
File: core/hakmem_bigcache.c
// New hash table structure
typedef struct BigCacheNode {
void* ptr;
size_t size;
uintptr_t site_id;
struct BigCacheNode* next; // Collision chain
uint64_t timestamp; // LRU tracking
uint64_t access_count; // Hit count for stats
} BigCacheNode;
typedef struct BigCacheTable {
BigCacheNode** buckets; // Dynamic array of buckets
size_t capacity; // Number of buckets (power of 2)
size_t count; // Total cached entries
size_t max_count; // Maximum entries before resize
pthread_rwlock_t lock; // Protect table resizing
} BigCacheTable;
static BigCacheTable g_bigcache;
// Configuration
#define BIGCACHE_INITIAL_CAPACITY 256 // Start with 256 buckets
#define BIGCACHE_MAX_CAPACITY 65536 // Max 64K buckets
#define BIGCACHE_LOAD_FACTOR 0.75 // Resize at 75% load
Task 2: Implement Hash Table Operations (3-4 hours)
// Initialize BigCache
void hak_bigcache_init(void) {
g_bigcache.capacity = BIGCACHE_INITIAL_CAPACITY;
g_bigcache.count = 0;
g_bigcache.max_count = g_bigcache.capacity * BIGCACHE_LOAD_FACTOR;
g_bigcache.buckets = calloc(g_bigcache.capacity, sizeof(BigCacheNode*));
pthread_rwlock_init(&g_bigcache.lock, NULL);
}
// Hash function (simple but effective)
static inline size_t bigcache_hash(size_t size, uintptr_t site_id, size_t capacity) {
uint64_t hash = size ^ site_id;
hash ^= (hash >> 16);
hash *= 0x85ebca6b;
hash ^= (hash >> 13);
return hash & (capacity - 1); // Assumes capacity is power of 2
}
// Insert into BigCache
int hak_bigcache_put(void* ptr, size_t size, uintptr_t site_id) {
pthread_rwlock_rdlock(&g_bigcache.lock);
// Check if resize needed
if (g_bigcache.count >= g_bigcache.max_count) {
pthread_rwlock_unlock(&g_bigcache.lock);
resize_bigcache();
pthread_rwlock_rdlock(&g_bigcache.lock);
}
// Hash to bucket
size_t bucket_idx = bigcache_hash(size, site_id, g_bigcache.capacity);
BigCacheNode** bucket = &g_bigcache.buckets[bucket_idx];
// Create new node
BigCacheNode* node = malloc(sizeof(BigCacheNode));
node->ptr = ptr;
node->size = size;
node->site_id = site_id;
node->timestamp = get_timestamp_ns();
node->access_count = 0;
// Insert at head (most recent)
node->next = *bucket;
*bucket = node;
g_bigcache.count++;
pthread_rwlock_unlock(&g_bigcache.lock);
return 0;
}
// Lookup in BigCache
int hak_bigcache_try_get(size_t size, uintptr_t site_id, void** out_ptr) {
pthread_rwlock_rdlock(&g_bigcache.lock);
size_t bucket_idx = bigcache_hash(size, site_id, g_bigcache.capacity);
BigCacheNode** bucket = &g_bigcache.buckets[bucket_idx];
// Search chain
BigCacheNode** prev = bucket;
BigCacheNode* node = *bucket;
while (node) {
if (node->size == size && node->site_id == site_id) {
// Found match!
*out_ptr = node->ptr;
// Remove from cache
*prev = node->next;
free(node);
g_bigcache.count--;
pthread_rwlock_unlock(&g_bigcache.lock);
return 1; // Cache hit
}
prev = &node->next;
node = node->next;
}
pthread_rwlock_unlock(&g_bigcache.lock);
return 0; // Cache miss
}
Task 3: Implement Resize Logic (2-3 hours)
// Resize BigCache hash table (2x capacity)
static void resize_bigcache(void) {
pthread_rwlock_wrlock(&g_bigcache.lock);
size_t old_capacity = g_bigcache.capacity;
size_t new_capacity = old_capacity * 2;
if (new_capacity > BIGCACHE_MAX_CAPACITY) {
new_capacity = BIGCACHE_MAX_CAPACITY;
}
if (new_capacity == old_capacity) {
pthread_rwlock_unlock(&g_bigcache.lock);
return; // Already at max
}
// Allocate new buckets
BigCacheNode** new_buckets = calloc(new_capacity, sizeof(BigCacheNode*));
if (!new_buckets) {
fprintf(stderr, "[BIGCACHE] Failed to resize: malloc failed\n");
pthread_rwlock_unlock(&g_bigcache.lock);
return;
}
// Rehash all entries
for (size_t i = 0; i < old_capacity; i++) {
BigCacheNode* node = g_bigcache.buckets[i];
while (node) {
BigCacheNode* next = node->next;
// Rehash to new bucket
size_t new_bucket_idx = bigcache_hash(node->size, node->site_id, new_capacity);
node->next = new_buckets[new_bucket_idx];
new_buckets[new_bucket_idx] = node;
node = next;
}
}
// Replace old buckets
free(g_bigcache.buckets);
g_bigcache.buckets = new_buckets;
g_bigcache.capacity = new_capacity;
g_bigcache.max_count = new_capacity * BIGCACHE_LOAD_FACTOR;
fprintf(stderr, "[BIGCACHE] Resized: %zu → %zu buckets (%zu entries)\n",
old_capacity, new_capacity, g_bigcache.count);
pthread_rwlock_unlock(&g_bigcache.lock);
}
Part 2: L2.5 Pool Dynamic Sharding
Current Architecture (CONTENTION)
File: core/hakmem_l25_pool.c
#define L25_NUM_SHARDS 64 // Fixed 64 shards
typedef struct L25Shard {
void* freelist[MAX_SIZE_CLASSES];
pthread_mutex_t lock;
} L25Shard;
static L25Shard g_l25_shards[L25_NUM_SHARDS]; // Fixed array
Problems:
- Fixed 64 shards: High contention in multi-threaded workloads
- Load imbalance: Some shards may be hot, others cold
Proposed Architecture (DYNAMIC)
typedef struct L25ShardRegistry {
L25Shard** shards; // Dynamic array of shards
size_t num_shards; // Current number of shards
pthread_rwlock_t lock; // Protect shard array expansion
} L25ShardRegistry;
static L25ShardRegistry g_l25_registry;
Implementation Tasks
Task 1: Redesign L2.5 Shard Structure (1-2 hours)
File: core/hakmem_l25_pool.c
typedef struct L25Shard {
void* freelist[MAX_SIZE_CLASSES];
pthread_mutex_t lock;
size_t allocation_count; // Track load
} L25Shard;
typedef struct L25ShardRegistry {
L25Shard** shards; // Dynamic array
size_t num_shards; // Current count
size_t max_shards; // Max shards (e.g., 1024)
pthread_rwlock_t lock; // Protect expansion
} L25ShardRegistry;
static L25ShardRegistry g_l25_registry;
#define L25_INITIAL_SHARDS 64 // Start with 64
#define L25_MAX_SHARDS 1024 // Max 1024 shards
Task 2: Implement Dynamic Shard Allocation (2-3 hours)
// Initialize L2.5 Pool
void hak_l25_pool_init(void) {
g_l25_registry.num_shards = L25_INITIAL_SHARDS;
g_l25_registry.max_shards = L25_MAX_SHARDS;
g_l25_registry.shards = calloc(L25_INITIAL_SHARDS, sizeof(L25Shard*));
pthread_rwlock_init(&g_l25_registry.lock, NULL);
// Allocate initial shards
for (size_t i = 0; i < L25_INITIAL_SHARDS; i++) {
g_l25_registry.shards[i] = alloc_l25_shard();
}
}
// Allocate a new shard
static L25Shard* alloc_l25_shard(void) {
L25Shard* shard = calloc(1, sizeof(L25Shard));
pthread_mutex_init(&shard->lock, NULL);
shard->allocation_count = 0;
for (int i = 0; i < MAX_SIZE_CLASSES; i++) {
shard->freelist[i] = NULL;
}
return shard;
}
// Expand shard array (2x)
static int expand_l25_shards(void) {
pthread_rwlock_wrlock(&g_l25_registry.lock);
size_t old_num = g_l25_registry.num_shards;
size_t new_num = old_num * 2;
if (new_num > g_l25_registry.max_shards) {
new_num = g_l25_registry.max_shards;
}
if (new_num == old_num) {
pthread_rwlock_unlock(&g_l25_registry.lock);
return -1; // Already at max
}
// Reallocate shard array
L25Shard** new_shards = realloc(g_l25_registry.shards, new_num * sizeof(L25Shard*));
if (!new_shards) {
pthread_rwlock_unlock(&g_l25_registry.lock);
return -1;
}
// Allocate new shards
for (size_t i = old_num; i < new_num; i++) {
new_shards[i] = alloc_l25_shard();
}
g_l25_registry.shards = new_shards;
g_l25_registry.num_shards = new_num;
fprintf(stderr, "[L2.5_POOL] Expanded shards: %zu → %zu\n", old_num, new_num);
pthread_rwlock_unlock(&g_l25_registry.lock);
return 0;
}
Task 3: Contention-Based Expansion (2-3 hours)
// Detect high contention and expand shards
static void check_l25_contention(void) {
static uint64_t last_check_time = 0;
uint64_t now = get_timestamp_ns();
// Check every 5 seconds
if (now - last_check_time < 5000000000ULL) {
return;
}
last_check_time = now;
// Calculate average load per shard
size_t total_load = 0;
for (size_t i = 0; i < g_l25_registry.num_shards; i++) {
total_load += g_l25_registry.shards[i]->allocation_count;
}
size_t avg_load = total_load / g_l25_registry.num_shards;
// If average load is high, expand
if (avg_load > 1000) { // Threshold: 1000 allocations per shard
fprintf(stderr, "[L2.5_POOL] High load detected (avg=%zu), expanding shards\n", avg_load);
expand_l25_shards();
// Reset counters
for (size_t i = 0; i < g_l25_registry.num_shards; i++) {
g_l25_registry.shards[i]->allocation_count = 0;
}
}
}
Testing Strategy
Test 1: BigCache Resize Verification
# Enable debug logging
HAKMEM_LOG=1 ./larson_hakmem 10 8 128 1024 1 12345 4 2>&1 | grep "BIGCACHE"
# Should see:
# [BIGCACHE] Resized: 256 → 512 buckets (450 entries)
# [BIGCACHE] Resized: 512 → 1024 buckets (900 entries)
Test 2: L2.5 Shard Expansion
HAKMEM_LOG=1 ./larson_hakmem 10 8 128 1024 1 12345 4 2>&1 | grep "L2.5_POOL"
# Should see:
# [L2.5_POOL] Expanded shards: 64 → 128
Test 3: Cache Hit Rate Improvement
# Before (fixed)
# BigCache hit rate: ~60%
# After (dynamic)
# BigCache hit rate: ~75% (fewer evictions)
Success Criteria
✅ BigCache resizes: Logs show 256 → 512 → 1024 buckets ✅ L2.5 expands: Logs show 64 → 128 → 256 shards ✅ Cache hit rate: +10-20% improvement ✅ No memory leaks: Valgrind clean ✅ Thread safety: No data races (TSan clean)
Deliverable
Report file: /mnt/workdisk/public_share/hakmem/PHASE2C_IMPLEMENTATION_REPORT.md
Required sections:
- BigCache resize behavior (logs, hit rate improvement)
- L2.5 shard expansion (logs, contention reduction)
- Performance comparison (before/after)
- Memory usage (overhead analysis)
- Production readiness (YES/NO verdict)
Let's make BigCache and L2.5 dynamic! 📈