# Phase 2c: BigCache & L2.5 Pool Dynamic Expansion **Date**: 2025-11-08 **Priority**: 🟡 MEDIUM - Memory efficiency **Estimated Effort**: 3-5 days **Status**: Ready for implementation **Depends on**: Phase 2a, 2b (not blocking, can run in parallel) --- ## Executive Summary **Problem**: BigCache and L2.5 Pool use fixed-size arrays → Hash collisions, contention **Solution**: Implement dynamic hash tables and shard allocation **Expected Result**: Better cache hit rate, less contention, more memory efficient --- ## Part 1: BigCache Dynamic Hash Table ### Current Architecture (INEFFICIENT) **File**: `core/hakmem_bigcache.c` ```c #define BIGCACHE_SIZE 256 #define BIGCACHE_WAYS 8 typedef struct BigCacheEntry { void* ptr; size_t size; uintptr_t site_id; // ... } BigCacheEntry; // Fixed 2D array! static BigCacheEntry g_cache[BIGCACHE_SIZE][BIGCACHE_WAYS]; ``` **Problems**: 1. **Hash collisions**: 256 slots → high collision rate for large workloads 2. **Eviction overhead**: When a slot is full, must evict (even if memory available) 3. **Wasted capacity**: Some slots may be empty while others are full ### Proposed Architecture (DYNAMIC) **Hash table with chaining**: ```c typedef struct BigCacheNode { void* ptr; size_t size; uintptr_t site_id; struct BigCacheNode* next; // ← Chain for collisions uint64_t timestamp; // For LRU eviction } BigCacheNode; typedef struct BigCacheTable { BigCacheNode** buckets; // Array of bucket heads size_t capacity; // Current number of buckets size_t count; // Total entries in cache pthread_rwlock_t lock; // Protect resizing } BigCacheTable; static BigCacheTable g_bigcache; ``` ### Implementation Tasks #### Task 1: Redesign BigCache Structure (2-3 hours) **File**: `core/hakmem_bigcache.c` ```c // New hash table structure typedef struct BigCacheNode { void* ptr; size_t size; uintptr_t site_id; struct BigCacheNode* next; // Collision chain uint64_t timestamp; // LRU tracking uint64_t access_count; // Hit count for stats } BigCacheNode; typedef struct BigCacheTable { BigCacheNode** buckets; // Dynamic array of buckets size_t capacity; // Number of buckets (power of 2) size_t count; // Total cached entries size_t max_count; // Maximum entries before resize pthread_rwlock_t lock; // Protect table resizing } BigCacheTable; static BigCacheTable g_bigcache; // Configuration #define BIGCACHE_INITIAL_CAPACITY 256 // Start with 256 buckets #define BIGCACHE_MAX_CAPACITY 65536 // Max 64K buckets #define BIGCACHE_LOAD_FACTOR 0.75 // Resize at 75% load ``` #### Task 2: Implement Hash Table Operations (3-4 hours) ```c // Initialize BigCache void hak_bigcache_init(void) { g_bigcache.capacity = BIGCACHE_INITIAL_CAPACITY; g_bigcache.count = 0; g_bigcache.max_count = g_bigcache.capacity * BIGCACHE_LOAD_FACTOR; g_bigcache.buckets = calloc(g_bigcache.capacity, sizeof(BigCacheNode*)); pthread_rwlock_init(&g_bigcache.lock, NULL); } // Hash function (simple but effective) static inline size_t bigcache_hash(size_t size, uintptr_t site_id, size_t capacity) { uint64_t hash = size ^ site_id; hash ^= (hash >> 16); hash *= 0x85ebca6b; hash ^= (hash >> 13); return hash & (capacity - 1); // Assumes capacity is power of 2 } // Insert into BigCache int hak_bigcache_put(void* ptr, size_t size, uintptr_t site_id) { pthread_rwlock_rdlock(&g_bigcache.lock); // Check if resize needed if (g_bigcache.count >= g_bigcache.max_count) { pthread_rwlock_unlock(&g_bigcache.lock); resize_bigcache(); pthread_rwlock_rdlock(&g_bigcache.lock); } // Hash to bucket size_t bucket_idx = bigcache_hash(size, site_id, g_bigcache.capacity); BigCacheNode** bucket = &g_bigcache.buckets[bucket_idx]; // Create new node BigCacheNode* node = malloc(sizeof(BigCacheNode)); node->ptr = ptr; node->size = size; node->site_id = site_id; node->timestamp = get_timestamp_ns(); node->access_count = 0; // Insert at head (most recent) node->next = *bucket; *bucket = node; g_bigcache.count++; pthread_rwlock_unlock(&g_bigcache.lock); return 0; } // Lookup in BigCache int hak_bigcache_try_get(size_t size, uintptr_t site_id, void** out_ptr) { pthread_rwlock_rdlock(&g_bigcache.lock); size_t bucket_idx = bigcache_hash(size, site_id, g_bigcache.capacity); BigCacheNode** bucket = &g_bigcache.buckets[bucket_idx]; // Search chain BigCacheNode** prev = bucket; BigCacheNode* node = *bucket; while (node) { if (node->size == size && node->site_id == site_id) { // Found match! *out_ptr = node->ptr; // Remove from cache *prev = node->next; free(node); g_bigcache.count--; pthread_rwlock_unlock(&g_bigcache.lock); return 1; // Cache hit } prev = &node->next; node = node->next; } pthread_rwlock_unlock(&g_bigcache.lock); return 0; // Cache miss } ``` #### Task 3: Implement Resize Logic (2-3 hours) ```c // Resize BigCache hash table (2x capacity) static void resize_bigcache(void) { pthread_rwlock_wrlock(&g_bigcache.lock); size_t old_capacity = g_bigcache.capacity; size_t new_capacity = old_capacity * 2; if (new_capacity > BIGCACHE_MAX_CAPACITY) { new_capacity = BIGCACHE_MAX_CAPACITY; } if (new_capacity == old_capacity) { pthread_rwlock_unlock(&g_bigcache.lock); return; // Already at max } // Allocate new buckets BigCacheNode** new_buckets = calloc(new_capacity, sizeof(BigCacheNode*)); if (!new_buckets) { fprintf(stderr, "[BIGCACHE] Failed to resize: malloc failed\n"); pthread_rwlock_unlock(&g_bigcache.lock); return; } // Rehash all entries for (size_t i = 0; i < old_capacity; i++) { BigCacheNode* node = g_bigcache.buckets[i]; while (node) { BigCacheNode* next = node->next; // Rehash to new bucket size_t new_bucket_idx = bigcache_hash(node->size, node->site_id, new_capacity); node->next = new_buckets[new_bucket_idx]; new_buckets[new_bucket_idx] = node; node = next; } } // Replace old buckets free(g_bigcache.buckets); g_bigcache.buckets = new_buckets; g_bigcache.capacity = new_capacity; g_bigcache.max_count = new_capacity * BIGCACHE_LOAD_FACTOR; fprintf(stderr, "[BIGCACHE] Resized: %zu → %zu buckets (%zu entries)\n", old_capacity, new_capacity, g_bigcache.count); pthread_rwlock_unlock(&g_bigcache.lock); } ``` --- ## Part 2: L2.5 Pool Dynamic Sharding ### Current Architecture (CONTENTION) **File**: `core/hakmem_l25_pool.c` ```c #define L25_NUM_SHARDS 64 // Fixed 64 shards typedef struct L25Shard { void* freelist[MAX_SIZE_CLASSES]; pthread_mutex_t lock; } L25Shard; static L25Shard g_l25_shards[L25_NUM_SHARDS]; // Fixed array ``` **Problems**: 1. **Fixed 64 shards**: High contention in multi-threaded workloads 2. **Load imbalance**: Some shards may be hot, others cold ### Proposed Architecture (DYNAMIC) ```c typedef struct L25ShardRegistry { L25Shard** shards; // Dynamic array of shards size_t num_shards; // Current number of shards pthread_rwlock_t lock; // Protect shard array expansion } L25ShardRegistry; static L25ShardRegistry g_l25_registry; ``` ### Implementation Tasks #### Task 1: Redesign L2.5 Shard Structure (1-2 hours) **File**: `core/hakmem_l25_pool.c` ```c typedef struct L25Shard { void* freelist[MAX_SIZE_CLASSES]; pthread_mutex_t lock; size_t allocation_count; // Track load } L25Shard; typedef struct L25ShardRegistry { L25Shard** shards; // Dynamic array size_t num_shards; // Current count size_t max_shards; // Max shards (e.g., 1024) pthread_rwlock_t lock; // Protect expansion } L25ShardRegistry; static L25ShardRegistry g_l25_registry; #define L25_INITIAL_SHARDS 64 // Start with 64 #define L25_MAX_SHARDS 1024 // Max 1024 shards ``` #### Task 2: Implement Dynamic Shard Allocation (2-3 hours) ```c // Initialize L2.5 Pool void hak_l25_pool_init(void) { g_l25_registry.num_shards = L25_INITIAL_SHARDS; g_l25_registry.max_shards = L25_MAX_SHARDS; g_l25_registry.shards = calloc(L25_INITIAL_SHARDS, sizeof(L25Shard*)); pthread_rwlock_init(&g_l25_registry.lock, NULL); // Allocate initial shards for (size_t i = 0; i < L25_INITIAL_SHARDS; i++) { g_l25_registry.shards[i] = alloc_l25_shard(); } } // Allocate a new shard static L25Shard* alloc_l25_shard(void) { L25Shard* shard = calloc(1, sizeof(L25Shard)); pthread_mutex_init(&shard->lock, NULL); shard->allocation_count = 0; for (int i = 0; i < MAX_SIZE_CLASSES; i++) { shard->freelist[i] = NULL; } return shard; } // Expand shard array (2x) static int expand_l25_shards(void) { pthread_rwlock_wrlock(&g_l25_registry.lock); size_t old_num = g_l25_registry.num_shards; size_t new_num = old_num * 2; if (new_num > g_l25_registry.max_shards) { new_num = g_l25_registry.max_shards; } if (new_num == old_num) { pthread_rwlock_unlock(&g_l25_registry.lock); return -1; // Already at max } // Reallocate shard array L25Shard** new_shards = realloc(g_l25_registry.shards, new_num * sizeof(L25Shard*)); if (!new_shards) { pthread_rwlock_unlock(&g_l25_registry.lock); return -1; } // Allocate new shards for (size_t i = old_num; i < new_num; i++) { new_shards[i] = alloc_l25_shard(); } g_l25_registry.shards = new_shards; g_l25_registry.num_shards = new_num; fprintf(stderr, "[L2.5_POOL] Expanded shards: %zu → %zu\n", old_num, new_num); pthread_rwlock_unlock(&g_l25_registry.lock); return 0; } ``` #### Task 3: Contention-Based Expansion (2-3 hours) ```c // Detect high contention and expand shards static void check_l25_contention(void) { static uint64_t last_check_time = 0; uint64_t now = get_timestamp_ns(); // Check every 5 seconds if (now - last_check_time < 5000000000ULL) { return; } last_check_time = now; // Calculate average load per shard size_t total_load = 0; for (size_t i = 0; i < g_l25_registry.num_shards; i++) { total_load += g_l25_registry.shards[i]->allocation_count; } size_t avg_load = total_load / g_l25_registry.num_shards; // If average load is high, expand if (avg_load > 1000) { // Threshold: 1000 allocations per shard fprintf(stderr, "[L2.5_POOL] High load detected (avg=%zu), expanding shards\n", avg_load); expand_l25_shards(); // Reset counters for (size_t i = 0; i < g_l25_registry.num_shards; i++) { g_l25_registry.shards[i]->allocation_count = 0; } } } ``` --- ## Testing Strategy ### Test 1: BigCache Resize Verification ```bash # Enable debug logging HAKMEM_LOG=1 ./larson_hakmem 10 8 128 1024 1 12345 4 2>&1 | grep "BIGCACHE" # Should see: # [BIGCACHE] Resized: 256 → 512 buckets (450 entries) # [BIGCACHE] Resized: 512 → 1024 buckets (900 entries) ``` ### Test 2: L2.5 Shard Expansion ```bash HAKMEM_LOG=1 ./larson_hakmem 10 8 128 1024 1 12345 4 2>&1 | grep "L2.5_POOL" # Should see: # [L2.5_POOL] Expanded shards: 64 → 128 ``` ### Test 3: Cache Hit Rate Improvement ```bash # Before (fixed) # BigCache hit rate: ~60% # After (dynamic) # BigCache hit rate: ~75% (fewer evictions) ``` --- ## Success Criteria ✅ **BigCache resizes**: Logs show 256 → 512 → 1024 buckets ✅ **L2.5 expands**: Logs show 64 → 128 → 256 shards ✅ **Cache hit rate**: +10-20% improvement ✅ **No memory leaks**: Valgrind clean ✅ **Thread safety**: No data races (TSan clean) --- ## Deliverable **Report file**: `/mnt/workdisk/public_share/hakmem/PHASE2C_IMPLEMENTATION_REPORT.md` **Required sections**: 1. **BigCache resize behavior** (logs, hit rate improvement) 2. **L2.5 shard expansion** (logs, contention reduction) 3. **Performance comparison** (before/after) 4. **Memory usage** (overhead analysis) 5. **Production readiness** (YES/NO verdict) --- **Let's make BigCache and L2.5 dynamic! 📈**