// superslab_cache.c - Cache management for SuperSlab allocator // Purpose: LRU cache and old cache (prewarm) for SuperSlabs // License: MIT // Date: 2025-11-28 #include "hakmem_tiny_superslab_internal.h" // ============================================================================ // Cache System - Global Variables // ============================================================================ SuperslabCacheEntry* g_ss_cache_head[8] = {0}; size_t g_ss_cache_count[8] = {0}; size_t g_ss_cache_cap[8] = {0}; size_t g_ss_precharge_target[8] = {0}; _Atomic int g_ss_precharge_done[8] = {0}; int g_ss_cache_enabled = 0; pthread_once_t g_ss_cache_once = PTHREAD_ONCE_INIT; pthread_mutex_t g_ss_cache_lock[8]; uint64_t g_ss_cache_hits[8] = {0}; uint64_t g_ss_cache_misses[8] = {0}; uint64_t g_ss_cache_puts[8] = {0}; uint64_t g_ss_cache_drops[8] = {0}; uint64_t g_ss_cache_precharged[8] = {0}; uint64_t g_superslabs_reused = 0; uint64_t g_superslabs_cached = 0; // ============================================================================ // Cache Initialization // ============================================================================ void ss_cache_global_init(void) { for (int i = 0; i < 8; i++) { pthread_mutex_init(&g_ss_cache_lock[i], NULL); } } void ss_cache_ensure_init(void) { pthread_once(&g_ss_cache_once, ss_cache_global_init); } // ============================================================================ // OS Acquisition (mmap with alignment) // ============================================================================ void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int populate) { void* ptr = NULL; static int log_count = 0; #ifdef MAP_ALIGNED_SUPER int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER; #ifdef MAP_POPULATE if (populate) { map_flags |= MAP_POPULATE; } #endif ptr = mmap(NULL, ss_size, PROT_READ | PROT_WRITE, map_flags, -1, 0); if (ptr != MAP_FAILED) { atomic_fetch_add(&g_ss_mmap_count, 1); if (((uintptr_t)ptr & ss_mask) == 0) { ss_stats_os_alloc(size_class, ss_size); return ptr; } munmap(ptr, ss_size); ptr = NULL; } else { log_superslab_oom_once(ss_size, ss_size, errno); } #endif size_t alloc_size = ss_size * 2; int flags = MAP_PRIVATE | MAP_ANONYMOUS; #ifdef MAP_POPULATE if (populate) { flags |= MAP_POPULATE; } #endif void* raw = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE, flags, -1, 0); if (raw != MAP_FAILED) { uint64_t count = atomic_fetch_add(&g_ss_mmap_count, 1) + 1; #if !HAKMEM_BUILD_RELEASE if (log_count < 10) { fprintf(stderr, "[SUPERSLAB_MMAP] #%lu: class=%d size=%zu (total SuperSlab mmaps so far)\n", (unsigned long)count, size_class, ss_size); log_count++; } #endif } if (raw == MAP_FAILED) { log_superslab_oom_once(ss_size, alloc_size, errno); return NULL; } uintptr_t raw_addr = (uintptr_t)raw; uintptr_t aligned_addr = (raw_addr + ss_mask) & ~ss_mask; ptr = (void*)aligned_addr; size_t prefix_size = aligned_addr - raw_addr; if (prefix_size > 0) { munmap(raw, prefix_size); } size_t suffix_size = alloc_size - prefix_size - ss_size; if (suffix_size > 0) { if (populate) { #ifdef MADV_DONTNEED madvise((char*)ptr + ss_size, suffix_size, MADV_DONTNEED); #endif } else { munmap((char*)ptr + ss_size, suffix_size); } } ss_stats_os_alloc(size_class, ss_size); return ptr; } // ============================================================================ // Cache Precharge (prewarm) // ============================================================================ void ss_cache_precharge(uint8_t size_class, size_t ss_size, uintptr_t ss_mask) { if (!g_ss_cache_enabled) return; if (size_class >= 8) return; if (g_ss_precharge_target[size_class] == 0) return; if (atomic_load_explicit(&g_ss_precharge_done[size_class], memory_order_acquire)) return; ss_cache_ensure_init(); pthread_mutex_lock(&g_ss_cache_lock[size_class]); size_t target = g_ss_precharge_target[size_class]; size_t cap = g_ss_cache_cap[size_class]; size_t desired = target; if (cap != 0 && desired > cap) { desired = cap; } while (g_ss_cache_count[size_class] < desired) { void* raw = ss_os_acquire(size_class, ss_size, ss_mask, 1); if (!raw) { break; } SuperslabCacheEntry* entry = (SuperslabCacheEntry*)raw; entry->next = g_ss_cache_head[size_class]; g_ss_cache_head[size_class] = entry; g_ss_cache_count[size_class]++; g_ss_cache_precharged[size_class]++; } atomic_store_explicit(&g_ss_precharge_done[size_class], 1, memory_order_release); pthread_mutex_unlock(&g_ss_cache_lock[size_class]); } // ============================================================================ // Cache Pop/Push Operations // ============================================================================ SuperslabCacheEntry* ss_cache_pop(uint8_t size_class) { if (!g_ss_cache_enabled) return NULL; if (size_class >= 8) return NULL; ss_cache_ensure_init(); pthread_mutex_lock(&g_ss_cache_lock[size_class]); SuperslabCacheEntry* entry = g_ss_cache_head[size_class]; if (entry) { g_ss_cache_head[size_class] = entry->next; if (g_ss_cache_count[size_class] > 0) { g_ss_cache_count[size_class]--; } entry->next = NULL; g_ss_cache_hits[size_class]++; } else { g_ss_cache_misses[size_class]++; } pthread_mutex_unlock(&g_ss_cache_lock[size_class]); return entry; } int ss_cache_push(uint8_t size_class, SuperSlab* ss) { if (!g_ss_cache_enabled) return 0; if (size_class >= 8) return 0; ss_cache_ensure_init(); pthread_mutex_lock(&g_ss_cache_lock[size_class]); size_t cap = g_ss_cache_cap[size_class]; if (cap != 0 && g_ss_cache_count[size_class] >= cap) { g_ss_cache_drops[size_class]++; pthread_mutex_unlock(&g_ss_cache_lock[size_class]); return 0; } SuperslabCacheEntry* entry = (SuperslabCacheEntry*)ss; entry->next = g_ss_cache_head[size_class]; g_ss_cache_head[size_class] = entry; g_ss_cache_count[size_class]++; g_ss_cache_puts[size_class]++; pthread_mutex_unlock(&g_ss_cache_lock[size_class]); return 1; } // ============================================================================ // Precharge Configuration API // ============================================================================ void tiny_ss_precharge_set_class_target(int class_idx, size_t target) { if (class_idx < 0 || class_idx >= 8) { return; } ss_cache_ensure_init(); pthread_mutex_lock(&g_ss_cache_lock[class_idx]); g_ss_precharge_target[class_idx] = target; if (target > 0) { g_ss_cache_enabled = 1; atomic_store_explicit(&g_ss_precharge_done[class_idx], 0, memory_order_relaxed); } pthread_mutex_unlock(&g_ss_cache_lock[class_idx]); } void tiny_ss_cache_set_class_cap(int class_idx, size_t new_cap) { if (class_idx < 0 || class_idx >= 8) { return; } ss_cache_ensure_init(); pthread_mutex_lock(&g_ss_cache_lock[class_idx]); size_t old_cap = g_ss_cache_cap[class_idx]; g_ss_cache_cap[class_idx] = new_cap; // If shrinking cap, drop extra cached superslabs (oldest from head) and munmap them. if (new_cap == 0 || new_cap < old_cap) { while (g_ss_cache_count[class_idx] > new_cap) { SuperslabCacheEntry* entry = g_ss_cache_head[class_idx]; if (!entry) { g_ss_cache_count[class_idx] = 0; break; } g_ss_cache_head[class_idx] = entry->next; g_ss_cache_count[class_idx]--; g_ss_cache_drops[class_idx]++; // Convert cache entry back to SuperSlab* and release it to OS. SuperSlab* ss = (SuperSlab*)entry; size_t ss_size = (size_t)1 << ss->lg_size; munmap((void*)ss, ss_size); // Update global stats to keep accounting consistent. extern pthread_mutex_t g_superslab_lock; // From ss_stats_box.c pthread_mutex_lock(&g_superslab_lock); g_superslabs_freed++; if (g_bytes_allocated >= ss_size) { g_bytes_allocated -= ss_size; } else { g_bytes_allocated = 0; } pthread_mutex_unlock(&g_superslab_lock); } } pthread_mutex_unlock(&g_ss_cache_lock[class_idx]); // Recompute cache enabled flag (8 classes, so O(8) is cheap) int enabled = 0; for (int i = 0; i < 8; i++) { if (g_ss_cache_cap[i] > 0 || g_ss_precharge_target[i] > 0) { enabled = 1; break; } } g_ss_cache_enabled = enabled; }