Files
hakmem/core/superslab_cache.c

280 lines
9.0 KiB
C
Raw Normal View History

// superslab_cache.c - Cache management for SuperSlab allocator
// Purpose: LRU cache and old cache (prewarm) for SuperSlabs
// License: MIT
// Date: 2025-11-28
#include "hakmem_tiny_superslab_internal.h"
// ============================================================================
// Cache System - Global Variables
// ============================================================================
SuperslabCacheEntry* g_ss_cache_head[8] = {0};
size_t g_ss_cache_count[8] = {0};
size_t g_ss_cache_cap[8] = {0};
size_t g_ss_precharge_target[8] = {0};
_Atomic int g_ss_precharge_done[8] = {0};
int g_ss_cache_enabled = 0;
pthread_once_t g_ss_cache_once = PTHREAD_ONCE_INIT;
pthread_mutex_t g_ss_cache_lock[8];
uint64_t g_ss_cache_hits[8] = {0};
uint64_t g_ss_cache_misses[8] = {0};
uint64_t g_ss_cache_puts[8] = {0};
uint64_t g_ss_cache_drops[8] = {0};
uint64_t g_ss_cache_precharged[8] = {0};
uint64_t g_superslabs_reused = 0;
uint64_t g_superslabs_cached = 0;
// ============================================================================
// Cache Initialization
// ============================================================================
void ss_cache_global_init(void) {
for (int i = 0; i < 8; i++) {
pthread_mutex_init(&g_ss_cache_lock[i], NULL);
}
}
void ss_cache_ensure_init(void) {
pthread_once(&g_ss_cache_once, ss_cache_global_init);
}
// ============================================================================
// OS Acquisition (mmap with alignment)
// ============================================================================
void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int populate) {
void* ptr = NULL;
static int log_count = 0;
#ifdef MAP_ALIGNED_SUPER
int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER;
#ifdef MAP_POPULATE
if (populate) {
map_flags |= MAP_POPULATE;
}
#endif
ptr = mmap(NULL, ss_size,
PROT_READ | PROT_WRITE,
map_flags,
-1, 0);
if (ptr != MAP_FAILED) {
atomic_fetch_add(&g_ss_mmap_count, 1);
if (((uintptr_t)ptr & ss_mask) == 0) {
ss_stats_os_alloc(size_class, ss_size);
return ptr;
}
munmap(ptr, ss_size);
ptr = NULL;
} else {
log_superslab_oom_once(ss_size, ss_size, errno);
}
#endif
size_t alloc_size = ss_size * 2;
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
#ifdef MAP_POPULATE
if (populate) {
flags |= MAP_POPULATE;
}
#endif
void* raw = mmap(NULL, alloc_size,
PROT_READ | PROT_WRITE,
flags,
-1, 0);
if (raw != MAP_FAILED) {
uint64_t count = atomic_fetch_add(&g_ss_mmap_count, 1) + 1;
#if !HAKMEM_BUILD_RELEASE
if (log_count < 10) {
fprintf(stderr, "[SUPERSLAB_MMAP] #%lu: class=%d size=%zu (total SuperSlab mmaps so far)\n",
(unsigned long)count, size_class, ss_size);
log_count++;
}
#endif
}
if (raw == MAP_FAILED) {
log_superslab_oom_once(ss_size, alloc_size, errno);
return NULL;
}
uintptr_t raw_addr = (uintptr_t)raw;
uintptr_t aligned_addr = (raw_addr + ss_mask) & ~ss_mask;
ptr = (void*)aligned_addr;
size_t prefix_size = aligned_addr - raw_addr;
if (prefix_size > 0) {
munmap(raw, prefix_size);
}
size_t suffix_size = alloc_size - prefix_size - ss_size;
if (suffix_size > 0) {
if (populate) {
#ifdef MADV_DONTNEED
madvise((char*)ptr + ss_size, suffix_size, MADV_DONTNEED);
#endif
} else {
munmap((char*)ptr + ss_size, suffix_size);
}
}
ss_stats_os_alloc(size_class, ss_size);
return ptr;
}
// ============================================================================
// Cache Precharge (prewarm)
// ============================================================================
void ss_cache_precharge(uint8_t size_class, size_t ss_size, uintptr_t ss_mask) {
if (!g_ss_cache_enabled) return;
if (size_class >= 8) return;
if (g_ss_precharge_target[size_class] == 0) return;
if (atomic_load_explicit(&g_ss_precharge_done[size_class], memory_order_acquire)) return;
ss_cache_ensure_init();
pthread_mutex_lock(&g_ss_cache_lock[size_class]);
size_t target = g_ss_precharge_target[size_class];
size_t cap = g_ss_cache_cap[size_class];
size_t desired = target;
if (cap != 0 && desired > cap) {
desired = cap;
}
while (g_ss_cache_count[size_class] < desired) {
void* raw = ss_os_acquire(size_class, ss_size, ss_mask, 1);
if (!raw) {
break;
}
SuperslabCacheEntry* entry = (SuperslabCacheEntry*)raw;
entry->next = g_ss_cache_head[size_class];
g_ss_cache_head[size_class] = entry;
g_ss_cache_count[size_class]++;
g_ss_cache_precharged[size_class]++;
}
atomic_store_explicit(&g_ss_precharge_done[size_class], 1, memory_order_release);
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
}
// ============================================================================
// Cache Pop/Push Operations
// ============================================================================
SuperslabCacheEntry* ss_cache_pop(uint8_t size_class) {
if (!g_ss_cache_enabled) return NULL;
if (size_class >= 8) return NULL;
ss_cache_ensure_init();
pthread_mutex_lock(&g_ss_cache_lock[size_class]);
SuperslabCacheEntry* entry = g_ss_cache_head[size_class];
if (entry) {
g_ss_cache_head[size_class] = entry->next;
if (g_ss_cache_count[size_class] > 0) {
g_ss_cache_count[size_class]--;
}
entry->next = NULL;
g_ss_cache_hits[size_class]++;
} else {
g_ss_cache_misses[size_class]++;
}
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
return entry;
}
int ss_cache_push(uint8_t size_class, SuperSlab* ss) {
if (!g_ss_cache_enabled) return 0;
if (size_class >= 8) return 0;
ss_cache_ensure_init();
pthread_mutex_lock(&g_ss_cache_lock[size_class]);
size_t cap = g_ss_cache_cap[size_class];
if (cap != 0 && g_ss_cache_count[size_class] >= cap) {
g_ss_cache_drops[size_class]++;
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
return 0;
}
SuperslabCacheEntry* entry = (SuperslabCacheEntry*)ss;
entry->next = g_ss_cache_head[size_class];
g_ss_cache_head[size_class] = entry;
g_ss_cache_count[size_class]++;
g_ss_cache_puts[size_class]++;
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
return 1;
}
Comprehensive legacy cleanup and architecture consolidation Summary of Changes: MOVED TO ARCHIVE: - core/hakmem_tiny_legacy_slow_box.inc → archive/ * Slow path legacy code preserved for reference * Superseded by Gatekeeper Box architecture - core/superslab_allocate.c → archive/superslab_allocate_legacy.c * Legacy SuperSlab allocation implementation * Functionality integrated into new Box system - core/superslab_head.c → archive/superslab_head_legacy.c * Legacy slab head management * Refactored through Box architecture REMOVED DEAD CODE: - Eliminated unused allocation policy variants from ss_allocation_box.c * Reduced from 127+ lines of conditional logic to focused implementation * Removed: old policy branches, unused allocation strategies * Kept: current Box-based allocation path ADDED NEW INFRASTRUCTURE: - core/superslab_head_stub.c (41 lines) * Minimal stub for backward compatibility * Delegates to new architecture - Enhanced core/superslab_cache.c (75 lines added) * Added missing API functions for cache management * Proper interface for SuperSlab cache integration REFACTORED CORE SYSTEMS: - core/hakmem_super_registry.c * Moved registration logic from scattered locations * Centralized SuperSlab registry management - core/hakmem_tiny.c * Removed 27 lines of redundant initialization * Simplified through Box architecture - core/hakmem_tiny_alloc.inc * Streamlined allocation path to use Gatekeeper * Removed legacy decision logic - core/box/ss_allocation_box.c/h * Dramatically simplified allocation policy * Removed conditional branches for unused strategies * Focused on current Box-based approach BUILD SYSTEM: - Updated Makefile for archive structure - Removed obsolete object file references - Maintained build compatibility SAFETY & TESTING: - All deletions verified: no broken references - Build verification: RELEASE=0 and RELEASE=1 pass - Smoke tests: 100% pass rate - Functional verification: allocation/free intact Architecture Consolidation: Before: Multiple overlapping allocation paths with legacy code branches After: Single unified path through Gatekeeper Boxes with clear architecture Benefits: - Reduced code size and complexity - Improved maintainability - Single source of truth for allocation logic - Better diagnostic/observability hooks - Foundation for future optimizations 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-04 14:22:48 +09:00
// ============================================================================
// Precharge Configuration API
// ============================================================================
void tiny_ss_precharge_set_class_target(int class_idx, size_t target) {
if (class_idx < 0 || class_idx >= 8) {
return;
}
ss_cache_ensure_init();
pthread_mutex_lock(&g_ss_cache_lock[class_idx]);
g_ss_precharge_target[class_idx] = target;
if (target > 0) {
g_ss_cache_enabled = 1;
atomic_store_explicit(&g_ss_precharge_done[class_idx], 0, memory_order_relaxed);
}
pthread_mutex_unlock(&g_ss_cache_lock[class_idx]);
}
void tiny_ss_cache_set_class_cap(int class_idx, size_t new_cap) {
if (class_idx < 0 || class_idx >= 8) {
return;
}
ss_cache_ensure_init();
pthread_mutex_lock(&g_ss_cache_lock[class_idx]);
size_t old_cap = g_ss_cache_cap[class_idx];
g_ss_cache_cap[class_idx] = new_cap;
// If shrinking cap, drop extra cached superslabs (oldest from head) and munmap them.
if (new_cap == 0 || new_cap < old_cap) {
while (g_ss_cache_count[class_idx] > new_cap) {
SuperslabCacheEntry* entry = g_ss_cache_head[class_idx];
if (!entry) {
g_ss_cache_count[class_idx] = 0;
break;
}
g_ss_cache_head[class_idx] = entry->next;
g_ss_cache_count[class_idx]--;
g_ss_cache_drops[class_idx]++;
// Convert cache entry back to SuperSlab* and release it to OS.
SuperSlab* ss = (SuperSlab*)entry;
size_t ss_size = (size_t)1 << ss->lg_size;
munmap((void*)ss, ss_size);
// Update global stats to keep accounting consistent.
extern pthread_mutex_t g_superslab_lock; // From ss_stats_box.c
pthread_mutex_lock(&g_superslab_lock);
g_superslabs_freed++;
if (g_bytes_allocated >= ss_size) {
g_bytes_allocated -= ss_size;
} else {
g_bytes_allocated = 0;
}
pthread_mutex_unlock(&g_superslab_lock);
}
}
pthread_mutex_unlock(&g_ss_cache_lock[class_idx]);
// Recompute cache enabled flag (8 classes, so O(8) is cheap)
int enabled = 0;
for (int i = 0; i < 8; i++) {
if (g_ss_cache_cap[i] > 0 || g_ss_precharge_target[i] > 0) {
enabled = 1;
break;
}
}
g_ss_cache_enabled = enabled;
}