2025-11-29 05:13:04 +09:00
|
|
|
// superslab_cache.c - Cache management for SuperSlab allocator
|
|
|
|
|
// Purpose: LRU cache and old cache (prewarm) for SuperSlabs
|
|
|
|
|
// License: MIT
|
|
|
|
|
// Date: 2025-11-28
|
|
|
|
|
|
|
|
|
|
#include "hakmem_tiny_superslab_internal.h"
|
2025-12-09 21:50:15 +09:00
|
|
|
#include "hakmem_env_cache.h"
|
2025-12-08 21:30:21 +09:00
|
|
|
#include "box/ss_os_acquire_box.h"
|
2025-11-29 05:13:04 +09:00
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Cache System - Global Variables
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
SuperslabCacheEntry* g_ss_cache_head[8] = {0};
|
|
|
|
|
size_t g_ss_cache_count[8] = {0};
|
|
|
|
|
size_t g_ss_cache_cap[8] = {0};
|
|
|
|
|
size_t g_ss_precharge_target[8] = {0};
|
|
|
|
|
_Atomic int g_ss_precharge_done[8] = {0};
|
|
|
|
|
int g_ss_cache_enabled = 0;
|
|
|
|
|
|
|
|
|
|
pthread_once_t g_ss_cache_once = PTHREAD_ONCE_INIT;
|
|
|
|
|
pthread_mutex_t g_ss_cache_lock[8];
|
|
|
|
|
|
|
|
|
|
uint64_t g_ss_cache_hits[8] = {0};
|
|
|
|
|
uint64_t g_ss_cache_misses[8] = {0};
|
|
|
|
|
uint64_t g_ss_cache_puts[8] = {0};
|
|
|
|
|
uint64_t g_ss_cache_drops[8] = {0};
|
|
|
|
|
uint64_t g_ss_cache_precharged[8] = {0};
|
|
|
|
|
|
|
|
|
|
uint64_t g_superslabs_reused = 0;
|
|
|
|
|
uint64_t g_superslabs_cached = 0;
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Cache Initialization
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
void ss_cache_global_init(void) {
|
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
|
pthread_mutex_init(&g_ss_cache_lock[i], NULL);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ss_cache_ensure_init(void) {
|
|
|
|
|
pthread_once(&g_ss_cache_once, ss_cache_global_init);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// OS Acquisition (mmap with alignment)
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int populate) {
|
|
|
|
|
void* ptr = NULL;
|
|
|
|
|
static int log_count = 0;
|
2025-12-10 09:08:18 +09:00
|
|
|
(void)populate;
|
|
|
|
|
#if HAKMEM_BUILD_RELEASE
|
|
|
|
|
(void)log_count;
|
|
|
|
|
#endif
|
2025-11-29 05:13:04 +09:00
|
|
|
|
|
|
|
|
#ifdef MAP_ALIGNED_SUPER
|
2025-12-05 10:42:47 +09:00
|
|
|
// MAP_POPULATE: Pre-fault pages to eliminate runtime page faults (60% of CPU overhead)
|
|
|
|
|
// Critical optimization: pre-fault during mmap (one-time cost) vs. runtime faults (every alloc)
|
|
|
|
|
int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_POPULATE;
|
2025-11-29 05:13:04 +09:00
|
|
|
ptr = mmap(NULL, ss_size,
|
|
|
|
|
PROT_READ | PROT_WRITE,
|
|
|
|
|
map_flags,
|
|
|
|
|
-1, 0);
|
|
|
|
|
if (ptr != MAP_FAILED) {
|
|
|
|
|
atomic_fetch_add(&g_ss_mmap_count, 1);
|
2025-12-08 21:30:21 +09:00
|
|
|
ss_os_stats_record_alloc();
|
2025-11-29 05:13:04 +09:00
|
|
|
if (((uintptr_t)ptr & ss_mask) == 0) {
|
|
|
|
|
ss_stats_os_alloc(size_class, ss_size);
|
|
|
|
|
return ptr;
|
|
|
|
|
}
|
2025-12-08 21:30:21 +09:00
|
|
|
ss_os_stats_record_free();
|
2025-11-29 05:13:04 +09:00
|
|
|
munmap(ptr, ss_size);
|
|
|
|
|
ptr = NULL;
|
|
|
|
|
} else {
|
|
|
|
|
log_superslab_oom_once(ss_size, ss_size, errno);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
size_t alloc_size = ss_size * 2;
|
|
|
|
|
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
|
|
|
|
void* raw = mmap(NULL, alloc_size,
|
|
|
|
|
PROT_READ | PROT_WRITE,
|
|
|
|
|
flags,
|
|
|
|
|
-1, 0);
|
|
|
|
|
if (raw != MAP_FAILED) {
|
|
|
|
|
uint64_t count = atomic_fetch_add(&g_ss_mmap_count, 1) + 1;
|
2025-12-08 21:30:21 +09:00
|
|
|
ss_os_stats_record_alloc();
|
2025-11-29 05:13:04 +09:00
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
if (log_count < 10) {
|
|
|
|
|
fprintf(stderr, "[SUPERSLAB_MMAP] #%lu: class=%d size=%zu (total SuperSlab mmaps so far)\n",
|
|
|
|
|
(unsigned long)count, size_class, ss_size);
|
|
|
|
|
log_count++;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2025-12-10 09:08:18 +09:00
|
|
|
#if HAKMEM_BUILD_RELEASE
|
|
|
|
|
(void)count;
|
|
|
|
|
#endif
|
2025-11-29 05:13:04 +09:00
|
|
|
}
|
|
|
|
|
if (raw == MAP_FAILED) {
|
|
|
|
|
log_superslab_oom_once(ss_size, alloc_size, errno);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uintptr_t raw_addr = (uintptr_t)raw;
|
|
|
|
|
uintptr_t aligned_addr = (raw_addr + ss_mask) & ~ss_mask;
|
|
|
|
|
ptr = (void*)aligned_addr;
|
|
|
|
|
|
|
|
|
|
size_t prefix_size = aligned_addr - raw_addr;
|
|
|
|
|
if (prefix_size > 0) {
|
2025-12-08 21:30:21 +09:00
|
|
|
ss_os_stats_record_free();
|
2025-11-29 05:13:04 +09:00
|
|
|
munmap(raw, prefix_size);
|
|
|
|
|
}
|
|
|
|
|
size_t suffix_size = alloc_size - prefix_size - ss_size;
|
|
|
|
|
if (suffix_size > 0) {
|
2025-12-04 20:11:24 +09:00
|
|
|
// 余剰領域は常に munmap して、実際に使用する SuperSlab サイズだけを残す。
|
2025-12-08 21:30:21 +09:00
|
|
|
ss_os_stats_record_free();
|
2025-12-04 20:11:24 +09:00
|
|
|
munmap((char*)ptr + ss_size, suffix_size);
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-05 10:42:47 +09:00
|
|
|
// Pre-fault pages in fallback path (only after trim to actual SuperSlab size)
|
|
|
|
|
// This is critical: we MUST touch the pages after munmap() to establish valid mappings
|
|
|
|
|
// CRITICAL FIX (2025-12-05): Use MADV_POPULATE_WRITE for efficiency
|
|
|
|
|
#ifdef MADV_POPULATE_WRITE
|
2025-12-09 21:50:15 +09:00
|
|
|
int ret = ss_os_madvise_guarded(ptr, ss_size, MADV_POPULATE_WRITE, "ss_cache_populate");
|
2025-12-05 10:42:47 +09:00
|
|
|
if (ret != 0) {
|
2025-12-09 21:50:15 +09:00
|
|
|
if (HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
|
|
|
|
|
fprintf(stderr, "[SS_CACHE] madvise(MADV_POPULATE_WRITE) EINVAL (strict). Aborting.\n");
|
|
|
|
|
abort();
|
|
|
|
|
}
|
2025-12-05 10:42:47 +09:00
|
|
|
// Fallback: explicit memset
|
|
|
|
|
memset(ptr, 0, ss_size);
|
2025-11-29 05:13:04 +09:00
|
|
|
}
|
2025-12-05 10:42:47 +09:00
|
|
|
#else
|
|
|
|
|
// Fallback for kernels < 5.14
|
|
|
|
|
memset(ptr, 0, ss_size);
|
2025-12-08 21:30:21 +09:00
|
|
|
ss_os_stats_record_madvise();
|
2025-12-05 10:42:47 +09:00
|
|
|
#endif
|
2025-11-29 05:13:04 +09:00
|
|
|
|
|
|
|
|
ss_stats_os_alloc(size_class, ss_size);
|
|
|
|
|
return ptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Cache Precharge (prewarm)
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
void ss_cache_precharge(uint8_t size_class, size_t ss_size, uintptr_t ss_mask) {
|
|
|
|
|
if (!g_ss_cache_enabled) return;
|
|
|
|
|
if (size_class >= 8) return;
|
|
|
|
|
if (g_ss_precharge_target[size_class] == 0) return;
|
|
|
|
|
if (atomic_load_explicit(&g_ss_precharge_done[size_class], memory_order_acquire)) return;
|
|
|
|
|
|
|
|
|
|
ss_cache_ensure_init();
|
|
|
|
|
pthread_mutex_lock(&g_ss_cache_lock[size_class]);
|
|
|
|
|
size_t target = g_ss_precharge_target[size_class];
|
|
|
|
|
size_t cap = g_ss_cache_cap[size_class];
|
|
|
|
|
size_t desired = target;
|
|
|
|
|
if (cap != 0 && desired > cap) {
|
|
|
|
|
desired = cap;
|
|
|
|
|
}
|
|
|
|
|
while (g_ss_cache_count[size_class] < desired) {
|
|
|
|
|
void* raw = ss_os_acquire(size_class, ss_size, ss_mask, 1);
|
|
|
|
|
if (!raw) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
SuperslabCacheEntry* entry = (SuperslabCacheEntry*)raw;
|
|
|
|
|
entry->next = g_ss_cache_head[size_class];
|
|
|
|
|
g_ss_cache_head[size_class] = entry;
|
|
|
|
|
g_ss_cache_count[size_class]++;
|
|
|
|
|
g_ss_cache_precharged[size_class]++;
|
|
|
|
|
}
|
|
|
|
|
atomic_store_explicit(&g_ss_precharge_done[size_class], 1, memory_order_release);
|
|
|
|
|
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Cache Pop/Push Operations
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
SuperslabCacheEntry* ss_cache_pop(uint8_t size_class) {
|
|
|
|
|
if (!g_ss_cache_enabled) return NULL;
|
|
|
|
|
if (size_class >= 8) return NULL;
|
|
|
|
|
|
|
|
|
|
ss_cache_ensure_init();
|
|
|
|
|
|
|
|
|
|
pthread_mutex_lock(&g_ss_cache_lock[size_class]);
|
|
|
|
|
SuperslabCacheEntry* entry = g_ss_cache_head[size_class];
|
|
|
|
|
if (entry) {
|
|
|
|
|
g_ss_cache_head[size_class] = entry->next;
|
|
|
|
|
if (g_ss_cache_count[size_class] > 0) {
|
|
|
|
|
g_ss_cache_count[size_class]--;
|
|
|
|
|
}
|
|
|
|
|
entry->next = NULL;
|
|
|
|
|
g_ss_cache_hits[size_class]++;
|
|
|
|
|
} else {
|
|
|
|
|
g_ss_cache_misses[size_class]++;
|
|
|
|
|
}
|
|
|
|
|
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
|
|
|
|
|
return entry;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int ss_cache_push(uint8_t size_class, SuperSlab* ss) {
|
|
|
|
|
if (!g_ss_cache_enabled) return 0;
|
|
|
|
|
if (size_class >= 8) return 0;
|
|
|
|
|
|
|
|
|
|
ss_cache_ensure_init();
|
|
|
|
|
pthread_mutex_lock(&g_ss_cache_lock[size_class]);
|
|
|
|
|
size_t cap = g_ss_cache_cap[size_class];
|
|
|
|
|
if (cap != 0 && g_ss_cache_count[size_class] >= cap) {
|
|
|
|
|
g_ss_cache_drops[size_class]++;
|
|
|
|
|
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
SuperslabCacheEntry* entry = (SuperslabCacheEntry*)ss;
|
|
|
|
|
entry->next = g_ss_cache_head[size_class];
|
|
|
|
|
g_ss_cache_head[size_class] = entry;
|
|
|
|
|
g_ss_cache_count[size_class]++;
|
|
|
|
|
g_ss_cache_puts[size_class]++;
|
|
|
|
|
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2025-12-04 14:22:48 +09:00
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Precharge Configuration API
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
void tiny_ss_precharge_set_class_target(int class_idx, size_t target) {
|
|
|
|
|
if (class_idx < 0 || class_idx >= 8) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ss_cache_ensure_init();
|
|
|
|
|
pthread_mutex_lock(&g_ss_cache_lock[class_idx]);
|
|
|
|
|
|
|
|
|
|
g_ss_precharge_target[class_idx] = target;
|
|
|
|
|
if (target > 0) {
|
|
|
|
|
g_ss_cache_enabled = 1;
|
|
|
|
|
atomic_store_explicit(&g_ss_precharge_done[class_idx], 0, memory_order_relaxed);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&g_ss_cache_lock[class_idx]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void tiny_ss_cache_set_class_cap(int class_idx, size_t new_cap) {
|
|
|
|
|
if (class_idx < 0 || class_idx >= 8) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ss_cache_ensure_init();
|
|
|
|
|
pthread_mutex_lock(&g_ss_cache_lock[class_idx]);
|
|
|
|
|
|
|
|
|
|
size_t old_cap = g_ss_cache_cap[class_idx];
|
|
|
|
|
g_ss_cache_cap[class_idx] = new_cap;
|
|
|
|
|
|
|
|
|
|
// If shrinking cap, drop extra cached superslabs (oldest from head) and munmap them.
|
|
|
|
|
if (new_cap == 0 || new_cap < old_cap) {
|
|
|
|
|
while (g_ss_cache_count[class_idx] > new_cap) {
|
|
|
|
|
SuperslabCacheEntry* entry = g_ss_cache_head[class_idx];
|
|
|
|
|
if (!entry) {
|
|
|
|
|
g_ss_cache_count[class_idx] = 0;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
g_ss_cache_head[class_idx] = entry->next;
|
|
|
|
|
g_ss_cache_count[class_idx]--;
|
|
|
|
|
g_ss_cache_drops[class_idx]++;
|
|
|
|
|
|
|
|
|
|
// Convert cache entry back to SuperSlab* and release it to OS.
|
|
|
|
|
SuperSlab* ss = (SuperSlab*)entry;
|
|
|
|
|
size_t ss_size = (size_t)1 << ss->lg_size;
|
|
|
|
|
munmap((void*)ss, ss_size);
|
|
|
|
|
|
|
|
|
|
// Update global stats to keep accounting consistent.
|
|
|
|
|
extern pthread_mutex_t g_superslab_lock; // From ss_stats_box.c
|
|
|
|
|
pthread_mutex_lock(&g_superslab_lock);
|
|
|
|
|
g_superslabs_freed++;
|
|
|
|
|
if (g_bytes_allocated >= ss_size) {
|
|
|
|
|
g_bytes_allocated -= ss_size;
|
|
|
|
|
} else {
|
|
|
|
|
g_bytes_allocated = 0;
|
|
|
|
|
}
|
|
|
|
|
pthread_mutex_unlock(&g_superslab_lock);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&g_ss_cache_lock[class_idx]);
|
|
|
|
|
|
|
|
|
|
// Recompute cache enabled flag (8 classes, so O(8) is cheap)
|
|
|
|
|
int enabled = 0;
|
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
|
if (g_ss_cache_cap[i] > 0 || g_ss_precharge_target[i] > 0) {
|
|
|
|
|
enabled = 1;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
g_ss_cache_enabled = enabled;
|
|
|
|
|
}
|