Files
hakmem/core/hakmem_bigcache.c

296 lines
10 KiB
C
Raw Normal View History

// hakmem_bigcache.c - Big-Block Cache Implementation
// Purpose: Per-site ring cache for large allocations
//
// License: MIT
// Date: 2025-10-21
#include "hakmem_bigcache.h"
#include "hakmem_internal.h" // Phase 6.15 P0.1: For HAKMEM_LOG macro
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <pthread.h>
// ============================================================================
// Data Structures (Box理論: 箱の内部構造)
// ============================================================================
typedef struct __attribute__((aligned(64))) {
void* ptr; // Cached pointer (user pointer, not raw)
size_t actual_bytes; // Actual allocated size (for safety check)
size_t class_bytes; // Size class (1MB, 2MB, 4MB, 8MB) for indexing
uintptr_t site; // Allocation site
int valid; // 1 if slot is valid
uint16_t freq; // Phase 6.11 P0-BigCache-2: LFU frequency counter (0-65535)
} BigCacheSlot;
// Phase 6.4 P2: O(1) Direct Table [site][class]
// メモリ使用量: 64 sites × 4 classes × 32 bytes = 8 KB (cache-friendly!)
static BigCacheSlot g_cache[BIGCACHE_MAX_SITES][BIGCACHE_NUM_CLASSES];
static pthread_mutex_t g_cache_locks[BIGCACHE_MAX_SITES];
// Statistics (for debugging/paper)
static struct {
uint64_t hits;
uint64_t misses;
uint64_t puts;
uint64_t evictions;
uint64_t rejects;
} g_stats;
static int g_initialized = 0;
// Phase 6.11 P0-BigCache-2: LFU Hybrid - Decay tracking
static uint64_t g_put_count = 0; // Total puts (for decay trigger)
#define LFU_DECAY_INTERVAL 1024 // Decay every 1024 puts (prevents overflow + adapts to workload changes)
// ============================================================================
// Helper Functions (Box内部実装)
// ============================================================================
// Phase 6.11 P0-BigCache-3: FNV-1a hash function (better distribution than modulo)
// FNV-1a (Fowler-Noll-Vo) hash: fast, simple, excellent distribution
static inline int hash_site(uintptr_t site) {
uint32_t hash = 2166136261u; // FNV offset basis
uint8_t* bytes = (uint8_t*)&site;
// FNV-1a: XOR then multiply (better avalanche than FNV-1)
for (int i = 0; i < sizeof(uintptr_t); i++) {
hash ^= bytes[i];
hash *= 16777619u; // FNV prime
}
// Modulo to fit into BIGCACHE_MAX_SITES (256 sites)
return (int)(hash % BIGCACHE_MAX_SITES);
}
// Check if size is cacheable
static inline int is_cacheable(size_t size) {
return size >= BIGCACHE_MIN_SIZE;
}
// Phase 6.11: Finer-grained size-class決定 (8 classes)
// Returns: 0-7 (class index) for O(1) table lookup
// Classes: 512KB, 1MB, 2MB, 3MB, 4MB, 6MB, 8MB, 16MB
static inline int get_class_index(size_t size) {
// Simple conditional approach (easier to maintain with non-power-of-2 classes)
if (size < BIGCACHE_CLASS_1MB) return 0; // 512KB-1MB
if (size < BIGCACHE_CLASS_2MB) return 1; // 1MB-2MB
if (size < BIGCACHE_CLASS_3MB) return 2; // 2MB-3MB (NEW: reduces fragmentation)
if (size < BIGCACHE_CLASS_4MB) return 3; // 3MB-4MB (NEW)
if (size < BIGCACHE_CLASS_6MB) return 4; // 4MB-6MB
if (size < BIGCACHE_CLASS_8MB) return 5; // 6MB-8MB (NEW)
if (size < BIGCACHE_CLASS_16MB) return 6; // 8MB-16MB
return 7; // 16MB+ (NEW: very large allocations)
}
// Get size class bytes from index
static inline size_t class_index_to_bytes(int class_idx) {
static const size_t class_sizes[BIGCACHE_NUM_CLASSES] = {
BIGCACHE_CLASS_512KB, // Phase 6.11: NEW class for 512KB-1MB
BIGCACHE_CLASS_1MB,
BIGCACHE_CLASS_2MB,
BIGCACHE_CLASS_3MB, // Phase 6.11: NEW class to reduce fragmentation (e.g., 2.1MB → 3MB instead of 4MB)
BIGCACHE_CLASS_4MB,
BIGCACHE_CLASS_6MB, // Phase 6.11: NEW class
BIGCACHE_CLASS_8MB,
BIGCACHE_CLASS_16MB // Phase 6.11: NEW class for very large allocations
};
return class_sizes[class_idx];
}
// Callback for actual freeing (set by hakmem.c)
static void (*g_free_callback)(void* ptr, size_t size) = NULL;
// Free a cached block (when evicting)
static inline void evict_slot(BigCacheSlot* slot) {
if (!slot->valid) return;
// Use callback if available, otherwise just mark invalid
if (g_free_callback) {
// Pass actual allocated size, not class_bytes!
g_free_callback(slot->ptr, slot->actual_bytes);
}
slot->valid = 0;
slot->freq = 0; // Phase 6.11: Reset frequency on eviction
g_stats.evictions++;
}
// Phase 6.11 P0-BigCache-2: LFU Hybrid - Decay all frequencies
// Purpose: Prevent overflow + adapt to changing workload patterns
static inline void decay_frequencies(void) {
for (int site_idx = 0; site_idx < BIGCACHE_MAX_SITES; site_idx++) {
for (int class_idx = 0; class_idx < BIGCACHE_NUM_CLASSES; class_idx++) {
BigCacheSlot* slot = &g_cache[site_idx][class_idx];
if (slot->valid) {
slot->freq = slot->freq >> 1; // Halve frequency (shift right by 1)
}
}
}
}
// ============================================================================
// Public API (Box Interface)
// ============================================================================
void hak_bigcache_init(void) {
if (g_initialized) return;
memset(g_cache, 0, sizeof(g_cache));
memset(&g_stats, 0, sizeof(g_stats));
for (int i = 0; i < BIGCACHE_MAX_SITES; i++) {
pthread_mutex_init(&g_cache_locks[i], NULL);
}
g_initialized = 1;
HAKMEM_LOG("[BigCache] Initialized (P2: O(1) direct table, sites=%d, classes=%d)\n",
BIGCACHE_MAX_SITES, BIGCACHE_NUM_CLASSES);
HAKMEM_LOG("[BigCache] Size classes: 1MB, 2MB, 4MB, 8MB (P3: branchless)\n");
}
void hak_bigcache_shutdown(void) {
if (!g_initialized) return;
// Free all cached blocks (O(sites × classes) = 64 × 4 = 256 slots)
for (int site_idx = 0; site_idx < BIGCACHE_MAX_SITES; site_idx++) {
for (int class_idx = 0; class_idx < BIGCACHE_NUM_CLASSES; class_idx++) {
BigCacheSlot* slot = &g_cache[site_idx][class_idx];
if (slot->valid) {
evict_slot(slot);
}
}
}
hak_bigcache_print_stats();
g_initialized = 0;
}
// Phase 6.4 P2: O(1) get - Direct table lookup
int hak_bigcache_try_get(size_t size, uintptr_t site, void** out_ptr) {
if (!g_initialized) hak_bigcache_init();
if (!is_cacheable(size)) return 0;
// O(1) calculation: site_idx, class_idx
int site_idx = hash_site(site);
int class_idx = get_class_index(size); // P3: branchless
// O(1) lookup: table[site_idx][class_idx]
pthread_mutex_t* lock = &g_cache_locks[site_idx];
pthread_mutex_lock(lock);
BigCacheSlot* slot = &g_cache[site_idx][class_idx];
// Check: valid, matching site, AND sufficient size (Segfault fix!)
if (slot->valid && slot->site == site && slot->actual_bytes >= size) {
// Hit! Return and invalidate slot
*out_ptr = slot->ptr;
slot->valid = 0;
// Phase 6.11 P0-BigCache-2: LFU - increment frequency on hit (saturating at 65535)
if (slot->freq < 65535) slot->freq++;
g_stats.hits++;
pthread_mutex_unlock(lock);
return 1;
}
// Miss (invalid, wrong site, or undersized)
g_stats.misses++;
pthread_mutex_unlock(lock);
return 0;
}
// Phase 6.4 P2: O(1) put - Direct table insertion
int hak_bigcache_put(void* ptr, size_t actual_bytes, uintptr_t site) {
if (!g_initialized) hak_bigcache_init();
if (!is_cacheable(actual_bytes)) {
g_stats.rejects++;
return 0;
}
// O(1) calculation: site_idx, class_idx
int site_idx = hash_site(site);
int class_idx = get_class_index(actual_bytes); // P3: branchless
// O(1) lookup: table[site_idx][class_idx]
pthread_mutex_t* lock = &g_cache_locks[site_idx];
pthread_mutex_lock(lock);
BigCacheSlot* slot = &g_cache[site_idx][class_idx];
// Phase 6.11 P0-BigCache-2: LFU Hybrid Eviction
// Instead of evicting target slot directly, find coldest slot in same site
if (slot->valid) {
BigCacheSlot* coldest = slot;
uint16_t min_freq = slot->freq;
// Scan all class slots in same site (8 slots max)
for (int c = 0; c < BIGCACHE_NUM_CLASSES; c++) {
BigCacheSlot* candidate = &g_cache[site_idx][c];
if (!candidate->valid) {
// Invalid slot = coldest (freq=0, prefer reusing empty slots)
coldest = candidate;
break;
}
if (candidate->freq < min_freq) {
min_freq = candidate->freq;
coldest = candidate;
}
}
// Evict coldest slot (might be target slot, might be different)
evict_slot(coldest);
// If we evicted a different slot, use it instead of target slot
if (coldest != slot) {
slot = coldest;
class_idx = get_class_index(actual_bytes); // Recalculate class for new slot
}
}
// Store in cache (O(1) direct write)
slot->ptr = ptr;
slot->actual_bytes = actual_bytes; // Store actual size (Segfault fix!)
slot->class_bytes = class_index_to_bytes(class_idx); // For stats/debugging
slot->site = site;
slot->valid = 1;
slot->freq = 0; // Phase 6.11: Initialize frequency to 0 (will increment on first hit)
g_stats.puts++;
g_put_count++;
// Phase 6.11 P0-BigCache-2: Periodic decay (every 1024 puts)
if (g_put_count % LFU_DECAY_INTERVAL == 0) {
decay_frequencies();
}
pthread_mutex_unlock(lock);
return 1;
}
void hak_bigcache_print_stats(void) {
if (!g_initialized) return;
printf("\n========================================\n");
printf("BigCache Statistics\n");
printf("========================================\n");
printf("Hits: %lu\n", (unsigned long)g_stats.hits);
printf("Misses: %lu\n", (unsigned long)g_stats.misses);
printf("Puts: %lu\n", (unsigned long)g_stats.puts);
printf("Evictions: %lu\n", (unsigned long)g_stats.evictions);
printf("Rejects: %lu\n", (unsigned long)g_stats.rejects);
if (g_stats.hits + g_stats.misses > 0) {
double hit_rate = (double)g_stats.hits / (g_stats.hits + g_stats.misses) * 100.0;
printf("Hit Rate: %.1f%%\n", hit_rate);
}
printf("========================================\n");
}
void hak_bigcache_set_free_callback(hak_bigcache_free_fn_t fn) {
g_free_callback = fn;
}