296 lines
10 KiB
C
296 lines
10 KiB
C
|
|
// hakmem_bigcache.c - Big-Block Cache Implementation
|
|||
|
|
// Purpose: Per-site ring cache for large allocations
|
|||
|
|
//
|
|||
|
|
// License: MIT
|
|||
|
|
// Date: 2025-10-21
|
|||
|
|
|
|||
|
|
#include "hakmem_bigcache.h"
|
|||
|
|
#include "hakmem_internal.h" // Phase 6.15 P0.1: For HAKMEM_LOG macro
|
|||
|
|
#include <stdlib.h>
|
|||
|
|
#include <string.h>
|
|||
|
|
#include <stdio.h>
|
|||
|
|
#include <pthread.h>
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// Data Structures (Box理論: 箱の内部構造)
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
typedef struct __attribute__((aligned(64))) {
|
|||
|
|
void* ptr; // Cached pointer (user pointer, not raw)
|
|||
|
|
size_t actual_bytes; // Actual allocated size (for safety check)
|
|||
|
|
size_t class_bytes; // Size class (1MB, 2MB, 4MB, 8MB) for indexing
|
|||
|
|
uintptr_t site; // Allocation site
|
|||
|
|
int valid; // 1 if slot is valid
|
|||
|
|
uint16_t freq; // Phase 6.11 P0-BigCache-2: LFU frequency counter (0-65535)
|
|||
|
|
} BigCacheSlot;
|
|||
|
|
|
|||
|
|
// Phase 6.4 P2: O(1) Direct Table [site][class]
|
|||
|
|
// メモリ使用量: 64 sites × 4 classes × 32 bytes = 8 KB (cache-friendly!)
|
|||
|
|
static BigCacheSlot g_cache[BIGCACHE_MAX_SITES][BIGCACHE_NUM_CLASSES];
|
|||
|
|
static pthread_mutex_t g_cache_locks[BIGCACHE_MAX_SITES];
|
|||
|
|
|
|||
|
|
// Statistics (for debugging/paper)
|
|||
|
|
static struct {
|
|||
|
|
uint64_t hits;
|
|||
|
|
uint64_t misses;
|
|||
|
|
uint64_t puts;
|
|||
|
|
uint64_t evictions;
|
|||
|
|
uint64_t rejects;
|
|||
|
|
} g_stats;
|
|||
|
|
|
|||
|
|
static int g_initialized = 0;
|
|||
|
|
|
|||
|
|
// Phase 6.11 P0-BigCache-2: LFU Hybrid - Decay tracking
|
|||
|
|
static uint64_t g_put_count = 0; // Total puts (for decay trigger)
|
|||
|
|
#define LFU_DECAY_INTERVAL 1024 // Decay every 1024 puts (prevents overflow + adapts to workload changes)
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// Helper Functions (Box内部実装)
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
// Phase 6.11 P0-BigCache-3: FNV-1a hash function (better distribution than modulo)
|
|||
|
|
// FNV-1a (Fowler-Noll-Vo) hash: fast, simple, excellent distribution
|
|||
|
|
static inline int hash_site(uintptr_t site) {
|
|||
|
|
uint32_t hash = 2166136261u; // FNV offset basis
|
|||
|
|
uint8_t* bytes = (uint8_t*)&site;
|
|||
|
|
|
|||
|
|
// FNV-1a: XOR then multiply (better avalanche than FNV-1)
|
|||
|
|
for (int i = 0; i < sizeof(uintptr_t); i++) {
|
|||
|
|
hash ^= bytes[i];
|
|||
|
|
hash *= 16777619u; // FNV prime
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Modulo to fit into BIGCACHE_MAX_SITES (256 sites)
|
|||
|
|
return (int)(hash % BIGCACHE_MAX_SITES);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Check if size is cacheable
|
|||
|
|
static inline int is_cacheable(size_t size) {
|
|||
|
|
return size >= BIGCACHE_MIN_SIZE;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Phase 6.11: Finer-grained size-class決定 (8 classes)
|
|||
|
|
// Returns: 0-7 (class index) for O(1) table lookup
|
|||
|
|
// Classes: 512KB, 1MB, 2MB, 3MB, 4MB, 6MB, 8MB, 16MB
|
|||
|
|
static inline int get_class_index(size_t size) {
|
|||
|
|
// Simple conditional approach (easier to maintain with non-power-of-2 classes)
|
|||
|
|
if (size < BIGCACHE_CLASS_1MB) return 0; // 512KB-1MB
|
|||
|
|
if (size < BIGCACHE_CLASS_2MB) return 1; // 1MB-2MB
|
|||
|
|
if (size < BIGCACHE_CLASS_3MB) return 2; // 2MB-3MB (NEW: reduces fragmentation)
|
|||
|
|
if (size < BIGCACHE_CLASS_4MB) return 3; // 3MB-4MB (NEW)
|
|||
|
|
if (size < BIGCACHE_CLASS_6MB) return 4; // 4MB-6MB
|
|||
|
|
if (size < BIGCACHE_CLASS_8MB) return 5; // 6MB-8MB (NEW)
|
|||
|
|
if (size < BIGCACHE_CLASS_16MB) return 6; // 8MB-16MB
|
|||
|
|
return 7; // 16MB+ (NEW: very large allocations)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Get size class bytes from index
|
|||
|
|
static inline size_t class_index_to_bytes(int class_idx) {
|
|||
|
|
static const size_t class_sizes[BIGCACHE_NUM_CLASSES] = {
|
|||
|
|
BIGCACHE_CLASS_512KB, // Phase 6.11: NEW class for 512KB-1MB
|
|||
|
|
BIGCACHE_CLASS_1MB,
|
|||
|
|
BIGCACHE_CLASS_2MB,
|
|||
|
|
BIGCACHE_CLASS_3MB, // Phase 6.11: NEW class to reduce fragmentation (e.g., 2.1MB → 3MB instead of 4MB)
|
|||
|
|
BIGCACHE_CLASS_4MB,
|
|||
|
|
BIGCACHE_CLASS_6MB, // Phase 6.11: NEW class
|
|||
|
|
BIGCACHE_CLASS_8MB,
|
|||
|
|
BIGCACHE_CLASS_16MB // Phase 6.11: NEW class for very large allocations
|
|||
|
|
};
|
|||
|
|
return class_sizes[class_idx];
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Callback for actual freeing (set by hakmem.c)
|
|||
|
|
static void (*g_free_callback)(void* ptr, size_t size) = NULL;
|
|||
|
|
|
|||
|
|
// Free a cached block (when evicting)
|
|||
|
|
static inline void evict_slot(BigCacheSlot* slot) {
|
|||
|
|
if (!slot->valid) return;
|
|||
|
|
|
|||
|
|
// Use callback if available, otherwise just mark invalid
|
|||
|
|
if (g_free_callback) {
|
|||
|
|
// Pass actual allocated size, not class_bytes!
|
|||
|
|
g_free_callback(slot->ptr, slot->actual_bytes);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
slot->valid = 0;
|
|||
|
|
slot->freq = 0; // Phase 6.11: Reset frequency on eviction
|
|||
|
|
g_stats.evictions++;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Phase 6.11 P0-BigCache-2: LFU Hybrid - Decay all frequencies
|
|||
|
|
// Purpose: Prevent overflow + adapt to changing workload patterns
|
|||
|
|
static inline void decay_frequencies(void) {
|
|||
|
|
for (int site_idx = 0; site_idx < BIGCACHE_MAX_SITES; site_idx++) {
|
|||
|
|
for (int class_idx = 0; class_idx < BIGCACHE_NUM_CLASSES; class_idx++) {
|
|||
|
|
BigCacheSlot* slot = &g_cache[site_idx][class_idx];
|
|||
|
|
if (slot->valid) {
|
|||
|
|
slot->freq = slot->freq >> 1; // Halve frequency (shift right by 1)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// Public API (Box Interface)
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
void hak_bigcache_init(void) {
|
|||
|
|
if (g_initialized) return;
|
|||
|
|
|
|||
|
|
memset(g_cache, 0, sizeof(g_cache));
|
|||
|
|
memset(&g_stats, 0, sizeof(g_stats));
|
|||
|
|
|
|||
|
|
for (int i = 0; i < BIGCACHE_MAX_SITES; i++) {
|
|||
|
|
pthread_mutex_init(&g_cache_locks[i], NULL);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
g_initialized = 1;
|
|||
|
|
|
|||
|
|
HAKMEM_LOG("[BigCache] Initialized (P2: O(1) direct table, sites=%d, classes=%d)\n",
|
|||
|
|
BIGCACHE_MAX_SITES, BIGCACHE_NUM_CLASSES);
|
|||
|
|
HAKMEM_LOG("[BigCache] Size classes: 1MB, 2MB, 4MB, 8MB (P3: branchless)\n");
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
void hak_bigcache_shutdown(void) {
|
|||
|
|
if (!g_initialized) return;
|
|||
|
|
|
|||
|
|
// Free all cached blocks (O(sites × classes) = 64 × 4 = 256 slots)
|
|||
|
|
for (int site_idx = 0; site_idx < BIGCACHE_MAX_SITES; site_idx++) {
|
|||
|
|
for (int class_idx = 0; class_idx < BIGCACHE_NUM_CLASSES; class_idx++) {
|
|||
|
|
BigCacheSlot* slot = &g_cache[site_idx][class_idx];
|
|||
|
|
if (slot->valid) {
|
|||
|
|
evict_slot(slot);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
hak_bigcache_print_stats();
|
|||
|
|
|
|||
|
|
g_initialized = 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Phase 6.4 P2: O(1) get - Direct table lookup
|
|||
|
|
int hak_bigcache_try_get(size_t size, uintptr_t site, void** out_ptr) {
|
|||
|
|
if (!g_initialized) hak_bigcache_init();
|
|||
|
|
if (!is_cacheable(size)) return 0;
|
|||
|
|
|
|||
|
|
// O(1) calculation: site_idx, class_idx
|
|||
|
|
int site_idx = hash_site(site);
|
|||
|
|
int class_idx = get_class_index(size); // P3: branchless
|
|||
|
|
|
|||
|
|
// O(1) lookup: table[site_idx][class_idx]
|
|||
|
|
pthread_mutex_t* lock = &g_cache_locks[site_idx];
|
|||
|
|
pthread_mutex_lock(lock);
|
|||
|
|
BigCacheSlot* slot = &g_cache[site_idx][class_idx];
|
|||
|
|
|
|||
|
|
// Check: valid, matching site, AND sufficient size (Segfault fix!)
|
|||
|
|
if (slot->valid && slot->site == site && slot->actual_bytes >= size) {
|
|||
|
|
// Hit! Return and invalidate slot
|
|||
|
|
*out_ptr = slot->ptr;
|
|||
|
|
slot->valid = 0;
|
|||
|
|
|
|||
|
|
// Phase 6.11 P0-BigCache-2: LFU - increment frequency on hit (saturating at 65535)
|
|||
|
|
if (slot->freq < 65535) slot->freq++;
|
|||
|
|
|
|||
|
|
g_stats.hits++;
|
|||
|
|
pthread_mutex_unlock(lock);
|
|||
|
|
return 1;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Miss (invalid, wrong site, or undersized)
|
|||
|
|
g_stats.misses++;
|
|||
|
|
pthread_mutex_unlock(lock);
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Phase 6.4 P2: O(1) put - Direct table insertion
|
|||
|
|
int hak_bigcache_put(void* ptr, size_t actual_bytes, uintptr_t site) {
|
|||
|
|
if (!g_initialized) hak_bigcache_init();
|
|||
|
|
if (!is_cacheable(actual_bytes)) {
|
|||
|
|
g_stats.rejects++;
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// O(1) calculation: site_idx, class_idx
|
|||
|
|
int site_idx = hash_site(site);
|
|||
|
|
int class_idx = get_class_index(actual_bytes); // P3: branchless
|
|||
|
|
|
|||
|
|
// O(1) lookup: table[site_idx][class_idx]
|
|||
|
|
pthread_mutex_t* lock = &g_cache_locks[site_idx];
|
|||
|
|
pthread_mutex_lock(lock);
|
|||
|
|
BigCacheSlot* slot = &g_cache[site_idx][class_idx];
|
|||
|
|
|
|||
|
|
// Phase 6.11 P0-BigCache-2: LFU Hybrid Eviction
|
|||
|
|
// Instead of evicting target slot directly, find coldest slot in same site
|
|||
|
|
if (slot->valid) {
|
|||
|
|
BigCacheSlot* coldest = slot;
|
|||
|
|
uint16_t min_freq = slot->freq;
|
|||
|
|
|
|||
|
|
// Scan all class slots in same site (8 slots max)
|
|||
|
|
for (int c = 0; c < BIGCACHE_NUM_CLASSES; c++) {
|
|||
|
|
BigCacheSlot* candidate = &g_cache[site_idx][c];
|
|||
|
|
if (!candidate->valid) {
|
|||
|
|
// Invalid slot = coldest (freq=0, prefer reusing empty slots)
|
|||
|
|
coldest = candidate;
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
if (candidate->freq < min_freq) {
|
|||
|
|
min_freq = candidate->freq;
|
|||
|
|
coldest = candidate;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Evict coldest slot (might be target slot, might be different)
|
|||
|
|
evict_slot(coldest);
|
|||
|
|
|
|||
|
|
// If we evicted a different slot, use it instead of target slot
|
|||
|
|
if (coldest != slot) {
|
|||
|
|
slot = coldest;
|
|||
|
|
class_idx = get_class_index(actual_bytes); // Recalculate class for new slot
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Store in cache (O(1) direct write)
|
|||
|
|
slot->ptr = ptr;
|
|||
|
|
slot->actual_bytes = actual_bytes; // Store actual size (Segfault fix!)
|
|||
|
|
slot->class_bytes = class_index_to_bytes(class_idx); // For stats/debugging
|
|||
|
|
slot->site = site;
|
|||
|
|
slot->valid = 1;
|
|||
|
|
slot->freq = 0; // Phase 6.11: Initialize frequency to 0 (will increment on first hit)
|
|||
|
|
|
|||
|
|
g_stats.puts++;
|
|||
|
|
g_put_count++;
|
|||
|
|
|
|||
|
|
// Phase 6.11 P0-BigCache-2: Periodic decay (every 1024 puts)
|
|||
|
|
if (g_put_count % LFU_DECAY_INTERVAL == 0) {
|
|||
|
|
decay_frequencies();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
pthread_mutex_unlock(lock);
|
|||
|
|
return 1;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
void hak_bigcache_print_stats(void) {
|
|||
|
|
if (!g_initialized) return;
|
|||
|
|
|
|||
|
|
printf("\n========================================\n");
|
|||
|
|
printf("BigCache Statistics\n");
|
|||
|
|
printf("========================================\n");
|
|||
|
|
printf("Hits: %lu\n", (unsigned long)g_stats.hits);
|
|||
|
|
printf("Misses: %lu\n", (unsigned long)g_stats.misses);
|
|||
|
|
printf("Puts: %lu\n", (unsigned long)g_stats.puts);
|
|||
|
|
printf("Evictions: %lu\n", (unsigned long)g_stats.evictions);
|
|||
|
|
printf("Rejects: %lu\n", (unsigned long)g_stats.rejects);
|
|||
|
|
|
|||
|
|
if (g_stats.hits + g_stats.misses > 0) {
|
|||
|
|
double hit_rate = (double)g_stats.hits / (g_stats.hits + g_stats.misses) * 100.0;
|
|||
|
|
printf("Hit Rate: %.1f%%\n", hit_rate);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
printf("========================================\n");
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
void hak_bigcache_set_free_callback(hak_bigcache_free_fn_t fn) {
|
|||
|
|
g_free_callback = fn;
|
|||
|
|
}
|