Files
hakmem/core/hakmem_bigcache.c
Moe Charm (CI) 52386401b3 Debug Counters Implementation - Clean History
Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-05 12:31:14 +09:00

296 lines
10 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem_bigcache.c - Big-Block Cache Implementation
// Purpose: Per-site ring cache for large allocations
//
// License: MIT
// Date: 2025-10-21
#include "hakmem_bigcache.h"
#include "hakmem_internal.h" // Phase 6.15 P0.1: For HAKMEM_LOG macro
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <pthread.h>
// ============================================================================
// Data Structures (Box理論: 箱の内部構造)
// ============================================================================
typedef struct __attribute__((aligned(64))) {
void* ptr; // Cached pointer (user pointer, not raw)
size_t actual_bytes; // Actual allocated size (for safety check)
size_t class_bytes; // Size class (1MB, 2MB, 4MB, 8MB) for indexing
uintptr_t site; // Allocation site
int valid; // 1 if slot is valid
uint16_t freq; // Phase 6.11 P0-BigCache-2: LFU frequency counter (0-65535)
} BigCacheSlot;
// Phase 6.4 P2: O(1) Direct Table [site][class]
// メモリ使用量: 64 sites × 4 classes × 32 bytes = 8 KB (cache-friendly!)
static BigCacheSlot g_cache[BIGCACHE_MAX_SITES][BIGCACHE_NUM_CLASSES];
static pthread_mutex_t g_cache_locks[BIGCACHE_MAX_SITES];
// Statistics (for debugging/paper)
static struct {
uint64_t hits;
uint64_t misses;
uint64_t puts;
uint64_t evictions;
uint64_t rejects;
} g_stats;
static int g_initialized = 0;
// Phase 6.11 P0-BigCache-2: LFU Hybrid - Decay tracking
static uint64_t g_put_count = 0; // Total puts (for decay trigger)
#define LFU_DECAY_INTERVAL 1024 // Decay every 1024 puts (prevents overflow + adapts to workload changes)
// ============================================================================
// Helper Functions (Box内部実装)
// ============================================================================
// Phase 6.11 P0-BigCache-3: FNV-1a hash function (better distribution than modulo)
// FNV-1a (Fowler-Noll-Vo) hash: fast, simple, excellent distribution
static inline int hash_site(uintptr_t site) {
uint32_t hash = 2166136261u; // FNV offset basis
uint8_t* bytes = (uint8_t*)&site;
// FNV-1a: XOR then multiply (better avalanche than FNV-1)
for (int i = 0; i < sizeof(uintptr_t); i++) {
hash ^= bytes[i];
hash *= 16777619u; // FNV prime
}
// Modulo to fit into BIGCACHE_MAX_SITES (256 sites)
return (int)(hash % BIGCACHE_MAX_SITES);
}
// Check if size is cacheable
static inline int is_cacheable(size_t size) {
return size >= BIGCACHE_MIN_SIZE;
}
// Phase 6.11: Finer-grained size-class決定 (8 classes)
// Returns: 0-7 (class index) for O(1) table lookup
// Classes: 512KB, 1MB, 2MB, 3MB, 4MB, 6MB, 8MB, 16MB
static inline int get_class_index(size_t size) {
// Simple conditional approach (easier to maintain with non-power-of-2 classes)
if (size < BIGCACHE_CLASS_1MB) return 0; // 512KB-1MB
if (size < BIGCACHE_CLASS_2MB) return 1; // 1MB-2MB
if (size < BIGCACHE_CLASS_3MB) return 2; // 2MB-3MB (NEW: reduces fragmentation)
if (size < BIGCACHE_CLASS_4MB) return 3; // 3MB-4MB (NEW)
if (size < BIGCACHE_CLASS_6MB) return 4; // 4MB-6MB
if (size < BIGCACHE_CLASS_8MB) return 5; // 6MB-8MB (NEW)
if (size < BIGCACHE_CLASS_16MB) return 6; // 8MB-16MB
return 7; // 16MB+ (NEW: very large allocations)
}
// Get size class bytes from index
static inline size_t class_index_to_bytes(int class_idx) {
static const size_t class_sizes[BIGCACHE_NUM_CLASSES] = {
BIGCACHE_CLASS_512KB, // Phase 6.11: NEW class for 512KB-1MB
BIGCACHE_CLASS_1MB,
BIGCACHE_CLASS_2MB,
BIGCACHE_CLASS_3MB, // Phase 6.11: NEW class to reduce fragmentation (e.g., 2.1MB → 3MB instead of 4MB)
BIGCACHE_CLASS_4MB,
BIGCACHE_CLASS_6MB, // Phase 6.11: NEW class
BIGCACHE_CLASS_8MB,
BIGCACHE_CLASS_16MB // Phase 6.11: NEW class for very large allocations
};
return class_sizes[class_idx];
}
// Callback for actual freeing (set by hakmem.c)
static void (*g_free_callback)(void* ptr, size_t size) = NULL;
// Free a cached block (when evicting)
static inline void evict_slot(BigCacheSlot* slot) {
if (!slot->valid) return;
// Use callback if available, otherwise just mark invalid
if (g_free_callback) {
// Pass actual allocated size, not class_bytes!
g_free_callback(slot->ptr, slot->actual_bytes);
}
slot->valid = 0;
slot->freq = 0; // Phase 6.11: Reset frequency on eviction
g_stats.evictions++;
}
// Phase 6.11 P0-BigCache-2: LFU Hybrid - Decay all frequencies
// Purpose: Prevent overflow + adapt to changing workload patterns
static inline void decay_frequencies(void) {
for (int site_idx = 0; site_idx < BIGCACHE_MAX_SITES; site_idx++) {
for (int class_idx = 0; class_idx < BIGCACHE_NUM_CLASSES; class_idx++) {
BigCacheSlot* slot = &g_cache[site_idx][class_idx];
if (slot->valid) {
slot->freq = slot->freq >> 1; // Halve frequency (shift right by 1)
}
}
}
}
// ============================================================================
// Public API (Box Interface)
// ============================================================================
void hak_bigcache_init(void) {
if (g_initialized) return;
memset(g_cache, 0, sizeof(g_cache));
memset(&g_stats, 0, sizeof(g_stats));
for (int i = 0; i < BIGCACHE_MAX_SITES; i++) {
pthread_mutex_init(&g_cache_locks[i], NULL);
}
g_initialized = 1;
HAKMEM_LOG("[BigCache] Initialized (P2: O(1) direct table, sites=%d, classes=%d)\n",
BIGCACHE_MAX_SITES, BIGCACHE_NUM_CLASSES);
HAKMEM_LOG("[BigCache] Size classes: 1MB, 2MB, 4MB, 8MB (P3: branchless)\n");
}
void hak_bigcache_shutdown(void) {
if (!g_initialized) return;
// Free all cached blocks (O(sites × classes) = 64 × 4 = 256 slots)
for (int site_idx = 0; site_idx < BIGCACHE_MAX_SITES; site_idx++) {
for (int class_idx = 0; class_idx < BIGCACHE_NUM_CLASSES; class_idx++) {
BigCacheSlot* slot = &g_cache[site_idx][class_idx];
if (slot->valid) {
evict_slot(slot);
}
}
}
hak_bigcache_print_stats();
g_initialized = 0;
}
// Phase 6.4 P2: O(1) get - Direct table lookup
int hak_bigcache_try_get(size_t size, uintptr_t site, void** out_ptr) {
if (!g_initialized) hak_bigcache_init();
if (!is_cacheable(size)) return 0;
// O(1) calculation: site_idx, class_idx
int site_idx = hash_site(site);
int class_idx = get_class_index(size); // P3: branchless
// O(1) lookup: table[site_idx][class_idx]
pthread_mutex_t* lock = &g_cache_locks[site_idx];
pthread_mutex_lock(lock);
BigCacheSlot* slot = &g_cache[site_idx][class_idx];
// Check: valid, matching site, AND sufficient size (Segfault fix!)
if (slot->valid && slot->site == site && slot->actual_bytes >= size) {
// Hit! Return and invalidate slot
*out_ptr = slot->ptr;
slot->valid = 0;
// Phase 6.11 P0-BigCache-2: LFU - increment frequency on hit (saturating at 65535)
if (slot->freq < 65535) slot->freq++;
g_stats.hits++;
pthread_mutex_unlock(lock);
return 1;
}
// Miss (invalid, wrong site, or undersized)
g_stats.misses++;
pthread_mutex_unlock(lock);
return 0;
}
// Phase 6.4 P2: O(1) put - Direct table insertion
int hak_bigcache_put(void* ptr, size_t actual_bytes, uintptr_t site) {
if (!g_initialized) hak_bigcache_init();
if (!is_cacheable(actual_bytes)) {
g_stats.rejects++;
return 0;
}
// O(1) calculation: site_idx, class_idx
int site_idx = hash_site(site);
int class_idx = get_class_index(actual_bytes); // P3: branchless
// O(1) lookup: table[site_idx][class_idx]
pthread_mutex_t* lock = &g_cache_locks[site_idx];
pthread_mutex_lock(lock);
BigCacheSlot* slot = &g_cache[site_idx][class_idx];
// Phase 6.11 P0-BigCache-2: LFU Hybrid Eviction
// Instead of evicting target slot directly, find coldest slot in same site
if (slot->valid) {
BigCacheSlot* coldest = slot;
uint16_t min_freq = slot->freq;
// Scan all class slots in same site (8 slots max)
for (int c = 0; c < BIGCACHE_NUM_CLASSES; c++) {
BigCacheSlot* candidate = &g_cache[site_idx][c];
if (!candidate->valid) {
// Invalid slot = coldest (freq=0, prefer reusing empty slots)
coldest = candidate;
break;
}
if (candidate->freq < min_freq) {
min_freq = candidate->freq;
coldest = candidate;
}
}
// Evict coldest slot (might be target slot, might be different)
evict_slot(coldest);
// If we evicted a different slot, use it instead of target slot
if (coldest != slot) {
slot = coldest;
class_idx = get_class_index(actual_bytes); // Recalculate class for new slot
}
}
// Store in cache (O(1) direct write)
slot->ptr = ptr;
slot->actual_bytes = actual_bytes; // Store actual size (Segfault fix!)
slot->class_bytes = class_index_to_bytes(class_idx); // For stats/debugging
slot->site = site;
slot->valid = 1;
slot->freq = 0; // Phase 6.11: Initialize frequency to 0 (will increment on first hit)
g_stats.puts++;
g_put_count++;
// Phase 6.11 P0-BigCache-2: Periodic decay (every 1024 puts)
if (g_put_count % LFU_DECAY_INTERVAL == 0) {
decay_frequencies();
}
pthread_mutex_unlock(lock);
return 1;
}
void hak_bigcache_print_stats(void) {
if (!g_initialized) return;
printf("\n========================================\n");
printf("BigCache Statistics\n");
printf("========================================\n");
printf("Hits: %lu\n", (unsigned long)g_stats.hits);
printf("Misses: %lu\n", (unsigned long)g_stats.misses);
printf("Puts: %lu\n", (unsigned long)g_stats.puts);
printf("Evictions: %lu\n", (unsigned long)g_stats.evictions);
printf("Rejects: %lu\n", (unsigned long)g_stats.rejects);
if (g_stats.hits + g_stats.misses > 0) {
double hit_rate = (double)g_stats.hits / (g_stats.hits + g_stats.misses) * 100.0;
printf("Hit Rate: %.1f%%\n", hit_rate);
}
printf("========================================\n");
}
void hak_bigcache_set_free_callback(hak_bigcache_free_fn_t fn) {
g_free_callback = fn;
}