// hakmem_bigcache.c - Big-Block Cache Implementation // Purpose: Per-site ring cache for large allocations // // License: MIT // Date: 2025-10-21 #include "hakmem_bigcache.h" #include "hakmem_internal.h" // Phase 6.15 P0.1: For HAKMEM_LOG macro #include #include #include #include // ============================================================================ // Data Structures (Box理論: 箱の内部構造) // ============================================================================ typedef struct __attribute__((aligned(64))) { void* ptr; // Cached pointer (user pointer, not raw) size_t actual_bytes; // Actual allocated size (for safety check) size_t class_bytes; // Size class (1MB, 2MB, 4MB, 8MB) for indexing uintptr_t site; // Allocation site int valid; // 1 if slot is valid uint16_t freq; // Phase 6.11 P0-BigCache-2: LFU frequency counter (0-65535) } BigCacheSlot; // Phase 6.4 P2: O(1) Direct Table [site][class] // メモリ使用量: 64 sites × 4 classes × 32 bytes = 8 KB (cache-friendly!) static BigCacheSlot g_cache[BIGCACHE_MAX_SITES][BIGCACHE_NUM_CLASSES]; static pthread_mutex_t g_cache_locks[BIGCACHE_MAX_SITES]; // Statistics (for debugging/paper) static struct { uint64_t hits; uint64_t misses; uint64_t puts; uint64_t evictions; uint64_t rejects; } g_stats; static int g_initialized = 0; // Phase 6.11 P0-BigCache-2: LFU Hybrid - Decay tracking static uint64_t g_put_count = 0; // Total puts (for decay trigger) #define LFU_DECAY_INTERVAL 1024 // Decay every 1024 puts (prevents overflow + adapts to workload changes) // ============================================================================ // Helper Functions (Box内部実装) // ============================================================================ // Phase 6.11 P0-BigCache-3: FNV-1a hash function (better distribution than modulo) // FNV-1a (Fowler-Noll-Vo) hash: fast, simple, excellent distribution static inline int hash_site(uintptr_t site) { uint32_t hash = 2166136261u; // FNV offset basis uint8_t* bytes = (uint8_t*)&site; // FNV-1a: XOR then multiply (better avalanche than FNV-1) for (int i = 0; i < sizeof(uintptr_t); i++) { hash ^= bytes[i]; hash *= 16777619u; // FNV prime } // Modulo to fit into BIGCACHE_MAX_SITES (256 sites) return (int)(hash % BIGCACHE_MAX_SITES); } // Check if size is cacheable static inline int is_cacheable(size_t size) { return size >= BIGCACHE_MIN_SIZE; } // Phase 6.11: Finer-grained size-class決定 (8 classes) // Returns: 0-7 (class index) for O(1) table lookup // Classes: 512KB, 1MB, 2MB, 3MB, 4MB, 6MB, 8MB, 16MB static inline int get_class_index(size_t size) { // Simple conditional approach (easier to maintain with non-power-of-2 classes) if (size < BIGCACHE_CLASS_1MB) return 0; // 512KB-1MB if (size < BIGCACHE_CLASS_2MB) return 1; // 1MB-2MB if (size < BIGCACHE_CLASS_3MB) return 2; // 2MB-3MB (NEW: reduces fragmentation) if (size < BIGCACHE_CLASS_4MB) return 3; // 3MB-4MB (NEW) if (size < BIGCACHE_CLASS_6MB) return 4; // 4MB-6MB if (size < BIGCACHE_CLASS_8MB) return 5; // 6MB-8MB (NEW) if (size < BIGCACHE_CLASS_16MB) return 6; // 8MB-16MB return 7; // 16MB+ (NEW: very large allocations) } // Get size class bytes from index static inline size_t class_index_to_bytes(int class_idx) { static const size_t class_sizes[BIGCACHE_NUM_CLASSES] = { BIGCACHE_CLASS_512KB, // Phase 6.11: NEW class for 512KB-1MB BIGCACHE_CLASS_1MB, BIGCACHE_CLASS_2MB, BIGCACHE_CLASS_3MB, // Phase 6.11: NEW class to reduce fragmentation (e.g., 2.1MB → 3MB instead of 4MB) BIGCACHE_CLASS_4MB, BIGCACHE_CLASS_6MB, // Phase 6.11: NEW class BIGCACHE_CLASS_8MB, BIGCACHE_CLASS_16MB // Phase 6.11: NEW class for very large allocations }; return class_sizes[class_idx]; } // Callback for actual freeing (set by hakmem.c) static void (*g_free_callback)(void* ptr, size_t size) = NULL; // Free a cached block (when evicting) static inline void evict_slot(BigCacheSlot* slot) { if (!slot->valid) return; // Use callback if available, otherwise just mark invalid if (g_free_callback) { // Pass actual allocated size, not class_bytes! g_free_callback(slot->ptr, slot->actual_bytes); } slot->valid = 0; slot->freq = 0; // Phase 6.11: Reset frequency on eviction g_stats.evictions++; } // Phase 6.11 P0-BigCache-2: LFU Hybrid - Decay all frequencies // Purpose: Prevent overflow + adapt to changing workload patterns static inline void decay_frequencies(void) { for (int site_idx = 0; site_idx < BIGCACHE_MAX_SITES; site_idx++) { for (int class_idx = 0; class_idx < BIGCACHE_NUM_CLASSES; class_idx++) { BigCacheSlot* slot = &g_cache[site_idx][class_idx]; if (slot->valid) { slot->freq = slot->freq >> 1; // Halve frequency (shift right by 1) } } } } // ============================================================================ // Public API (Box Interface) // ============================================================================ void hak_bigcache_init(void) { if (g_initialized) return; memset(g_cache, 0, sizeof(g_cache)); memset(&g_stats, 0, sizeof(g_stats)); for (int i = 0; i < BIGCACHE_MAX_SITES; i++) { pthread_mutex_init(&g_cache_locks[i], NULL); } g_initialized = 1; HAKMEM_LOG("[BigCache] Initialized (P2: O(1) direct table, sites=%d, classes=%d)\n", BIGCACHE_MAX_SITES, BIGCACHE_NUM_CLASSES); HAKMEM_LOG("[BigCache] Size classes: 1MB, 2MB, 4MB, 8MB (P3: branchless)\n"); } void hak_bigcache_shutdown(void) { if (!g_initialized) return; // Free all cached blocks (O(sites × classes) = 64 × 4 = 256 slots) for (int site_idx = 0; site_idx < BIGCACHE_MAX_SITES; site_idx++) { for (int class_idx = 0; class_idx < BIGCACHE_NUM_CLASSES; class_idx++) { BigCacheSlot* slot = &g_cache[site_idx][class_idx]; if (slot->valid) { evict_slot(slot); } } } hak_bigcache_print_stats(); g_initialized = 0; } // Phase 6.4 P2: O(1) get - Direct table lookup int hak_bigcache_try_get(size_t size, uintptr_t site, void** out_ptr) { if (!g_initialized) hak_bigcache_init(); if (!is_cacheable(size)) return 0; // O(1) calculation: site_idx, class_idx int site_idx = hash_site(site); int class_idx = get_class_index(size); // P3: branchless // O(1) lookup: table[site_idx][class_idx] pthread_mutex_t* lock = &g_cache_locks[site_idx]; pthread_mutex_lock(lock); BigCacheSlot* slot = &g_cache[site_idx][class_idx]; // Check: valid, matching site, AND sufficient size (Segfault fix!) if (slot->valid && slot->site == site && slot->actual_bytes >= size) { // Hit! Return and invalidate slot *out_ptr = slot->ptr; slot->valid = 0; // Phase 6.11 P0-BigCache-2: LFU - increment frequency on hit (saturating at 65535) if (slot->freq < 65535) slot->freq++; g_stats.hits++; pthread_mutex_unlock(lock); return 1; } // Miss (invalid, wrong site, or undersized) g_stats.misses++; pthread_mutex_unlock(lock); return 0; } // Phase 6.4 P2: O(1) put - Direct table insertion int hak_bigcache_put(void* ptr, size_t actual_bytes, uintptr_t site) { if (!g_initialized) hak_bigcache_init(); if (!is_cacheable(actual_bytes)) { g_stats.rejects++; return 0; } // O(1) calculation: site_idx, class_idx int site_idx = hash_site(site); int class_idx = get_class_index(actual_bytes); // P3: branchless // O(1) lookup: table[site_idx][class_idx] pthread_mutex_t* lock = &g_cache_locks[site_idx]; pthread_mutex_lock(lock); BigCacheSlot* slot = &g_cache[site_idx][class_idx]; // Phase 6.11 P0-BigCache-2: LFU Hybrid Eviction // Instead of evicting target slot directly, find coldest slot in same site if (slot->valid) { BigCacheSlot* coldest = slot; uint16_t min_freq = slot->freq; // Scan all class slots in same site (8 slots max) for (int c = 0; c < BIGCACHE_NUM_CLASSES; c++) { BigCacheSlot* candidate = &g_cache[site_idx][c]; if (!candidate->valid) { // Invalid slot = coldest (freq=0, prefer reusing empty slots) coldest = candidate; break; } if (candidate->freq < min_freq) { min_freq = candidate->freq; coldest = candidate; } } // Evict coldest slot (might be target slot, might be different) evict_slot(coldest); // If we evicted a different slot, use it instead of target slot if (coldest != slot) { slot = coldest; class_idx = get_class_index(actual_bytes); // Recalculate class for new slot } } // Store in cache (O(1) direct write) slot->ptr = ptr; slot->actual_bytes = actual_bytes; // Store actual size (Segfault fix!) slot->class_bytes = class_index_to_bytes(class_idx); // For stats/debugging slot->site = site; slot->valid = 1; slot->freq = 0; // Phase 6.11: Initialize frequency to 0 (will increment on first hit) g_stats.puts++; g_put_count++; // Phase 6.11 P0-BigCache-2: Periodic decay (every 1024 puts) if (g_put_count % LFU_DECAY_INTERVAL == 0) { decay_frequencies(); } pthread_mutex_unlock(lock); return 1; } void hak_bigcache_print_stats(void) { if (!g_initialized) return; printf("\n========================================\n"); printf("BigCache Statistics\n"); printf("========================================\n"); printf("Hits: %lu\n", (unsigned long)g_stats.hits); printf("Misses: %lu\n", (unsigned long)g_stats.misses); printf("Puts: %lu\n", (unsigned long)g_stats.puts); printf("Evictions: %lu\n", (unsigned long)g_stats.evictions); printf("Rejects: %lu\n", (unsigned long)g_stats.rejects); if (g_stats.hits + g_stats.misses > 0) { double hit_rate = (double)g_stats.hits / (g_stats.hits + g_stats.misses) * 100.0; printf("Hit Rate: %.1f%%\n", hit_rate); } printf("========================================\n"); } void hak_bigcache_set_free_callback(hak_bigcache_free_fn_t fn) { g_free_callback = fn; }