// tiny_unified_cache.c - Phase 23: Unified Frontend Cache Implementation #include "tiny_unified_cache.h" #include "../box/unified_batch_box.h" // Phase 23-D: Box U2 batch alloc (deprecated in 23-E) #include "../tiny_tls.h" // Phase 23-E: TinyTLSSlab, TinySlabMeta #include "../tiny_box_geometry.h" // Phase 23-E: tiny_stride_for_class, tiny_slab_base_for_geometry #include "../box/tiny_next_ptr_box.h" // Phase 23-E: tiny_next_read (freelist traversal) #include "../hakmem_tiny_superslab.h" // Phase 23-E: SuperSlab, superslab_refill() #include "../superslab/superslab_inline.h" // Phase 23-E: ss_active_add, slab_index_for, ss_slabs_capacity #include "../hakmem_super_registry.h" // For hak_super_lookup (pointer→SuperSlab) #include "../box/pagefault_telemetry_box.h" // Phase 24: Box PageFaultTelemetry (Tiny page touch stats) #include "../hakmem_env_cache.h" // Priority-2: ENV cache (eliminate syscalls) #include #include // Phase 23-E: Forward declarations extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES]; // From hakmem_tiny_superslab.c // ============================================================================ // TLS Variables (defined here, extern in header) // ============================================================================ __thread TinyUnifiedCache g_unified_cache[TINY_NUM_CLASSES]; // ============================================================================ // Metrics (Phase 23, optional for debugging) // ============================================================================ #if !HAKMEM_BUILD_RELEASE __thread uint64_t g_unified_cache_hit[TINY_NUM_CLASSES] = {0}; __thread uint64_t g_unified_cache_miss[TINY_NUM_CLASSES] = {0}; __thread uint64_t g_unified_cache_push[TINY_NUM_CLASSES] = {0}; __thread uint64_t g_unified_cache_full[TINY_NUM_CLASSES] = {0}; #endif // ============================================================================ // Phase 8-Step1-Fix: unified_cache_enabled() implementation (non-static) // ============================================================================ // Enable flag (default: ON, disable with HAKMEM_TINY_UNIFIED_CACHE=0) int unified_cache_enabled(void) { // Priority-2: Use cached ENV (eliminate lazy-init static overhead) static int g_enable = -1; if (__builtin_expect(g_enable == -1, 0)) { g_enable = HAK_ENV_TINY_UNIFIED_CACHE(); #if !HAKMEM_BUILD_RELEASE if (g_enable) { fprintf(stderr, "[Unified-INIT] unified_cache_enabled() = %d\n", g_enable); fflush(stderr); } #endif } return g_enable; } // ============================================================================ // Init (called at thread start or lazy on first access) // ============================================================================ void unified_cache_init(void) { if (!unified_cache_enabled()) return; // Layer 2 Defensive Fix: Use __libc_calloc for infrastructure allocations // Rationale: Cache arrays are infrastructure (not workload), bypass HAKMEM entirely // This prevents interaction with BenchFast mode and ensures clean separation extern void* __libc_calloc(size_t, size_t); // Initialize all classes (C0-C7) for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) { if (g_unified_cache[cls].slots != NULL) continue; // Already initialized size_t cap = unified_capacity(cls); g_unified_cache[cls].slots = (void**)__libc_calloc(cap, sizeof(void*)); if (!g_unified_cache[cls].slots) { #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "[Unified-INIT] Failed to allocate C%d cache (%zu slots)\n", cls, cap); fflush(stderr); #endif continue; // Skip this class, try others } g_unified_cache[cls].capacity = (uint16_t)cap; g_unified_cache[cls].mask = (uint16_t)(cap - 1); g_unified_cache[cls].head = 0; g_unified_cache[cls].tail = 0; #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "[Unified-INIT] C%d: %zu slots (%zu bytes)\n", cls, cap, cap * sizeof(void*)); fflush(stderr); #endif } } // ============================================================================ // Shutdown (called at thread exit, optional) // ============================================================================ void unified_cache_shutdown(void) { if (!unified_cache_enabled()) return; // TODO: Drain caches to SuperSlab before shutdown (prevent leak) // Layer 2 Defensive Fix: Use __libc_free (symmetric with __libc_calloc in init) extern void __libc_free(void*); // Free cache buffers for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) { if (g_unified_cache[cls].slots) { __libc_free(g_unified_cache[cls].slots); g_unified_cache[cls].slots = NULL; } } #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "[Unified-SHUTDOWN] All caches freed\n"); fflush(stderr); #endif } // ============================================================================ // Stats (Phase 23 metrics) // ============================================================================ void unified_cache_print_stats(void) { if (!unified_cache_enabled()) return; #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "\n[Unified-STATS] Unified Cache Metrics:\n"); for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) { uint64_t total_allocs = g_unified_cache_hit[cls] + g_unified_cache_miss[cls]; uint64_t total_frees = g_unified_cache_push[cls] + g_unified_cache_full[cls]; if (total_allocs == 0 && total_frees == 0) continue; // Skip unused classes double hit_rate = (total_allocs > 0) ? (100.0 * g_unified_cache_hit[cls] / total_allocs) : 0.0; double full_rate = (total_frees > 0) ? (100.0 * g_unified_cache_full[cls] / total_frees) : 0.0; // Current occupancy uint16_t count = (g_unified_cache[cls].tail >= g_unified_cache[cls].head) ? (g_unified_cache[cls].tail - g_unified_cache[cls].head) : (g_unified_cache[cls].capacity - g_unified_cache[cls].head + g_unified_cache[cls].tail); fprintf(stderr, " C%d: %u/%u slots occupied, hit=%llu miss=%llu (%.1f%% hit), push=%llu full=%llu (%.1f%% full)\n", cls, count, g_unified_cache[cls].capacity, (unsigned long long)g_unified_cache_hit[cls], (unsigned long long)g_unified_cache_miss[cls], hit_rate, (unsigned long long)g_unified_cache_push[cls], (unsigned long long)g_unified_cache_full[cls], full_rate); } fflush(stderr); #endif } // ============================================================================ // Phase 23-E: Direct SuperSlab Carve (TLS SLL Bypass) // ============================================================================ // Fail-fast helper: verify that a candidate BASE pointer belongs to a valid // Tiny slab within a SuperSlab. This is intentionally defensive and only // compiled in debug builds to avoid hot-path overhead in release. static inline int unified_refill_validate_base(int class_idx, TinyTLSSlab* tls, TinySlabMeta* meta, void* base, const char* stage) { #if HAKMEM_BUILD_RELEASE (void)class_idx; (void)tls; (void)base; (void)stage; return 1; #else if (!base) { fprintf(stderr, "[UNIFIED_REFILL_CORRUPT] stage=%s cls=%d base=NULL tls_ss=%p meta=%p\n", stage ? stage : "unified_refill", class_idx, (void*)(tls ? tls->ss : NULL), (void*)meta); abort(); } SuperSlab* tls_ss = tls ? tls->ss : NULL; if (!tls_ss || tls_ss->magic != SUPERSLAB_MAGIC) { fprintf(stderr, "[UNIFIED_REFILL_CORRUPT] stage=%s cls=%d base=%p tls_ss=%p meta=%p (invalid TLS ss)\n", stage ? stage : "unified_refill", class_idx, base, (void*)tls_ss, (void*)meta); abort(); } // Cross-check registry lookup for additional safety. SuperSlab* ss_lookup = hak_super_lookup(base); if (!ss_lookup || ss_lookup->magic != SUPERSLAB_MAGIC) { fprintf(stderr, "[UNIFIED_REFILL_CORRUPT] stage=%s cls=%d base=%p tls_ss=%p lookup_ss=%p meta=%p\n", stage ? stage : "unified_refill", class_idx, base, (void*)tls_ss, (void*)ss_lookup, (void*)meta); abort(); } if (ss_lookup != tls_ss) { fprintf(stderr, "[UNIFIED_REFILL_CORRUPT] stage=%s cls=%d base=%p tls_ss=%p lookup_ss=%p (mismatch)\n", stage ? stage : "unified_refill", class_idx, base, (void*)tls_ss, (void*)ss_lookup); abort(); } int slab_idx = tls ? (int)tls->slab_idx : -1; int cap = ss_slabs_capacity(tls_ss); if (slab_idx < 0 || slab_idx >= cap) { fprintf(stderr, "[UNIFIED_REFILL_CORRUPT] stage=%s cls=%d base=%p tls_ss=%p slab_idx=%d cap=%d meta_cap=%u meta_used=%u meta_carved=%u\n", stage ? stage : "unified_refill", class_idx, base, (void*)tls_ss, slab_idx, cap, meta ? meta->capacity : 0u, meta ? (unsigned)meta->used : 0u, meta ? (unsigned)meta->carved : 0u); abort(); } // Ensure meta matches TLS view for this slab. TinySlabMeta* expected_meta = &tls_ss->slabs[slab_idx]; if (meta && meta != expected_meta) { fprintf(stderr, "[UNIFIED_REFILL_CORRUPT] stage=%s cls=%d base=%p tls_ss=%p slab_idx=%d meta=%p expected_meta=%p\n", stage ? stage : "unified_refill", class_idx, base, (void*)tls_ss, slab_idx, (void*)meta, (void*)expected_meta); abort(); } uint8_t* slab_base = tiny_slab_base_for_geometry(tls_ss, slab_idx); size_t stride = tiny_stride_for_class(class_idx); size_t usable = tiny_usable_bytes_for_slab(slab_idx); uint8_t* slab_end = slab_base + usable; if ((uint8_t*)base < slab_base || (uint8_t*)base >= slab_end) { fprintf(stderr, "[UNIFIED_REFILL_CORRUPT] stage=%s cls=%d base=%p range=[%p,%p) stride=%zu meta_cap=%u meta_used=%u meta_carved=%u\n", stage ? stage : "unified_refill", class_idx, base, (void*)slab_base, (void*)slab_end, stride, meta ? meta->capacity : 0u, meta ? (unsigned)meta->used : 0u, meta ? (unsigned)meta->carved : 0u); abort(); } ptrdiff_t offset = (uint8_t*)base - slab_base; if (offset % (ptrdiff_t)stride != 0) { fprintf(stderr, "[UNIFIED_REFILL_CORRUPT] stage=%s cls=%d base=%p offset=%td stride=%zu (misaligned) meta_cap=%u meta_used=%u meta_carved=%u\n", stage ? stage : "unified_refill", class_idx, base, offset, stride, meta ? meta->capacity : 0u, meta ? (unsigned)meta->used : 0u, meta ? (unsigned)meta->carved : 0u); abort(); } return 1; #endif } // Batch refill from SuperSlab (called on cache miss) // Returns: BASE pointer (first block), or NULL if failed // Design: Direct carve from SuperSlab to array (no TLS SLL intermediate layer) void* unified_cache_refill(int class_idx) { TinyTLSSlab* tls = &g_tls_slabs[class_idx]; // Step 1: Ensure SuperSlab available if (!tls->ss) { if (!superslab_refill(class_idx)) return NULL; tls = &g_tls_slabs[class_idx]; // Reload after refill } TinyUnifiedCache* cache = &g_unified_cache[class_idx]; // ✅ Phase 11+: Ensure cache is initialized (lazy init for cold path) if (!cache->slots) { unified_cache_init(); // Re-check after init (may fail due to alloc failure) if (!cache->slots) { return NULL; } } // Step 2: Calculate available room in unified cache int room = (int)cache->capacity - 1; // Leave 1 slot for full detection if (cache->head > cache->tail) { room = cache->head - cache->tail - 1; } else if (cache->head < cache->tail) { room = cache->capacity - (cache->tail - cache->head) - 1; } if (room <= 0) return NULL; if (room > 128) room = 128; // Batch size limit // Step 3: Direct carve from SuperSlab into local array (bypass TLS SLL!) void* out[128]; int produced = 0; TinySlabMeta* m = tls->meta; size_t bs = tiny_stride_for_class(class_idx); uint8_t* base = tls->slab_base ? tls->slab_base : tiny_slab_base_for_geometry(tls->ss, tls->slab_idx); while (produced < room) { if (m->freelist) { // Freelist pop void* p = m->freelist; void* next_node = tiny_next_read(class_idx, p); // ROOT CAUSE FIX: Write header BEFORE exposing block (but AFTER reading next) // For Class 0 (offset 0), next overlaps header, so we must read next first. #if HAKMEM_TINY_HEADER_CLASSIDX *(uint8_t*)p = (uint8_t)(0xa0 | (class_idx & 0x0f)); // Prevent compiler from reordering header write after out[] assignment __atomic_thread_fence(__ATOMIC_RELEASE); #endif m->freelist = next_node; unified_refill_validate_base(class_idx, tls, m, p, "unified_refill_freelist"); // PageFaultTelemetry: record page touch for this BASE pagefault_telemetry_touch(class_idx, p); m->used++; out[produced++] = p; } else if (m->carved < m->capacity) { // Linear carve (fresh block, no freelist link) void* p = (void*)(base + ((size_t)m->carved * bs)); unified_refill_validate_base(class_idx, tls, m, p, "unified_refill_carve"); // PageFaultTelemetry: record page touch for this BASE pagefault_telemetry_touch(class_idx, p); // ✅ CRITICAL: Write header (new block) #if HAKMEM_TINY_HEADER_CLASSIDX *(uint8_t*)p = (uint8_t)(0xa0 | (class_idx & 0x0f)); #endif m->carved++; m->used++; out[produced++] = p; } else { // SuperSlab exhausted → refill and retry if (!superslab_refill(class_idx)) break; // ✅ CRITICAL: Reload TLS pointers after refill (avoid stale pointer bug) tls = &g_tls_slabs[class_idx]; m = tls->meta; base = tls->slab_base ? tls->slab_base : tiny_slab_base_for_geometry(tls->ss, tls->slab_idx); } } if (produced == 0) return NULL; // Step 4: Update active counter // Guard: tls->ss can be NULL if all SuperSlab refills failed if (tls->ss) { ss_active_add(tls->ss, (uint32_t)produced); } // Step 5: Store blocks into unified cache (skip first, return it) void* first = out[0]; for (int i = 1; i < produced; i++) { cache->slots[cache->tail] = out[i]; cache->tail = (cache->tail + 1) & cache->mask; } #if !HAKMEM_BUILD_RELEASE g_unified_cache_miss[class_idx]++; #endif return first; // Return first block (BASE pointer) }