Refactor: Split hakmem_tiny_superslab.c + unified backend exit point

Major refactoring to improve maintainability and debugging: 1. Split hakmem_tiny_superslab.c (1521 lines) into 7 focused files: - superslab_allocate.c: SuperSlab allocation/deallocation - superslab_backend.c: Backend allocation paths (legacy, shared) - superslab_ace.c: ACE (Adaptive Cache Engine) logic - superslab_slab.c: Slab initialization and bitmap management - superslab_cache.c: LRU cache and prewarm cache management - superslab_head.c: SuperSlabHead management and expansion - superslab_stats.c: Statistics tracking and debugging 2. Created hakmem_tiny_superslab_internal.h for shared declarations 3. Added superslab_return_block() as single exit point for header writing: - All backend allocations now go through this helper - Prevents bugs where headers are forgotten in some paths - Makes future debugging easier 4. Updated Makefile for new file structure 5. Added header writing to ss_legacy_backend_box.c and ss_unified_backend_box.c (though not currently linked) Note: Header corruption bug in Larson benchmark still exists. Class 1-6 allocations go through TLS refill/carve paths, not backend. Further investigation needed. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-29 05:13:04 +09:00
parent b52e1985e6
commit 6ac6f5ae1b
13 changed files with 1734 additions and 1542 deletions
--- a/core/superslab_cache.c
+++ b/core/superslab_cache.c
@ -0,0 +1,204 @@
+// superslab_cache.c - Cache management for SuperSlab allocator
+// Purpose: LRU cache and old cache (prewarm) for SuperSlabs
+// License: MIT
+// Date: 2025-11-28
+
+#include "hakmem_tiny_superslab_internal.h"
+
+// ============================================================================
+// Cache System - Global Variables
+// ============================================================================
+
+SuperslabCacheEntry* g_ss_cache_head[8] = {0};
+size_t g_ss_cache_count[8] = {0};
+size_t g_ss_cache_cap[8] = {0};
+size_t g_ss_precharge_target[8] = {0};
+_Atomic int g_ss_precharge_done[8] = {0};
+int g_ss_cache_enabled = 0;
+
+pthread_once_t g_ss_cache_once = PTHREAD_ONCE_INIT;
+pthread_mutex_t g_ss_cache_lock[8];
+
+uint64_t g_ss_cache_hits[8] = {0};
+uint64_t g_ss_cache_misses[8] = {0};
+uint64_t g_ss_cache_puts[8] = {0};
+uint64_t g_ss_cache_drops[8] = {0};
+uint64_t g_ss_cache_precharged[8] = {0};
+
+uint64_t g_superslabs_reused = 0;
+uint64_t g_superslabs_cached = 0;
+
+// ============================================================================
+// Cache Initialization
+// ============================================================================
+
+void ss_cache_global_init(void) {
+    for (int i = 0; i < 8; i++) {
+        pthread_mutex_init(&g_ss_cache_lock[i], NULL);
+    }
+}
+
+void ss_cache_ensure_init(void) {
+    pthread_once(&g_ss_cache_once, ss_cache_global_init);
+}
+
+// ============================================================================
+// OS Acquisition (mmap with alignment)
+// ============================================================================
+
+void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int populate) {
+    void* ptr = NULL;
+    static int log_count = 0;
+
+#ifdef MAP_ALIGNED_SUPER
+    int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER;
+#ifdef MAP_POPULATE
+    if (populate) {
+        map_flags |= MAP_POPULATE;
+    }
+#endif
+    ptr = mmap(NULL, ss_size,
+               PROT_READ | PROT_WRITE,
+               map_flags,
+               -1, 0);
+    if (ptr != MAP_FAILED) {
+        atomic_fetch_add(&g_ss_mmap_count, 1);
+        if (((uintptr_t)ptr & ss_mask) == 0) {
+            ss_stats_os_alloc(size_class, ss_size);
+            return ptr;
+        }
+        munmap(ptr, ss_size);
+        ptr = NULL;
+    } else {
+        log_superslab_oom_once(ss_size, ss_size, errno);
+    }
+#endif
+
+    size_t alloc_size = ss_size * 2;
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#ifdef MAP_POPULATE
+    if (populate) {
+        flags |= MAP_POPULATE;
+    }
+#endif
+    void* raw = mmap(NULL, alloc_size,
+                     PROT_READ | PROT_WRITE,
+                     flags,
+                     -1, 0);
+    if (raw != MAP_FAILED) {
+        uint64_t count = atomic_fetch_add(&g_ss_mmap_count, 1) + 1;
+        #if !HAKMEM_BUILD_RELEASE
+        if (log_count < 10) {
+            fprintf(stderr, "[SUPERSLAB_MMAP] #%lu: class=%d size=%zu (total SuperSlab mmaps so far)\n",
+                    (unsigned long)count, size_class, ss_size);
+            log_count++;
+        }
+        #endif
+    }
+    if (raw == MAP_FAILED) {
+        log_superslab_oom_once(ss_size, alloc_size, errno);
+        return NULL;
+    }
+
+    uintptr_t raw_addr = (uintptr_t)raw;
+    uintptr_t aligned_addr = (raw_addr + ss_mask) & ~ss_mask;
+    ptr = (void*)aligned_addr;
+
+    size_t prefix_size = aligned_addr - raw_addr;
+    if (prefix_size > 0) {
+        munmap(raw, prefix_size);
+    }
+    size_t suffix_size = alloc_size - prefix_size - ss_size;
+    if (suffix_size > 0) {
+        if (populate) {
+#ifdef MADV_DONTNEED
+            madvise((char*)ptr + ss_size, suffix_size, MADV_DONTNEED);
+#endif
+        } else {
+            munmap((char*)ptr + ss_size, suffix_size);
+        }
+    }
+
+    ss_stats_os_alloc(size_class, ss_size);
+    return ptr;
+}
+
+// ============================================================================
+// Cache Precharge (prewarm)
+// ============================================================================
+
+void ss_cache_precharge(uint8_t size_class, size_t ss_size, uintptr_t ss_mask) {
+    if (!g_ss_cache_enabled) return;
+    if (size_class >= 8) return;
+    if (g_ss_precharge_target[size_class] == 0) return;
+    if (atomic_load_explicit(&g_ss_precharge_done[size_class], memory_order_acquire)) return;
+
+    ss_cache_ensure_init();
+    pthread_mutex_lock(&g_ss_cache_lock[size_class]);
+    size_t target = g_ss_precharge_target[size_class];
+    size_t cap = g_ss_cache_cap[size_class];
+    size_t desired = target;
+    if (cap != 0 && desired > cap) {
+        desired = cap;
+    }
+    while (g_ss_cache_count[size_class] < desired) {
+        void* raw = ss_os_acquire(size_class, ss_size, ss_mask, 1);
+        if (!raw) {
+            break;
+        }
+        SuperslabCacheEntry* entry = (SuperslabCacheEntry*)raw;
+        entry->next = g_ss_cache_head[size_class];
+        g_ss_cache_head[size_class] = entry;
+        g_ss_cache_count[size_class]++;
+        g_ss_cache_precharged[size_class]++;
+    }
+    atomic_store_explicit(&g_ss_precharge_done[size_class], 1, memory_order_release);
+    pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
+}
+
+// ============================================================================
+// Cache Pop/Push Operations
+// ============================================================================
+
+SuperslabCacheEntry* ss_cache_pop(uint8_t size_class) {
+    if (!g_ss_cache_enabled) return NULL;
+    if (size_class >= 8) return NULL;
+
+    ss_cache_ensure_init();
+
+    pthread_mutex_lock(&g_ss_cache_lock[size_class]);
+    SuperslabCacheEntry* entry = g_ss_cache_head[size_class];
+    if (entry) {
+        g_ss_cache_head[size_class] = entry->next;
+        if (g_ss_cache_count[size_class] > 0) {
+            g_ss_cache_count[size_class]--;
+        }
+        entry->next = NULL;
+        g_ss_cache_hits[size_class]++;
+    } else {
+        g_ss_cache_misses[size_class]++;
+    }
+    pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
+    return entry;
+}
+
+int ss_cache_push(uint8_t size_class, SuperSlab* ss) {
+    if (!g_ss_cache_enabled) return 0;
+    if (size_class >= 8) return 0;
+
+    ss_cache_ensure_init();
+    pthread_mutex_lock(&g_ss_cache_lock[size_class]);
+    size_t cap = g_ss_cache_cap[size_class];
+    if (cap != 0 && g_ss_cache_count[size_class] >= cap) {
+        g_ss_cache_drops[size_class]++;
+        pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
+        return 0;
+    }
+    SuperslabCacheEntry* entry = (SuperslabCacheEntry*)ss;
+    entry->next = g_ss_cache_head[size_class];
+    g_ss_cache_head[size_class] = entry;
+    g_ss_cache_count[size_class]++;
+    g_ss_cache_puts[size_class]++;
+    pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
+    return 1;
+}