diff --git a/Makefile b/Makefile index 1ec983e1..5cd45dba 100644 --- a/Makefile +++ b/Makefile @@ -190,7 +190,7 @@ LDFLAGS += $(EXTRA_LDFLAGS) # Targets TARGET = test_hakmem -OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/front/tiny_ring_cache.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o test_hakmem.o +OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_ring_cache.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o test_hakmem.o OBJS = $(OBJS_BASE) # Shared library @@ -222,7 +222,7 @@ endif # Benchmark targets BENCH_HAKMEM = bench_allocators_hakmem BENCH_SYSTEM = bench_allocators_system -BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/front/tiny_ring_cache.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o +BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_ring_cache.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o @@ -399,7 +399,7 @@ test-box-refactor: box-refactor ./larson_hakmem 10 8 128 1024 1 12345 4 # Phase 4: Tiny Pool benchmarks (properly linked with hakmem) -TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/front/tiny_ring_cache.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o +TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_ring_cache.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o diff --git a/core/hakmem_l25_pool.c b/core/hakmem_l25_pool.c index f0f65cce..e2d524ea 100644 --- a/core/hakmem_l25_pool.c +++ b/core/hakmem_l25_pool.c @@ -51,6 +51,8 @@ #include "hakmem_internal.h" // For AllocHeader and HAKMEM_MAGIC #include "hakmem_syscall.h" // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD) #include "box/pagefault_telemetry_box.h" // Box PageFaultTelemetry (PF_BUCKET_L25) +#include "page_arena.h" // Phase 24: PageArena integration for L25 +#include "page_arena.h" // Phase 24: PageArena integration #include #include #include @@ -335,7 +337,18 @@ static inline int l25_alloc_new_run(int class_idx) { int blocks = l25_blocks_per_run(class_idx); size_t stride = l25_stride_bytes(class_idx); size_t run_bytes = (size_t)blocks * stride; - void* raw = mmap(NULL, run_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + // Phase 24: Try PageArena first, fallback to mmap + if (page_arena_enabled() && g_page_arena.hot.pages == NULL) { + page_arena_init(&g_page_arena); + } + void* raw = page_arena_alloc_aligned(&g_page_arena, run_bytes, L25_PAGE_SIZE); + if (!raw) { + // PageArena cache miss → fallback to mmap + raw = mmap(NULL, run_bytes, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + } + if (raw == MAP_FAILED || raw == NULL) return 0; L25ActiveRun* ar = &g_l25_active[class_idx]; ar->base = (char*)raw; @@ -641,9 +654,14 @@ static int refill_freelist(int class_idx, int shard_idx) { int ok_any = 0; for (int b = 0; b < bundles; b++) { - // Allocate bundle via mmap to avoid malloc contention and allow THP policy later - void* raw = mmap(NULL, bundle_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + // Phase 24: Try PageArena first, fallback to mmap + void* raw = page_arena_alloc_aligned(&g_page_arena, bundle_size, L25_PAGE_SIZE); + if (!raw) { + // PageArena cache miss → fallback to mmap + raw = mmap(NULL, bundle_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + } + if (!raw) { if (ok_any) break; else return 0; } diff --git a/core/page_arena.c b/core/page_arena.c new file mode 100644 index 00000000..16c45ab8 --- /dev/null +++ b/core/page_arena.c @@ -0,0 +1,448 @@ +// page_arena.c - Phase 24: PageArena/HotSpanBox Implementation +#include "page_arena.h" +#include +#include +#include +#include +#include + +// ============================================================================ +// TLS Variables +// ============================================================================ + +__thread PageArena g_page_arena = {0}; + +// ============================================================================ +// Box PA1: Hot Page Cache (4KB pages) +// ============================================================================ + +void hot_page_cache_init(HotPageCache* cache, int capacity) { + if (!cache) return; + + cache->pages = (void**)calloc(capacity, sizeof(void*)); + if (!cache->pages) { + #if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[HotPageCache-INIT] Failed to allocate cache (%d slots)\n", capacity); + fflush(stderr); + #endif + cache->capacity = 0; + cache->count = 0; + return; + } + + cache->capacity = capacity; + cache->count = 0; + pthread_mutex_init(&cache->lock, NULL); + + #if !HAKMEM_BUILD_RELEASE + cache->hits = 0; + cache->misses = 0; + cache->frees = 0; + cache->evictions = 0; + fprintf(stderr, "[HotPageCache-INIT] Initialized with %d slots (%zu KB)\n", + capacity, (size_t)capacity * 4); + fflush(stderr); + #endif +} + +void hot_page_cache_shutdown(HotPageCache* cache) { + if (!cache || !cache->pages) return; + + pthread_mutex_lock(&cache->lock); + + // Unmap all cached pages + for (int i = 0; i < cache->count; i++) { + if (cache->pages[i]) { + munmap(cache->pages[i], 4096); + } + } + + free(cache->pages); + cache->pages = NULL; + cache->capacity = 0; + cache->count = 0; + + pthread_mutex_unlock(&cache->lock); + pthread_mutex_destroy(&cache->lock); + + #if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[HotPageCache-SHUTDOWN] Unmapped %d pages\n", cache->count); + fflush(stderr); + #endif +} + +void* hot_page_alloc(HotPageCache* cache) { + if (!cache || !cache->pages) return NULL; + + pthread_mutex_lock(&cache->lock); + + if (cache->count > 0) { + // Pop from stack (LIFO) + void* page = cache->pages[--cache->count]; + pthread_mutex_unlock(&cache->lock); + + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&cache->hits, 1); + #endif + + return page; + } + + pthread_mutex_unlock(&cache->lock); + + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&cache->misses, 1); + #endif + + return NULL; // Cache miss +} + +void hot_page_free(HotPageCache* cache, void* page) { + if (!cache || !cache->pages || !page) return; + + pthread_mutex_lock(&cache->lock); + + if (cache->count < cache->capacity) { + // Push to stack (LIFO) + cache->pages[cache->count++] = page; + pthread_mutex_unlock(&cache->lock); + + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&cache->frees, 1); + #endif + + return; + } + + pthread_mutex_unlock(&cache->lock); + + // Cache full, evict (munmap) + munmap(page, 4096); + + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&cache->evictions, 1); + #endif +} + +// ============================================================================ +// Box PA2: Warm Span Cache (64KB-2MB spans) +// ============================================================================ + +void warm_span_cache_init(WarmSpanCache* cache, int cap_64k, int cap_128k, int cap_2m) { + if (!cache) return; + + // Allocate 64KB span cache + cache->spans_64k = (void**)calloc(cap_64k, sizeof(void*)); + cache->capacity_64k = cache->spans_64k ? cap_64k : 0; + cache->count_64k = 0; + + // Allocate 128KB span cache + cache->spans_128k = (void**)calloc(cap_128k, sizeof(void*)); + cache->capacity_128k = cache->spans_128k ? cap_128k : 0; + cache->count_128k = 0; + + // Allocate 2MB span cache + cache->spans_2m = (void**)calloc(cap_2m, sizeof(void*)); + cache->capacity_2m = cache->spans_2m ? cap_2m : 0; + cache->count_2m = 0; + + pthread_mutex_init(&cache->lock, NULL); + + #if !HAKMEM_BUILD_RELEASE + cache->hits_64k = 0; + cache->hits_128k = 0; + cache->hits_2m = 0; + cache->misses = 0; + cache->frees_64k = 0; + cache->frees_128k = 0; + cache->frees_2m = 0; + cache->evictions = 0; + fprintf(stderr, "[WarmSpanCache-INIT] Initialized: 64K=%d, 128K=%d, 2M=%d\n", + cap_64k, cap_128k, cap_2m); + fflush(stderr); + #endif +} + +void warm_span_cache_shutdown(WarmSpanCache* cache) { + if (!cache) return; + + pthread_mutex_lock(&cache->lock); + + // Unmap 64KB spans + for (int i = 0; i < cache->count_64k; i++) { + if (cache->spans_64k[i]) { + munmap(cache->spans_64k[i], 65536); + } + } + free(cache->spans_64k); + cache->spans_64k = NULL; + + // Unmap 128KB spans + for (int i = 0; i < cache->count_128k; i++) { + if (cache->spans_128k[i]) { + munmap(cache->spans_128k[i], 131072); + } + } + free(cache->spans_128k); + cache->spans_128k = NULL; + + // Unmap 2MB spans + for (int i = 0; i < cache->count_2m; i++) { + if (cache->spans_2m[i]) { + munmap(cache->spans_2m[i], 2097152); + } + } + free(cache->spans_2m); + cache->spans_2m = NULL; + + pthread_mutex_unlock(&cache->lock); + pthread_mutex_destroy(&cache->lock); + + #if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[WarmSpanCache-SHUTDOWN] Complete\n"); + fflush(stderr); + #endif +} + +void* warm_span_alloc(WarmSpanCache* cache, size_t size) { + if (!cache) return NULL; + + pthread_mutex_lock(&cache->lock); + + // Try 64KB cache + if (size <= 65536 && cache->count_64k > 0) { + void* span = cache->spans_64k[--cache->count_64k]; + pthread_mutex_unlock(&cache->lock); + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&cache->hits_64k, 1); + #endif + return span; + } + + // Try 128KB cache + if (size <= 131072 && cache->count_128k > 0) { + void* span = cache->spans_128k[--cache->count_128k]; + pthread_mutex_unlock(&cache->lock); + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&cache->hits_128k, 1); + #endif + return span; + } + + // Try 2MB cache + if (size <= 2097152 && cache->count_2m > 0) { + void* span = cache->spans_2m[--cache->count_2m]; + pthread_mutex_unlock(&cache->lock); + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&cache->hits_2m, 1); + #endif + return span; + } + + pthread_mutex_unlock(&cache->lock); + + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&cache->misses, 1); + #endif + + return NULL; // Cache miss +} + +void warm_span_free(WarmSpanCache* cache, void* span, size_t size) { + if (!cache || !span) return; + + pthread_mutex_lock(&cache->lock); + + // Try 64KB cache + if (size <= 65536 && cache->count_64k < cache->capacity_64k) { + cache->spans_64k[cache->count_64k++] = span; + pthread_mutex_unlock(&cache->lock); + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&cache->frees_64k, 1); + #endif + return; + } + + // Try 128KB cache + if (size <= 131072 && cache->count_128k < cache->capacity_128k) { + cache->spans_128k[cache->count_128k++] = span; + pthread_mutex_unlock(&cache->lock); + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&cache->frees_128k, 1); + #endif + return; + } + + // Try 2MB cache + if (size <= 2097152 && cache->count_2m < cache->capacity_2m) { + cache->spans_2m[cache->count_2m++] = span; + pthread_mutex_unlock(&cache->lock); + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&cache->frees_2m, 1); + #endif + return; + } + + pthread_mutex_unlock(&cache->lock); + + // Cache full, evict (munmap) + munmap(span, size); + + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&cache->evictions, 1); + #endif +} + +// ============================================================================ +// Box PA3: Unified PageArena +// ============================================================================ + +void page_arena_init(PageArena* arena) { + if (!arena) return; + + // Initialize hot page cache + int hot_size = page_arena_hot_size(); + hot_page_cache_init(&arena->hot, hot_size); + + // Initialize warm span cache + int warm_64k = page_arena_warm_64k_size(); + int warm_128k = page_arena_warm_128k_size(); + int warm_2m = page_arena_warm_2m_size(); + warm_span_cache_init(&arena->warm, warm_64k, warm_128k, warm_2m); + + #if !HAKMEM_BUILD_RELEASE + arena->total_allocs = 0; + arena->total_frees = 0; + arena->mmap_calls = 0; + fprintf(stderr, "[PageArena-INIT] Initialized (hot=%d, warm_64k=%d, warm_128k=%d, warm_2m=%d)\n", + hot_size, warm_64k, warm_128k, warm_2m); + fflush(stderr); + #endif +} + +void page_arena_shutdown(PageArena* arena) { + if (!arena) return; + + hot_page_cache_shutdown(&arena->hot); + warm_span_cache_shutdown(&arena->warm); + + #if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[PageArena-SHUTDOWN] Complete (allocs=%llu, frees=%llu, mmap=%llu)\n", + (unsigned long long)arena->total_allocs, + (unsigned long long)arena->total_frees, + (unsigned long long)arena->mmap_calls); + fflush(stderr); + #endif +} + +void* page_arena_alloc_pages(PageArena* arena, size_t size) { + if (!arena) return NULL; + + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&arena->total_allocs, 1); + #endif + + // Fast path: Disabled + if (!page_arena_enabled()) { + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&arena->mmap_calls, 1); + #endif + return mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + } + + // Try hot page cache (4KB pages) + if (size == 4096) { + void* page = hot_page_alloc(&arena->hot); + if (page) return page; + } + + // Try warm span cache (64KB-2MB spans) + if (size >= 65536 && size <= 2097152) { + void* span = warm_span_alloc(&arena->warm, size); + if (span) return span; + } + + // Cold path: mmap fallback + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&arena->mmap_calls, 1); + #endif + + void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + return (ptr == MAP_FAILED) ? NULL : ptr; +} + +void* page_arena_alloc_aligned(PageArena* arena, size_t size, size_t alignment) { + // For now, use page_arena_alloc_pages (mmap is always page-aligned) + (void)alignment; // Unused for now + return page_arena_alloc_pages(arena, size); +} + +void page_arena_free_pages(PageArena* arena, void* ptr, size_t size) { + if (!arena || !ptr) return; + + #if !HAKMEM_BUILD_RELEASE + __sync_fetch_and_add(&arena->total_frees, 1); + #endif + + // Fast path: Disabled + if (!page_arena_enabled()) { + munmap(ptr, size); + return; + } + + // Try hot page cache (4KB pages) + if (size == 4096) { + hot_page_free(&arena->hot, ptr); + return; + } + + // Try warm span cache (64KB-2MB spans) + if (size >= 65536 && size <= 2097152) { + warm_span_free(&arena->warm, ptr, size); + return; + } + + // Cold path: munmap + munmap(ptr, size); +} + +void page_arena_free_aligned(PageArena* arena, void* ptr, size_t size) { + // For now, use page_arena_free_pages + page_arena_free_pages(arena, ptr, size); +} + +void page_arena_print_stats(PageArena* arena) { + if (!arena) return; + + #if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "\n[PageArena-STATS] Performance Metrics:\n"); + fprintf(stderr, " Total allocs: %llu\n", (unsigned long long)arena->total_allocs); + fprintf(stderr, " Total frees: %llu\n", (unsigned long long)arena->total_frees); + fprintf(stderr, " mmap calls: %llu\n", (unsigned long long)arena->mmap_calls); + + fprintf(stderr, "\n[HotPageCache-STATS]:\n"); + fprintf(stderr, " Hits: %llu\n", (unsigned long long)arena->hot.hits); + fprintf(stderr, " Misses: %llu\n", (unsigned long long)arena->hot.misses); + fprintf(stderr, " Frees: %llu\n", (unsigned long long)arena->hot.frees); + fprintf(stderr, " Evictions: %llu\n", (unsigned long long)arena->hot.evictions); + fprintf(stderr, " Occupancy: %d/%d pages\n", arena->hot.count, arena->hot.capacity); + + fprintf(stderr, "\n[WarmSpanCache-STATS]:\n"); + fprintf(stderr, " Hits (64K): %llu\n", (unsigned long long)arena->warm.hits_64k); + fprintf(stderr, " Hits (128K): %llu\n", (unsigned long long)arena->warm.hits_128k); + fprintf(stderr, " Hits (2M): %llu\n", (unsigned long long)arena->warm.hits_2m); + fprintf(stderr, " Misses: %llu\n", (unsigned long long)arena->warm.misses); + fprintf(stderr, " Frees (64K): %llu\n", (unsigned long long)arena->warm.frees_64k); + fprintf(stderr, " Frees (128K): %llu\n", (unsigned long long)arena->warm.frees_128k); + fprintf(stderr, " Frees (2M): %llu\n", (unsigned long long)arena->warm.frees_2m); + fprintf(stderr, " Evictions: %llu\n", (unsigned long long)arena->warm.evictions); + fprintf(stderr, " Occupancy: 64K=%d/%d, 128K=%d/%d, 2M=%d/%d\n", + arena->warm.count_64k, arena->warm.capacity_64k, + arena->warm.count_128k, arena->warm.capacity_128k, + arena->warm.count_2m, arena->warm.capacity_2m); + + fflush(stderr); + #endif +} diff --git a/core/page_arena.d b/core/page_arena.d new file mode 100644 index 00000000..46667f04 --- /dev/null +++ b/core/page_arena.d @@ -0,0 +1,4 @@ +core/page_arena.o: core/page_arena.c core/page_arena.h \ + core/hakmem_build_flags.h +core/page_arena.h: +core/hakmem_build_flags.h: diff --git a/core/page_arena.h b/core/page_arena.h new file mode 100644 index 00000000..678ad920 --- /dev/null +++ b/core/page_arena.h @@ -0,0 +1,245 @@ +// page_arena.h - Phase 24: PageArena/HotSpanBox (Mid-Large page-fault optimization) +// +// Goal: Reduce Mid/VM page-faults by 50-66% (80-100K → 30-40K) +// Target: +30-50% performance for Mid-Large (8-52KB) / L25 (64KB-2MB) +// +// Design: +// Box PA1: Hot Page Cache (4KB pages, LIFO stack, 1024 slots) +// Box PA2: Warm Span Cache (64KB-2MB spans, size-bucketed, 448 slots) +// Box PA3: Cold Path (mmap fallback when cache misses) +// +// Integration: +// - Pool TLS: chunk_ensure() → page_arena_alloc_pages() +// - L25: l25_alloc_new_run() → page_arena_alloc_aligned() +// - L25: refill_freelist() → page_arena_alloc_aligned() +// +// ENV Variables: +// HAKMEM_PAGE_ARENA_ENABLE=1 # Enable PageArena (default: 0, OFF) +// HAKMEM_PAGE_ARENA_HOT_SIZE=1024 # Hot page cache size (default: 1024) +// HAKMEM_PAGE_ARENA_WARM_64K=256 # Warm 64KB span cache size (default: 256) +// HAKMEM_PAGE_ARENA_WARM_128K=128 # Warm 128KB span cache size (default: 128) +// HAKMEM_PAGE_ARENA_WARM_2M=64 # Warm 2MB span cache size (default: 64) + +#ifndef HAK_PAGE_ARENA_H +#define HAK_PAGE_ARENA_H + +#include +#include +#include +#include "hakmem_build_flags.h" + +// ============================================================================ +// Box PA1: Hot Page Cache (4KB pages) +// ============================================================================ + +#define PA_HOT_PAGE_DEFAULT_SIZE 1024 // 1024 slots = 4MB cache + +typedef struct { + void** pages; // Dynamic array of 4KB pages + int capacity; // Max slots (power of 2) + int count; // Current occupancy + pthread_mutex_t lock; // Lock for MT safety + + // Metrics (debug only) + #if !HAKMEM_BUILD_RELEASE + uint64_t hits; // Alloc hits + uint64_t misses; // Alloc misses (fallback to mmap) + uint64_t frees; // Free pushes + uint64_t evictions; // Free evictions (cache full) + #endif +} HotPageCache; + +// Initialize hot page cache (called at startup or lazy init) +void hot_page_cache_init(HotPageCache* cache, int capacity); + +// Shutdown hot page cache (called at cleanup) +void hot_page_cache_shutdown(HotPageCache* cache); + +// Allocate 4KB page from hot cache (returns NULL if cache miss) +void* hot_page_alloc(HotPageCache* cache); + +// Free 4KB page to hot cache (may evict if cache full) +void hot_page_free(HotPageCache* cache, void* page); + +// ============================================================================ +// Box PA2: Warm Span Cache (64KB-2MB spans) +// ============================================================================ + +#define PA_WARM_64K_DEFAULT_SIZE 256 // 256 slots = 16MB cache +#define PA_WARM_128K_DEFAULT_SIZE 128 // 128 slots = 16MB cache +#define PA_WARM_2M_DEFAULT_SIZE 64 // 64 slots = 128MB cache + +typedef struct { + // 64KB spans + void** spans_64k; + int capacity_64k; + int count_64k; + + // 128KB spans + void** spans_128k; + int capacity_128k; + int count_128k; + + // 2MB spans + void** spans_2m; + int capacity_2m; + int count_2m; + + pthread_mutex_t lock; // Lock for MT safety + + // Metrics (debug only) + #if !HAKMEM_BUILD_RELEASE + uint64_t hits_64k; + uint64_t hits_128k; + uint64_t hits_2m; + uint64_t misses; + uint64_t frees_64k; + uint64_t frees_128k; + uint64_t frees_2m; + uint64_t evictions; + #endif +} WarmSpanCache; + +// Initialize warm span cache (called at startup or lazy init) +void warm_span_cache_init(WarmSpanCache* cache, int cap_64k, int cap_128k, int cap_2m); + +// Shutdown warm span cache (called at cleanup) +void warm_span_cache_shutdown(WarmSpanCache* cache); + +// Allocate aligned span from warm cache (returns NULL if cache miss) +void* warm_span_alloc(WarmSpanCache* cache, size_t size); + +// Free aligned span to warm cache (may evict if cache full) +void warm_span_free(WarmSpanCache* cache, void* span, size_t size); + +// ============================================================================ +// Box PA3: Unified PageArena (combines PA1 + PA2 + Cold Path) +// ============================================================================ + +typedef struct { + HotPageCache hot; // Box PA1: 4KB pages + WarmSpanCache warm; // Box PA2: 64KB-2MB spans + + // Metrics (debug only) + #if !HAKMEM_BUILD_RELEASE + uint64_t total_allocs; + uint64_t total_frees; + uint64_t mmap_calls; // Cold path fallback count + #endif +} PageArena; + +// Global page arena (TLS per thread, or single global instance) +extern __thread PageArena g_page_arena; + +// ============================================================================ +// ENV Control (cached, lazy init) +// ============================================================================ + +// Enable flag (default: 0, OFF) +static inline int page_arena_enabled(void) { + static int g_enable = -1; + if (__builtin_expect(g_enable == -1, 0)) { + const char* e = getenv("HAKMEM_PAGE_ARENA_ENABLE"); + g_enable = (e && *e && *e != '0') ? 1 : 0; + #if !HAKMEM_BUILD_RELEASE + if (g_enable) { + fprintf(stderr, "[PageArena-INIT] page_arena_enabled() = %d\n", g_enable); + fflush(stderr); + } + #endif + } + return g_enable; +} + +// Hot page cache size (default: 1024) +static inline int page_arena_hot_size(void) { + static int g_size = -1; + if (__builtin_expect(g_size == -1, 0)) { + const char* e = getenv("HAKMEM_PAGE_ARENA_HOT_SIZE"); + g_size = (e && *e) ? atoi(e) : PA_HOT_PAGE_DEFAULT_SIZE; + if (g_size < 64) g_size = 64; + if (g_size > 4096) g_size = 4096; + #if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[PageArena-INIT] hot_size = %d\n", g_size); + fflush(stderr); + #endif + } + return g_size; +} + +// Warm 64KB span cache size (default: 256) +static inline int page_arena_warm_64k_size(void) { + static int g_size = -1; + if (__builtin_expect(g_size == -1, 0)) { + const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_64K"); + g_size = (e && *e) ? atoi(e) : PA_WARM_64K_DEFAULT_SIZE; + if (g_size < 16) g_size = 16; + if (g_size > 1024) g_size = 1024; + #if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[PageArena-INIT] warm_64k_size = %d\n", g_size); + fflush(stderr); + #endif + } + return g_size; +} + +// Warm 128KB span cache size (default: 128) +static inline int page_arena_warm_128k_size(void) { + static int g_size = -1; + if (__builtin_expect(g_size == -1, 0)) { + const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_128K"); + g_size = (e && *e) ? atoi(e) : PA_WARM_128K_DEFAULT_SIZE; + if (g_size < 8) g_size = 8; + if (g_size > 512) g_size = 512; + #if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[PageArena-INIT] warm_128k_size = %d\n", g_size); + fflush(stderr); + #endif + } + return g_size; +} + +// Warm 2MB span cache size (default: 64) +static inline int page_arena_warm_2m_size(void) { + static int g_size = -1; + if (__builtin_expect(g_size == -1, 0)) { + const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_2M"); + g_size = (e && *e) ? atoi(e) : PA_WARM_2M_DEFAULT_SIZE; + if (g_size < 4) g_size = 4; + if (g_size > 256) g_size = 256; + #if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[PageArena-INIT] warm_2m_size = %d\n", g_size); + fflush(stderr); + #endif + } + return g_size; +} + +// ============================================================================ +// Public API (Box PA3: Unified PageArena) +// ============================================================================ + +// Initialize PageArena (called at thread start or lazy on first access) +void page_arena_init(PageArena* arena); + +// Shutdown PageArena (called at thread exit) +void page_arena_shutdown(PageArena* arena); + +// Allocate pages (any size, uses hot/warm/cold based on size) +// Returns: Pointer to allocated pages, or NULL if failed +void* page_arena_alloc_pages(PageArena* arena, size_t size); + +// Allocate aligned span (64KB/128KB/2MB aligned) +// Returns: Pointer to allocated span, or NULL if failed +void* page_arena_alloc_aligned(PageArena* arena, size_t size, size_t alignment); + +// Free pages (any size, uses hot/warm based on size) +void page_arena_free_pages(PageArena* arena, void* ptr, size_t size); + +// Free aligned span (64KB/128KB/2MB aligned) +void page_arena_free_aligned(PageArena* arena, void* ptr, size_t size); + +// Print stats (debug only) +void page_arena_print_stats(PageArena* arena); + +#endif // HAK_PAGE_ARENA_H diff --git a/core/pool_tls_arena.c b/core/pool_tls_arena.c index 588789fc..b4e09f38 100644 --- a/core/pool_tls_arena.c +++ b/core/pool_tls_arena.c @@ -1,5 +1,6 @@ #include "pool_tls_arena.h" #include "pool_tls.h" // For POOL_HEADER_SIZE, POOL_USE_HEADERS +#include "page_arena.h" // Phase 24: PageArena integration #include #include #include @@ -50,6 +51,11 @@ static int chunk_ensure(PoolChunk* chunk, size_t needed) { return 0; // Space available } + // Phase 24: Ensure PageArena is initialized before first use + if (page_arena_enabled() && g_page_arena.hot.pages == NULL) { + page_arena_init(&g_page_arena); + } + // Need new chunk - calculate size with exponential growth pthread_once(&g_arena_cfg_once, arena_read_env); size_t new_size; @@ -78,15 +84,20 @@ static int chunk_ensure(PoolChunk* chunk, size_t needed) { } #endif - // Allocate new chunk - void* new_base = mmap(NULL, new_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (new_base == MAP_FAILED) { - // DEBUG: Log mmap failure details - static _Atomic int mmap_fail_count = 0; - int fail_num = atomic_fetch_add(&mmap_fail_count, 1); + // Phase 24: Try PageArena first, fallback to mmap + void* new_base = page_arena_alloc_pages(&g_page_arena, new_size); + if (!new_base) { + // PageArena cache miss → fallback to mmap + new_base = mmap(NULL, new_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + } + + if (new_base == MAP_FAILED || new_base == NULL) { + // DEBUG: Log allocation failure details + static _Atomic int alloc_fail_count = 0; + int fail_num = atomic_fetch_add(&alloc_fail_count, 1); if (fail_num < 10) { - fprintf(stderr, "[POOL_ARENA] mmap FAILED: new_size=%zu MB, growth_level=%d, errno=%d\n", + fprintf(stderr, "[POOL_ARENA] alloc FAILED: new_size=%zu MB, growth_level=%d, errno=%d\n", new_size / (1024*1024), chunk->growth_level, errno); } return -1; // OOM @@ -171,7 +182,14 @@ void arena_cleanup_thread(void) { if (chunk->chunk_base) { pid_t tid = (pid_t)syscall(SYS_gettid); pool_reg_unregister(chunk->chunk_base, chunk->chunk_size, tid); - munmap(chunk->chunk_base, chunk->chunk_size); + + // Phase 24: Return to PageArena if enabled + if (page_arena_enabled()) { + page_arena_free_pages(&g_page_arena, chunk->chunk_base, chunk->chunk_size); + } else { + munmap(chunk->chunk_base, chunk->chunk_size); + } + chunk->chunk_base = NULL; } } diff --git a/hakmem_l25_pool.d b/hakmem_l25_pool.d index 500e9d44..e3ff1551 100644 --- a/hakmem_l25_pool.d +++ b/hakmem_l25_pool.d @@ -2,7 +2,7 @@ hakmem_l25_pool.o: core/hakmem_l25_pool.c core/hakmem_l25_pool.h \ core/hakmem_config.h core/hakmem_features.h core/hakmem_internal.h \ core/hakmem.h core/hakmem_build_flags.h core/hakmem_sys.h \ core/hakmem_whale.h core/hakmem_syscall.h \ - core/box/pagefault_telemetry_box.h core/hakmem_prof.h \ + core/box/pagefault_telemetry_box.h core/page_arena.h core/hakmem_prof.h \ core/hakmem_debug.h core/hakmem_policy.h core/hakmem_l25_pool.h: core/hakmem_config.h: @@ -14,6 +14,7 @@ core/hakmem_sys.h: core/hakmem_whale.h: core/hakmem_syscall.h: core/box/pagefault_telemetry_box.h: +core/page_arena.h: core/hakmem_prof.h: core/hakmem_debug.h: core/hakmem_policy.h: