Files
hakmem/core/page_arena.c

445 lines
13 KiB
C
Raw Normal View History

Phase 24 PageArena/HotSpanBox: Mid/VM page reuse cache (structural limit identified) Summary: - Implemented PageArena (Box PA1-PA3) for Mid-Large (8-52KB) / L25 (64KB-2MB) - Integration: Pool TLS Arena + L25 alloc/refill paths - Result: Minimal impact (+4.7% Mid, 0% VM page-fault reduction) - Conclusion: Structural limit - existing Arena/Pool/L25 already optimized Implementation: 1. Box PA1: Hot Page Cache (4KB pages, LIFO stack, 1024 slots) - core/page_arena.c: hot_page_alloc/free with mutex protection - TLS cache for 4KB pages 2. Box PA2: Warm Span Cache (64KB-2MB spans, size-bucketed) - 64KB/128KB/2MB span caches (256/128/64 slots) - Size-class based allocation 3. Box PA3: Cold Path (mmap fallback) - page_arena_alloc_pages/aligned with fallback to direct mmap Integration Points: 4. Pool TLS Arena (core/pool_tls_arena.c) - chunk_ensure(): Lazy init + page_arena_alloc_pages() hook - arena_cleanup_thread(): Return chunks to PageArena if enabled - Exponential growth preserved (1MB → 8MB) 5. L25 Pool (core/hakmem_l25_pool.c) - l25_alloc_new_run(): Lazy init + page_arena_alloc_aligned() hook - refill_freelist(): PageArena allocation for bundles - 2MB run carving preserved ENV Variables: - HAKMEM_PAGE_ARENA_ENABLE=1 (default: 0, OFF) - HAKMEM_PAGE_ARENA_HOT_SIZE=1024 (default: 1024) - HAKMEM_PAGE_ARENA_WARM_64K=256 (default: 256) - HAKMEM_PAGE_ARENA_WARM_128K=128 (default: 128) - HAKMEM_PAGE_ARENA_WARM_2M=64 (default: 64) Benchmark Results: - Mid-Large MT (4T, 40K iter, 2KB): - OFF: 84,535 page-faults, 726K ops/s - ON: 84,534 page-faults, 760K ops/s (+4.7% ops, -0.001% faults) - VM Mixed (200K iter): - OFF: 102,134 page-faults, 257K ops/s - ON: 102,134 page-faults, 255K ops/s (0% change) Root Cause Analysis: - Hypothesis: 50-66% page-fault reduction (80-100K → 30-40K) - Actual: <1% page-fault reduction, minimal performance impact - Reason: Structural limit - existing Arena/Pool/L25 already highly optimized - 1MB chunk sizes with high-density linear carving - TLS ring + exponential growth minimize mmap calls - PageArena becomes double-buffering layer with no benefit - Remaining page-faults from kernel zero-clear + app access patterns Lessons Learned: 1. Mid/Large allocators already page-optimal via Arena/Pool design 2. Middle-layer caching ineffective when base layer already optimized 3. Page-fault reduction requires app-level access pattern changes 4. Tiny layer (Phase 23) remains best target for frontend optimization Next Steps: - Defer PageArena (low ROI, structural limit reached) - Focus on upper layers (allocation pattern analysis, size distribution) - Consider app-side access pattern optimization 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-17 03:22:27 +09:00
// page_arena.c - Phase 24: PageArena/HotSpanBox Implementation
#include "page_arena.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <sys/mman.h>
#include <unistd.h>
// ============================================================================
// TLS Variables
// ============================================================================
__thread PageArena g_page_arena = {0};
// ============================================================================
// Box PA1: Hot Page Cache (4KB pages)
// ============================================================================
void hot_page_cache_init(HotPageCache* cache, int capacity) {
if (!cache) return;
cache->pages = (void**)calloc(capacity, sizeof(void*));
if (!cache->pages) {
cache->capacity = 0;
cache->count = 0;
return;
}
cache->capacity = capacity;
cache->count = 0;
pthread_mutex_init(&cache->lock, NULL);
#if !HAKMEM_BUILD_RELEASE
cache->hits = 0;
cache->misses = 0;
cache->frees = 0;
cache->evictions = 0;
fprintf(stderr, "[HotPageCache-INIT] Initialized with %d slots (%zu KB)\n",
capacity, (size_t)capacity * 4);
fflush(stderr);
#endif
}
void hot_page_cache_shutdown(HotPageCache* cache) {
if (!cache || !cache->pages) return;
pthread_mutex_lock(&cache->lock);
// Unmap all cached pages
for (int i = 0; i < cache->count; i++) {
if (cache->pages[i]) {
munmap(cache->pages[i], 4096);
}
}
free(cache->pages);
cache->pages = NULL;
cache->capacity = 0;
cache->count = 0;
pthread_mutex_unlock(&cache->lock);
pthread_mutex_destroy(&cache->lock);
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[HotPageCache-SHUTDOWN] Unmapped %d pages\n", cache->count);
fflush(stderr);
#endif
}
void* hot_page_alloc(HotPageCache* cache) {
if (!cache || !cache->pages) return NULL;
pthread_mutex_lock(&cache->lock);
if (cache->count > 0) {
// Pop from stack (LIFO)
void* page = cache->pages[--cache->count];
pthread_mutex_unlock(&cache->lock);
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&cache->hits, 1);
#endif
return page;
}
pthread_mutex_unlock(&cache->lock);
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&cache->misses, 1);
#endif
return NULL; // Cache miss
}
void hot_page_free(HotPageCache* cache, void* page) {
if (!cache || !cache->pages || !page) return;
pthread_mutex_lock(&cache->lock);
if (cache->count < cache->capacity) {
// Push to stack (LIFO)
cache->pages[cache->count++] = page;
pthread_mutex_unlock(&cache->lock);
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&cache->frees, 1);
#endif
return;
}
pthread_mutex_unlock(&cache->lock);
// Cache full, evict (munmap)
munmap(page, 4096);
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&cache->evictions, 1);
#endif
}
// ============================================================================
// Box PA2: Warm Span Cache (64KB-2MB spans)
// ============================================================================
void warm_span_cache_init(WarmSpanCache* cache, int cap_64k, int cap_128k, int cap_2m) {
if (!cache) return;
// Allocate 64KB span cache
cache->spans_64k = (void**)calloc(cap_64k, sizeof(void*));
cache->capacity_64k = cache->spans_64k ? cap_64k : 0;
cache->count_64k = 0;
// Allocate 128KB span cache
cache->spans_128k = (void**)calloc(cap_128k, sizeof(void*));
cache->capacity_128k = cache->spans_128k ? cap_128k : 0;
cache->count_128k = 0;
// Allocate 2MB span cache
cache->spans_2m = (void**)calloc(cap_2m, sizeof(void*));
cache->capacity_2m = cache->spans_2m ? cap_2m : 0;
cache->count_2m = 0;
pthread_mutex_init(&cache->lock, NULL);
#if !HAKMEM_BUILD_RELEASE
cache->hits_64k = 0;
cache->hits_128k = 0;
cache->hits_2m = 0;
cache->misses = 0;
cache->frees_64k = 0;
cache->frees_128k = 0;
cache->frees_2m = 0;
cache->evictions = 0;
fprintf(stderr, "[WarmSpanCache-INIT] Initialized: 64K=%d, 128K=%d, 2M=%d\n",
cap_64k, cap_128k, cap_2m);
fflush(stderr);
#endif
}
void warm_span_cache_shutdown(WarmSpanCache* cache) {
if (!cache) return;
pthread_mutex_lock(&cache->lock);
// Unmap 64KB spans
for (int i = 0; i < cache->count_64k; i++) {
if (cache->spans_64k[i]) {
munmap(cache->spans_64k[i], 65536);
}
}
free(cache->spans_64k);
cache->spans_64k = NULL;
// Unmap 128KB spans
for (int i = 0; i < cache->count_128k; i++) {
if (cache->spans_128k[i]) {
munmap(cache->spans_128k[i], 131072);
}
}
free(cache->spans_128k);
cache->spans_128k = NULL;
// Unmap 2MB spans
for (int i = 0; i < cache->count_2m; i++) {
if (cache->spans_2m[i]) {
munmap(cache->spans_2m[i], 2097152);
}
}
free(cache->spans_2m);
cache->spans_2m = NULL;
pthread_mutex_unlock(&cache->lock);
pthread_mutex_destroy(&cache->lock);
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[WarmSpanCache-SHUTDOWN] Complete\n");
fflush(stderr);
#endif
}
void* warm_span_alloc(WarmSpanCache* cache, size_t size) {
if (!cache) return NULL;
pthread_mutex_lock(&cache->lock);
// Try 64KB cache
if (size <= 65536 && cache->count_64k > 0) {
void* span = cache->spans_64k[--cache->count_64k];
pthread_mutex_unlock(&cache->lock);
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&cache->hits_64k, 1);
#endif
return span;
}
// Try 128KB cache
if (size <= 131072 && cache->count_128k > 0) {
void* span = cache->spans_128k[--cache->count_128k];
pthread_mutex_unlock(&cache->lock);
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&cache->hits_128k, 1);
#endif
return span;
}
// Try 2MB cache
if (size <= 2097152 && cache->count_2m > 0) {
void* span = cache->spans_2m[--cache->count_2m];
pthread_mutex_unlock(&cache->lock);
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&cache->hits_2m, 1);
#endif
return span;
}
pthread_mutex_unlock(&cache->lock);
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&cache->misses, 1);
#endif
return NULL; // Cache miss
}
void warm_span_free(WarmSpanCache* cache, void* span, size_t size) {
if (!cache || !span) return;
pthread_mutex_lock(&cache->lock);
// Try 64KB cache
if (size <= 65536 && cache->count_64k < cache->capacity_64k) {
cache->spans_64k[cache->count_64k++] = span;
pthread_mutex_unlock(&cache->lock);
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&cache->frees_64k, 1);
#endif
return;
}
// Try 128KB cache
if (size <= 131072 && cache->count_128k < cache->capacity_128k) {
cache->spans_128k[cache->count_128k++] = span;
pthread_mutex_unlock(&cache->lock);
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&cache->frees_128k, 1);
#endif
return;
}
// Try 2MB cache
if (size <= 2097152 && cache->count_2m < cache->capacity_2m) {
cache->spans_2m[cache->count_2m++] = span;
pthread_mutex_unlock(&cache->lock);
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&cache->frees_2m, 1);
#endif
return;
}
pthread_mutex_unlock(&cache->lock);
// Cache full, evict (munmap)
munmap(span, size);
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&cache->evictions, 1);
#endif
}
// ============================================================================
// Box PA3: Unified PageArena
// ============================================================================
void page_arena_init(PageArena* arena) {
if (!arena) return;
// Initialize hot page cache
int hot_size = page_arena_hot_size();
hot_page_cache_init(&arena->hot, hot_size);
// Initialize warm span cache
int warm_64k = page_arena_warm_64k_size();
int warm_128k = page_arena_warm_128k_size();
int warm_2m = page_arena_warm_2m_size();
warm_span_cache_init(&arena->warm, warm_64k, warm_128k, warm_2m);
#if !HAKMEM_BUILD_RELEASE
arena->total_allocs = 0;
arena->total_frees = 0;
arena->mmap_calls = 0;
fprintf(stderr, "[PageArena-INIT] Initialized (hot=%d, warm_64k=%d, warm_128k=%d, warm_2m=%d)\n",
hot_size, warm_64k, warm_128k, warm_2m);
fflush(stderr);
#endif
}
void page_arena_shutdown(PageArena* arena) {
if (!arena) return;
hot_page_cache_shutdown(&arena->hot);
warm_span_cache_shutdown(&arena->warm);
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[PageArena-SHUTDOWN] Complete (allocs=%llu, frees=%llu, mmap=%llu)\n",
(unsigned long long)arena->total_allocs,
(unsigned long long)arena->total_frees,
(unsigned long long)arena->mmap_calls);
fflush(stderr);
#endif
}
void* page_arena_alloc_pages(PageArena* arena, size_t size) {
if (!arena) return NULL;
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&arena->total_allocs, 1);
#endif
// Fast path: Disabled
if (!page_arena_enabled()) {
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&arena->mmap_calls, 1);
#endif
return mmap(NULL, size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
}
// Try hot page cache (4KB pages)
if (size == 4096) {
void* page = hot_page_alloc(&arena->hot);
if (page) return page;
}
// Try warm span cache (64KB-2MB spans)
if (size >= 65536 && size <= 2097152) {
void* span = warm_span_alloc(&arena->warm, size);
if (span) return span;
}
// Cold path: mmap fallback
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&arena->mmap_calls, 1);
#endif
void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
return (ptr == MAP_FAILED) ? NULL : ptr;
}
void* page_arena_alloc_aligned(PageArena* arena, size_t size, size_t alignment) {
// For now, use page_arena_alloc_pages (mmap is always page-aligned)
(void)alignment; // Unused for now
return page_arena_alloc_pages(arena, size);
}
void page_arena_free_pages(PageArena* arena, void* ptr, size_t size) {
if (!arena || !ptr) return;
#if !HAKMEM_BUILD_RELEASE
__sync_fetch_and_add(&arena->total_frees, 1);
#endif
// Fast path: Disabled
if (!page_arena_enabled()) {
munmap(ptr, size);
return;
}
// Try hot page cache (4KB pages)
if (size == 4096) {
hot_page_free(&arena->hot, ptr);
return;
}
// Try warm span cache (64KB-2MB spans)
if (size >= 65536 && size <= 2097152) {
warm_span_free(&arena->warm, ptr, size);
return;
}
// Cold path: munmap
munmap(ptr, size);
}
void page_arena_free_aligned(PageArena* arena, void* ptr, size_t size) {
// For now, use page_arena_free_pages
page_arena_free_pages(arena, ptr, size);
}
void page_arena_print_stats(PageArena* arena) {
if (!arena) return;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "\n[PageArena-STATS] Performance Metrics:\n");
fprintf(stderr, " Total allocs: %llu\n", (unsigned long long)arena->total_allocs);
fprintf(stderr, " Total frees: %llu\n", (unsigned long long)arena->total_frees);
fprintf(stderr, " mmap calls: %llu\n", (unsigned long long)arena->mmap_calls);
fprintf(stderr, "\n[HotPageCache-STATS]:\n");
fprintf(stderr, " Hits: %llu\n", (unsigned long long)arena->hot.hits);
fprintf(stderr, " Misses: %llu\n", (unsigned long long)arena->hot.misses);
fprintf(stderr, " Frees: %llu\n", (unsigned long long)arena->hot.frees);
fprintf(stderr, " Evictions: %llu\n", (unsigned long long)arena->hot.evictions);
fprintf(stderr, " Occupancy: %d/%d pages\n", arena->hot.count, arena->hot.capacity);
fprintf(stderr, "\n[WarmSpanCache-STATS]:\n");
fprintf(stderr, " Hits (64K): %llu\n", (unsigned long long)arena->warm.hits_64k);
fprintf(stderr, " Hits (128K): %llu\n", (unsigned long long)arena->warm.hits_128k);
fprintf(stderr, " Hits (2M): %llu\n", (unsigned long long)arena->warm.hits_2m);
fprintf(stderr, " Misses: %llu\n", (unsigned long long)arena->warm.misses);
fprintf(stderr, " Frees (64K): %llu\n", (unsigned long long)arena->warm.frees_64k);
fprintf(stderr, " Frees (128K): %llu\n", (unsigned long long)arena->warm.frees_128k);
fprintf(stderr, " Frees (2M): %llu\n", (unsigned long long)arena->warm.frees_2m);
fprintf(stderr, " Evictions: %llu\n", (unsigned long long)arena->warm.evictions);
fprintf(stderr, " Occupancy: 64K=%d/%d, 128K=%d/%d, 2M=%d/%d\n",
arena->warm.count_64k, arena->warm.capacity_64k,
arena->warm.count_128k, arena->warm.capacity_128k,
arena->warm.count_2m, arena->warm.capacity_2m);
fflush(stderr);
#endif
}