Files
hakmem/core/page_arena.h
Moe Charm (CI) 9b0d746407 Phase 3d-B: TLS Cache Merge - Unified g_tls_sll[] structure (+12-18% expected)
Merge separate g_tls_sll_head[] and g_tls_sll_count[] arrays into unified
TinyTLSSLL struct to improve L1D cache locality. Expected performance gain:
+12-18% from reducing cache line splits (2 loads → 1 load per operation).

Changes:
- core/hakmem_tiny.h: Add TinyTLSSLL type (16B aligned, head+count+pad)
- core/hakmem_tiny.c: Replace separate arrays with g_tls_sll[8]
- core/box/tls_sll_box.h: Update Box API (13 sites) for unified access
- Updated 32+ files: All g_tls_sll_head[i] → g_tls_sll[i].head
- Updated 32+ files: All g_tls_sll_count[i] → g_tls_sll[i].count
- core/hakmem_tiny_integrity.h: Unified canary guards
- core/box/integrity_box.c: Simplified canary validation
- Makefile: Added core/box/tiny_sizeclass_hist_box.o to link

Build:  PASS (10K ops sanity test)
Warnings: Only pre-existing LTO type mismatches (unrelated)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-20 07:32:30 +09:00

247 lines
8.4 KiB
C

// page_arena.h - Phase 24: PageArena/HotSpanBox (Mid-Large page-fault optimization)
//
// Goal: Reduce Mid/VM page-faults by 50-66% (80-100K → 30-40K)
// Target: +30-50% performance for Mid-Large (8-52KB) / L25 (64KB-2MB)
//
// Design:
// Box PA1: Hot Page Cache (4KB pages, LIFO stack, 1024 slots)
// Box PA2: Warm Span Cache (64KB-2MB spans, size-bucketed, 448 slots)
// Box PA3: Cold Path (mmap fallback when cache misses)
//
// Integration:
// - Pool TLS: chunk_ensure() → page_arena_alloc_pages()
// - L25: l25_alloc_new_run() → page_arena_alloc_aligned()
// - L25: refill_freelist() → page_arena_alloc_aligned()
//
// ENV Variables:
// HAKMEM_PAGE_ARENA_ENABLE=1 # Enable PageArena (default: 0, OFF)
// HAKMEM_PAGE_ARENA_HOT_SIZE=1024 # Hot page cache size (default: 1024)
// HAKMEM_PAGE_ARENA_WARM_64K=256 # Warm 64KB span cache size (default: 256)
// HAKMEM_PAGE_ARENA_WARM_128K=128 # Warm 128KB span cache size (default: 128)
// HAKMEM_PAGE_ARENA_WARM_2M=64 # Warm 2MB span cache size (default: 64)
#ifndef HAK_PAGE_ARENA_H
#define HAK_PAGE_ARENA_H
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <pthread.h>
#include "hakmem_build_flags.h"
// ============================================================================
// Box PA1: Hot Page Cache (4KB pages)
// ============================================================================
#define PA_HOT_PAGE_DEFAULT_SIZE 1024 // 1024 slots = 4MB cache
typedef struct {
void** pages; // Dynamic array of 4KB pages
int capacity; // Max slots (power of 2)
int count; // Current occupancy
pthread_mutex_t lock; // Lock for MT safety
// Metrics (debug only)
#if !HAKMEM_BUILD_RELEASE
uint64_t hits; // Alloc hits
uint64_t misses; // Alloc misses (fallback to mmap)
uint64_t frees; // Free pushes
uint64_t evictions; // Free evictions (cache full)
#endif
} HotPageCache;
// Initialize hot page cache (called at startup or lazy init)
void hot_page_cache_init(HotPageCache* cache, int capacity);
// Shutdown hot page cache (called at cleanup)
void hot_page_cache_shutdown(HotPageCache* cache);
// Allocate 4KB page from hot cache (returns NULL if cache miss)
void* hot_page_alloc(HotPageCache* cache);
// Free 4KB page to hot cache (may evict if cache full)
void hot_page_free(HotPageCache* cache, void* page);
// ============================================================================
// Box PA2: Warm Span Cache (64KB-2MB spans)
// ============================================================================
#define PA_WARM_64K_DEFAULT_SIZE 256 // 256 slots = 16MB cache
#define PA_WARM_128K_DEFAULT_SIZE 128 // 128 slots = 16MB cache
#define PA_WARM_2M_DEFAULT_SIZE 64 // 64 slots = 128MB cache
typedef struct {
// 64KB spans
void** spans_64k;
int capacity_64k;
int count_64k;
// 128KB spans
void** spans_128k;
int capacity_128k;
int count_128k;
// 2MB spans
void** spans_2m;
int capacity_2m;
int count_2m;
pthread_mutex_t lock; // Lock for MT safety
// Metrics (debug only)
#if !HAKMEM_BUILD_RELEASE
uint64_t hits_64k;
uint64_t hits_128k;
uint64_t hits_2m;
uint64_t misses;
uint64_t frees_64k;
uint64_t frees_128k;
uint64_t frees_2m;
uint64_t evictions;
#endif
} WarmSpanCache;
// Initialize warm span cache (called at startup or lazy init)
void warm_span_cache_init(WarmSpanCache* cache, int cap_64k, int cap_128k, int cap_2m);
// Shutdown warm span cache (called at cleanup)
void warm_span_cache_shutdown(WarmSpanCache* cache);
// Allocate aligned span from warm cache (returns NULL if cache miss)
void* warm_span_alloc(WarmSpanCache* cache, size_t size);
// Free aligned span to warm cache (may evict if cache full)
void warm_span_free(WarmSpanCache* cache, void* span, size_t size);
// ============================================================================
// Box PA3: Unified PageArena (combines PA1 + PA2 + Cold Path)
// ============================================================================
typedef struct {
HotPageCache hot; // Box PA1: 4KB pages
WarmSpanCache warm; // Box PA2: 64KB-2MB spans
// Metrics (debug only)
#if !HAKMEM_BUILD_RELEASE
uint64_t total_allocs;
uint64_t total_frees;
uint64_t mmap_calls; // Cold path fallback count
#endif
} PageArena;
// Global page arena (TLS per thread, or single global instance)
extern __thread PageArena g_page_arena;
// ============================================================================
// ENV Control (cached, lazy init)
// ============================================================================
// Enable flag (default: 0, OFF)
static inline int page_arena_enabled(void) {
static int g_enable = -1;
if (__builtin_expect(g_enable == -1, 0)) {
const char* e = getenv("HAKMEM_PAGE_ARENA_ENABLE");
g_enable = (e && *e && *e != '0') ? 1 : 0;
#if !HAKMEM_BUILD_RELEASE
if (g_enable) {
fprintf(stderr, "[PageArena-INIT] page_arena_enabled() = %d\n", g_enable);
fflush(stderr);
}
#endif
}
return g_enable;
}
// Hot page cache size (default: 1024)
static inline int page_arena_hot_size(void) {
static int g_size = -1;
if (__builtin_expect(g_size == -1, 0)) {
const char* e = getenv("HAKMEM_PAGE_ARENA_HOT_SIZE");
g_size = (e && *e) ? atoi(e) : PA_HOT_PAGE_DEFAULT_SIZE;
if (g_size < 64) g_size = 64;
if (g_size > 4096) g_size = 4096;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[PageArena-INIT] hot_size = %d\n", g_size);
fflush(stderr);
#endif
}
return g_size;
}
// Warm 64KB span cache size (default: 256)
static inline int page_arena_warm_64k_size(void) {
static int g_size = -1;
if (__builtin_expect(g_size == -1, 0)) {
const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_64K");
g_size = (e && *e) ? atoi(e) : PA_WARM_64K_DEFAULT_SIZE;
if (g_size < 16) g_size = 16;
if (g_size > 1024) g_size = 1024;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[PageArena-INIT] warm_64k_size = %d\n", g_size);
fflush(stderr);
#endif
}
return g_size;
}
// Warm 128KB span cache size (default: 128)
static inline int page_arena_warm_128k_size(void) {
static int g_size = -1;
if (__builtin_expect(g_size == -1, 0)) {
const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_128K");
g_size = (e && *e) ? atoi(e) : PA_WARM_128K_DEFAULT_SIZE;
if (g_size < 8) g_size = 8;
if (g_size > 512) g_size = 512;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[PageArena-INIT] warm_128k_size = %d\n", g_size);
fflush(stderr);
#endif
}
return g_size;
}
// Warm 2MB span cache size (default: 64)
static inline int page_arena_warm_2m_size(void) {
static int g_size = -1;
if (__builtin_expect(g_size == -1, 0)) {
const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_2M");
g_size = (e && *e) ? atoi(e) : PA_WARM_2M_DEFAULT_SIZE;
if (g_size < 4) g_size = 4;
if (g_size > 256) g_size = 256;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[PageArena-INIT] warm_2m_size = %d\n", g_size);
fflush(stderr);
#endif
}
return g_size;
}
// ============================================================================
// Public API (Box PA3: Unified PageArena)
// ============================================================================
// Initialize PageArena (called at thread start or lazy on first access)
void page_arena_init(PageArena* arena);
// Shutdown PageArena (called at thread exit)
void page_arena_shutdown(PageArena* arena);
// Allocate pages (any size, uses hot/warm/cold based on size)
// Returns: Pointer to allocated pages, or NULL if failed
void* page_arena_alloc_pages(PageArena* arena, size_t size);
// Allocate aligned span (64KB/128KB/2MB aligned)
// Returns: Pointer to allocated span, or NULL if failed
void* page_arena_alloc_aligned(PageArena* arena, size_t size, size_t alignment);
// Free pages (any size, uses hot/warm based on size)
void page_arena_free_pages(PageArena* arena, void* ptr, size_t size);
// Free aligned span (64KB/128KB/2MB aligned)
void page_arena_free_aligned(PageArena* arena, void* ptr, size_t size);
// Print stats (debug only)
void page_arena_print_stats(PageArena* arena);
#endif // HAK_PAGE_ARENA_H