Merge separate g_tls_sll_head[] and g_tls_sll_count[] arrays into unified TinyTLSSLL struct to improve L1D cache locality. Expected performance gain: +12-18% from reducing cache line splits (2 loads → 1 load per operation). Changes: - core/hakmem_tiny.h: Add TinyTLSSLL type (16B aligned, head+count+pad) - core/hakmem_tiny.c: Replace separate arrays with g_tls_sll[8] - core/box/tls_sll_box.h: Update Box API (13 sites) for unified access - Updated 32+ files: All g_tls_sll_head[i] → g_tls_sll[i].head - Updated 32+ files: All g_tls_sll_count[i] → g_tls_sll[i].count - core/hakmem_tiny_integrity.h: Unified canary guards - core/box/integrity_box.c: Simplified canary validation - Makefile: Added core/box/tiny_sizeclass_hist_box.o to link Build: ✅ PASS (10K ops sanity test) Warnings: Only pre-existing LTO type mismatches (unrelated) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
247 lines
8.4 KiB
C
247 lines
8.4 KiB
C
// page_arena.h - Phase 24: PageArena/HotSpanBox (Mid-Large page-fault optimization)
|
|
//
|
|
// Goal: Reduce Mid/VM page-faults by 50-66% (80-100K → 30-40K)
|
|
// Target: +30-50% performance for Mid-Large (8-52KB) / L25 (64KB-2MB)
|
|
//
|
|
// Design:
|
|
// Box PA1: Hot Page Cache (4KB pages, LIFO stack, 1024 slots)
|
|
// Box PA2: Warm Span Cache (64KB-2MB spans, size-bucketed, 448 slots)
|
|
// Box PA3: Cold Path (mmap fallback when cache misses)
|
|
//
|
|
// Integration:
|
|
// - Pool TLS: chunk_ensure() → page_arena_alloc_pages()
|
|
// - L25: l25_alloc_new_run() → page_arena_alloc_aligned()
|
|
// - L25: refill_freelist() → page_arena_alloc_aligned()
|
|
//
|
|
// ENV Variables:
|
|
// HAKMEM_PAGE_ARENA_ENABLE=1 # Enable PageArena (default: 0, OFF)
|
|
// HAKMEM_PAGE_ARENA_HOT_SIZE=1024 # Hot page cache size (default: 1024)
|
|
// HAKMEM_PAGE_ARENA_WARM_64K=256 # Warm 64KB span cache size (default: 256)
|
|
// HAKMEM_PAGE_ARENA_WARM_128K=128 # Warm 128KB span cache size (default: 128)
|
|
// HAKMEM_PAGE_ARENA_WARM_2M=64 # Warm 2MB span cache size (default: 64)
|
|
|
|
#ifndef HAK_PAGE_ARENA_H
|
|
#define HAK_PAGE_ARENA_H
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <pthread.h>
|
|
#include "hakmem_build_flags.h"
|
|
|
|
// ============================================================================
|
|
// Box PA1: Hot Page Cache (4KB pages)
|
|
// ============================================================================
|
|
|
|
#define PA_HOT_PAGE_DEFAULT_SIZE 1024 // 1024 slots = 4MB cache
|
|
|
|
typedef struct {
|
|
void** pages; // Dynamic array of 4KB pages
|
|
int capacity; // Max slots (power of 2)
|
|
int count; // Current occupancy
|
|
pthread_mutex_t lock; // Lock for MT safety
|
|
|
|
// Metrics (debug only)
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
uint64_t hits; // Alloc hits
|
|
uint64_t misses; // Alloc misses (fallback to mmap)
|
|
uint64_t frees; // Free pushes
|
|
uint64_t evictions; // Free evictions (cache full)
|
|
#endif
|
|
} HotPageCache;
|
|
|
|
// Initialize hot page cache (called at startup or lazy init)
|
|
void hot_page_cache_init(HotPageCache* cache, int capacity);
|
|
|
|
// Shutdown hot page cache (called at cleanup)
|
|
void hot_page_cache_shutdown(HotPageCache* cache);
|
|
|
|
// Allocate 4KB page from hot cache (returns NULL if cache miss)
|
|
void* hot_page_alloc(HotPageCache* cache);
|
|
|
|
// Free 4KB page to hot cache (may evict if cache full)
|
|
void hot_page_free(HotPageCache* cache, void* page);
|
|
|
|
// ============================================================================
|
|
// Box PA2: Warm Span Cache (64KB-2MB spans)
|
|
// ============================================================================
|
|
|
|
#define PA_WARM_64K_DEFAULT_SIZE 256 // 256 slots = 16MB cache
|
|
#define PA_WARM_128K_DEFAULT_SIZE 128 // 128 slots = 16MB cache
|
|
#define PA_WARM_2M_DEFAULT_SIZE 64 // 64 slots = 128MB cache
|
|
|
|
typedef struct {
|
|
// 64KB spans
|
|
void** spans_64k;
|
|
int capacity_64k;
|
|
int count_64k;
|
|
|
|
// 128KB spans
|
|
void** spans_128k;
|
|
int capacity_128k;
|
|
int count_128k;
|
|
|
|
// 2MB spans
|
|
void** spans_2m;
|
|
int capacity_2m;
|
|
int count_2m;
|
|
|
|
pthread_mutex_t lock; // Lock for MT safety
|
|
|
|
// Metrics (debug only)
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
uint64_t hits_64k;
|
|
uint64_t hits_128k;
|
|
uint64_t hits_2m;
|
|
uint64_t misses;
|
|
uint64_t frees_64k;
|
|
uint64_t frees_128k;
|
|
uint64_t frees_2m;
|
|
uint64_t evictions;
|
|
#endif
|
|
} WarmSpanCache;
|
|
|
|
// Initialize warm span cache (called at startup or lazy init)
|
|
void warm_span_cache_init(WarmSpanCache* cache, int cap_64k, int cap_128k, int cap_2m);
|
|
|
|
// Shutdown warm span cache (called at cleanup)
|
|
void warm_span_cache_shutdown(WarmSpanCache* cache);
|
|
|
|
// Allocate aligned span from warm cache (returns NULL if cache miss)
|
|
void* warm_span_alloc(WarmSpanCache* cache, size_t size);
|
|
|
|
// Free aligned span to warm cache (may evict if cache full)
|
|
void warm_span_free(WarmSpanCache* cache, void* span, size_t size);
|
|
|
|
// ============================================================================
|
|
// Box PA3: Unified PageArena (combines PA1 + PA2 + Cold Path)
|
|
// ============================================================================
|
|
|
|
typedef struct {
|
|
HotPageCache hot; // Box PA1: 4KB pages
|
|
WarmSpanCache warm; // Box PA2: 64KB-2MB spans
|
|
|
|
// Metrics (debug only)
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
uint64_t total_allocs;
|
|
uint64_t total_frees;
|
|
uint64_t mmap_calls; // Cold path fallback count
|
|
#endif
|
|
} PageArena;
|
|
|
|
// Global page arena (TLS per thread, or single global instance)
|
|
extern __thread PageArena g_page_arena;
|
|
|
|
// ============================================================================
|
|
// ENV Control (cached, lazy init)
|
|
// ============================================================================
|
|
|
|
// Enable flag (default: 0, OFF)
|
|
static inline int page_arena_enabled(void) {
|
|
static int g_enable = -1;
|
|
if (__builtin_expect(g_enable == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_PAGE_ARENA_ENABLE");
|
|
g_enable = (e && *e && *e != '0') ? 1 : 0;
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
if (g_enable) {
|
|
fprintf(stderr, "[PageArena-INIT] page_arena_enabled() = %d\n", g_enable);
|
|
fflush(stderr);
|
|
}
|
|
#endif
|
|
}
|
|
return g_enable;
|
|
}
|
|
|
|
// Hot page cache size (default: 1024)
|
|
static inline int page_arena_hot_size(void) {
|
|
static int g_size = -1;
|
|
if (__builtin_expect(g_size == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_PAGE_ARENA_HOT_SIZE");
|
|
g_size = (e && *e) ? atoi(e) : PA_HOT_PAGE_DEFAULT_SIZE;
|
|
if (g_size < 64) g_size = 64;
|
|
if (g_size > 4096) g_size = 4096;
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
fprintf(stderr, "[PageArena-INIT] hot_size = %d\n", g_size);
|
|
fflush(stderr);
|
|
#endif
|
|
}
|
|
return g_size;
|
|
}
|
|
|
|
// Warm 64KB span cache size (default: 256)
|
|
static inline int page_arena_warm_64k_size(void) {
|
|
static int g_size = -1;
|
|
if (__builtin_expect(g_size == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_64K");
|
|
g_size = (e && *e) ? atoi(e) : PA_WARM_64K_DEFAULT_SIZE;
|
|
if (g_size < 16) g_size = 16;
|
|
if (g_size > 1024) g_size = 1024;
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
fprintf(stderr, "[PageArena-INIT] warm_64k_size = %d\n", g_size);
|
|
fflush(stderr);
|
|
#endif
|
|
}
|
|
return g_size;
|
|
}
|
|
|
|
// Warm 128KB span cache size (default: 128)
|
|
static inline int page_arena_warm_128k_size(void) {
|
|
static int g_size = -1;
|
|
if (__builtin_expect(g_size == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_128K");
|
|
g_size = (e && *e) ? atoi(e) : PA_WARM_128K_DEFAULT_SIZE;
|
|
if (g_size < 8) g_size = 8;
|
|
if (g_size > 512) g_size = 512;
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
fprintf(stderr, "[PageArena-INIT] warm_128k_size = %d\n", g_size);
|
|
fflush(stderr);
|
|
#endif
|
|
}
|
|
return g_size;
|
|
}
|
|
|
|
// Warm 2MB span cache size (default: 64)
|
|
static inline int page_arena_warm_2m_size(void) {
|
|
static int g_size = -1;
|
|
if (__builtin_expect(g_size == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_2M");
|
|
g_size = (e && *e) ? atoi(e) : PA_WARM_2M_DEFAULT_SIZE;
|
|
if (g_size < 4) g_size = 4;
|
|
if (g_size > 256) g_size = 256;
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
fprintf(stderr, "[PageArena-INIT] warm_2m_size = %d\n", g_size);
|
|
fflush(stderr);
|
|
#endif
|
|
}
|
|
return g_size;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Public API (Box PA3: Unified PageArena)
|
|
// ============================================================================
|
|
|
|
// Initialize PageArena (called at thread start or lazy on first access)
|
|
void page_arena_init(PageArena* arena);
|
|
|
|
// Shutdown PageArena (called at thread exit)
|
|
void page_arena_shutdown(PageArena* arena);
|
|
|
|
// Allocate pages (any size, uses hot/warm/cold based on size)
|
|
// Returns: Pointer to allocated pages, or NULL if failed
|
|
void* page_arena_alloc_pages(PageArena* arena, size_t size);
|
|
|
|
// Allocate aligned span (64KB/128KB/2MB aligned)
|
|
// Returns: Pointer to allocated span, or NULL if failed
|
|
void* page_arena_alloc_aligned(PageArena* arena, size_t size, size_t alignment);
|
|
|
|
// Free pages (any size, uses hot/warm based on size)
|
|
void page_arena_free_pages(PageArena* arena, void* ptr, size_t size);
|
|
|
|
// Free aligned span (64KB/128KB/2MB aligned)
|
|
void page_arena_free_aligned(PageArena* arena, void* ptr, size_t size);
|
|
|
|
// Print stats (debug only)
|
|
void page_arena_print_stats(PageArena* arena);
|
|
|
|
#endif // HAK_PAGE_ARENA_H
|