hakmem/core/page_arena.h

// page_arena.h - Phase 24: PageArena/HotSpanBox (Mid-Large page-fault optimization)
//
// Goal: Reduce Mid/VM page-faults by 50-66% (80-100K → 30-40K)
// Target: +30-50% performance for Mid-Large (8-52KB) / L25 (64KB-2MB)
//
// Design:
//   Box PA1: Hot Page Cache (4KB pages, LIFO stack, 1024 slots)
//   Box PA2: Warm Span Cache (64KB-2MB spans, size-bucketed, 448 slots)
//   Box PA3: Cold Path (mmap fallback when cache misses)
//
// Integration:
//   - Pool TLS: chunk_ensure() → page_arena_alloc_pages()
//   - L25: l25_alloc_new_run() → page_arena_alloc_aligned()
//   - L25: refill_freelist() → page_arena_alloc_aligned()
//
// ENV Variables:
//   HAKMEM_PAGE_ARENA_ENABLE=1          # Enable PageArena (default: 0, OFF)
//   HAKMEM_PAGE_ARENA_HOT_SIZE=1024     # Hot page cache size (default: 1024)
//   HAKMEM_PAGE_ARENA_WARM_64K=256      # Warm 64KB span cache size (default: 256)
//   HAKMEM_PAGE_ARENA_WARM_128K=128     # Warm 128KB span cache size (default: 128)
//   HAKMEM_PAGE_ARENA_WARM_2M=64        # Warm 2MB span cache size (default: 64)

#ifndef HAK_PAGE_ARENA_H
#define HAK_PAGE_ARENA_H

#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <pthread.h>
#include "hakmem_build_flags.h"
#include <stdio.h>  // debug logging

// ============================================================================
// Box PA1: Hot Page Cache (4KB pages)
// ============================================================================

#define PA_HOT_PAGE_DEFAULT_SIZE 1024  // 1024 slots = 4MB cache

typedef struct {
    void** pages;           // Dynamic array of 4KB pages
    int capacity;           // Max slots (power of 2)
    int count;              // Current occupancy
    pthread_mutex_t lock;   // Lock for MT safety

    // Metrics (debug only)
    #if !HAKMEM_BUILD_RELEASE
    uint64_t hits;          // Alloc hits
    uint64_t misses;        // Alloc misses (fallback to mmap)
    uint64_t frees;         // Free pushes
    uint64_t evictions;     // Free evictions (cache full)
    #endif
} HotPageCache;

// Initialize hot page cache (called at startup or lazy init)
void hot_page_cache_init(HotPageCache* cache, int capacity);

// Shutdown hot page cache (called at cleanup)
void hot_page_cache_shutdown(HotPageCache* cache);

// Allocate 4KB page from hot cache (returns NULL if cache miss)
void* hot_page_alloc(HotPageCache* cache);

// Free 4KB page to hot cache (may evict if cache full)
void hot_page_free(HotPageCache* cache, void* page);

// ============================================================================
// Box PA2: Warm Span Cache (64KB-2MB spans)
// ============================================================================

#define PA_WARM_64K_DEFAULT_SIZE  256  // 256 slots = 16MB cache
#define PA_WARM_128K_DEFAULT_SIZE 128  // 128 slots = 16MB cache
#define PA_WARM_2M_DEFAULT_SIZE   64   // 64 slots = 128MB cache

typedef struct {
    // 64KB spans
    void** spans_64k;
    int capacity_64k;
    int count_64k;

    // 128KB spans
    void** spans_128k;
    int capacity_128k;
    int count_128k;

    // 2MB spans
    void** spans_2m;
    int capacity_2m;
    int count_2m;

    pthread_mutex_t lock;   // Lock for MT safety

    // Metrics (debug only)
    #if !HAKMEM_BUILD_RELEASE
    uint64_t hits_64k;
    uint64_t hits_128k;
    uint64_t hits_2m;
    uint64_t misses;
    uint64_t frees_64k;
    uint64_t frees_128k;
    uint64_t frees_2m;
    uint64_t evictions;
    #endif
} WarmSpanCache;

// Initialize warm span cache (called at startup or lazy init)
void warm_span_cache_init(WarmSpanCache* cache, int cap_64k, int cap_128k, int cap_2m);

// Shutdown warm span cache (called at cleanup)
void warm_span_cache_shutdown(WarmSpanCache* cache);

// Allocate aligned span from warm cache (returns NULL if cache miss)
void* warm_span_alloc(WarmSpanCache* cache, size_t size);

// Free aligned span to warm cache (may evict if cache full)
void warm_span_free(WarmSpanCache* cache, void* span, size_t size);

// ============================================================================
// Box PA3: Unified PageArena (combines PA1 + PA2 + Cold Path)
// ============================================================================

typedef struct {
    HotPageCache hot;       // Box PA1: 4KB pages
    WarmSpanCache warm;     // Box PA2: 64KB-2MB spans

    // Metrics (debug only)
    #if !HAKMEM_BUILD_RELEASE
    uint64_t total_allocs;
    uint64_t total_frees;
    uint64_t mmap_calls;    // Cold path fallback count
    #endif
} PageArena;

// Global page arena (TLS per thread, or single global instance)
extern __thread PageArena g_page_arena;

// ============================================================================
// ENV Control (cached, lazy init)
// ============================================================================

// Enable flag (default: 0, OFF)
static inline int page_arena_enabled(void) {
    static int g_enable = -1;
    if (__builtin_expect(g_enable == -1, 0)) {
        const char* e = getenv("HAKMEM_PAGE_ARENA_ENABLE");
        g_enable = (e && *e && *e != '0') ? 1 : 0;
        #if !HAKMEM_BUILD_RELEASE
        if (g_enable) {
            fprintf(stderr, "[PageArena-INIT] page_arena_enabled() = %d\n", g_enable);
            fflush(stderr);
        }
        #endif
    }
    return g_enable;
}

// Hot page cache size (default: 1024)
static inline int page_arena_hot_size(void) {
    static int g_size = -1;
    if (__builtin_expect(g_size == -1, 0)) {
        const char* e = getenv("HAKMEM_PAGE_ARENA_HOT_SIZE");
        g_size = (e && *e) ? atoi(e) : PA_HOT_PAGE_DEFAULT_SIZE;
        if (g_size < 64) g_size = 64;
        if (g_size > 4096) g_size = 4096;
        #if !HAKMEM_BUILD_RELEASE
        fprintf(stderr, "[PageArena-INIT] hot_size = %d\n", g_size);
        fflush(stderr);
        #endif
    }
    return g_size;
}

// Warm 64KB span cache size (default: 256)
static inline int page_arena_warm_64k_size(void) {
    static int g_size = -1;
    if (__builtin_expect(g_size == -1, 0)) {
        const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_64K");
        g_size = (e && *e) ? atoi(e) : PA_WARM_64K_DEFAULT_SIZE;
        if (g_size < 16) g_size = 16;
        if (g_size > 1024) g_size = 1024;
        #if !HAKMEM_BUILD_RELEASE
        fprintf(stderr, "[PageArena-INIT] warm_64k_size = %d\n", g_size);
        fflush(stderr);
        #endif
    }
    return g_size;
}

// Warm 128KB span cache size (default: 128)
static inline int page_arena_warm_128k_size(void) {
    static int g_size = -1;
    if (__builtin_expect(g_size == -1, 0)) {
        const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_128K");
        g_size = (e && *e) ? atoi(e) : PA_WARM_128K_DEFAULT_SIZE;
        if (g_size < 8) g_size = 8;
        if (g_size > 512) g_size = 512;
        #if !HAKMEM_BUILD_RELEASE
        fprintf(stderr, "[PageArena-INIT] warm_128k_size = %d\n", g_size);
        fflush(stderr);
        #endif
    }
    return g_size;
}

// Warm 2MB span cache size (default: 64)
static inline int page_arena_warm_2m_size(void) {
    static int g_size = -1;
    if (__builtin_expect(g_size == -1, 0)) {
        const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_2M");
        g_size = (e && *e) ? atoi(e) : PA_WARM_2M_DEFAULT_SIZE;
        if (g_size < 4) g_size = 4;
        if (g_size > 256) g_size = 256;
        #if !HAKMEM_BUILD_RELEASE
        fprintf(stderr, "[PageArena-INIT] warm_2m_size = %d\n", g_size);
        fflush(stderr);
        #endif
    }
    return g_size;
}

// ============================================================================
// Public API (Box PA3: Unified PageArena)
// ============================================================================

// Initialize PageArena (called at thread start or lazy on first access)
void page_arena_init(PageArena* arena);

// Shutdown PageArena (called at thread exit)
void page_arena_shutdown(PageArena* arena);

// Allocate pages (any size, uses hot/warm/cold based on size)
// Returns: Pointer to allocated pages, or NULL if failed
void* page_arena_alloc_pages(PageArena* arena, size_t size);

// Allocate aligned span (64KB/128KB/2MB aligned)
// Returns: Pointer to allocated span, or NULL if failed
void* page_arena_alloc_aligned(PageArena* arena, size_t size, size_t alignment);

// Free pages (any size, uses hot/warm based on size)
void page_arena_free_pages(PageArena* arena, void* ptr, size_t size);

// Free aligned span (64KB/128KB/2MB aligned)
void page_arena_free_aligned(PageArena* arena, void* ptr, size_t size);

// Print stats (debug only)
void page_arena_print_stats(PageArena* arena);

#endif // HAK_PAGE_ARENA_H
Phase 24 PageArena/HotSpanBox: Mid/VM page reuse cache (structural limit identified) Summary: - Implemented PageArena (Box PA1-PA3) for Mid-Large (8-52KB) / L25 (64KB-2MB) - Integration: Pool TLS Arena + L25 alloc/refill paths - Result: Minimal impact (+4.7% Mid, 0% VM page-fault reduction) - Conclusion: Structural limit - existing Arena/Pool/L25 already optimized Implementation: 1. Box PA1: Hot Page Cache (4KB pages, LIFO stack, 1024 slots) - core/page_arena.c: hot_page_alloc/free with mutex protection - TLS cache for 4KB pages 2. Box PA2: Warm Span Cache (64KB-2MB spans, size-bucketed) - 64KB/128KB/2MB span caches (256/128/64 slots) - Size-class based allocation 3. Box PA3: Cold Path (mmap fallback) - page_arena_alloc_pages/aligned with fallback to direct mmap Integration Points: 4. Pool TLS Arena (core/pool_tls_arena.c) - chunk_ensure(): Lazy init + page_arena_alloc_pages() hook - arena_cleanup_thread(): Return chunks to PageArena if enabled - Exponential growth preserved (1MB → 8MB) 5. L25 Pool (core/hakmem_l25_pool.c) - l25_alloc_new_run(): Lazy init + page_arena_alloc_aligned() hook - refill_freelist(): PageArena allocation for bundles - 2MB run carving preserved ENV Variables: - HAKMEM_PAGE_ARENA_ENABLE=1 (default: 0, OFF) - HAKMEM_PAGE_ARENA_HOT_SIZE=1024 (default: 1024) - HAKMEM_PAGE_ARENA_WARM_64K=256 (default: 256) - HAKMEM_PAGE_ARENA_WARM_128K=128 (default: 128) - HAKMEM_PAGE_ARENA_WARM_2M=64 (default: 64) Benchmark Results: - Mid-Large MT (4T, 40K iter, 2KB): - OFF: 84,535 page-faults, 726K ops/s - ON: 84,534 page-faults, 760K ops/s (+4.7% ops, -0.001% faults) - VM Mixed (200K iter): - OFF: 102,134 page-faults, 257K ops/s - ON: 102,134 page-faults, 255K ops/s (0% change) Root Cause Analysis: - Hypothesis: 50-66% page-fault reduction (80-100K → 30-40K) - Actual: <1% page-fault reduction, minimal performance impact - Reason: Structural limit - existing Arena/Pool/L25 already highly optimized - 1MB chunk sizes with high-density linear carving - TLS ring + exponential growth minimize mmap calls - PageArena becomes double-buffering layer with no benefit - Remaining page-faults from kernel zero-clear + app access patterns Lessons Learned: 1. Mid/Large allocators already page-optimal via Arena/Pool design 2. Middle-layer caching ineffective when base layer already optimized 3. Page-fault reduction requires app-level access pattern changes 4. Tiny layer (Phase 23) remains best target for frontend optimization Next Steps: - Defer PageArena (low ROI, structural limit reached) - Focus on upper layers (allocation pattern analysis, size distribution) - Consider app-side access pattern optimization 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-17 03:22:27 +09:00			`// page_arena.h - Phase 24: PageArena/HotSpanBox (Mid-Large page-fault optimization)`
			`//`
			`// Goal: Reduce Mid/VM page-faults by 50-66% (80-100K → 30-40K)`
			`// Target: +30-50% performance for Mid-Large (8-52KB) / L25 (64KB-2MB)`
			`//`
			`// Design:`
			`// Box PA1: Hot Page Cache (4KB pages, LIFO stack, 1024 slots)`
			`// Box PA2: Warm Span Cache (64KB-2MB spans, size-bucketed, 448 slots)`
			`// Box PA3: Cold Path (mmap fallback when cache misses)`
			`//`
			`// Integration:`
			`// - Pool TLS: chunk_ensure() → page_arena_alloc_pages()`
			`// - L25: l25_alloc_new_run() → page_arena_alloc_aligned()`
			`// - L25: refill_freelist() → page_arena_alloc_aligned()`
			`//`
			`// ENV Variables:`
			`// HAKMEM_PAGE_ARENA_ENABLE=1 # Enable PageArena (default: 0, OFF)`
			`// HAKMEM_PAGE_ARENA_HOT_SIZE=1024 # Hot page cache size (default: 1024)`
			`// HAKMEM_PAGE_ARENA_WARM_64K=256 # Warm 64KB span cache size (default: 256)`
			`// HAKMEM_PAGE_ARENA_WARM_128K=128 # Warm 128KB span cache size (default: 128)`
			`// HAKMEM_PAGE_ARENA_WARM_2M=64 # Warm 2MB span cache size (default: 64)`

			`#ifndef HAK_PAGE_ARENA_H`
			`#define HAK_PAGE_ARENA_H`

			`#include <stddef.h>`
			`#include <stdint.h>`
Phase 3d-B: TLS Cache Merge - Unified g_tls_sll[] structure (+12-18% expected) Merge separate g_tls_sll_head[] and g_tls_sll_count[] arrays into unified TinyTLSSLL struct to improve L1D cache locality. Expected performance gain: +12-18% from reducing cache line splits (2 loads → 1 load per operation). Changes: - core/hakmem_tiny.h: Add TinyTLSSLL type (16B aligned, head+count+pad) - core/hakmem_tiny.c: Replace separate arrays with g_tls_sll[8] - core/box/tls_sll_box.h: Update Box API (13 sites) for unified access - Updated 32+ files: All g_tls_sll_head[i] → g_tls_sll[i].head - Updated 32+ files: All g_tls_sll_count[i] → g_tls_sll[i].count - core/hakmem_tiny_integrity.h: Unified canary guards - core/box/integrity_box.c: Simplified canary validation - Makefile: Added core/box/tiny_sizeclass_hist_box.o to link Build: ✅ PASS (10K ops sanity test) Warnings: Only pre-existing LTO type mismatches (unrelated) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-20 07:32:30 +09:00			`#include <stdlib.h>`
Phase 24 PageArena/HotSpanBox: Mid/VM page reuse cache (structural limit identified) Summary: - Implemented PageArena (Box PA1-PA3) for Mid-Large (8-52KB) / L25 (64KB-2MB) - Integration: Pool TLS Arena + L25 alloc/refill paths - Result: Minimal impact (+4.7% Mid, 0% VM page-fault reduction) - Conclusion: Structural limit - existing Arena/Pool/L25 already optimized Implementation: 1. Box PA1: Hot Page Cache (4KB pages, LIFO stack, 1024 slots) - core/page_arena.c: hot_page_alloc/free with mutex protection - TLS cache for 4KB pages 2. Box PA2: Warm Span Cache (64KB-2MB spans, size-bucketed) - 64KB/128KB/2MB span caches (256/128/64 slots) - Size-class based allocation 3. Box PA3: Cold Path (mmap fallback) - page_arena_alloc_pages/aligned with fallback to direct mmap Integration Points: 4. Pool TLS Arena (core/pool_tls_arena.c) - chunk_ensure(): Lazy init + page_arena_alloc_pages() hook - arena_cleanup_thread(): Return chunks to PageArena if enabled - Exponential growth preserved (1MB → 8MB) 5. L25 Pool (core/hakmem_l25_pool.c) - l25_alloc_new_run(): Lazy init + page_arena_alloc_aligned() hook - refill_freelist(): PageArena allocation for bundles - 2MB run carving preserved ENV Variables: - HAKMEM_PAGE_ARENA_ENABLE=1 (default: 0, OFF) - HAKMEM_PAGE_ARENA_HOT_SIZE=1024 (default: 1024) - HAKMEM_PAGE_ARENA_WARM_64K=256 (default: 256) - HAKMEM_PAGE_ARENA_WARM_128K=128 (default: 128) - HAKMEM_PAGE_ARENA_WARM_2M=64 (default: 64) Benchmark Results: - Mid-Large MT (4T, 40K iter, 2KB): - OFF: 84,535 page-faults, 726K ops/s - ON: 84,534 page-faults, 760K ops/s (+4.7% ops, -0.001% faults) - VM Mixed (200K iter): - OFF: 102,134 page-faults, 257K ops/s - ON: 102,134 page-faults, 255K ops/s (0% change) Root Cause Analysis: - Hypothesis: 50-66% page-fault reduction (80-100K → 30-40K) - Actual: <1% page-fault reduction, minimal performance impact - Reason: Structural limit - existing Arena/Pool/L25 already highly optimized - 1MB chunk sizes with high-density linear carving - TLS ring + exponential growth minimize mmap calls - PageArena becomes double-buffering layer with no benefit - Remaining page-faults from kernel zero-clear + app access patterns Lessons Learned: 1. Mid/Large allocators already page-optimal via Arena/Pool design 2. Middle-layer caching ineffective when base layer already optimized 3. Page-fault reduction requires app-level access pattern changes 4. Tiny layer (Phase 23) remains best target for frontend optimization Next Steps: - Defer PageArena (low ROI, structural limit reached) - Focus on upper layers (allocation pattern analysis, size distribution) - Consider app-side access pattern optimization 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-17 03:22:27 +09:00			`#include <pthread.h>`
			`#include "hakmem_build_flags.h"`
Code Cleanup: Remove false positives, redundant validations, and reduce verbose logging Following the C7 stride upgrade fix (commit 23c0d9541), this commit performs comprehensive cleanup to improve code quality and reduce debug noise. ## Changes ### 1. Disable False Positive Checks (tiny_nextptr.h) - Disabled: NXT_MISALIGN validation block with `#if 0` - Reason: Produces false positives due to slab base offsets (2048, 65536) not being stride-aligned, causing all blocks to appear "misaligned" - TODO: Reimplement to check stride DISTANCE between consecutive blocks instead of absolute alignment to stride boundaries ### 2. Remove Redundant Geometry Validations hakmem_tiny_refill_p0.inc.h (P0 batch refill) - Removed 25-line CARVE_GEOMETRY_FIX validation block - Replaced with NOTE explaining redundancy - Reason: Stride table is now correct in tiny_block_stride_for_class(), defense-in-depth validation adds overhead without benefit ss_legacy_backend_box.c (legacy backend) - Removed 18-line LEGACY_FIX_GEOMETRY validation block - Replaced with NOTE explaining redundancy - Reason: Shared_pool validates geometry at acquisition time ### 3. Reduce Verbose Logging hakmem_shared_pool.c (sp_fix_geometry_if_needed) - Made SP_FIX_GEOMETRY logging conditional on `!HAKMEM_BUILD_RELEASE` - Reason: Geometry fixes are expected during stride upgrades, no need to log in release builds ### 4. Verification - Build: ✅ Successful (LTO warnings expected) - Test: ✅ 10K iterations (1.87M ops/s, no crashes) - NXT_MISALIGN false positives: ✅ Eliminated ## Files Modified - core/tiny_nextptr.h - Disabled false positive NXT_MISALIGN check - core/hakmem_tiny_refill_p0.inc.h - Removed redundant CARVE validation - core/box/ss_legacy_backend_box.c - Removed redundant LEGACY validation - core/hakmem_shared_pool.c - Made SP_FIX_GEOMETRY logging debug-only ## Impact - Code clarity: Removed 43 lines of redundant validation code - Debug noise: Reduced false positive diagnostics - Performance: Eliminated overhead from redundant geometry checks - Maintainability: Single source of truth for geometry validation 🧹 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-21 23:00:24 +09:00			`#include <stdio.h> // debug logging`
Phase 24 PageArena/HotSpanBox: Mid/VM page reuse cache (structural limit identified) Summary: - Implemented PageArena (Box PA1-PA3) for Mid-Large (8-52KB) / L25 (64KB-2MB) - Integration: Pool TLS Arena + L25 alloc/refill paths - Result: Minimal impact (+4.7% Mid, 0% VM page-fault reduction) - Conclusion: Structural limit - existing Arena/Pool/L25 already optimized Implementation: 1. Box PA1: Hot Page Cache (4KB pages, LIFO stack, 1024 slots) - core/page_arena.c: hot_page_alloc/free with mutex protection - TLS cache for 4KB pages 2. Box PA2: Warm Span Cache (64KB-2MB spans, size-bucketed) - 64KB/128KB/2MB span caches (256/128/64 slots) - Size-class based allocation 3. Box PA3: Cold Path (mmap fallback) - page_arena_alloc_pages/aligned with fallback to direct mmap Integration Points: 4. Pool TLS Arena (core/pool_tls_arena.c) - chunk_ensure(): Lazy init + page_arena_alloc_pages() hook - arena_cleanup_thread(): Return chunks to PageArena if enabled - Exponential growth preserved (1MB → 8MB) 5. L25 Pool (core/hakmem_l25_pool.c) - l25_alloc_new_run(): Lazy init + page_arena_alloc_aligned() hook - refill_freelist(): PageArena allocation for bundles - 2MB run carving preserved ENV Variables: - HAKMEM_PAGE_ARENA_ENABLE=1 (default: 0, OFF) - HAKMEM_PAGE_ARENA_HOT_SIZE=1024 (default: 1024) - HAKMEM_PAGE_ARENA_WARM_64K=256 (default: 256) - HAKMEM_PAGE_ARENA_WARM_128K=128 (default: 128) - HAKMEM_PAGE_ARENA_WARM_2M=64 (default: 64) Benchmark Results: - Mid-Large MT (4T, 40K iter, 2KB): - OFF: 84,535 page-faults, 726K ops/s - ON: 84,534 page-faults, 760K ops/s (+4.7% ops, -0.001% faults) - VM Mixed (200K iter): - OFF: 102,134 page-faults, 257K ops/s - ON: 102,134 page-faults, 255K ops/s (0% change) Root Cause Analysis: - Hypothesis: 50-66% page-fault reduction (80-100K → 30-40K) - Actual: <1% page-fault reduction, minimal performance impact - Reason: Structural limit - existing Arena/Pool/L25 already highly optimized - 1MB chunk sizes with high-density linear carving - TLS ring + exponential growth minimize mmap calls - PageArena becomes double-buffering layer with no benefit - Remaining page-faults from kernel zero-clear + app access patterns Lessons Learned: 1. Mid/Large allocators already page-optimal via Arena/Pool design 2. Middle-layer caching ineffective when base layer already optimized 3. Page-fault reduction requires app-level access pattern changes 4. Tiny layer (Phase 23) remains best target for frontend optimization Next Steps: - Defer PageArena (low ROI, structural limit reached) - Focus on upper layers (allocation pattern analysis, size distribution) - Consider app-side access pattern optimization 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-17 03:22:27 +09:00
			`// ============================================================================`
			`// Box PA1: Hot Page Cache (4KB pages)`
			`// ============================================================================`

			`#define PA_HOT_PAGE_DEFAULT_SIZE 1024 // 1024 slots = 4MB cache`

			`typedef struct {`
			`void** pages; // Dynamic array of 4KB pages`
			`int capacity; // Max slots (power of 2)`
			`int count; // Current occupancy`
			`pthread_mutex_t lock; // Lock for MT safety`

			`// Metrics (debug only)`
			`#if !HAKMEM_BUILD_RELEASE`
			`uint64_t hits; // Alloc hits`
			`uint64_t misses; // Alloc misses (fallback to mmap)`
			`uint64_t frees; // Free pushes`
			`uint64_t evictions; // Free evictions (cache full)`
			`#endif`
			`} HotPageCache;`

			`// Initialize hot page cache (called at startup or lazy init)`
			`void hot_page_cache_init(HotPageCache* cache, int capacity);`

			`// Shutdown hot page cache (called at cleanup)`
			`void hot_page_cache_shutdown(HotPageCache* cache);`

			`// Allocate 4KB page from hot cache (returns NULL if cache miss)`
			`void* hot_page_alloc(HotPageCache* cache);`

			`// Free 4KB page to hot cache (may evict if cache full)`
			`void hot_page_free(HotPageCache* cache, void* page);`

			`// ============================================================================`
			`// Box PA2: Warm Span Cache (64KB-2MB spans)`
			`// ============================================================================`

			`#define PA_WARM_64K_DEFAULT_SIZE 256 // 256 slots = 16MB cache`
			`#define PA_WARM_128K_DEFAULT_SIZE 128 // 128 slots = 16MB cache`
			`#define PA_WARM_2M_DEFAULT_SIZE 64 // 64 slots = 128MB cache`

			`typedef struct {`
			`// 64KB spans`
			`void** spans_64k;`
			`int capacity_64k;`
			`int count_64k;`

			`// 128KB spans`
			`void** spans_128k;`
			`int capacity_128k;`
			`int count_128k;`

			`// 2MB spans`
			`void** spans_2m;`
			`int capacity_2m;`
			`int count_2m;`

			`pthread_mutex_t lock; // Lock for MT safety`

			`// Metrics (debug only)`
			`#if !HAKMEM_BUILD_RELEASE`
			`uint64_t hits_64k;`
			`uint64_t hits_128k;`
			`uint64_t hits_2m;`
			`uint64_t misses;`
			`uint64_t frees_64k;`
			`uint64_t frees_128k;`
			`uint64_t frees_2m;`
			`uint64_t evictions;`
			`#endif`
			`} WarmSpanCache;`

			`// Initialize warm span cache (called at startup or lazy init)`
			`void warm_span_cache_init(WarmSpanCache* cache, int cap_64k, int cap_128k, int cap_2m);`

			`// Shutdown warm span cache (called at cleanup)`
			`void warm_span_cache_shutdown(WarmSpanCache* cache);`

			`// Allocate aligned span from warm cache (returns NULL if cache miss)`
			`void* warm_span_alloc(WarmSpanCache* cache, size_t size);`

			`// Free aligned span to warm cache (may evict if cache full)`
			`void warm_span_free(WarmSpanCache* cache, void* span, size_t size);`

			`// ============================================================================`
			`// Box PA3: Unified PageArena (combines PA1 + PA2 + Cold Path)`
			`// ============================================================================`

			`typedef struct {`
			`HotPageCache hot; // Box PA1: 4KB pages`
			`WarmSpanCache warm; // Box PA2: 64KB-2MB spans`

			`// Metrics (debug only)`
			`#if !HAKMEM_BUILD_RELEASE`
			`uint64_t total_allocs;`
			`uint64_t total_frees;`
			`uint64_t mmap_calls; // Cold path fallback count`
			`#endif`
			`} PageArena;`

			`// Global page arena (TLS per thread, or single global instance)`
			`extern __thread PageArena g_page_arena;`

			`// ============================================================================`
			`// ENV Control (cached, lazy init)`
			`// ============================================================================`

			`// Enable flag (default: 0, OFF)`
			`static inline int page_arena_enabled(void) {`
			`static int g_enable = -1;`
			`if (__builtin_expect(g_enable == -1, 0)) {`
			`const char* e = getenv("HAKMEM_PAGE_ARENA_ENABLE");`
			`g_enable = (e && e && e != '0') ? 1 : 0;`
			`#if !HAKMEM_BUILD_RELEASE`
			`if (g_enable) {`
			`fprintf(stderr, "[PageArena-INIT] page_arena_enabled() = %d\n", g_enable);`
			`fflush(stderr);`
			`}`
			`#endif`
			`}`
			`return g_enable;`
			`}`

			`// Hot page cache size (default: 1024)`
			`static inline int page_arena_hot_size(void) {`
			`static int g_size = -1;`
			`if (__builtin_expect(g_size == -1, 0)) {`
			`const char* e = getenv("HAKMEM_PAGE_ARENA_HOT_SIZE");`
			`g_size = (e && *e) ? atoi(e) : PA_HOT_PAGE_DEFAULT_SIZE;`
			`if (g_size < 64) g_size = 64;`
			`if (g_size > 4096) g_size = 4096;`
			`#if !HAKMEM_BUILD_RELEASE`
			`fprintf(stderr, "[PageArena-INIT] hot_size = %d\n", g_size);`
			`fflush(stderr);`
			`#endif`
			`}`
			`return g_size;`
			`}`

			`// Warm 64KB span cache size (default: 256)`
			`static inline int page_arena_warm_64k_size(void) {`
			`static int g_size = -1;`
			`if (__builtin_expect(g_size == -1, 0)) {`
			`const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_64K");`
			`g_size = (e && *e) ? atoi(e) : PA_WARM_64K_DEFAULT_SIZE;`
			`if (g_size < 16) g_size = 16;`
			`if (g_size > 1024) g_size = 1024;`
			`#if !HAKMEM_BUILD_RELEASE`
			`fprintf(stderr, "[PageArena-INIT] warm_64k_size = %d\n", g_size);`
			`fflush(stderr);`
			`#endif`
			`}`
			`return g_size;`
			`}`

			`// Warm 128KB span cache size (default: 128)`
			`static inline int page_arena_warm_128k_size(void) {`
			`static int g_size = -1;`
			`if (__builtin_expect(g_size == -1, 0)) {`
			`const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_128K");`
			`g_size = (e && *e) ? atoi(e) : PA_WARM_128K_DEFAULT_SIZE;`
			`if (g_size < 8) g_size = 8;`
			`if (g_size > 512) g_size = 512;`
			`#if !HAKMEM_BUILD_RELEASE`
			`fprintf(stderr, "[PageArena-INIT] warm_128k_size = %d\n", g_size);`
			`fflush(stderr);`
			`#endif`
			`}`
			`return g_size;`
			`}`

			`// Warm 2MB span cache size (default: 64)`
			`static inline int page_arena_warm_2m_size(void) {`
			`static int g_size = -1;`
			`if (__builtin_expect(g_size == -1, 0)) {`
			`const char* e = getenv("HAKMEM_PAGE_ARENA_WARM_2M");`
			`g_size = (e && *e) ? atoi(e) : PA_WARM_2M_DEFAULT_SIZE;`
			`if (g_size < 4) g_size = 4;`
			`if (g_size > 256) g_size = 256;`
			`#if !HAKMEM_BUILD_RELEASE`
			`fprintf(stderr, "[PageArena-INIT] warm_2m_size = %d\n", g_size);`
			`fflush(stderr);`
			`#endif`
			`}`
			`return g_size;`
			`}`

			`// ============================================================================`
			`// Public API (Box PA3: Unified PageArena)`
			`// ============================================================================`

			`// Initialize PageArena (called at thread start or lazy on first access)`
			`void page_arena_init(PageArena* arena);`

			`// Shutdown PageArena (called at thread exit)`
			`void page_arena_shutdown(PageArena* arena);`

			`// Allocate pages (any size, uses hot/warm/cold based on size)`
			`// Returns: Pointer to allocated pages, or NULL if failed`
			`void* page_arena_alloc_pages(PageArena* arena, size_t size);`

			`// Allocate aligned span (64KB/128KB/2MB aligned)`
			`// Returns: Pointer to allocated span, or NULL if failed`
			`void* page_arena_alloc_aligned(PageArena* arena, size_t size, size_t alignment);`

			`// Free pages (any size, uses hot/warm based on size)`
			`void page_arena_free_pages(PageArena* arena, void* ptr, size_t size);`

			`// Free aligned span (64KB/128KB/2MB aligned)`
			`void page_arena_free_aligned(PageArena* arena, void* ptr, size_t size);`

			`// Print stats (debug only)`
			`void page_arena_print_stats(PageArena* arena);`

			`#endif // HAK_PAGE_ARENA_H`