Files
hakmem/core/hakmem_pool.c

1453 lines
56 KiB
C
Raw Permalink Normal View History

// ============================================================================
// hakmem_pool.c - L2 Hybrid Pool Implementation (Mid-Size: 2-32KiB)
// ============================================================================
//
// サイズクラス定義:
// ┌──────────┬─────────┬──────────────┬─────────────┐
// │ クラス │ サイズ │ 初期CAP │ ページ構成 │
// ├──────────┼─────────┼──────────────┼─────────────┤
// │ Class 0 │ 2 KiB │ 64 pages │ 32 blocks/p │
// │ Class 1 │ 4 KiB │ 64 pages │ 16 blocks/p │
// │ Class 2 │ 8 KiB │ 64 pages │ 8 blocks/p │
// │ Class 3 │ 16 KiB │ 32 pages │ 4 blocks/p │
// │ Class 4 │ 32 KiB │ 16 pages │ 2 blocks/p │
// │ DYN1 │ 6 KiB* │ 0 (無効) │ 可変 │
// │ DYN2 │ (未使用)│ 0 (無効) │ 可変 │
// └──────────┴─────────┴──────────────┴─────────────┘
// * DYN1はギャップ(8-16KB)を埋めるための動的クラス
//
// W_MAX (切り上げ許容倍率):
// - 意味: 要求サイズの何倍までのクラスを許容するか
// - デフォルト: 1.40 (40%までの切り上げを許容)
// - 例: 3KiBの要求 → 4KiBクラス使用OK (1.33倍 < 1.40)
// - 環境変数: HAKMEM_WMAX_MID=1.6 で変更可能
//
// CAP (在庫量):
// - 意味: 各クラスで保持する最大ページ数
// - 初期値: {64,64,64,32,16} - 保守的(フットプリント優先)
// - 推奨値: {256,256,256,128,64} - パフォーマンス優先
// - 環境変数: HAKMEM_CAP_MID=256,256,256,128,64 で設定
// - 学習モード: HAKMEM_LEARN=1 で自動調整
//
// TLSリング構造:
// - POOL_L2_RING_CAP: リングバッファ容量デフォルト16
// - ActivePage A/B: bump-run方式ロックフリー
// - LIFO overflow: リングから溢れた分
//
// パフォーマンスチューニング:
// 1. 初期CAP 4倍化: HAKMEM_CAP_MID=256,256,256,128,64
// 2. W_MAX緩和: HAKMEM_WMAX_MID=1.6
// 3. DYN1有効化: HAKMEM_MID_DYN1=6144 HAKMEM_CAP_MID_DYN1=64
// 4. 学習モード: HAKMEM_LEARN=1
//
// License: MIT
// Last Updated: 2025-10-26 (Code Cleanup完了)
#include "hakmem_pool.h"
#include "hakmem_config.h"
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#include "hakmem_build_flags.h" // Phase 29: HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED
#include "hakmem_internal.h" // For AllocHeader and HAKMEM_MAGIC
#include "box/pool_hotbox_v2_header_box.h"
#include "hakmem_syscall.h" // Box 3 syscall layer (bypasses LD_PRELOAD)
#include "box/pool_hotbox_v2_box.h"
#include "box/pool_zero_mode_box.h" // Zeroing policy (env cached)
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
#include <sys/mman.h>
#include <pthread.h>
#include <stdatomic.h>
#include "hakmem_prof.h"
#include "hakmem_policy.h" // FrozenPolicy caps (Soft CAP gating)
#include "hakmem_debug.h"
#define POOL_HOTBOX_V2_HEADER_BYTES ((size_t)sizeof(void*))
// Use an over-sized mapping to guarantee POOL_PAGE_SIZE alignment for the
// v2 page base. This keeps page_of() O(1) without relying on mmap alignment.
#define POOL_HOTBOX_V2_MAP_LEN (POOL_PAGE_SIZE * 2)
// False sharing mitigation: padded mutex type (64B)
typedef struct { pthread_mutex_t m; char _pad[64 - (sizeof(pthread_mutex_t) % 64)]; } PaddedMutex;
// ===========================================================================
// Internal Data Structures
// ===========================================================================
#include "box/pool_tls_types.inc.h"
// Mid page descriptor registry (64KiB pages → {class_idx, owner_tid})
#include "box/pool_mid_desc.inc.h"
// ---------------- Transfer Cache (per-thread per-class inbox) --------------
#include "box/pool_mid_tc.inc.h"
#include "box/pool_mf2_types.inc.h"
// --- MF2 Initialization Functions ---
// Thread-safe initialization using pthread_once
static pthread_once_t mf2_page_registry_init_control = PTHREAD_ONCE_INIT;
static void mf2_page_registry_init_impl(void) {
// Initialize all page slots to NULL
memset(&g_mf2_page_registry, 0, sizeof(g_mf2_page_registry));
// Initialize 256 coarse-grained locks for registry updates
for (int i = 0; i < 256; i++) {
pthread_mutex_init(&g_mf2_page_registry.locks[i], NULL);
}
// Initialize counters
atomic_store(&g_mf2_page_registry.total_pages, 0);
atomic_store(&g_mf2_page_registry.active_pages, 0);
}
static void mf2_page_registry_init(void) {
pthread_once(&mf2_page_registry_init_control, mf2_page_registry_init_impl);
}
// Strategy A: ThreadPages destructor (cleanup on thread exit)
static void mf2_thread_pages_destructor(void* arg) {
MF2_ThreadPages* tp = (MF2_ThreadPages*)arg;
if (!tp) return;
// SAFETY: Don't remove from global registry or free memory
// Reason: Causes "malloc(): unsorted double linked list corrupted" crashes
// Tradeoff: Small memory leak (one ThreadPages struct per thread lifetime)
// TODO: Investigate safe cleanup mechanism
// Remove from global registry (DISABLED for safety)
// for (int i = 0; i < atomic_load_explicit(&g_num_thread_pages, memory_order_acquire); i++) {
// if (atomic_load_explicit((atomic_uintptr_t*)&g_all_thread_pages[i], memory_order_acquire) == (uintptr_t)tp) {
// atomic_store_explicit((atomic_uintptr_t*)&g_all_thread_pages[i], 0, memory_order_release);
// break;
// }
// }
// Free all pages owned by this thread (DISABLED for safety)
// hkm_libc_free(tp);
(void)tp; // Suppress unused warning
}
// Strategy A: Initialize pthread_key (once only)
static void mf2_init_tls_key(void) {
pthread_key_create(&g_mf2_tls_key, mf2_thread_pages_destructor);
}
// Helper: rdtsc() - Read CPU timestamp counter (for Route P idle detection)
static inline uint64_t mf2_rdtsc(void) {
#if defined(__x86_64__) || defined(__i386__)
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)hi << 32) | lo;
#else
// Fallback for non-x86 architectures (use clock_gettime approximation)
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec;
#endif
}
static MF2_ThreadPages* mf2_thread_pages_get(void) {
if (t_mf2_pages) return t_mf2_pages;
// Initialize pthread_key (once only)
pthread_once(&g_mf2_key_once, mf2_init_tls_key);
// Allocate thread-local page lists
MF2_ThreadPages* tp = (MF2_ThreadPages*)hkm_libc_calloc(1, sizeof(MF2_ThreadPages));
if (!tp) return NULL;
// Initialize with current thread ID
tp->my_tid = pthread_self();
// All page lists start empty (NULL)
for (int c = 0; c < POOL_NUM_CLASSES; c++) {
tp->active_page[c] = NULL;
tp->full_pages[c] = NULL;
atomic_store_explicit(&tp->pages_remote_pending[c], 0, memory_order_relaxed);
atomic_flag_clear_explicit(&tp->pending_claim[c], memory_order_relaxed);
tp->page_count[c] = 0;
}
// Route P: Initialize activity tracking
atomic_store_explicit(&tp->last_alloc_tsc, mf2_rdtsc(), memory_order_relaxed);
// Strategy A: Register in global array for round-robin drain
int idx = atomic_fetch_add_explicit(&g_num_thread_pages, 1, memory_order_acq_rel);
if (idx < MF2_MAX_THREADS) {
atomic_store_explicit((atomic_uintptr_t*)&g_all_thread_pages[idx], (uintptr_t)tp, memory_order_release);
// DEBUG: Log first 10 thread registrations - Disabled for performance
// static _Atomic int reg_samples = 0;
// int rs = atomic_fetch_add_explicit(&reg_samples, 1, memory_order_relaxed);
// if (rs < 10) {
// fprintf(stderr, "[TLS_REGISTER %d] tid=%lu, tp=%p, idx=%d\n",
// rs, (unsigned long)tp->my_tid, tp, idx);
// }
} else {
MF2_ERROR_LOG("Too many threads! MAX=%d", MF2_MAX_THREADS);
}
// Set pthread-specific data for destructor
pthread_setspecific(g_mf2_tls_key, tp);
t_mf2_pages = tp;
return tp;
}
// --- MF2 Page Allocation & Lookup ---
// O(1) page lookup from block address (mimalloc's secret sauce!)
static inline MidPage* mf2_addr_to_page(void* addr) {
// Step 1: Get page base address (64KB aligned)
// 0xFFFF = 65535, ~0xFFFF clears bottom 16 bits
void* page_base = (void*)((uintptr_t)addr & ~0xFFFFULL);
// Step 2: Index into registry (direct-mapped, 64K entries)
// (addr >> 16) extracts page index, & 0xFFFF wraps to registry size
size_t idx = ((uintptr_t)page_base >> 16) & (MF2_PAGE_REGISTRY_SIZE - 1);
// Step 3: Direct lookup (no hash collision handling needed with 64K entries)
MidPage* page = g_mf2_page_registry.pages[idx];
// Validation: Ensure page base matches (handles potential collisions)
if (page && page->base == page_base) {
return page;
}
// Collision or not registered (shouldn't happen in normal operation)
return NULL;
}
// Register a page in the global registry (called once per page allocation)
static void mf2_register_page(MidPage* page) {
if (!page) return;
// Calculate registry index from page base
size_t idx = ((uintptr_t)page->base >> 16) & (MF2_PAGE_REGISTRY_SIZE - 1);
// ALIGNMENT VERIFICATION (Step 2) - DEBUG: Disabled for performance
// static int register_count = 0;
// if (register_count < 10) {
// fprintf(stderr, "[REGISTER %d] Page %p → idx %zu (aligned=%s)\n",
// register_count, page->base, idx,
// (((uintptr_t)page->base & 0xFFFF) == 0) ? "YES" : "NO");
// register_count++;
// }
// Coarse-grained lock (256 locks for 64K entries = 256 entries/lock)
int lock_idx = idx % 256;
pthread_mutex_lock(&g_mf2_page_registry.locks[lock_idx]);
// Check for collision (should be rare with 64K entries)
if (g_mf2_page_registry.pages[idx] != NULL) {
// Collision detected - this is a problem!
// For MVP, we'll just log and overwrite (TODO: handle collisions properly)
HAKMEM_LOG("[MF2] WARNING: Page registry collision at index %zu\n", idx);
}
// Register the page
g_mf2_page_registry.pages[idx] = page;
// Update counters
atomic_fetch_add_explicit(&g_mf2_page_registry.total_pages, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_mf2_page_registry.active_pages, 1, memory_order_relaxed);
pthread_mutex_unlock(&g_mf2_page_registry.locks[lock_idx]);
}
// Unregister a page from the global registry (called when returning page to OS)
__attribute__((unused)) static void mf2_unregister_page(MidPage* page) {
if (!page) return;
size_t idx = ((uintptr_t)page->base >> 16) & (MF2_PAGE_REGISTRY_SIZE - 1);
int lock_idx = idx % 256;
pthread_mutex_lock(&g_mf2_page_registry.locks[lock_idx]);
if (g_mf2_page_registry.pages[idx] == page) {
g_mf2_page_registry.pages[idx] = NULL;
atomic_fetch_sub_explicit(&g_mf2_page_registry.active_pages, 1, memory_order_relaxed);
}
pthread_mutex_unlock(&g_mf2_page_registry.locks[lock_idx]);
}
// Allocate and initialize a new 64KB page for given size class
static MidPage* mf2_alloc_new_page(int class_idx) {
if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) return NULL;
// Get user size class (2KB, 4KB, 8KB, 16KB, 32KB)
size_t user_size = g_class_sizes[class_idx];
if (user_size == 0) return NULL; // Dynamic class disabled
// CRITICAL FIX: Each block needs HEADER_SIZE + user_size
// The header stores metadata (AllocHeader), user_size is the usable space
size_t block_size = HEADER_SIZE + user_size;
// Step 1: Allocate 64KB page (aligned to 64KB boundary)
// CRITICAL FIX #4: Must ensure 64KB alignment!
// mmap() only guarantees 4KB alignment, breaking addr_to_page() lookup.
// This caused 97% of frees to fail silently (fatal bug!)
//
// CRITICAL FIX: Use mmap() + alignment adjustment to avoid recursion!
// Using wrapped posix_memalign with WRAP_L2=1 causes infinite recursion.
// Allocate 2x size to allow alignment adjustment
size_t alloc_size = POOL_PAGE_SIZE * 2; // 128KB
void* raw = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (raw == MAP_FAILED) {
if (g_hakem_config.ace_trace) {
fprintf(stderr, "[ACE-FAIL] MapFail: class=%d size=%zu (MidPool)\n", class_idx, alloc_size);
}
return NULL; // OOM
}
// Find 64KB aligned address within allocation
uintptr_t addr = (uintptr_t)raw;
uintptr_t aligned = (addr + 0xFFFF) & ~0xFFFFULL; // Round up to 64KB boundary
void* page_base = (void*)aligned;
// Free unused prefix (if any)
size_t prefix_size = aligned - addr;
if (prefix_size > 0) {
munmap(raw, prefix_size);
}
// Free unused suffix
size_t suffix_offset = prefix_size + POOL_PAGE_SIZE;
if (suffix_offset < alloc_size) {
munmap((char*)raw + suffix_offset, alloc_size - suffix_offset);
}
// DEBUG: Log first few allocations
static _Atomic int mmap_count = 0;
int mc = atomic_fetch_add_explicit(&mmap_count, 1, memory_order_relaxed);
if (mc < 5) {
MF2_DEBUG_LOG("MMAP_ALLOC %d: raw=%p, aligned=%p, prefix=%zu, suffix=%zu",
mc, raw, page_base, prefix_size, alloc_size - suffix_offset);
}
// ALIGNMENT VERIFICATION (Step 1)
if (((uintptr_t)page_base & 0xFFFF) != 0) {
MF2_ERROR_LOG("ALIGNMENT BUG: Page %p not 64KB aligned! (offset=%zu)",
page_base, ((uintptr_t)page_base & 0xFFFF));
}
PoolZeroMode zero_mode = hak_pool_zero_mode();
// Zero-fill (default) or relax based on ENV gate (POOL_ZERO_MODE_HEADER/OFF).
// mmap() already returns zeroed pages; this gate controls additional zeroing overhead.
if (zero_mode == POOL_ZERO_MODE_FULL) {
memset(page_base, 0, POOL_PAGE_SIZE);
}
// Step 2: Allocate MidPage descriptor
MidPage* page = (MidPage*)hkm_libc_calloc(1, sizeof(MidPage));
if (!page) {
// CRITICAL FIX: Use munmap for mmap-allocated memory
munmap(page_base, POOL_PAGE_SIZE);
return NULL;
}
// Step 3: Initialize page descriptor
page->base = page_base;
page->class_idx = (uint8_t)class_idx;
page->flags = 0;
page->owner_tid = pthread_self();
page->owner_tp = mf2_thread_pages_get(); // Store owner's ThreadPages for pending queue
page->last_transfer_time = 0; // No transfer yet (lease mechanism)
// Step 4: Build freelist chain (walk through page and link blocks)
// Calculate how many blocks fit in 64KB page (including header overhead)
size_t usable_size = POOL_PAGE_SIZE;
size_t num_blocks = usable_size / block_size;
page->capacity = (uint16_t)num_blocks;
page->free_count = (uint16_t)num_blocks;
// Build linked list of free blocks
PoolBlock* freelist_head = NULL;
PoolBlock* freelist_tail = NULL;
for (size_t i = 0; i < num_blocks; i++) {
char* block_addr = (char*)page_base + (i * block_size);
PoolBlock* block = (PoolBlock*)block_addr;
if (zero_mode == POOL_ZERO_MODE_HEADER) {
memset(block, 0, HEADER_SIZE);
}
block->next = NULL;
if (freelist_head == NULL) {
freelist_head = block;
freelist_tail = block;
} else {
freelist_tail->next = block;
freelist_tail = block;
}
}
page->freelist = freelist_head;
// Step 5: Initialize remote stack (for cross-thread frees)
atomic_store(&page->remote_head, (uintptr_t)0);
atomic_store(&page->remote_count, 0);
// Step 6: Initialize lifecycle counters
atomic_store(&page->in_use, 0); // No blocks allocated yet
atomic_store(&page->pending_dn, 0);
// Step 7: Initialize linkage
page->next_page = NULL;
page->prev_page = NULL;
// Initialize pending queue fields
atomic_store_explicit(&page->in_remote_pending, false, memory_order_relaxed);
page->next_pending = NULL;
// Step 8: Register page in global registry
mf2_register_page(page);
return page;
}
// --- MF2 Allocation & Free Operations ---
// Forward declarations
static void mf2_enqueue_pending(MF2_ThreadPages* owner_tp, MidPage* page);
// Drain remote frees (cross-thread) into page's local freelist
// Called by owner thread when local freelist is empty
static int mf2_drain_remote_frees(MidPage* page) {
if (!page) return 0;
atomic_fetch_add(&g_mf2_drain_attempts, 1);
// Check if there are any remote frees (FIX #6: use seq_cst to ensure total ordering - DEBUG)
unsigned int remote_count = atomic_load_explicit(&page->remote_count, memory_order_seq_cst);
if (remote_count == 0) {
return 0; // Nothing to drain
}
// Atomically swap remote stack head with NULL (lock-free pop all)
uintptr_t head = atomic_exchange_explicit(&page->remote_head, (uintptr_t)0,
memory_order_acq_rel);
if (!head) {
atomic_store_explicit(&page->remote_count, 0, memory_order_release);
return 0; // Race: someone else drained it
}
// Reset remote count (FIX #6: use release for future drain checks to see)
atomic_store_explicit(&page->remote_count, 0, memory_order_release);
// Walk the remote stack and count blocks
int drained = 0;
PoolBlock* cur = (PoolBlock*)head;
PoolBlock* tail = NULL;
while (cur) {
drained++;
tail = cur;
cur = cur->next;
}
// Append remote stack to local freelist (splice in front for simplicity)
if (tail) {
tail->next = page->freelist;
page->freelist = (PoolBlock*)head;
page->free_count += drained;
}
atomic_fetch_add(&g_mf2_drain_count, 1);
atomic_fetch_add(&g_mf2_drain_blocks, drained);
// CRITICAL FIX: Check if new remotes arrived DURING drain
// If so, re-enqueue to owner's pending queue (avoid losing remotes!)
unsigned int post_drain_count = atomic_load_explicit(&page->remote_count, memory_order_acquire);
if (post_drain_count >= 1 && page->owner_tp) { // Use same threshold as initial enqueue
// New remotes arrived during drain, re-enqueue for next round
// Note: This is safe because flag was cleared earlier
mf2_enqueue_pending(page->owner_tp, page);
}
return drained;
}
// ===========================================================================
// Pending Queue Operations (MPSC Lock-Free Stack)
// ===========================================================================
// Enqueue page to owner's pending queue (called by remote threads)
// MPSC: Multiple producers (remote free threads), single consumer (owner)
static void mf2_enqueue_pending(MF2_ThreadPages* owner_tp, MidPage* page) {
if (!owner_tp || !page) return;
// Already in pending? Skip (avoid duplicate enqueue)
_Bool was_pending = atomic_exchange_explicit(&page->in_remote_pending, true, memory_order_acq_rel);
if (was_pending) {
return; // Already enqueued, nothing to do
}
atomic_fetch_add(&g_mf2_pending_enqueued, 1);
// Push to owner's pending stack (Treiber stack algorithm)
uintptr_t old_head;
do {
old_head = atomic_load_explicit(&owner_tp->pages_remote_pending[page->class_idx], memory_order_relaxed);
page->next_pending = (MidPage*)old_head;
} while (!atomic_compare_exchange_weak_explicit(
&owner_tp->pages_remote_pending[page->class_idx],
&old_head, (uintptr_t)page,
memory_order_release, // Publish page
memory_order_relaxed));
// 0→1 detection: Increment adoptable count for this class
// This enables O(1) early return in try_adopt (if count==0, no scan needed)
if (old_head == 0) {
atomic_fetch_add_explicit(&g_adoptable_count[page->class_idx], 1, memory_order_relaxed);
}
}
// Dequeue one page from pending queue (called by owner thread or adopter)
// Uses CAS for correctness (multi-consumer in adoption path)
static MidPage* mf2_dequeue_pending(MF2_ThreadPages* tp, int class_idx) {
if (!tp) return NULL;
uintptr_t old_head;
do {
old_head = atomic_load_explicit(&tp->pages_remote_pending[class_idx], memory_order_acquire);
if (old_head == 0) {
return NULL; // Queue empty
}
MidPage* page = (MidPage*)old_head;
// CAS to pop head
if (atomic_compare_exchange_weak_explicit(
&tp->pages_remote_pending[class_idx],
&old_head, (uintptr_t)page->next_pending,
memory_order_acq_rel, memory_order_relaxed)) {
// Successfully dequeued
MidPage* next = page->next_pending;
page->next_pending = NULL; // Clear link
// If queue became empty (next==NULL), decrement adoptable count
// This enables O(1) early return in try_adopt when all queues empty
if (next == NULL) {
atomic_fetch_sub_explicit(&g_adoptable_count[class_idx], 1, memory_order_relaxed);
}
return page;
}
} while (1);
}
// ===========================================================================
// End of Pending Queue Operations
// ===========================================================================
#include "box/pool_mf2_helpers.inc.h"
#include "box/pool_mf2_adoption.inc.h"
// Fast allocation path (owner thread, NO LOCK!)
static inline void* mf2_alloc_fast(int class_idx, size_t size, uintptr_t site_id) {
// Get thread-local page lists
MF2_ThreadPages* tp = mf2_thread_pages_get();
if (!tp) return NULL;
// Get active page for this class
MidPage* page = tp->active_page[class_idx];
if (!page) {
// No active page, go to slow path
return mf2_alloc_slow(class_idx, size, site_id);
}
// FAST PATH: Pop from page-local freelist (NO LOCK!)
if (page->freelist) {
atomic_fetch_add(&g_mf2_alloc_fast_hit, 1);
// Route P: Update activity tracking for idle detection
atomic_store_explicit(&tp->last_alloc_tsc, mf2_rdtsc(), memory_order_relaxed);
PoolBlock* block = page->freelist;
page->freelist = block->next;
page->free_count--;
// Increment in-use count (atomic for cross-thread visibility)
atomic_fetch_add_explicit(&page->in_use, 1, memory_order_relaxed);
// Return user pointer (skip header)
return (char*)block + HEADER_SIZE;
}
// Local freelist empty, go to slow path
return mf2_alloc_slow(class_idx, size, site_id);
}
// Slow allocation path (drain remote or allocate new page)
static void* mf2_alloc_slow(int class_idx, size_t size, uintptr_t site_id) {
(void)site_id; // Unused for now
atomic_fetch_add(&g_mf2_alloc_slow_hit, 1);
// Get thread-local page lists
MF2_ThreadPages* tp = mf2_thread_pages_get();
if (!tp) return NULL;
// ===========================================================================
// Allocation Strategy (Must-Reuse Order)
// ===========================================================================
// 1. MUST-REUSE GATE (Part 1): Drain own pending queue
// - Process up to 4 pages to avoid blocking
// - Direct handoff: activate first successful drain immediately
if (mf2_try_reuse_own_pending(tp, class_idx)) {
return mf2_alloc_fast(class_idx, size, site_id);
}
// 2. MUST-REUSE GATE (Part 2): Drain active page remotes
// - Check if current active page has remote frees
// - Drain and retry allocation if successful
if (mf2_try_drain_active_remotes(tp, class_idx)) {
return mf2_alloc_fast(class_idx, size, site_id);
}
// HISTORICAL NOTE: full_pages scan removed
// Old approach: Scan full_pages looking for pages with remotes
// Problem: Drained pages consumed before owner can scan them
// New approach: Direct Handoff immediately activates drained pages
// Result: full_pages scan always finds 0 pages (100% waste)
//
// Benchmark evidence (before removal):
// - Full scan checked: 1,879,484 pages
// - Full scan found: 0 pages (0% success rate!)
// 3. Consumer-Driven Adoption (Route P with idle detection)
// - Only adopt from idle owners (haven't allocated in >150µs)
// - Prevents "adoption stealing" from active owners
if (mf2_try_adopt_pending(tp, class_idx)) {
return mf2_alloc_fast(class_idx, size, site_id);
}
// 4. MUST-REUSE GATE (Final): Allocate new page (last resort)
// - Only reached after exhausting all reuse opportunities
// - Order: pending queue → active drain → adoption → NEW
MidPage* page = mf2_alloc_and_activate_new_page(tp, class_idx);
if (!page) {
return NULL; // OOM
}
// Retry allocation from new page
return mf2_alloc_fast(class_idx, size, site_id);
}
// Forward declaration of slow free path
static void mf2_free_slow(MidPage* page, void* ptr);
// Strategy A: Global Round-Robin Drain (Cross-Thread Pending Queue)
// Fast free path (owner thread, NO LOCK!)
static inline void mf2_free_fast(MidPage* page, void* ptr) {
if (!page || !ptr) return;
atomic_fetch_add(&g_mf2_free_owner_count, 1);
// Get block pointer (rewind to header)
PoolBlock* block = (PoolBlock*)((char*)ptr - HEADER_SIZE);
// FAST PATH: Push to page-local freelist (NO LOCK!)
block->next = page->freelist;
page->freelist = block;
page->free_count++;
// Decrement in-use count (atomic for cross-thread visibility)
int old_in_use = atomic_fetch_sub_explicit(&page->in_use, 1, memory_order_release);
// Check if page is now empty (all blocks free)
if (old_in_use == 1 && page->free_count == page->capacity) {
// Memory efficiency: Return empty pages to OS via MADV_DONTNEED
// Keeps VA mapped (no munmap), but releases physical memory
hak_batch_add_page(page->base, POOL_PAGE_SIZE);
}
}
// Slow free path (cross-thread free to remote stack)
static void mf2_free_slow(MidPage* page, void* ptr) {
if (!page || !ptr) return;
atomic_fetch_add(&g_mf2_free_remote_count, 1);
// Get block pointer
PoolBlock* block = (PoolBlock*)((char*)ptr - HEADER_SIZE);
// Push to page's remote stack (lock-free MPSC)
uintptr_t old_head;
do {
old_head = atomic_load_explicit(&page->remote_head, memory_order_acquire);
block->next = (PoolBlock*)old_head;
} while (!atomic_compare_exchange_weak_explicit(
&page->remote_head, &old_head, (uintptr_t)block,
memory_order_release, memory_order_relaxed));
// Increment remote count and detect threshold for enqueueing
unsigned int old_count = atomic_fetch_add_explicit(&page->remote_count, 1, memory_order_seq_cst);
// CRITICAL FIX: Use threshold-based enqueueing instead of 0→1 edge
// Problem: 0→1 causes ping-pong (drain 1 block → next free triggers 0→1 again)
// Solution: Only enqueue when remotes accumulate to threshold (better batching)
//
// Threshold values (configurable via HAKMEM_MF2_ENQUEUE_THRESHOLD, default=4):
// 1 = immediate (0→1 edge, causes ping-pong)
// 4 = balanced (batch 4 blocks before notifying owner)
// 8 = aggressive batching (higher latency, but better efficiency)
//
// We enqueue on transitions TO the threshold (old_count == threshold-1)
static int g_enqueue_threshold = 1; // 1=immediate (0→1 edge), 2=batch-2, 4=batch-4
if (old_count + 1 == (unsigned int)g_enqueue_threshold) {
// Remote count just reached threshold, notify owner
if (page->owner_tp) {
mf2_enqueue_pending(page->owner_tp, page);
}
}
// DEBUG: Sample first 10 remote frees - Disabled for performance
// static _Atomic int remote_free_samples = 0;
// int sample = atomic_fetch_add_explicit(&remote_free_samples, 1, memory_order_relaxed);
// if (sample < 10) {
// fprintf(stderr, "[REMOTE_FREE %d] ptr=%p → page=%p (base=%p), remote_count=%u (was %u), EDGE=%s\n",
// sample, ptr, page, page->base, old_count + 1, old_count, (old_count == 0) ? "YES" : "NO");
// }
// Decrement in-use count
int old_in_use = atomic_fetch_sub_explicit(&page->in_use, 1, memory_order_release);
// Check if page is now empty (FIX #6: acquire to see all remote frees)
if (old_in_use == 1 && page->free_count + atomic_load_explicit(&page->remote_count, memory_order_acquire) >= page->capacity) {
// Memory efficiency: Return empty pages to OS via MADV_DONTNEED
// Keeps VA mapped (no munmap), but releases physical memory
hak_batch_add_page(page->base, POOL_PAGE_SIZE);
}
}
// Top-level free dispatcher
static void mf2_free(void* ptr) {
if (!ptr) return;
// O(1) page lookup (mimalloc's magic!)
MidPage* page = mf2_addr_to_page(ptr);
if (!page) {
// Not a MF2 page (shouldn't happen if MF2 is enabled properly)
return;
}
// Check if we're the owner (fast path)
MF2_ThreadPages* tp = mf2_thread_pages_get();
if (tp && page->owner_tid == tp->my_tid) {
// Fast: Owner thread, push to local freelist (NO LOCK!)
mf2_free_fast(page, ptr);
} else {
// Slow: Cross-thread free, push to remote stack (lock-free)
mf2_free_slow(page, ptr);
}
}
// ===========================================================================
// Global pool state (simplified: single-threaded for MVP)
static struct {
PoolBlock* freelist[POOL_NUM_CLASSES][POOL_NUM_SHARDS];
// Locks: per (class, shard) freelist to allow concurrent operations
PaddedMutex freelist_locks[POOL_NUM_CLASSES][POOL_NUM_SHARDS];
// Non-empty bitmap (O(1) empty class skip)
// Bit i = 1 if freelist[class][shard] is non-empty
// Use atomic to avoid class-wide locks
atomic_uint_fast64_t nonempty_mask[POOL_NUM_CLASSES]; // 1 bit per shard
// Remote-free MPSC stacks per (class, shard): lock-free producers, drained under lock on alloc
atomic_uintptr_t remote_head[POOL_NUM_CLASSES][POOL_NUM_SHARDS];
atomic_uint remote_count[POOL_NUM_CLASSES][POOL_NUM_SHARDS];
// Statistics
uint64_t hits[POOL_NUM_CLASSES] __attribute__((aligned(64)));
uint64_t misses[POOL_NUM_CLASSES] __attribute__((aligned(64)));
uint64_t refills[POOL_NUM_CLASSES] __attribute__((aligned(64)));
uint64_t frees[POOL_NUM_CLASSES] __attribute__((aligned(64)));
uint64_t total_bytes_allocated __attribute__((aligned(64)));
uint64_t total_pages_allocated __attribute__((aligned(64)));
// Per-class page accounting (for Soft CAP guidance)
uint64_t pages_by_class[POOL_NUM_CLASSES] __attribute__((aligned(64)));
// ACE: per-class bundle factor for refill (1..4) + last snapshot
int bundle_factor[POOL_NUM_CLASSES];
uint64_t last_hits[POOL_NUM_CLASSES];
uint64_t last_misses[POOL_NUM_CLASSES];
int initialized;
int tls_free_enabled; // env: HAKMEM_POOL_TLS_FREE (default: 1)
// Extra metrics (for learner logging): all relaxed atomics
atomic_uint_fast64_t trylock_attempts __attribute__((aligned(64)));
atomic_uint_fast64_t trylock_success __attribute__((aligned(64)));
atomic_uint_fast64_t ring_underflow __attribute__((aligned(64)));
} g_pool;
static int g_wrap_l2_enabled = 1; // env: HAKMEM_WRAP_L2=0 to disable in wrappers
static int g_shard_mix_enabled = 0; // env: HAKMEM_SHARD_MIX=1 to enable stronger hashing
static int g_tls_ring_enabled = 1; // env: HAKMEM_POOL_TLS_RING=1 to enable TLS ring
static int g_trylock_probes = 3; // env: HAKMEM_TRYLOCK_PROBES (1..8)
static int g_ring_return_div = 2; // env: HAKMEM_RING_RETURN_DIV (2=half, 3=third)
static int g_tls_lo_max = 256; // env: HAKMEM_TLS_LO_MAX (LIFO size cap)
int g_hdr_light_enabled = 0; // env: HAKMEM_HDR_LIGHT=1 (minimize extra fields), =2 (no header writes/validation)
static int g_pool_min_bundle = 2; // env: HAKMEM_POOL_MIN_BUNDLE (default 2)
// Sampled counter updates to reduce hot-path stores: 1/2^k
static int g_count_sample_exp = 10; // env: HAKMEM_POOL_COUNT_SAMPLE (0..16)
static __thread uint32_t t_pool_rng = 0x243f6a88u; // per-thread RNG for sampling
// ---------------------------------------------------------------------------
// PoolHotBox v2 scaffolding (research-only; defaults to v1)
// ---------------------------------------------------------------------------
PoolHotBoxV2Stats g_pool_hotbox_v2_stats[POOL_NUM_CLASSES];
static __thread pool_ctx_v2* g_pool_ctx_v2 = NULL;
// Forward decls for helpers used in HotBox v2.
static inline uint32_t pool_hotbox_v2_block_size(int ci);
static inline uint32_t pool_block_size_for_class(int ci);
static inline void mid_set_header(AllocHeader* hdr, size_t class_sz, uintptr_t site_id);
static inline void mid_page_inuse_inc(void* raw);
static void* pool_cold_refill_page_v1(void* cold_ctx, uint32_t ci, uint32_t* out_block_size, uint32_t* out_capacity, void** out_slab_ref);
static void pool_cold_retire_page_v1(void* cold_ctx, uint32_t ci, void* slab_ref, void* base);
static int pool_hotbox_v2_global_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_POOL_V2_ENABLED");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g;
}
static unsigned pool_hotbox_v2_class_mask(void) {
static int parsed = 0;
static unsigned mask = 0;
if (__builtin_expect(!parsed, 0)) {
const char* e = getenv("HAKMEM_POOL_V2_CLASSES");
if (e && *e) {
mask = (unsigned)strtoul(e, NULL, 0);
} else {
mask = 0; // default: all OFF (opt-in only)
}
parsed = 1;
}
return mask;
}
int pool_hotbox_v2_class_enabled(int class_idx) {
if (!pool_hotbox_v2_global_enabled()) return 0;
if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) return 0;
unsigned mask = pool_hotbox_v2_class_mask();
static int logged = 0;
if (__builtin_expect(!logged && pool_hotbox_v2_stats_enabled(), 0)) {
fprintf(stderr, "[POOL_V2_MASK] enabled=0x%x\n", mask);
logged = 1;
}
return (mask & (1u << class_idx)) != 0;
}
int pool_hotbox_v2_stats_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_POOL_V2_STATS");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g;
}
pool_ctx_v2* pool_v2_tls_get(void) {
pool_ctx_v2* ctx = g_pool_ctx_v2;
if (__builtin_expect(ctx == NULL, 0)) {
ctx = (pool_ctx_v2*)calloc(1, sizeof(pool_ctx_v2));
if (!ctx) abort();
for (int i = 0; i < POOL_NUM_CLASSES; i++) {
uint32_t user_sz = pool_block_size_for_class(i);
ctx->cls[i].block_size = user_sz ? (user_sz + HEADER_SIZE) : 0;
ctx->cls[i].max_partial_pages = 2;
}
g_pool_ctx_v2 = ctx;
}
return ctx;
}
static inline uint32_t pool_hotbox_v2_block_size(int ci) {
switch (ci) {
case 0: return POOL_CLASS_2KB;
case 1: return POOL_CLASS_4KB;
case 2: return POOL_CLASS_8KB;
case 3: return POOL_CLASS_16KB;
case 4: return POOL_CLASS_32KB;
case 5: return POOL_CLASS_40KB;
case 6: return POOL_CLASS_52KB;
default: return 0;
}
}
static inline uint32_t pool_block_size_for_class(int ci) {
return pool_hotbox_v2_block_size(ci);
}
static inline void pool_hotbox_v2_record_alloc(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_calls, 1, memory_order_relaxed);
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#else
(void)0;
#endif
}
static inline void pool_hotbox_v2_record_alloc_refill(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_refill, 1, memory_order_relaxed);
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#else
(void)0;
#endif
}
static inline void pool_hotbox_v2_record_alloc_refill_fail(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_refill_fail, 1, memory_order_relaxed);
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#else
(void)0;
#endif
}
void pool_hotbox_v2_record_alloc_fallback(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_fallback_v1, 1, memory_order_relaxed);
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#else
(void)0;
#endif
}
static inline void pool_hotbox_v2_record_free(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].free_calls, 1, memory_order_relaxed);
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#else
(void)0;
#endif
}
void pool_hotbox_v2_record_free_call(uint32_t ci) {
pool_hotbox_v2_record_free(ci);
}
void pool_hotbox_v2_record_free_fallback(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].free_fallback_v1, 1, memory_order_relaxed);
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#else
(void)0;
#endif
}
enum pool_v2_pageof_fail {
POOL_V2_PAGEOF_NONE = 0,
POOL_V2_PAGEOF_OUT_OF_RANGE = 1,
POOL_V2_PAGEOF_MISALIGNED = 2,
POOL_V2_PAGEOF_HEADER_MISSING = 3,
POOL_V2_PAGEOF_UNKNOWN = 4,
};
static inline void pool_hotbox_v2_record_pageof_fail(uint32_t ci, int reason) {
if ((int)ci >= POOL_NUM_CLASSES) return;
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED
switch (reason) {
case POOL_V2_PAGEOF_HEADER_MISSING:
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].page_of_fail_header_missing, 1, memory_order_relaxed);
break;
case POOL_V2_PAGEOF_OUT_OF_RANGE:
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].page_of_fail_out_of_range, 1, memory_order_relaxed);
break;
case POOL_V2_PAGEOF_MISALIGNED:
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].page_of_fail_misaligned, 1, memory_order_relaxed);
break;
case POOL_V2_PAGEOF_UNKNOWN:
default:
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].page_of_fail_unknown, 1, memory_order_relaxed);
break;
}
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#else
(void)reason;
#endif
}
static pool_page_v2* pool_hotbox_v2_page_acquire(void) {
pool_page_v2* p = (pool_page_v2*)calloc(1, sizeof(pool_page_v2));
return p;
}
static void pool_hotbox_v2_page_release(pool_page_v2* p) {
free(p);
}
static void* pool_hotbox_v2_build_freelist(pool_page_v2* p) {
if (!p || !p->base || p->block_size == 0 || p->capacity == 0) return NULL;
uint8_t* base = (uint8_t*)p->base + POOL_HOTBOX_V2_HEADER_BYTES;
void* head = NULL;
for (uint32_t i = 0; i < p->capacity; i++) {
void* blk = base + ((size_t)i * p->block_size);
*(void**)blk = head;
head = blk;
}
return head;
}
static PoolColdIface pool_cold_iface_v1(void);
static pool_page_v2* pool_hotbox_v2_page_of(pool_ctx_v2* ctx, uint32_t ci, void* ptr, int* out_reason) {
if (out_reason) *out_reason = POOL_V2_PAGEOF_UNKNOWN;
if (!ctx || ci >= POOL_NUM_CLASSES || !ptr) return NULL;
// Compute page base by mask (POOL_PAGE_SIZE is a power of two).
void* page_base = pool_hotbox_v2_page_base(ptr, POOL_PAGE_SIZE);
pool_page_v2* p = (pool_page_v2*)pool_hotbox_v2_header_load(page_base);
if (!p) {
if (out_reason) *out_reason = POOL_V2_PAGEOF_HEADER_MISSING;
return NULL;
}
if (p->class_idx != ci || !p->base) {
if (out_reason) *out_reason = POOL_V2_PAGEOF_UNKNOWN;
return NULL;
}
uint8_t* data_base = (uint8_t*)p->base + POOL_HOTBOX_V2_HEADER_BYTES;
size_t span = (size_t)p->block_size * (size_t)p->capacity;
uintptr_t off = (uintptr_t)((uint8_t*)ptr - data_base);
if (off >= span) {
if (out_reason) *out_reason = POOL_V2_PAGEOF_OUT_OF_RANGE;
return NULL;
}
if (off % p->block_size != 0) {
if (out_reason) *out_reason = POOL_V2_PAGEOF_MISALIGNED;
return NULL;
}
if (out_reason) *out_reason = POOL_V2_PAGEOF_NONE;
return p;
}
static void pool_hotbox_v2_page_retire_slow(pool_ctx_v2* ctx, uint32_t ci, pool_page_v2* p) {
(void)ctx;
if (!p) return;
// Clear reverse header to avoid stale page_of hits.
pool_hotbox_v2_header_clear(p->base);
PoolColdIface cold = pool_cold_iface_v1();
if (cold.retire_page) {
void* cold_ctx = NULL;
cold.retire_page(cold_ctx, ci, p->slab_ref, p->base);
}
pool_hotbox_v2_page_release(p);
}
static void pool_hotbox_v2_push_partial(pool_class_v2* hc, pool_page_v2* p) {
if (!hc || !p) return;
p->next = hc->partial;
hc->partial = p;
if (hc->partial_count < UINT16_MAX) hc->partial_count++;
}
static __attribute__((unused)) pool_page_v2* pool_hotbox_v2_pop_partial(pool_class_v2* hc) {
if (!hc || !hc->partial) return NULL;
pool_page_v2* p = hc->partial;
hc->partial = p->next;
p->next = NULL;
if (hc->partial_count > 0) hc->partial_count--;
return p;
}
static pool_page_v2* pool_hotbox_v2_take_usable_partial(pool_class_v2* hc) {
if (!hc) return NULL;
pool_page_v2* prev = NULL;
pool_page_v2* p = hc->partial;
while (p) {
if (p->freelist && p->used < p->capacity) {
if (prev) {
prev->next = p->next;
} else {
hc->partial = p->next;
}
p->next = NULL;
if (hc->partial_count > 0) hc->partial_count--;
return p;
}
prev = p;
p = p->next;
}
return NULL;
}
static int pool_hotbox_v2_unlink_partial(pool_class_v2* hc, pool_page_v2* target) {
if (!hc || !target) return 0;
pool_page_v2* prev = NULL;
pool_page_v2* p = hc->partial;
while (p) {
if (p == target) {
if (prev) {
prev->next = p->next;
} else {
hc->partial = p->next;
}
p->next = NULL;
if (hc->partial_count > 0) hc->partial_count--;
return 1;
}
prev = p;
p = p->next;
}
return 0;
}
static void pool_hotbox_v2_record_alloc_fast(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_fast, 1, memory_order_relaxed);
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#else
(void)0;
#endif
}
static void pool_hotbox_v2_record_free_fast(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].free_fast, 1, memory_order_relaxed);
Phase 29: Pool Hotbox v2 Stats Prune - NO-OP (infrastructure ready) Target: g_pool_hotbox_v2_stats atomics (12 total) in Pool v2 Result: 0.00% impact (code path inactive by default, ENV-gated) Verdict: NO-OP - Maintain compile-out for future-proofing Audit Results: - Classification: 12/12 TELEMETRY (100% observational) - Counters: alloc_calls, alloc_fast, alloc_refill, alloc_refill_fail, alloc_fallback_v1, free_calls, free_fast, free_fallback_v1, page_of_fail_* (4 failure counters) - Verification: All stats/logging only, zero flow control usage - Phase 28 lesson applied: Traced all usages, confirmed no CORRECTNESS Key Finding: Pool v2 OFF by default - Requires HAKMEM_POOL_V2_ENABLED=1 to activate - Benchmark never executes Pool v2 code paths - Compile-out has zero performance impact (code never runs) Implementation (future-ready): - Added HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED (default: 0) - Wrapped 13 atomic write sites in core/hakmem_pool.c - Pattern: #if HAKMEM_POOL_HOTBOX_V2_STATS_COMPILED ... #endif - Expected impact if Pool v2 enabled: +0.3~0.8% (HOT+WARM atomics) A/B Test Results: - Baseline (COMPILED=0): 52.98 M ops/s (±0.43M, 0.81% stdev) - Research (COMPILED=1): 53.31 M ops/s (±0.80M, 1.50% stdev) - Delta: -0.62% (noise, not real effect - code path not active) Critical Lesson Learned (NEW): Phase 29 revealed ENV-gated features can appear on hot paths but never execute. Updated audit checklist: 1. Classify atomics (CORRECTNESS vs TELEMETRY) 2. Verify no flow control usage 3. NEW: Verify code path is ACTIVE in benchmark (check ENV gates) 4. Implement compile-out 5. A/B test Verification methods added to documentation: - rg "getenv.*FEATURE" to check ENV gates - perf record/report to verify execution - Debug printf for quick validation Cumulative Progress (Phase 24-29): - Phase 24 (class stats): +0.93% GO - Phase 25 (free stats): +1.07% GO - Phase 26 (diagnostics): -0.33% NEUTRAL - Phase 27 (unified cache): +0.74% GO - Phase 28 (bg spill): NO-OP (all CORRECTNESS) - Phase 29 (pool v2): NO-OP (inactive code path) - Total: 17 atomics removed, +2.74% improvement Documentation: - PHASE29_POOL_HOTBOX_V2_AUDIT.md: Complete audit with TELEMETRY classification - PHASE29_POOL_HOTBOX_V2_STATS_RESULTS.md: Results + new lesson learned - ATOMIC_PRUNE_CUMULATIVE_SUMMARY.md: Updated with Phase 29 + new checklist - PHASE29_COMPLETE.md: Completion summary with recommendations Decision: Keep compile-out despite NO-OP - Code cleanliness (binary size reduction) - Future-proofing (ready when Pool v2 enabled) - Consistency with Phase 24-28 pattern Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 06:33:41 +09:00
#else
(void)0;
#endif
}
static inline void* pool_hotbox_v2_alloc_fast(pool_ctx_v2* ctx, uint32_t ci, uintptr_t site_id) {
pool_class_v2* hc = &ctx->cls[ci];
pool_page_v2* p = hc->current;
if (p && p->freelist && p->used < p->capacity) {
void* blk = p->freelist;
p->freelist = *(void**)blk;
p->used++;
pool_hotbox_v2_record_alloc_fast(ci);
AllocHeader* hdr = (AllocHeader*)blk;
size_t class_sz = pool_hotbox_v2_block_size((int)ci);
mid_set_header(hdr, class_sz, site_id);
mid_page_inuse_inc(blk);
return (char*)blk + HEADER_SIZE;
}
if (p) {
// Keep exhausted current reachable for free()
pool_hotbox_v2_push_partial(hc, p);
hc->current = NULL;
}
p = pool_hotbox_v2_take_usable_partial(hc);
if (p) {
hc->current = p;
void* blk = p->freelist;
p->freelist = *(void**)blk;
p->used++;
pool_hotbox_v2_record_alloc_fast(ci);
AllocHeader* hdr = (AllocHeader*)blk;
size_t class_sz = pool_hotbox_v2_block_size((int)ci);
mid_set_header(hdr, class_sz, site_id);
mid_page_inuse_inc(blk);
return (char*)blk + HEADER_SIZE;
}
return NULL;
}
static void pool_hotbox_v2_page_init(pool_page_v2* p, uint32_t ci, void* base, uint32_t block_size, uint32_t capacity, void* slab_ref) {
if (!p) return;
// Adjust capacity if caller did not account for header reservation.
size_t avail = (POOL_PAGE_SIZE > POOL_HOTBOX_V2_HEADER_BYTES) ? (POOL_PAGE_SIZE - POOL_HOTBOX_V2_HEADER_BYTES) : 0;
if (block_size > 0) {
uint32_t max_cap = (uint32_t)(avail / (size_t)block_size);
if (capacity == 0 || capacity > max_cap) capacity = max_cap;
}
p->freelist = NULL;
p->used = 0;
p->capacity = capacity;
p->block_size = block_size;
p->class_idx = ci;
p->base = base;
p->slab_ref = slab_ref;
p->next = NULL;
pool_hotbox_v2_header_store(p->base, p);
}
static PoolColdIface pool_cold_iface_v1(void) {
PoolColdIface iface = {pool_cold_refill_page_v1, pool_cold_retire_page_v1};
return iface;
}
static void* pool_cold_refill_page_v1(void* cold_ctx, uint32_t ci, uint32_t* out_block_size, uint32_t* out_capacity, void** out_slab_ref) {
(void)cold_ctx;
uint32_t user_sz = pool_hotbox_v2_block_size((int)ci);
if (user_sz == 0) return NULL;
uint32_t bs = user_sz + HEADER_SIZE;
if (bs == 0) return NULL;
uint32_t cap = 0;
if (POOL_PAGE_SIZE > POOL_HOTBOX_V2_HEADER_BYTES) {
cap = (uint32_t)((POOL_PAGE_SIZE - POOL_HOTBOX_V2_HEADER_BYTES) / bs);
}
if (cap == 0) return NULL;
// Over-allocate so we can align to POOL_PAGE_SIZE (64KiB) for O(1) page_of.
void* raw = mmap(NULL, POOL_HOTBOX_V2_MAP_LEN, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (raw == MAP_FAILED || !raw) {
return NULL;
}
uintptr_t aligned = ((uintptr_t)raw + (POOL_PAGE_SIZE - 1)) & ~((uintptr_t)POOL_PAGE_SIZE - 1);
void* base = (void*)aligned;
// Register page ownership for same-thread fast free consistency.
mid_desc_register(base, (int)ci, (uint64_t)(uintptr_t)pthread_self());
g_pool.refills[ci]++;
g_pool.total_pages_allocated++;
g_pool.pages_by_class[ci]++;
g_pool.total_bytes_allocated += POOL_HOTBOX_V2_MAP_LEN;
if (out_block_size) *out_block_size = bs;
if (out_capacity) *out_capacity = cap;
// slab_ref keeps the raw mapping pointer for unmap.
if (out_slab_ref) *out_slab_ref = raw;
return base;
}
static void pool_cold_retire_page_v1(void* cold_ctx, uint32_t ci, void* slab_ref, void* base) {
(void)cold_ctx;
(void)ci;
void* addr = slab_ref ? slab_ref : base;
if (!addr) return;
if (ci < POOL_NUM_CLASSES) {
if (g_pool.pages_by_class[ci] > 0) g_pool.pages_by_class[ci]--;
}
if (g_pool.total_pages_allocated > 0) g_pool.total_pages_allocated--;
if (g_pool.total_bytes_allocated >= POOL_HOTBOX_V2_MAP_LEN) g_pool.total_bytes_allocated -= POOL_HOTBOX_V2_MAP_LEN;
munmap(addr, POOL_HOTBOX_V2_MAP_LEN);
}
void* pool_hotbox_v2_alloc(uint32_t class_idx, size_t size, uintptr_t site_id) {
(void)size;
(void)site_id;
if ((int)class_idx < 0 || class_idx >= POOL_NUM_CLASSES) return NULL;
pool_hotbox_v2_record_alloc(class_idx);
pool_ctx_v2* ctx = pool_v2_tls_get();
void* blk = pool_hotbox_v2_alloc_fast(ctx, class_idx, site_id);
if (blk) return blk;
// slow: refill via Cold IF
PoolColdIface cold = pool_cold_iface_v1();
uint32_t bs = 0, cap = 0;
void* slab_ref = NULL;
void* base = cold.refill_page ? cold.refill_page(NULL, class_idx, &bs, &cap, &slab_ref) : NULL;
if (!base || !bs || !cap) {
pool_hotbox_v2_record_alloc_refill_fail(class_idx);
return NULL;
}
pool_class_v2* hc = &ctx->cls[class_idx];
pool_page_v2* page = pool_hotbox_v2_page_acquire();
if (!page) {
if (cold.retire_page) cold.retire_page(NULL, class_idx, slab_ref, base);
pool_hotbox_v2_record_alloc_refill_fail(class_idx);
return NULL;
}
pool_hotbox_v2_page_init(page, class_idx, base, bs, cap, slab_ref);
page->freelist = pool_hotbox_v2_build_freelist(page);
if (!page->freelist) {
pool_hotbox_v2_record_alloc_refill_fail(class_idx);
if (cold.retire_page) cold.retire_page(NULL, class_idx, slab_ref, base);
pool_hotbox_v2_page_release(page);
return NULL;
}
hc->current = page;
pool_hotbox_v2_record_alloc_refill(class_idx);
return pool_hotbox_v2_alloc_fast(ctx, class_idx, site_id);
}
int pool_hotbox_v2_free(uint32_t class_idx, void* raw_block) {
if (!raw_block || (int)class_idx < 0 || class_idx >= POOL_NUM_CLASSES) return 0;
pool_hotbox_v2_record_free(class_idx);
pool_ctx_v2* ctx = pool_v2_tls_get();
int pageof_reason = POOL_V2_PAGEOF_UNKNOWN;
pool_page_v2* p = pool_hotbox_v2_page_of(ctx, class_idx, raw_block, &pageof_reason);
if (!p) {
pool_hotbox_v2_record_pageof_fail(class_idx, pageof_reason);
if (pool_hotbox_v2_stats_enabled()) {
static _Atomic uint32_t dbg = 0;
uint32_t n = atomic_fetch_add_explicit(&dbg, 1, memory_order_relaxed);
if (n < 4) {
pool_class_v2* hc = &ctx->cls[class_idx];
fprintf(stderr,
"[POOL_V2 page_of_fail] cls=%u ptr=%p reason=%d cur=%p cur_base=%p cur_cap=%u cur_bs=%u partial=%p\n",
class_idx, raw_block, pageof_reason,
(void*)hc->current,
hc->current ? hc->current->base : NULL,
hc->current ? hc->current->capacity : 0u,
hc->current ? hc->current->block_size : 0u,
(void*)hc->partial);
}
}
return 0; // let caller fall back to v1
}
*(void**)raw_block = p->freelist;
p->freelist = raw_block;
if (p->used > 0) p->used--;
pool_hotbox_v2_record_free_fast(class_idx);
pool_class_v2* hc = &ctx->cls[class_idx];
if (p->used == 0) {
pool_hotbox_v2_unlink_partial(hc, p);
if (hc->current == p) hc->current = NULL;
if (hc->partial_count < hc->max_partial_pages) {
pool_hotbox_v2_push_partial(hc, p);
} else {
pool_hotbox_v2_page_retire_slow(ctx, class_idx, p);
}
} else {
if (!hc->current) hc->current = p;
}
return 1;
}
__attribute__((destructor)) static void pool_hotbox_v2_dump_stats(void) {
if (!pool_hotbox_v2_stats_enabled()) return;
for (int i = 0; i < POOL_NUM_CLASSES; i++) {
uint64_t ac = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_calls, memory_order_relaxed);
uint64_t ar = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_refill, memory_order_relaxed);
uint64_t arf = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_refill_fail, memory_order_relaxed);
uint64_t afb = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_fallback_v1, memory_order_relaxed);
uint64_t fc = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].free_calls, memory_order_relaxed);
uint64_t ffb = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].free_fallback_v1, memory_order_relaxed);
uint64_t af = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_fast, memory_order_relaxed);
uint64_t ff = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].free_fast, memory_order_relaxed);
uint64_t pf_hdr = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].page_of_fail_header_missing, memory_order_relaxed);
uint64_t pf_range = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].page_of_fail_out_of_range, memory_order_relaxed);
uint64_t pf_mis = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].page_of_fail_misaligned, memory_order_relaxed);
uint64_t pf_unknown = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].page_of_fail_unknown, memory_order_relaxed);
if (ac || afb || fc || ffb || ar || arf || af || ff || pf_hdr || pf_range || pf_mis || pf_unknown) {
fprintf(stderr, "[POOL_V2_STATS] cls=%d alloc_calls=%llu alloc_fast=%llu alloc_refill=%llu alloc_refill_fail=%llu alloc_fb_v1=%llu free_calls=%llu free_fast=%llu free_fb_v1=%llu pageof_hdr=%llu pageof_range=%llu pageof_misaligned=%llu pageof_unknown=%llu\n",
i, (unsigned long long)ac, (unsigned long long)af, (unsigned long long)ar,
(unsigned long long)arf, (unsigned long long)afb,
(unsigned long long)fc, (unsigned long long)ff, (unsigned long long)ffb,
(unsigned long long)pf_hdr, (unsigned long long)pf_range, (unsigned long long)pf_mis, (unsigned long long)pf_unknown);
}
}
}
// Size class table (for O(1) lookup). Index 5/6 are Bridge classes for 32-64KB gap.
// 7 classes including Bridge classes (40KB, 52KB) to fill 32-64KB gap
static size_t g_class_sizes[POOL_NUM_CLASSES] = {
POOL_CLASS_2KB, // 2 KB
POOL_CLASS_4KB, // 4 KB
POOL_CLASS_8KB, // 8 KB
POOL_CLASS_16KB, // 16 KB
POOL_CLASS_32KB, // 32 KB
POOL_CLASS_40KB, // 40 KB (Bridge class 0)
POOL_CLASS_52KB // 52 KB (Bridge class 1)
};
// Blocks per page (for each class)
__attribute__((unused)) static const int g_blocks_per_page[POOL_NUM_CLASSES] = {
POOL_PAGE_SIZE / POOL_CLASS_2KB, // 32 blocks (2KiB)
POOL_PAGE_SIZE / POOL_CLASS_4KB, // 16 blocks (4KiB)
POOL_PAGE_SIZE / POOL_CLASS_8KB, // 8 blocks (8KiB)
POOL_PAGE_SIZE / POOL_CLASS_16KB, // 4 blocks (16KiB)
POOL_PAGE_SIZE / POOL_CLASS_32KB, // 2 blocks (32KiB)
POOL_PAGE_SIZE / POOL_CLASS_40KB, // 1 block (40KiB Bridge)
POOL_PAGE_SIZE / POOL_CLASS_52KB // 1 block (52KiB Bridge)
};
// ===========================================================================
// Helper Functions
// ===========================================================================
// Write minimal header for Mid allocation (fast-return friendly)
static inline void mid_set_header(AllocHeader* hdr, size_t class_sz, uintptr_t site_id) {
// For Mid, prefer headerless operation when HDR_LIGHT>=1.
// Debug or non-Mid callers can still write full headers elsewhere.
if (g_hdr_light_enabled >= 1) return; // skip header on alloc hot path
hdr->magic = HAKMEM_MAGIC;
hdr->method = ALLOC_METHOD_POOL;
hdr->size = class_sz;
if (!g_hdr_light_enabled) {
hdr->alloc_site = site_id;
hdr->class_bytes = 0;
hdr->owner_tid = (uintptr_t)(uintptr_t)pthread_self();
}
}
// Branchless LUT (Lookup Table) for O(1) class determination
// Expanded to 53 entries for Bridge classes (40KB, 52KB)
static const uint8_t SIZE_TO_CLASS[53] = {
0,0,0, // 0-2KB → Class 0
1,1, // 3-4KB → Class 1
2,2,2,2, // 5-8KB → Class 2
3,3,3,3,3,3,3,3, // 9-16KB → Class 3
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, // 17-32KB → Class 4
5,5,5,5,5,5,5,5, // 33-40KB → Class 5 (Bridge class 0)
6,6,6,6,6,6,6,6,6,6,6,6 // 41-52KB → Class 6 (Bridge class 1)
};
// Get size class index from size (0-6, or -1 if out of range)
// Updated range check for Bridge classes (0-52KB)
static inline int hak_pool_get_class_index(size_t size) {
// Fast path: exact match against configured class sizes (covers Bridge classes)
// Note: size passed here should already be a rounded class size from ACE.
for (int i = 0; i < POOL_NUM_CLASSES; i++) {
size_t cs = g_class_sizes[i];
if (cs != 0 && size == cs) return i;
}
// Fallback: map arbitrary size to nearest fixed class range via LUT (legacy behavior)
uint32_t kb = (uint32_t)((size + 1023) >> 10); // Round up to KB units
return (kb < 53) ? SIZE_TO_CLASS[kb] : -1; // Expanded to 53KB for Bridge classes
}
// Get shard index from site_id (0-63)
int hak_pool_get_shard_index(uintptr_t site_id) {
if (!g_shard_mix_enabled) {
// Legacy: Shift by 4 to reduce collision (instruction alignment)
return (int)((site_id >> 4) & (POOL_NUM_SHARDS - 1));
}
// SplitMix64-like mixer with thread id salt for better dispersion
uint64_t x = (uint64_t)site_id;
uint64_t tid = (uint64_t)(uintptr_t)pthread_self();
x ^= (tid << 1);
x += 0x9e3779b97f4a7c15ULL;
x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL;
x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL;
x = (x ^ (x >> 31));
return (int)((uint32_t)x & (POOL_NUM_SHARDS - 1));
}
// TLS helpers (non-inline helpers for shard bookkeeping)
#include "box/pool_tls_core.inc.h"
// Refill/ACE (boxed)
#include "box/pool_refill.inc.h"
// Init/Shutdown + MF2 debug (boxed)
#include "box/pool_init_api.inc.h"
// Pool statistics (boxed)
#include "box/pool_stats.inc.h"
// Public API (boxed): alloc/free/lookup/free_fast
#include "box/pool_api.inc.h"