// pool_init_api.inc.h — Box: L2 Pool init/shutdown + MF2 debug #ifndef POOL_INIT_API_INC_H #define POOL_INIT_API_INC_H // Thread-safe initialization using pthread_once static pthread_once_t hak_pool_init_once_control = PTHREAD_ONCE_INIT; static void hak_pool_init_impl(void) { HAKMEM_LOG("[Pool] hak_pool_init_impl() EXECUTING - Bridge class fix applied\n"); const FrozenPolicy* pol = hkm_policy_get(); // Phase 6.21 CRITICAL FIX: Bridge classes are hardcoded in g_class_sizes, // NOT from Policy. DO NOT overwrite them with 0! // The code below was disabling Bridge classes by setting them to 0 // because Policy returns mid_dyn1_bytes=0 and mid_dyn2_bytes=0. /* if (pol && pol->mid_dyn1_bytes >= POOL_MIN_SIZE && pol->mid_dyn1_bytes <= POOL_MAX_SIZE) { g_class_sizes[5] = pol->mid_dyn1_bytes; } else { g_class_sizes[5] = 0; } if (pol && pol->mid_dyn2_bytes >= POOL_MIN_SIZE && pol->mid_dyn2_bytes <= POOL_MAX_SIZE) { g_class_sizes[6] = pol->mid_dyn2_bytes; } else { g_class_sizes[6] = 0; } */ // Bridge classes remain as initialized: 40KB and 52KB for (int c = 0; c < POOL_NUM_CLASSES; c++) { for (int s = 0; s < POOL_NUM_SHARDS; s++) { g_pool.freelist[c][s] = NULL; } atomic_store(&g_pool.nonempty_mask[c], 0); for (int s = 0; s < POOL_NUM_SHARDS; s++) { pthread_mutex_init(&g_pool.freelist_locks[c][s].m, NULL); atomic_store(&g_pool.remote_head[c][s], (uintptr_t)0); atomic_store(&g_pool.remote_count[c][s], 0); } g_pool.hits[c] = 0; g_pool.misses[c] = 0; g_pool.refills[c] = 0; g_pool.frees[c] = 0; g_pool.pages_by_class[c] = 0; g_pool.bundle_factor[c] = 1; g_pool.last_hits[c] = 0; g_pool.last_misses[c] = 0; } g_pool.total_bytes_allocated = 0; g_pool.total_pages_allocated = 0; atomic_store(&g_pool.trylock_attempts, 0); atomic_store(&g_pool.trylock_success, 0); atomic_store(&g_pool.ring_underflow, 0); const char* e_tls = getenv("HAKMEM_POOL_TLS_FREE"); g_pool.tls_free_enabled = (e_tls == NULL) ? 1 : (atoi(e_tls) != 0); const char* e_wrap = getenv("HAKMEM_WRAP_L2"); if (e_wrap) { g_wrap_l2_enabled = (atoi(e_wrap) != 0); } const char* e_minb = getenv("HAKMEM_POOL_MIN_BUNDLE"); if (e_minb) { int v = atoi(e_minb); if (v >= 1 && v <= 8) g_pool_min_bundle = v; } const char* e_mix = getenv("HAKMEM_SHARD_MIX"); g_shard_mix_enabled = (e_mix && atoi(e_mix) != 0) ? 1 : 0; const char* e_ring = getenv("HAKMEM_POOL_TLS_RING"); if (e_ring) g_tls_ring_enabled = (atoi(e_ring) != 0); const char* e_hdr = getenv("HAKMEM_HDR_LIGHT"); if (e_hdr) g_hdr_light_enabled = atoi(e_hdr); const char* e_probe = getenv("HAKMEM_TRYLOCK_PROBES"); if (e_probe) { int v = atoi(e_probe); if (v>=1 && v<=8) g_trylock_probes = v; } const char* e_div = getenv("HAKMEM_RING_RETURN_DIV"); if (e_div) { int v = atoi(e_div); if (v>=2 && v<=4) g_ring_return_div = v; } const char* e_lo = getenv("HAKMEM_TLS_LO_MAX"); if (e_lo) { int v = atoi(e_lo); if (v>=32 && v<=16384) g_tls_lo_max = v; } const char* e_cs = getenv("HAKMEM_POOL_COUNT_SAMPLE"); if (e_cs) { int v = atoi(e_cs); if (v>=0 && v<=16) g_count_sample_exp = v; } const char* e_tc = getenv("HAKMEM_TC_ENABLE"); if (e_tc) g_tc_enabled = (atoi(e_tc) != 0); const char* e_tcu = getenv("HAKMEM_TC_UNBOUNDED"); if (e_tcu) g_tc_drain_unbounded = (atoi(e_tcu) != 0); const char* e_tcm = getenv("HAKMEM_TC_DRAIN_MAX"); if (e_tcm) { int v = atoi(e_tcm); if (v>=0 && v<=65536) g_tc_drain_max = v; } const char* e_tct = getenv("HAKMEM_TC_DRAIN_TRIGGER"); if (e_tct) { int v = atoi(e_tct); if (v>=0 && v<=POOL_L2_RING_CAP) g_tc_drain_trigger = v; } const char* e_mf2 = getenv("HAKMEM_MF2_ENABLE"); if (e_mf2 && atoi(e_mf2) != 0) { g_mf2_enabled = 1; mf2_page_registry_init(); const char* e_maxq = getenv("HAKMEM_MF2_MAX_QUEUES"); if (e_maxq) { int v = atoi(e_maxq); if (v>=1 && v<=256) g_mf2_max_queues = v; } const char* e_lease = getenv("HAKMEM_MF2_LEASE_MS"); if (e_lease) { int v = atoi(e_lease); if (v>=0 && v<=1000) g_mf2_lease_ms = v; } const char* e_idle = getenv("HAKMEM_MF2_IDLE_THRESHOLD_US"); if (e_idle) { int v = atoi(e_idle); if (v>=0 && v<=10000) g_mf2_idle_threshold_us = v; } HAKMEM_LOG("[Pool] MF2 Per-Page Sharding enabled\n"); HAKMEM_LOG("[MF2] max_queues=%d, lease_ms=%d, idle_threshold_us=%d\n", g_mf2_max_queues, g_mf2_lease_ms, g_mf2_idle_threshold_us); } g_pool.initialized = 1; HAKMEM_LOG("[Pool] Initialized (L2 Hybrid Pool) - Bridge classes SHOULD be enabled\n"); HAKMEM_LOG("[Pool] Class 5 (40KB): %zu\n", g_class_sizes[5]); HAKMEM_LOG("[Pool] Class 6 (52KB): %zu\n", g_class_sizes[6]); HAKMEM_LOG("[Pool] Initialized (L2 Hybrid Pool)\n"); #ifdef HAKMEM_DEBUG_VERBOSE // Debug: Show actual class sizes after initialization HAKMEM_LOG("[Pool] Class configuration:\n"); for (int i = 0; i < POOL_NUM_CLASSES; i++) { if (g_class_sizes[i] != 0) { HAKMEM_LOG(" Class %d: %zu KB (ENABLED)\n", i, g_class_sizes[i]/1024); } else { HAKMEM_LOG(" Class %d: DISABLED\n", i); } } #endif HAKMEM_LOG("[Pool] Page size: %d KB\n", POOL_PAGE_SIZE / 1024); HAKMEM_LOG("[Pool] Shards: %d (site-based)\n", POOL_NUM_SHARDS); // ACE Performance Fix: Pre-allocate pages for Bridge classes to avoid cold start // This ensures ACE can serve Mid-Large allocations (33KB) immediately without mmap fallback extern int refill_freelist(int class_idx, int shard_idx); int prewarm_pages = 4; // Pre-allocate 4 pages per shard for hot classes // Pre-warm Bridge class 5 (40KB) - Critical for 33KB allocations if (g_class_sizes[5] != 0) { int allocated = 0; for (int s = 0; s < prewarm_pages && s < POOL_NUM_SHARDS; s++) { if (refill_freelist(5, s) != 0) { // FIX: Check for SUCCESS (1), not FAILURE (0) allocated++; } } HAKMEM_LOG("[Pool] Pre-allocated %d pages for Bridge class 5 (%zu KB) - Critical for 33KB allocs\n", allocated, g_class_sizes[5]/1024); } else { HAKMEM_LOG("[Pool] WARNING: Bridge class 5 (40KB) is DISABLED - 33KB allocations will fail!\n"); } // Pre-warm Bridge class 6 (52KB) if (g_class_sizes[6] != 0) { int allocated = 0; for (int s = 0; s < prewarm_pages && s < POOL_NUM_SHARDS; s++) { if (refill_freelist(6, s) != 0) { // FIX: Check for SUCCESS (1), not FAILURE (0) allocated++; } } HAKMEM_LOG("[Pool] Pre-allocated %d pages for Bridge class 6 (%zu KB)\n", allocated, g_class_sizes[6]/1024); } } void hak_pool_init(void) { // Debug-only trace // static int called = 0; if (called++ == 0) { HAKMEM_LOG("[Pool] hak_pool_init() called for the first time\n"); } pthread_once(&hak_pool_init_once_control, hak_pool_init_impl); } static void mf2_print_debug_stats(void) { if (!g_mf2_enabled) return; fprintf(stderr, "\n[MF2 DEBUG STATS]\n"); fprintf(stderr, "Alloc fast hits: %12lu\n", (unsigned long)atomic_load(&g_mf2_alloc_fast_hit)); fprintf(stderr, "Alloc slow hits: %12lu\n", (unsigned long)atomic_load(&g_mf2_alloc_slow_hit)); fprintf(stderr, "Page reuses: %12lu\n", (unsigned long)atomic_load(&g_mf2_page_reuse_count)); fprintf(stderr, "New pages: %12lu\n", (unsigned long)atomic_load(&g_mf2_new_page_count)); fprintf(stderr, "Owner frees: %12lu\n", (unsigned long)atomic_load(&g_mf2_free_owner_count)); fprintf(stderr, "Remote frees: %12lu\n", (unsigned long)atomic_load(&g_mf2_free_remote_count)); fprintf(stderr, "Slow checked: %12lu\n", (unsigned long)atomic_load(&g_mf2_slow_checked_drain)); fprintf(stderr, "Slow found rem: %12lu\n", (unsigned long)atomic_load(&g_mf2_slow_found_remote)); fprintf(stderr, "Full scan chk: %12lu\n", (unsigned long)atomic_load(&g_mf2_full_scan_checked)); fprintf(stderr, "Full scan rem: %12lu\n", (unsigned long)atomic_load(&g_mf2_full_scan_found_remote)); fprintf(stderr, "Eager scan: %12lu\n", (unsigned long)atomic_load(&g_mf2_eager_drain_scanned)); fprintf(stderr, "Eager found: %12lu\n", (unsigned long)atomic_load(&g_mf2_eager_drain_found)); fprintf(stderr, "Drain attempts: %12lu\n", (unsigned long)atomic_load(&g_mf2_drain_attempts)); fprintf(stderr, "Drain successes: %12lu\n", (unsigned long)atomic_load(&g_mf2_drain_success)); fprintf(stderr, "Remote drains: %12lu (blocks: %lu)\n", (unsigned long)atomic_load(&g_mf2_drain_count), (unsigned long)atomic_load(&g_mf2_drain_blocks)); fprintf(stderr, "\n[PENDING QUEUE]\n"); fprintf(stderr, "Pending enqueued: %12lu\n", (unsigned long)atomic_load(&g_mf2_pending_enqueued)); fprintf(stderr, "Pending drained: %12lu\n", (unsigned long)atomic_load(&g_mf2_pending_drained)); fprintf(stderr, "Pending requeued: %12lu\n", (unsigned long)atomic_load(&g_mf2_pending_requeued)); uint64_t total_allocs = atomic_load(&g_mf2_alloc_fast_hit) + atomic_load(&g_mf2_alloc_slow_hit); uint64_t total_frees = atomic_load(&g_mf2_free_owner_count) + atomic_load(&g_mf2_free_remote_count); if (total_allocs > 0) fprintf(stderr, "\nFast path hit rate: %.2f%%\n", 100.0 * atomic_load(&g_mf2_alloc_fast_hit) / total_allocs); if (total_frees > 0) fprintf(stderr, "Owner free rate: %.2f%%\n", 100.0 * atomic_load(&g_mf2_free_owner_count) / total_frees); fflush(stderr); } __attribute__((destructor)) static void mf2_destructor(void) { mf2_print_debug_stats(); } void hak_pool_shutdown(void) { if (!g_pool.initialized) return; hak_pool_print_stats(); mf2_print_debug_stats(); g_pool.initialized = 0; } #endif // POOL_INIT_API_INC_H