// pool_api.inc.h — Box: L2 Pool public API (alloc/free/lookup) #ifndef POOL_API_INC_H #define POOL_API_INC_H #include "pagefault_telemetry_box.h" // Box PageFaultTelemetry (PF_BUCKET_MID) #include "box/pool_hotbox_v2_box.h" #include "box/tiny_heap_env_box.h" // TinyHeap profile (C7_SAFE では flatten を無効化) #include "box/pool_zero_mode_box.h" // Pool zeroing policy (env cached) #include // Pool v2 is experimental. Default OFF (use legacy v1 path). static inline int hak_pool_v2_enabled(void) { static int g = -1; if (__builtin_expect(g == -1, 0)) { const char* e = getenv("HAKMEM_POOL_V2_ENABLED"); g = (e && *e && *e != '0') ? 1 : 0; } return g; } // Fine-grained switches (only used when v2 is enabled). static inline int hak_pool_v2_block_to_user_enabled(void) { static int g = -1; if (__builtin_expect(g == -1, 0)) { const char* e = getenv("HAKMEM_POOL_V2_BLOCK_TO_USER"); g = (e && *e && *e != '0') ? 1 : 0; if (g == -1) g = 1; } return g; } static inline int hak_pool_v2_tls_fast_enabled(void) { static int g = -1; if (__builtin_expect(g == -1, 0)) { const char* e = getenv("HAKMEM_POOL_V2_TLS_FAST_PATH"); g = (e && *e && *e != '0') ? 1 : 0; if (g == -1) g = 1; } return g; } // Pool v1 flatten (hot path only) is experimental and opt-in. static inline int hak_pool_v1_flatten_enabled(void) { static int g = -1; if (__builtin_expect(g == -1, 0)) { // C7_SAFE/C7_ULTRA_BENCH プロファイルでは、安全側で強制 OFF int mode = tiny_heap_profile_mode(); if (mode == TINY_HEAP_PROFILE_C7_SAFE || mode == TINY_HEAP_PROFILE_C7_ULTRA_BENCH) { g = 0; return g; } const char* e = getenv("HAKMEM_POOL_V1_FLATTEN_ENABLED"); g = (e && *e && *e != '0') ? 1 : 0; } return g; } static inline int hak_pool_v1_flatten_stats_enabled(void) { static int g = -1; if (__builtin_expect(g == -1, 0)) { const char* e = getenv("HAKMEM_POOL_V1_FLATTEN_STATS"); g = (e && *e && *e != '0') ? 1 : 0; } return g; } // Mid desc lookup TLS cache (mid bench opt-in; default OFF) static inline int hak_mid_desc_cache_enabled(void) { static int g = -1; if (__builtin_expect(g == -1, 0)) { const char* e = getenv("HAKMEM_MID_DESC_CACHE_ENABLED"); g = (e && *e && *e != '0') ? 1 : 0; } return g; } typedef struct MidDescCache { void* last_page; MidPageDesc* last_desc; } MidDescCache; static __thread MidDescCache g_mid_desc_cache = {0}; static inline MidPageDesc* mid_desc_lookup_cached(void* addr) { if (!hak_mid_desc_cache_enabled()) return mid_desc_lookup(addr); void* page = (void*)((uintptr_t)addr & ~((uintptr_t)POOL_PAGE_SIZE - 1)); if (g_mid_desc_cache.last_desc && g_mid_desc_cache.last_page == page) { return g_mid_desc_cache.last_desc; } MidPageDesc* d = mid_desc_lookup(addr); if (d) { g_mid_desc_cache.last_page = page; g_mid_desc_cache.last_desc = d; } return d; } typedef struct PoolV1FlattenStats { _Atomic uint64_t alloc_tls_hit; _Atomic uint64_t alloc_fallback_v1; _Atomic uint64_t free_tls_hit; _Atomic uint64_t free_fallback_v1; _Atomic uint64_t free_fb_page_null; _Atomic uint64_t free_fb_not_mine; _Atomic uint64_t free_fb_other; } PoolV1FlattenStats; static PoolV1FlattenStats g_pool_v1_flat_stats = {0}; static inline void pool_v1_flat_stats_dump(void) { if (!hak_pool_v1_flatten_stats_enabled()) return; fprintf(stderr, "[POOL_V1_FLAT] alloc_tls_hit=%llu alloc_fb=%llu free_tls_hit=%llu free_fb=%llu page_null=%llu not_mine=%llu other=%llu\n", (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, memory_order_relaxed), (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.alloc_fallback_v1, memory_order_relaxed), (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_tls_hit, memory_order_relaxed), (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fallback_v1, memory_order_relaxed), (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fb_page_null, memory_order_relaxed), (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fb_not_mine, memory_order_relaxed), (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fb_other, memory_order_relaxed)); } __attribute__((destructor)) static void pool_v1_flatten_stats_destructor(void) { pool_v1_flat_stats_dump(); } // Thin helper to keep the hot path straight-line when converting a PoolBlock to // a user pointer. All sampling/stat updates remain here so the callers stay // small. static inline void* hak_pool_block_to_user(PoolBlock* b, int class_idx, uintptr_t site_id) { void* raw = (void*)b; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); void* user = (char*)raw + HEADER_SIZE; mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u << g_count_sample_exp) - 1u)) == 0u) { g_pool.hits[class_idx]++; } pagefault_telemetry_touch(PF_BUCKET_MID, user); return user; } // Legacy inline conversion used when v2 helper is disabled. static inline void* hak_pool_block_to_user_legacy(PoolBlock* b, int class_idx, uintptr_t site_id) { void* raw = (void*)b; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); void* user = (char*)raw + HEADER_SIZE; mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u << g_count_sample_exp) - 1u)) == 0u) { g_pool.hits[class_idx]++; } pagefault_telemetry_touch(PF_BUCKET_MID, user); return user; } static inline void* hak_pool_try_alloc_v2_impl(size_t size, uintptr_t site_id) { // Debug: IMMEDIATE output to verify function is called static int first_call = 1; if (__builtin_expect(first_call, 0)) { HAKMEM_LOG("[Pool] hak_pool_try_alloc FIRST CALL EVER!\n"); first_call = 0; } if (__builtin_expect(size == 40960, 0)) { HAKMEM_LOG("[Pool] hak_pool_try_alloc called with 40KB (Bridge class 5)\n"); } hak_pool_init(); // pthread_once() ensures thread-safe init (no data race!) // Debug for 33-41KB allocations if (__builtin_expect(size >= 33000 && size <= 41000, 0)) { HAKMEM_LOG("[Pool] hak_pool_try_alloc: size=%zu (after init)\n", size); } // P1.7 guard: allow pool by default even when called from wrappers. // Only block if explicitly disabled via env or during nested recursion. extern int hak_in_wrapper(void); extern __thread int g_hakmem_lock_depth; int in_wrapper = hak_in_wrapper(); if (in_wrapper && g_hakmem_lock_depth > 1) { if (__builtin_expect(size >= 33000 && size <= 41000, 0)) { HAKMEM_LOG("[Pool] REJECTED: nested wrapper depth=%d\n", g_hakmem_lock_depth); } return NULL; } if (in_wrapper && !g_wrap_l2_enabled) { if (__builtin_expect(size >= 33000 && size <= 41000, 0)) { HAKMEM_LOG("[Pool] REJECTED: in_wrapper=%d, wrap_l2=%d\n", in_wrapper, g_wrap_l2_enabled); } return NULL; } if (!hak_pool_is_poolable(size)) { if (__builtin_expect(size >= 33000 && size <= 41000, 0)) { HAKMEM_LOG("[Pool] REJECTED: not poolable (min=%d, max=%d)\n", POOL_MIN_SIZE, POOL_MAX_SIZE); } return NULL; } // Get class and shard indices int class_idx = hak_pool_get_class_index(size); if (class_idx < 0) { if (__builtin_expect(size >= 33000 && size <= 41000, 0)) { HAKMEM_LOG("[Pool] REJECTED: class_idx=%d (size=%zu not mapped)\n", class_idx, size); } return NULL; } // Experimental PoolHotBox v2 (Hot path) — currently structure only. if (__builtin_expect(pool_hotbox_v2_class_enabled(class_idx), 0)) { void* p = pool_hotbox_v2_alloc((uint32_t)class_idx, size, site_id); if (p) return p; pool_hotbox_v2_record_alloc_fallback((uint32_t)class_idx); } if (__builtin_expect(size >= 33000 && size <= 41000, 0)) { HAKMEM_LOG("[Pool] ACCEPTED: class_idx=%d, proceeding with allocation\n", class_idx); } // MF2: Per-Page Sharding path if (g_mf2_enabled) { return mf2_alloc_fast(class_idx, size, site_id); } // OLD PATH: TLS fast path (ring then local LIFO); drain TC only when needed PoolTLSRing* ring = &g_tls_bin[class_idx].ring; if (g_tc_enabled && ring->top < g_tc_drain_trigger && mid_tc_has_items(class_idx)) { HKM_TIME_START(t_tc_drain); if (mid_tc_drain_into_tls(class_idx, ring, &g_tls_bin[class_idx])) { HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc_drain); if (ring->top > 0) { HKM_TIME_START(t_ring_pop0); PoolBlock* tlsb = ring->items[--ring->top]; HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop0); return hak_pool_block_to_user(tlsb, class_idx, site_id); } } else { HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc_drain); } } if (g_tls_ring_enabled) { if (ring->top == 0) { atomic_fetch_add_explicit(&g_pool.ring_underflow, 1, memory_order_relaxed); } if (ring->top > 0) { HKM_TIME_START(t_ring_pop1); PoolBlock* tlsb = ring->items[--ring->top]; HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop1); return hak_pool_block_to_user(tlsb, class_idx, site_id); } } if (g_tls_bin[class_idx].lo_head) { HKM_TIME_START(t_lifo_pop0); PoolBlock* b = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = b->next; if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--; HKM_TIME_END(HKM_CAT_POOL_TLS_LIFO_POP, t_lifo_pop0); return hak_pool_block_to_user(b, class_idx, site_id); } // Compute shard only when we need to access shared structures int shard_idx = hak_pool_get_shard_index(site_id); // Try to batch-pop from a non-empty shard using trylock to fill TLS ring if (g_tls_ring_enabled) { int s0 = choose_nonempty_shard(class_idx, shard_idx); for (int probe = 0; probe < g_trylock_probes; ++probe) { int s = (s0 + probe) & (POOL_NUM_SHARDS - 1); pthread_mutex_t* l = &g_pool.freelist_locks[class_idx][s].m; atomic_fetch_add_explicit(&g_pool.trylock_attempts, 1, memory_order_relaxed); if (pthread_mutex_trylock(l) == 0) { atomic_fetch_add_explicit(&g_pool.trylock_success, 1, memory_order_relaxed); // First, drain any remote frees into freelist if (atomic_load_explicit(&g_pool.remote_count[class_idx][s], memory_order_relaxed) != 0) { drain_remote_locked(class_idx, s); } PoolBlock* head = g_pool.freelist[class_idx][s]; int to_ring = POOL_L2_RING_CAP - ring->top; if (to_ring < 0) to_ring = 0; while (head && to_ring-- > 0) { PoolBlock* nxt = head->next; ring->items[ring->top++] = head; head = nxt; } while (head) { PoolBlock* nxt = head->next; head->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = head; g_tls_bin[class_idx].lo_count++; head = nxt; } g_pool.freelist[class_idx][s] = head; if (!head) clear_nonempty_bit(class_idx, s); pthread_mutex_unlock(l); if (ring->top > 0) { PoolBlock* tlsb = ring->items[--ring->top]; return hak_pool_block_to_user(tlsb, class_idx, site_id); } } } } // Try TLS active pages (owner-only local bump-run, up to 3) PoolTLSPage* ap = NULL; if (g_tls_active_page_a[class_idx].page && g_tls_active_page_a[class_idx].count > 0 && g_tls_active_page_a[class_idx].bump < g_tls_active_page_a[class_idx].end) ap = &g_tls_active_page_a[class_idx]; else if (g_tls_active_page_b[class_idx].page && g_tls_active_page_b[class_idx].count > 0 && g_tls_active_page_b[class_idx].bump < g_tls_active_page_b[class_idx].end) ap = &g_tls_active_page_b[class_idx]; else if (g_tls_active_page_c[class_idx].page && g_tls_active_page_c[class_idx].count > 0 && g_tls_active_page_c[class_idx].bump < g_tls_active_page_c[class_idx].end) ap = &g_tls_active_page_c[class_idx]; if (ap) { if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { int need = POOL_L2_RING_CAP - ring->top; (void)refill_tls_from_active_page(class_idx, ring, &g_tls_bin[class_idx], ap, need); } PoolBlock* b = NULL; if (ring->top > 0) { b = ring->items[--ring->top]; } else if (ap->page && ap->count > 0 && ap->bump < ap->end) { b = (PoolBlock*)(void*)ap->bump; ap->bump += (HEADER_SIZE + g_class_sizes[class_idx]); ap->count--; if (ap->bump >= ap->end || ap->count<=0){ ap->page=NULL; ap->count=0; } } if (b) { g_pool.hits[class_idx]++; return hak_pool_block_to_user(b, class_idx, site_id); } } // Lock the shard freelist for this (class, shard) pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx].m; HKM_TIME_START(t_lock); struct timespec ts_lk1; int lk1 = hkm_prof_begin(&ts_lk1); (void)ts_lk1; (void)lk1; // Unused profiling variables pthread_mutex_lock(lock); HKM_TIME_END(HKM_CAT_POOL_LOCK, t_lock); hkm_prof_end(lk1, HKP_POOL_LOCK, &ts_lk1); // Try to pop from freelist PoolBlock* block = g_pool.freelist[class_idx][shard_idx]; if (!block) { // Before refilling, try draining remote stack and simple shard steal int stole = 0; const FrozenPolicy* pol = hkm_policy_get(); if (pol) { uint16_t cap = 0; if (class_idx < 5) cap = pol->mid_cap[class_idx]; else if (class_idx == 5 && pol->mid_dyn1_bytes != 0) cap = pol->mid_cap_dyn1; else if (class_idx == 6 && pol->mid_dyn2_bytes != 0) cap = pol->mid_cap_dyn2; // Drain remotes if (atomic_load_explicit(&g_pool.remote_count[class_idx][shard_idx], memory_order_relaxed) != 0) { drain_remote_locked(class_idx, shard_idx); block = g_pool.freelist[class_idx][shard_idx]; } // Light shard steal when over cap if (!block && cap > 0 && g_pool.pages_by_class[class_idx] >= cap) { HKM_TIME_START(t_steal); for (int d = 1; d <= 4 && !stole; d++) { int s1 = (shard_idx + d) & (POOL_NUM_SHARDS - 1); int s2 = (shard_idx - d) & (POOL_NUM_SHARDS - 1); if (is_shard_nonempty(class_idx, s1)) { pthread_mutex_t* l2 = &g_pool.freelist_locks[class_idx][s1].m; pthread_mutex_lock(l2); PoolBlock* b2 = g_pool.freelist[class_idx][s1]; if (b2) { g_pool.freelist[class_idx][s1] = b2->next; if (!g_pool.freelist[class_idx][s1]) clear_nonempty_bit(class_idx, s1); block = b2; stole = 1; } pthread_mutex_unlock(l2); } if (!stole && is_shard_nonempty(class_idx, s2)) { pthread_mutex_t* l3 = &g_pool.freelist_locks[class_idx][s2].m; pthread_mutex_lock(l3); PoolBlock* b3 = g_pool.freelist[class_idx][s2]; if (b3) { g_pool.freelist[class_idx][s2] = b3->next; if (!g_pool.freelist[class_idx][s2]) clear_nonempty_bit(class_idx, s2); block = b3; stole = 1; } pthread_mutex_unlock(l3); } } HKM_TIME_END(HKM_CAT_SHARD_STEAL, t_steal); } } if (!stole && !block) { // Freelist empty, refill page PoolTLSPage* tap = NULL; if (g_tls_active_page_a[class_idx].page == NULL || g_tls_active_page_a[class_idx].count == 0) tap = &g_tls_active_page_a[class_idx]; else if (g_tls_active_page_b[class_idx].page == NULL || g_tls_active_page_b[class_idx].count == 0) tap = &g_tls_active_page_b[class_idx]; else if (g_tls_active_page_c[class_idx].page == NULL || g_tls_active_page_c[class_idx].count == 0) tap = &g_tls_active_page_c[class_idx]; else tap = &g_tls_active_page_a[class_idx]; HKM_TIME_START(t_alloc_page); if (alloc_tls_page(class_idx, tap)) { HKM_TIME_END(HKM_CAT_POOL_ALLOC_TLS_PAGE, t_alloc_page); pthread_mutex_unlock(lock); // Top-up ring and return ap = tap; if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { int need = POOL_L2_RING_CAP - ring->top; (void)refill_tls_from_active_page(class_idx, ring, &g_tls_bin[class_idx], ap, need); } PoolBlock* takeb = NULL; if (ring->top > 0) { HKM_TIME_START(t_ring_pop2); takeb = ring->items[--ring->top]; HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop2); } else if (ap->page && ap->count > 0 && ap->bump < ap->end) { takeb = (PoolBlock*)(void*)ap->bump; ap->bump += (HEADER_SIZE + g_class_sizes[class_idx]); ap->count--; if (ap->bump >= ap->end || ap->count == 0) { ap->page = NULL; ap->count = 0; } } return hak_pool_block_to_user(takeb, class_idx, site_id); } HKM_TIME_START(t_refill); struct timespec ts_rf; int rf = hkm_prof_begin(&ts_rf); (void)ts_rf; (void)rf; int ok = refill_freelist(class_idx, shard_idx); HKM_TIME_END(HKM_CAT_POOL_REFILL, t_refill); hkm_prof_end(rf, HKP_POOL_REFILL, &ts_rf); if (!ok) { t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<next; mid_desc_adopt(block, class_idx, (uint64_t)(uintptr_t)pthread_self()); if (g_pool.freelist[class_idx][shard_idx] == NULL) clear_nonempty_bit(class_idx, shard_idx); pthread_mutex_unlock(lock); // Store to TLS then pop PoolBlock* take; if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; take = ring->items[--ring->top]; } else { block->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = block; g_tls_bin[class_idx].lo_count++; if (g_tls_ring_enabled && ring->top > 0) { take = ring->items[--ring->top]; } else { take = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = take->next; if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--; } } return hak_pool_block_to_user(take, class_idx, site_id); } static inline void hak_pool_free_v2_impl(void* ptr, size_t size, uintptr_t site_id) { if (!ptr) return; hak_pool_init(); if (!hak_pool_is_poolable(size)) return; if (g_mf2_enabled) { mf2_free(ptr); return; } void* raw = (char*)ptr - HEADER_SIZE; AllocHeader* hdr = (AllocHeader*)raw; MidPageDesc* d_desc = mid_desc_lookup_cached(ptr); int mid_by_desc = d_desc != NULL; if (!mid_by_desc && g_hdr_light_enabled < 2) { if (hdr->magic != HAKMEM_MAGIC) { MF2_ERROR_LOG("Invalid magic 0x%X in pool_free, expected 0x%X", hdr->magic, HAKMEM_MAGIC); return; } if (hdr->method != ALLOC_METHOD_POOL) { MF2_ERROR_LOG("Wrong method %d in pool_free, expected POOL (%d)", hdr->method, ALLOC_METHOD_POOL); return; } } int class_idx = mid_by_desc ? (int)d_desc->class_idx : hak_pool_get_class_index(size); if (class_idx < 0) return; if (__builtin_expect(pool_hotbox_v2_class_enabled(class_idx), 0)) { pool_hotbox_v2_record_free_call((uint32_t)class_idx); PoolBlock* raw_block_for_v2 = (PoolBlock*)raw; if (pool_hotbox_v2_free((uint32_t)class_idx, raw_block_for_v2)) { return; } pool_hotbox_v2_record_free_fallback((uint32_t)class_idx); } PoolBlock* block = (PoolBlock*)raw; uint64_t owner_tid = 0; if (d_desc) owner_tid = d_desc->owner_tid; else if (g_hdr_light_enabled < 2) owner_tid = hdr->owner_tid; const uint64_t self_tid = (uint64_t)(uintptr_t)pthread_self(); if (g_pool.tls_free_enabled) { const int same_thread = owner_tid != 0 && owner_tid == self_tid; if (same_thread) { PoolTLSRing* ring = &g_tls_bin[class_idx].ring; if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; } else { block->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = block; g_tls_bin[class_idx].lo_count++; if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) { size_t spill = g_tls_bin[class_idx].lo_count / 2; int shard = hak_pool_get_shard_index(site_id); // Spill half of local freelist to remote freelist while (spill-- && g_tls_bin[class_idx].lo_head) { PoolBlock* b = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = b->next; g_tls_bin[class_idx].lo_count--; HKM_TIME_START(t_remote_push1); uintptr_t old_head; do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); b->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)b, memory_order_release, memory_order_relaxed)); atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push1); } set_nonempty_bit(class_idx, shard); } } } else { if (g_tc_enabled && owner_tid != 0) { MidTC* otc = mid_tc_lookup_by_tid(owner_tid); if (otc) { mid_tc_push(otc, class_idx, block); return; } } int shard = hak_pool_get_shard_index(site_id); uintptr_t old_head; HKM_TIME_START(t_remote_push2); do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); block->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)block, memory_order_release, memory_order_relaxed)); atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push2); set_nonempty_bit(class_idx, shard); } } else { int shard_idx2 = hak_pool_get_shard_index(site_id); pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx2].m; pthread_mutex_lock(lock); block->next = g_pool.freelist[class_idx][shard_idx2]; g_pool.freelist[class_idx][shard_idx2] = block; set_nonempty_bit(class_idx, shard_idx2); pthread_mutex_unlock(lock); } t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } } MidPageDesc* d = mid_desc_lookup_cached(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } static inline void hak_pool_free_fast_v2_impl(void* ptr, uintptr_t site_id) { if (!ptr || !g_pool.initialized) return; if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { mf2_free(ptr); return; } } MidPageDesc* d = mid_desc_lookup_cached(ptr); if (!d) return; size_t sz = g_class_sizes[(int)d->class_idx]; if (sz == 0) return; hak_pool_free(ptr, sz, site_id); } static inline void* hak_pool_try_alloc_v1_impl(size_t size, uintptr_t site_id) { // Debug: IMMEDIATE output to verify function is called static int first_call = 1; if (first_call) { HAKMEM_LOG("[Pool] hak_pool_try_alloc FIRST CALL EVER!\n"); first_call = 0; } if (size == 40960) { HAKMEM_LOG("[Pool] hak_pool_try_alloc called with 40KB (Bridge class 5)\n"); } hak_pool_init(); // pthread_once() ensures thread-safe init (no data race!) // Debug for 33-41KB allocations if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] hak_pool_try_alloc: size=%zu (after init)\n", size); } // P1.7 guard: allow pool by default even when called from wrappers. // Only block if explicitly disabled via env or during nested recursion. extern int hak_in_wrapper(void); extern __thread int g_hakmem_lock_depth; int in_wrapper = hak_in_wrapper(); if (in_wrapper && g_hakmem_lock_depth > 1) { if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: nested wrapper depth=%d\n", g_hakmem_lock_depth); } return NULL; } if (in_wrapper && !g_wrap_l2_enabled) { if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: in_wrapper=%d, wrap_l2=%d\n", in_wrapper, g_wrap_l2_enabled); } return NULL; } if (!hak_pool_is_poolable(size)) { if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: not poolable (min=%d, max=%d)\n", POOL_MIN_SIZE, POOL_MAX_SIZE); } return NULL; } // Get class and shard indices int class_idx = hak_pool_get_class_index(size); if (class_idx < 0) { if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: class_idx=%d (size=%zu not mapped)\n", class_idx, size); } return NULL; } if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] ACCEPTED: class_idx=%d, proceeding with allocation\n", class_idx); } // MF2: Per-Page Sharding path if (g_mf2_enabled) { return mf2_alloc_fast(class_idx, size, site_id); } // OLD PATH: TLS fast path (ring then local LIFO); drain TC only when needed PoolTLSRing* ring = &g_tls_bin[class_idx].ring; if (g_tc_enabled && ring->top < g_tc_drain_trigger && mid_tc_has_items(class_idx)) { HKM_TIME_START(t_tc_drain); if (mid_tc_drain_into_tls(class_idx, ring, &g_tls_bin[class_idx])) { HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc_drain); if (ring->top > 0) { HKM_TIME_START(t_ring_pop0); PoolBlock* tlsb = ring->items[--ring->top]; HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop0); void* raw = (void*)tlsb; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); void* user0 = (char*)raw + HEADER_SIZE; mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<top == 0) { atomic_fetch_add_explicit(&g_pool.ring_underflow, 1, memory_order_relaxed); } if (ring->top > 0) { HKM_TIME_START(t_ring_pop1); PoolBlock* tlsb = ring->items[--ring->top]; HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop1); void* raw = (void*)tlsb; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); void* user1 = (char*)raw + HEADER_SIZE; t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<next; if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--; HKM_TIME_END(HKM_CAT_POOL_TLS_LIFO_POP, t_lifo_pop0); void* raw = (void*)b; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); void* user2 = (char*)raw + HEADER_SIZE; mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<top; if (to_ring < 0) to_ring = 0; while (head && to_ring-- > 0) { PoolBlock* nxt = head->next; ring->items[ring->top++] = head; head = nxt; } while (head) { PoolBlock* nxt = head->next; head->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = head; g_tls_bin[class_idx].lo_count++; head = nxt; } g_pool.freelist[class_idx][s] = head; if (!head) clear_nonempty_bit(class_idx, s); pthread_mutex_unlock(l); if (ring->top > 0) { PoolBlock* tlsb = ring->items[--ring->top]; void* raw = (void*)tlsb; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u< 0 && g_tls_active_page_a[class_idx].bump < g_tls_active_page_a[class_idx].end) ap = &g_tls_active_page_a[class_idx]; else if (g_tls_active_page_b[class_idx].page && g_tls_active_page_b[class_idx].count > 0 && g_tls_active_page_b[class_idx].bump < g_tls_active_page_b[class_idx].end) ap = &g_tls_active_page_b[class_idx]; else if (g_tls_active_page_c[class_idx].page && g_tls_active_page_c[class_idx].count > 0 && g_tls_active_page_c[class_idx].bump < g_tls_active_page_c[class_idx].end) ap = &g_tls_active_page_c[class_idx]; if (ap) { if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { int need = POOL_L2_RING_CAP - ring->top; (void)refill_tls_from_active_page(class_idx, ring, &g_tls_bin[class_idx], ap, need); } PoolBlock* b = NULL; if (ring->top > 0) { b = ring->items[--ring->top]; } else if (ap->page && ap->count > 0 && ap->bump < ap->end) { b = (PoolBlock*)(void*)ap->bump; ap->bump += (HEADER_SIZE + g_class_sizes[class_idx]); ap->count--; if (ap->bump >= ap->end || ap->count<=0){ ap->page=NULL; ap->count=0; } } if (b) { void* raw = (void*)b; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); mid_page_inuse_inc(raw); g_pool.hits[class_idx]++; return (char*)raw + HEADER_SIZE; } } // Lock the shard freelist for this (class, shard) pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx].m; HKM_TIME_START(t_lock); struct timespec ts_lk1; int lk1 = hkm_prof_begin(&ts_lk1); (void)ts_lk1; (void)lk1; // Unused profiling variables pthread_mutex_lock(lock); HKM_TIME_END(HKM_CAT_POOL_LOCK, t_lock); hkm_prof_end(lk1, HKP_POOL_LOCK, &ts_lk1); // Try to pop from freelist PoolBlock* block = g_pool.freelist[class_idx][shard_idx]; if (!block) { // Before refilling, try draining remote stack and simple shard steal int stole = 0; const FrozenPolicy* pol = hkm_policy_get(); if (pol) { uint16_t cap = 0; if (class_idx < 5) cap = pol->mid_cap[class_idx]; else if (class_idx == 5 && pol->mid_dyn1_bytes != 0) cap = pol->mid_cap_dyn1; else if (class_idx == 6 && pol->mid_dyn2_bytes != 0) cap = pol->mid_cap_dyn2; // Drain remotes if (atomic_load_explicit(&g_pool.remote_count[class_idx][shard_idx], memory_order_relaxed) != 0) { drain_remote_locked(class_idx, shard_idx); block = g_pool.freelist[class_idx][shard_idx]; } // Light shard steal when over cap if (!block && cap > 0 && g_pool.pages_by_class[class_idx] >= cap) { HKM_TIME_START(t_steal); for (int d = 1; d <= 4 && !stole; d++) { int s1 = (shard_idx + d) & (POOL_NUM_SHARDS - 1); int s2 = (shard_idx - d) & (POOL_NUM_SHARDS - 1); if (is_shard_nonempty(class_idx, s1)) { pthread_mutex_t* l2 = &g_pool.freelist_locks[class_idx][s1].m; pthread_mutex_lock(l2); PoolBlock* b2 = g_pool.freelist[class_idx][s1]; if (b2) { g_pool.freelist[class_idx][s1] = b2->next; if (!g_pool.freelist[class_idx][s1]) clear_nonempty_bit(class_idx, s1); block = b2; stole = 1; } pthread_mutex_unlock(l2); } if (!stole && is_shard_nonempty(class_idx, s2)) { pthread_mutex_t* l3 = &g_pool.freelist_locks[class_idx][s2].m; pthread_mutex_lock(l3); PoolBlock* b3 = g_pool.freelist[class_idx][s2]; if (b3) { g_pool.freelist[class_idx][s2] = b3->next; if (!g_pool.freelist[class_idx][s2]) clear_nonempty_bit(class_idx, s2); block = b3; stole = 1; } pthread_mutex_unlock(l3); } } HKM_TIME_END(HKM_CAT_SHARD_STEAL, t_steal); } } if (!stole && !block) { // Freelist empty, refill page PoolTLSPage* tap = NULL; if (g_tls_active_page_a[class_idx].page == NULL || g_tls_active_page_a[class_idx].count == 0) tap = &g_tls_active_page_a[class_idx]; else if (g_tls_active_page_b[class_idx].page == NULL || g_tls_active_page_b[class_idx].count == 0) tap = &g_tls_active_page_b[class_idx]; else if (g_tls_active_page_c[class_idx].page == NULL || g_tls_active_page_c[class_idx].count == 0) tap = &g_tls_active_page_c[class_idx]; else tap = &g_tls_active_page_a[class_idx]; HKM_TIME_START(t_alloc_page); if (alloc_tls_page(class_idx, tap)) { HKM_TIME_END(HKM_CAT_POOL_ALLOC_TLS_PAGE, t_alloc_page); pthread_mutex_unlock(lock); // Top-up ring and return ap = tap; if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { int need = POOL_L2_RING_CAP - ring->top; (void)refill_tls_from_active_page(class_idx, ring, &g_tls_bin[class_idx], ap, need); } PoolBlock* takeb = NULL; if (ring->top > 0) { HKM_TIME_START(t_ring_pop2); takeb = ring->items[--ring->top]; HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop2);} else if (ap->page && ap->count > 0 && ap->bump < ap->end) { takeb = (PoolBlock*)(void*)ap->bump; ap->bump += (HEADER_SIZE + g_class_sizes[class_idx]); ap->count--; if (ap->bump >= ap->end || ap->count==0){ ap->page=NULL; ap->count=0; } } void* raw2 = (void*)takeb; AllocHeader* hdr2 = (AllocHeader*)raw2; mid_set_header(hdr2, g_class_sizes[class_idx], site_id); void* user3 = (char*)raw2 + HEADER_SIZE; mid_page_inuse_inc(raw2); g_pool.hits[class_idx]++; pagefault_telemetry_touch(PF_BUCKET_MID, user3); return user3; } HKM_TIME_START(t_refill); struct timespec ts_rf; int rf = hkm_prof_begin(&ts_rf); (void)ts_rf; (void)rf; int ok = refill_freelist(class_idx, shard_idx); HKM_TIME_END(HKM_CAT_POOL_REFILL, t_refill); hkm_prof_end(rf, HKP_POOL_REFILL, &ts_rf); if (!ok) { t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<next; mid_desc_adopt(block, class_idx, (uint64_t)(uintptr_t)pthread_self()); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; take = ring->items[--ring->top]; } else { block->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = block; g_tls_bin[class_idx].lo_count++; if (g_tls_ring_enabled && ring->top > 0) { take = ring->items[--ring->top]; } else { take = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = take->next; if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--; } } void* raw = (void*)take; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); void* user4 = (char*)raw + HEADER_SIZE; mid_page_inuse_inc(raw); pagefault_telemetry_touch(PF_BUCKET_MID, user4); return user4; } static inline void hak_pool_free_v1_impl(void* ptr, size_t size, uintptr_t site_id) { if (!ptr) return; hak_pool_init(); if (!hak_pool_is_poolable(size)) return; if (g_mf2_enabled) { mf2_free(ptr); return; } void* raw = (char*)ptr - HEADER_SIZE; AllocHeader* hdr = (AllocHeader*)raw; int mid_by_desc = 0; MidPageDesc* d_desc = mid_desc_lookup_cached(ptr); if (d_desc) mid_by_desc = 1; if (!mid_by_desc && g_hdr_light_enabled < 2) { if (hdr->magic != HAKMEM_MAGIC) { MF2_ERROR_LOG("Invalid magic 0x%X in pool_free, expected 0x%X", hdr->magic, HAKMEM_MAGIC); return; } if (hdr->method != ALLOC_METHOD_POOL) { MF2_ERROR_LOG("Wrong method %d in pool_free, expected POOL (%d)", hdr->method, ALLOC_METHOD_POOL); return; } } int class_idx = mid_by_desc ? (int)d_desc->class_idx : hak_pool_get_class_index(size); if (class_idx < 0) return; PoolBlock* block = (PoolBlock*)raw; if (g_pool.tls_free_enabled) { int same_thread = 0; if (g_hdr_light_enabled >= 1) { MidPageDesc* d = mid_desc_lookup_cached(raw); if (d && d->owner_tid != 0 && d->owner_tid == (uint64_t)(uintptr_t)pthread_self()) { same_thread = 1; } } else if (hdr->owner_tid != 0 && hdr->owner_tid == (uintptr_t)(uintptr_t)pthread_self()) { same_thread = 1; } if (same_thread) { PoolTLSRing* ring = &g_tls_bin[class_idx].ring; if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; } else { block->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = block; g_tls_bin[class_idx].lo_count++; if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) { size_t spill = g_tls_bin[class_idx].lo_count / 2; int shard = hak_pool_get_shard_index(site_id); // Spill half of local freelist to remote freelist while (spill-- && g_tls_bin[class_idx].lo_head) { PoolBlock* b = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = b->next; g_tls_bin[class_idx].lo_count--; HKM_TIME_START(t_remote_push1); uintptr_t old_head; do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); b->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)b, memory_order_release, memory_order_relaxed)); atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push1); } set_nonempty_bit(class_idx, shard); } } } else { if (g_tc_enabled) { uint64_t owner_tid = 0; if (g_hdr_light_enabled < 2) owner_tid = hdr->owner_tid; if (owner_tid == 0) { MidPageDesc* d = mid_desc_lookup_cached(raw); if (d) owner_tid = d->owner_tid; } if (owner_tid != 0) { MidTC* otc = mid_tc_lookup_by_tid(owner_tid); if (otc) { mid_tc_push(otc, class_idx, block); return; } } } int shard = hak_pool_get_shard_index(site_id); uintptr_t old_head; HKM_TIME_START(t_remote_push2); do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); block->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)block, memory_order_release, memory_order_relaxed)); atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push2); set_nonempty_bit(class_idx, shard); } } else { int shard_idx2 = hak_pool_get_shard_index(site_id); pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx2].m; pthread_mutex_lock(lock); block->next = g_pool.freelist[class_idx][shard_idx2]; g_pool.freelist[class_idx][shard_idx2] = block; set_nonempty_bit(class_idx, shard_idx2); pthread_mutex_unlock(lock); } t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<top > 0) { PoolBlock* tlsb = ring->items[--ring->top]; // Adopt shared pages to this thread so free can stay on the fast path. mid_desc_adopt(tlsb, class_idx, (uint64_t)(uintptr_t)pthread_self()); if (hak_pool_v1_flatten_stats_enabled()) { atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed); } return hak_pool_block_to_user(tlsb, class_idx, site_id); } if (g_tls_bin[class_idx].lo_head) { PoolBlock* b = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = b->next; if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--; mid_desc_adopt(b, class_idx, (uint64_t)(uintptr_t)pthread_self()); if (hak_pool_v1_flatten_stats_enabled()) { atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed); } return hak_pool_block_to_user(b, class_idx, site_id); } if (hak_pool_v1_flatten_stats_enabled()) { atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_fallback_v1, 1, memory_order_relaxed); } return hak_pool_try_alloc_v1_impl(size, site_id); } static inline void hak_pool_free_v1_flat(void* ptr, size_t size, uintptr_t site_id) { if (!ptr) return; if (!hak_pool_is_poolable(size)) return; void* raw = (char*)ptr - HEADER_SIZE; MidPageDesc* d_desc = mid_desc_lookup_cached(ptr); if (!d_desc) { if (hak_pool_v1_flatten_stats_enabled()) { atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed); atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_page_null, 1, memory_order_relaxed); } hak_pool_free_v1_impl(ptr, size, site_id); return; } int class_idx = (int)d_desc->class_idx; if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) { if (hak_pool_v1_flatten_stats_enabled()) { atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed); atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_other, 1, memory_order_relaxed); } hak_pool_free_v1_impl(ptr, size, site_id); return; } const uint64_t owner_tid = d_desc->owner_tid; const uint64_t self_tid = (uint64_t)(uintptr_t)pthread_self(); if (g_pool.tls_free_enabled && owner_tid != 0 && owner_tid == self_tid) { PoolBlock* block = (PoolBlock*)raw; PoolTLSRing* ring = &g_tls_bin[class_idx].ring; if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; } else { block->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = block; g_tls_bin[class_idx].lo_count++; if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) { size_t spill = g_tls_bin[class_idx].lo_count / 2; int shard = hak_pool_get_shard_index(site_id); while (spill-- && g_tls_bin[class_idx].lo_head) { PoolBlock* b = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = b->next; g_tls_bin[class_idx].lo_count--; uintptr_t old_head; do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); b->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit( &g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)b, memory_order_release, memory_order_relaxed)); atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); } set_nonempty_bit(class_idx, shard); } } if (hak_pool_v1_flatten_stats_enabled()) { atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_tls_hit, 1, memory_order_relaxed); } return; } if (hak_pool_v1_flatten_stats_enabled()) { atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed); atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_not_mine, 1, memory_order_relaxed); } hak_pool_free_v1_impl(ptr, size, site_id); } static inline int hak_pool_mid_lookup_v1_impl(void* ptr, size_t* out_size) { if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { int c = (int)page->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } } MidPageDesc* d = mid_desc_lookup_cached(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } static inline void hak_pool_free_fast_v1_impl(void* ptr, uintptr_t site_id) { if (!ptr || !g_pool.initialized) return; if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { mf2_free(ptr); return; } } MidPageDesc* d = mid_desc_lookup_cached(ptr); if (!d) return; size_t sz = g_class_sizes[(int)d->class_idx]; if (sz == 0) return; hak_pool_free(ptr, sz, site_id); } // --- Public wrappers (env-gated) ---------------------------------------------- static inline int hak_pool_v2_route(void) { return hak_pool_v2_enabled(); } void* hak_pool_try_alloc(size_t size, uintptr_t site_id) { if (!hak_pool_v2_route()) { if (hak_pool_v1_flatten_enabled()) { return hak_pool_try_alloc_v1_flat(size, site_id); } return hak_pool_try_alloc_v1_impl(size, site_id); } return hak_pool_try_alloc_v2_impl(size, site_id); } void hak_pool_free(void* ptr, size_t size, uintptr_t site_id) { // Phase FREE-LEGACY-BREAKDOWN-1: pool v1 カウンタ extern void free_path_stat_inc_pool_v1_fast(void); free_path_stat_inc_pool_v1_fast(); if (!hak_pool_v2_route()) { if (hak_pool_v1_flatten_enabled()) { hak_pool_free_v1_flat(ptr, size, site_id); } else { hak_pool_free_v1_impl(ptr, size, site_id); } return; } hak_pool_free_v2_impl(ptr, size, site_id); } void hak_pool_free_fast(void* ptr, uintptr_t site_id) { if (!hak_pool_v2_route()) { // fast path lacks size; keep existing v1 fast implementation even when // flatten is enabled to avoid behavior drift. hak_pool_free_fast_v1_impl(ptr, site_id); return; } hak_pool_free_fast_v2_impl(ptr, site_id); } int hak_pool_mid_lookup(void* ptr, size_t* out_size) { if (!hak_pool_v2_route()) { return hak_pool_mid_lookup_v1_impl(ptr, out_size); } return hak_pool_mid_lookup_v2_impl(ptr, out_size); } #endif // POOL_API_INC_H