// pool_core_api.inc.h — Box: L2 Pool core state and basic config #ifndef POOL_CORE_API_INC_H #define POOL_CORE_API_INC_H // Global knobs (env-configurable) static int g_wrap_l2_enabled = 0; // env: HAKMEM_WRAP_L2=1 to allow in wrappers static int g_shard_mix_enabled = 0; // env: HAKMEM_SHARD_MIX=1 to enable stronger hashing static int g_tls_ring_enabled = 1; // env: HAKMEM_POOL_TLS_RING=1 to enable TLS ring static int g_trylock_probes = 3; // env: HAKMEM_TRYLOCK_PROBES (1..8) static int g_ring_return_div = 2; // env: HAKMEM_RING_RETURN_DIV (2=half, 3=third) static int g_tls_lo_max = 256; // env: HAKMEM_TLS_LO_MAX (LIFO size cap) int g_hdr_light_enabled = 0; // env: HAKMEM_HDR_LIGHT=1/2 static int g_pool_min_bundle = 2; // env: HAKMEM_POOL_MIN_BUNDLE static int g_count_sample_exp = 10; // env: HAKMEM_POOL_COUNT_SAMPLE (0..16) static __thread uint32_t t_pool_rng = 0x243f6a88u; // per-thread RNG for sampling // Size class table (for O(1) lookup). Index 5/6 are Bridge classes for 32-64KB gap. static size_t g_class_sizes[POOL_NUM_CLASSES] = { POOL_CLASS_2KB, POOL_CLASS_4KB, POOL_CLASS_8KB, POOL_CLASS_16KB, POOL_CLASS_32KB, POOL_CLASS_40KB, POOL_CLASS_52KB }; __attribute__((unused)) static const int g_blocks_per_page[POOL_NUM_CLASSES] = { POOL_PAGE_SIZE / POOL_CLASS_2KB, POOL_PAGE_SIZE / POOL_CLASS_4KB, POOL_PAGE_SIZE / POOL_CLASS_8KB, POOL_PAGE_SIZE / POOL_CLASS_16KB, POOL_PAGE_SIZE / POOL_CLASS_32KB, POOL_PAGE_SIZE / POOL_CLASS_40KB, POOL_PAGE_SIZE / POOL_CLASS_52KB }; // Global pool state typedef struct { PoolBlock* freelist[POOL_NUM_CLASSES][POOL_NUM_SHARDS]; PaddedMutex freelist_locks[POOL_NUM_CLASSES][POOL_NUM_SHARDS]; atomic_uint_fast64_t nonempty_mask[POOL_NUM_CLASSES]; atomic_uintptr_t remote_head[POOL_NUM_CLASSES][POOL_NUM_SHARDS]; atomic_uint remote_count[POOL_NUM_CLASSES][POOL_NUM_SHARDS]; uint64_t hits[POOL_NUM_CLASSES] __attribute__((aligned(64))); uint64_t misses[POOL_NUM_CLASSES] __attribute__((aligned(64))); uint64_t refills[POOL_NUM_CLASSES] __attribute__((aligned(64))); uint64_t frees[POOL_NUM_CLASSES] __attribute__((aligned(64))); uint64_t total_bytes_allocated __attribute__((aligned(64))); uint64_t total_pages_allocated __attribute__((aligned(64))); uint64_t pages_by_class[POOL_NUM_CLASSES] __attribute__((aligned(64))); int bundle_factor[POOL_NUM_CLASSES]; uint64_t last_hits[POOL_NUM_CLASSES]; uint64_t last_misses[POOL_NUM_CLASSES]; int initialized; int tls_free_enabled; atomic_uint_fast64_t trylock_attempts __attribute__((aligned(64))); atomic_uint_fast64_t trylock_success __attribute__((aligned(64))); atomic_uint_fast64_t ring_underflow __attribute__((aligned(64))); } PoolGlobal; static PoolGlobal g_pool; // --- Boxed Public/Core API implementations moved from hakmem_pool.c --- // Adjust bundle factor based on window stats static inline void pool_update_bundle_factor(int class_idx) { uint64_t h = g_pool.hits[class_idx]; uint64_t m = g_pool.misses[class_idx]; uint64_t dh = h - g_pool.last_hits[class_idx]; uint64_t dm = m - g_pool.last_misses[class_idx]; uint64_t dt = dh + dm; if (dt < 256) return; int bf = g_pool.bundle_factor[class_idx]; if (bf <= 0) bf = 1; if (dt > 0) { double hit_rate = (double)dh / (double)dt; if (hit_rate < 0.60 && dm > (dh + 16)) { if (bf < 4) bf++; } else if (hit_rate > 0.90 && dh > (dm + 32)) { if (bf > 1) bf--; } } g_pool.bundle_factor[class_idx] = bf; g_pool.last_hits[class_idx] = h; g_pool.last_misses[class_idx] = m; } // Refill freelist by allocating a new 64KiB page and splitting to blocks static int refill_freelist(int class_idx, int shard_idx) { if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) return 0; if (shard_idx < 0 || shard_idx >= POOL_NUM_SHARDS) return 0; size_t user_size = g_class_sizes[class_idx]; size_t block_size = HEADER_SIZE + user_size; int blocks_per_page = POOL_PAGE_SIZE / block_size; if (blocks_per_page == 0) return 0; void* page = mmap(NULL, POOL_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (!page) return 0; pool_update_bundle_factor(class_idx); int bundles = g_pool.bundle_factor[class_idx]; if (bundles < 1) bundles = 1; if (bundles > 4) bundles = 4; const FrozenPolicy* pol = hkm_policy_get(); if (pol) { uint16_t cap = 0; if (class_idx < 5) cap = pol->mid_cap[class_idx]; else if (class_idx == 5 && pol->mid_dyn1_bytes != 0) cap = pol->mid_cap_dyn1; else if (class_idx == 6 && pol->mid_dyn2_bytes != 0) cap = pol->mid_cap_dyn2; if (cap > 0) { uint64_t have = g_pool.pages_by_class[class_idx]; if (have >= cap) bundles = 1; else { uint64_t deficit = (cap - have); if (deficit < (uint64_t)bundles) bundles = (int)deficit; if (bundles < 1) bundles = 1; if (bundles > 4) bundles = 4; if (deficit >= (uint64_t)g_pool_min_bundle && bundles < g_pool_min_bundle) bundles = g_pool_min_bundle; } } } int pages_allocated_this_call = 0; for (int b = 0; b < bundles; b++) { PoolBlock* freelist_head = NULL; for (int i = 0; i < blocks_per_page; i++) { void* raw_block = (char*)page + (i * block_size); __builtin_prefetch((char*)raw_block + block_size, 1, 1); PoolBlock* block = (PoolBlock*)raw_block; block->next = freelist_head; freelist_head = block; } if (g_pool.freelist[class_idx][shard_idx]) { PoolBlock* tail = freelist_head; while (tail->next) tail = tail->next; tail->next = g_pool.freelist[class_idx][shard_idx]; } g_pool.freelist[class_idx][shard_idx] = freelist_head; mid_desc_register(page, class_idx, 0); if (b + 1 < bundles) { page = mmap(NULL, POOL_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (!page) break; } pages_allocated_this_call++; } set_nonempty_bit(class_idx, shard_idx); g_pool.refills[class_idx]++; g_pool.total_pages_allocated += pages_allocated_this_call; g_pool.pages_by_class[class_idx] += pages_allocated_this_call; g_pool.total_bytes_allocated += (uint64_t)pages_allocated_this_call * (uint64_t)POOL_PAGE_SIZE; return 1; } // Initialization and teardown #ifndef HAKMEM_POOL_API_NO_PUBLIC static pthread_once_t hak_pool_init_once_control = PTHREAD_ONCE_INIT; static void hak_pool_init_impl(void) { const FrozenPolicy* pol = hkm_policy_get(); if (pol && pol->mid_dyn1_bytes >= POOL_MIN_SIZE && pol->mid_dyn1_bytes <= POOL_MAX_SIZE) g_class_sizes[5] = pol->mid_dyn1_bytes; else g_class_sizes[5] = 0; if (pol && pol->mid_dyn2_bytes >= POOL_MIN_SIZE && pol->mid_dyn2_bytes <= POOL_MAX_SIZE) g_class_sizes[6] = pol->mid_dyn2_bytes; else g_class_sizes[6] = 0; for (int c = 0; c < POOL_NUM_CLASSES; c++) { for (int s = 0; s < POOL_NUM_SHARDS; s++) { g_pool.freelist[c][s] = NULL; } atomic_store(&g_pool.nonempty_mask[c], 0); for (int s = 0; s < POOL_NUM_SHARDS; s++) { pthread_mutex_init(&g_pool.freelist_locks[c][s].m, NULL); atomic_store(&g_pool.remote_head[c][s], (uintptr_t)0); atomic_store(&g_pool.remote_count[c][s], 0); } g_pool.hits[c] = 0; g_pool.misses[c] = 0; g_pool.refills[c] = 0; g_pool.frees[c] = 0; g_pool.pages_by_class[c] = 0; g_pool.bundle_factor[c] = 1; g_pool.last_hits[c] = 0; g_pool.last_misses[c] = 0; } g_pool.total_bytes_allocated = 0; g_pool.total_pages_allocated = 0; atomic_store(&g_pool.trylock_attempts, 0); atomic_store(&g_pool.trylock_success, 0); atomic_store(&g_pool.ring_underflow, 0); const char* e_tls = getenv("HAKMEM_POOL_TLS_FREE"); g_pool.tls_free_enabled = (e_tls == NULL) ? 1 : (atoi(e_tls) != 0); const char* e_wrap = getenv("HAKMEM_WRAP_L2"); g_wrap_l2_enabled = (e_wrap && atoi(e_wrap) != 0) ? 1 : 0; const char* e_minb = getenv("HAKMEM_POOL_MIN_BUNDLE"); if (e_minb) { int v = atoi(e_minb); if (v>=1 && v<=8) g_pool_min_bundle = v; } const char* e_mix = getenv("HAKMEM_SHARD_MIX"); g_shard_mix_enabled = (e_mix && atoi(e_mix) != 0) ? 1 : 0; const char* e_ring = getenv("HAKMEM_POOL_TLS_RING"); if (e_ring) g_tls_ring_enabled = (atoi(e_ring) != 0); const char* e_hdr = getenv("HAKMEM_HDR_LIGHT"); if (e_hdr) g_hdr_light_enabled = atoi(e_hdr); const char* e_probe = getenv("HAKMEM_TRYLOCK_PROBES"); if (e_probe) { int v = atoi(e_probe); if (v>=1 && v<=8) g_trylock_probes = v; } const char* e_div = getenv("HAKMEM_RING_RETURN_DIV"); if (e_div) { int v = atoi(e_div); if (v>=2 && v<=4) g_ring_return_div = v; } const char* e_lo = getenv("HAKMEM_TLS_LO_MAX"); if (e_lo) { int v = atoi(e_lo); if (v>=32 && v<=16384) g_tls_lo_max = v; } const char* e_cs = getenv("HAKMEM_POOL_COUNT_SAMPLE"); if (e_cs) { int v = atoi(e_cs); if (v>=0 && v<=16) g_count_sample_exp = v; } const char* e_tc = getenv("HAKMEM_TC_ENABLE"); if (e_tc) g_tc_enabled = (atoi(e_tc) != 0); const char* e_tcu = getenv("HAKMEM_TC_UNBOUNDED"); if (e_tcu) g_tc_drain_unbounded = (atoi(e_tcu) != 0); const char* e_tcm = getenv("HAKMEM_TC_DRAIN_MAX"); if (e_tcm) { int v = atoi(e_tcm); if (v>=0 && v<=65536) g_tc_drain_max = v; } const char* e_tct = getenv("HAKMEM_TC_DRAIN_TRIGGER"); if (e_tct) { int v = atoi(e_tct); if (v>=0 && v<=POOL_L2_RING_CAP) g_tc_drain_trigger = v; } const char* e_mf2 = getenv("HAKMEM_MF2_ENABLE"); if (e_mf2 && atoi(e_mf2) != 0) { g_mf2_enabled = 1; mf2_page_registry_init(); const char* e_maxq = getenv("HAKMEM_MF2_MAX_QUEUES"); if (e_maxq) { int v = atoi(e_maxq); if (v>=1 && v<=256) g_mf2_max_queues = v; } const char* e_lease = getenv("HAKMEM_MF2_LEASE_MS"); if (e_lease) { int v = atoi(e_lease); if (v>=0 && v<=1000) g_mf2_lease_ms = v; } const char* e_idle = getenv("HAKMEM_MF2_IDLE_THRESHOLD_US"); if (e_idle) { int v = atoi(e_idle); if (v>=0 && v<=10000) g_mf2_idle_threshold_us = v; } HAKMEM_LOG("[Pool] MF2 Per-Page Sharding enabled\n"); HAKMEM_LOG("[MF2] max_queues=%d, lease_ms=%d, idle_threshold_us=%d\n", g_mf2_max_queues, g_mf2_lease_ms, g_mf2_idle_threshold_us); } g_pool.initialized = 1; HAKMEM_LOG("[Pool] Initialized (L2 Hybrid Pool)\n"); if (g_class_sizes[5] != 0 || g_class_sizes[6] != 0) { HAKMEM_LOG("[Pool] Classes: 2KB, 4KB, 8KB, 16KB, 32KB%s%s%s\n", g_class_sizes[5]?", dyn1=" : "", g_class_sizes[5]?"":(g_class_sizes[6]?",":""), (g_class_sizes[5]||g_class_sizes[6])?"":""); } else { HAKMEM_LOG("[Pool] Classes: 2KB, 4KB, 8KB, 16KB, 32KB\n"); } HAKMEM_LOG("[Pool] Page size: %d KB\n", POOL_PAGE_SIZE/1024); HAKMEM_LOG("[Pool] Shards: %d (site-based)\n", POOL_NUM_SHARDS); } static void mf2_print_debug_stats(void) { if (!g_mf2_enabled) return; fprintf(stderr, "\n[MF2 DEBUG STATS]\n"); fprintf(stderr, "Alloc fast hits: %12lu\n", (unsigned long)atomic_load(&g_mf2_alloc_fast_hit)); fprintf(stderr, "Alloc slow hits: %12lu\n", (unsigned long)atomic_load(&g_mf2_alloc_slow_hit)); fprintf(stderr, "Page reuses: %12lu\n", (unsigned long)atomic_load(&g_mf2_page_reuse_count)); fprintf(stderr, "New pages: %12lu\n", (unsigned long)atomic_load(&g_mf2_new_page_count)); fprintf(stderr, "Owner frees: %12lu\n", (unsigned long)atomic_load(&g_mf2_free_owner_count)); fprintf(stderr, "Remote frees: %12lu\n", (unsigned long)atomic_load(&g_mf2_free_remote_count)); fprintf(stderr, "Slow checked: %12lu\n", (unsigned long)atomic_load(&g_mf2_slow_checked_drain)); fprintf(stderr, "Slow found rem: %12lu\n", (unsigned long)atomic_load(&g_mf2_slow_found_remote)); fprintf(stderr, "Full scan chk: %12lu\n", (unsigned long)atomic_load(&g_mf2_full_scan_checked)); fprintf(stderr, "Full scan rem: %12lu\n", (unsigned long)atomic_load(&g_mf2_full_scan_found_remote)); fprintf(stderr, "Eager scan: %12lu\n", (unsigned long)atomic_load(&g_mf2_eager_drain_scanned)); fprintf(stderr, "Eager found: %12lu\n", (unsigned long)atomic_load(&g_mf2_eager_drain_found)); fprintf(stderr, "Drain attempts: %12lu\n", (unsigned long)atomic_load(&g_mf2_drain_attempts)); fprintf(stderr, "Drain successes: %12lu\n", (unsigned long)atomic_load(&g_mf2_drain_success)); fprintf(stderr, "Remote drains: %12lu (blocks: %lu)\n", (unsigned long)atomic_load(&g_mf2_drain_count), (unsigned long)atomic_load(&g_mf2_drain_blocks)); fprintf(stderr, "\n[PENDING QUEUE]\n"); fprintf(stderr, "Pending enqueued: %12lu\n", (unsigned long)atomic_load(&g_mf2_pending_enqueued)); fprintf(stderr, "Pending drained: %12lu\n", (unsigned long)atomic_load(&g_mf2_pending_drained)); fprintf(stderr, "Pending requeued: %12lu\n", (unsigned long)atomic_load(&g_mf2_pending_requeued)); uint64_t total_allocs = atomic_load(&g_mf2_alloc_fast_hit) + atomic_load(&g_mf2_alloc_slow_hit); uint64_t total_frees = atomic_load(&g_mf2_free_owner_count) + atomic_load(&g_mf2_free_remote_count); if (total_allocs > 0) fprintf(stderr, "\nFast path hit rate: %.2f%%\n", 100.0 * atomic_load(&g_mf2_alloc_fast_hit) / total_allocs); if (total_frees > 0) fprintf(stderr, "Owner free rate: %.2f%%\n", 100.0 * atomic_load(&g_mf2_free_owner_count) / total_frees); fflush(stderr); } __attribute__((destructor)) static void mf2_destructor(void) { mf2_print_debug_stats(); } void hak_pool_init(void) { pthread_once(&hak_pool_init_once_control, hak_pool_init_impl); } void hak_pool_shutdown(void) { if (!g_pool.initialized) return; extern void hak_pool_print_stats(void); hak_pool_print_stats(); mf2_print_debug_stats(); g_pool.initialized = 0; } // Try-alloc: legacy TLS path or MF2 void* hak_pool_try_alloc(size_t size, uintptr_t site_id) { hak_pool_init(); extern int hak_in_wrapper(void); if (hak_in_wrapper() && !g_wrap_l2_enabled) return NULL; if (!hak_pool_is_poolable(size)) return NULL; int class_idx = hak_pool_get_class_index(size); if (class_idx < 0) return NULL; if (g_mf2_enabled) { return mf2_alloc_fast(class_idx, size, site_id); } PoolTLSRing* ring = &g_tls_bin[class_idx].ring; if (g_tc_enabled && ring->top < g_tc_drain_trigger && mid_tc_has_items(class_idx)) { HKM_TIME_START(t_tc_drain); if (mid_tc_drain_into_tls(class_idx, ring, &g_tls_bin[class_idx])) { HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc_drain); if (ring->top > 0) { HKM_TIME_START(t_ring_pop0); PoolBlock* tlsb = ring->items[--ring->top]; HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop0); void* raw = (void*)tlsb; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<top == 0) { atomic_fetch_add_explicit(&g_pool.ring_underflow, 1, memory_order_relaxed); } if (ring->top > 0) { HKM_TIME_START(t_ring_pop1); PoolBlock* tlsb = ring->items[--ring->top]; HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop1); void* raw = (void*)tlsb; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<next; if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--; HKM_TIME_END(HKM_CAT_POOL_TLS_LIFO_POP, t_lifo_pop0); void* raw = (void*)b; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<top; if (to_ring < 0) to_ring = 0; while (head && to_ring-- > 0) { PoolBlock* nxt = head->next; ring->items[ring->top++] = head; head = nxt; } while (head) { PoolBlock* nxt = head->next; head->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = head; g_tls_bin[class_idx].lo_count++; head = nxt; } g_pool.freelist[class_idx][s] = head; if (!head) clear_nonempty_bit(class_idx, s); pthread_mutex_unlock(l); if (ring->top > 0) { PoolBlock* tlsb = ring->items[--ring->top]; void* raw = (void*)tlsb; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u< 0 && g_tls_active_page_a[class_idx].bump < g_tls_active_page_a[class_idx].end) ap = &g_tls_active_page_a[class_idx]; else if (g_tls_active_page_b[class_idx].page && g_tls_active_page_b[class_idx].count > 0 && g_tls_active_page_b[class_idx].bump < g_tls_active_page_b[class_idx].end) ap = &g_tls_active_page_b[class_idx]; else if (g_tls_active_page_c[class_idx].page && g_tls_active_page_c[class_idx].count > 0 && g_tls_active_page_c[class_idx].bump < g_tls_active_page_c[class_idx].end) ap = &g_tls_active_page_c[class_idx]; if (ap) { if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { int need = POOL_L2_RING_CAP - ring->top; (void)refill_tls_from_active_page(class_idx, ring, &g_tls_bin[class_idx], ap, need); } PoolBlock* b = NULL; if (ring->top > 0) { b = ring->items[--ring->top]; } else if (ap->page && ap->count > 0 && ap->bump < ap->end) { b = (PoolBlock*)(void*)ap->bump; ap->bump += (HEADER_SIZE + g_class_sizes[class_idx]); ap->count--; if (ap->bump >= ap->end || ap->count<=0){ ap->page=NULL; ap->count=0; } } if (b) { void* raw = (void*)b; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); mid_page_inuse_inc(raw); g_pool.hits[class_idx]++; return (char*)raw + HEADER_SIZE; } } pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx].m; HKM_TIME_START(t_lock); struct timespec ts_lk1; int lk1 = hkm_prof_begin(&ts_lk1); (void)ts_lk1; (void)lk1; pthread_mutex_lock(lock); HKM_TIME_END(HKM_CAT_POOL_LOCK, t_lock); hkm_prof_end(lk1, HKP_POOL_LOCK, &ts_lk1); PoolBlock* block = g_pool.freelist[class_idx][shard_idx]; if (!block) { int stole = 0; const FrozenPolicy* pol2 = hkm_policy_get(); if (pol2) { uint16_t cap = 0; if (class_idx < 5) cap = pol2->mid_cap[class_idx]; else if (class_idx == 5 && pol2->mid_dyn1_bytes != 0) cap = pol2->mid_cap_dyn1; else if (class_idx == 6 && pol2->mid_dyn2_bytes != 0) cap = pol2->mid_cap_dyn2; if (atomic_load_explicit(&g_pool.remote_count[class_idx][shard_idx], memory_order_relaxed) != 0) { drain_remote_locked(class_idx, shard_idx); } int neighbor = (shard_idx + 1) & (POOL_NUM_SHARDS - 1); if (is_shard_nonempty(class_idx, neighbor)) { PoolBlock* nb = g_pool.freelist[class_idx][neighbor]; if (nb) { g_pool.freelist[class_idx][neighbor] = nb->next; nb->next = NULL; block = nb; stole = 1; } if (!g_pool.freelist[class_idx][neighbor]) clear_nonempty_bit(class_idx, neighbor); } } if (!stole && !block) { (void)refill_freelist(class_idx, shard_idx); block = g_pool.freelist[class_idx][shard_idx]; } } if (!block) { pthread_mutex_unlock(lock); g_pool.misses[class_idx]++; return NULL; } g_pool.freelist[class_idx][shard_idx] = block->next; if (!g_pool.freelist[class_idx][shard_idx]) clear_nonempty_bit(class_idx, shard_idx); pthread_mutex_unlock(lock); void* raw = (void*)block; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<magic != HAKMEM_MAGIC) { MF2_ERROR_LOG("Invalid magic 0x%X in pool_free, expected 0x%X", hdr->magic, HAKMEM_MAGIC); return; } if (hdr->method != ALLOC_METHOD_POOL) { MF2_ERROR_LOG("Wrong method %d in pool_free, expected POOL (%d)", hdr->method, ALLOC_METHOD_POOL); return; } } int class_idx = mid_by_desc ? (int)d_desc->class_idx : hak_pool_get_class_index(size); if (class_idx < 0) return; PoolBlock* block = (PoolBlock*)raw; if (g_pool.tls_free_enabled) { int same_thread = 0; if (g_hdr_light_enabled >= 1) { MidPageDesc* d = mid_desc_lookup(raw); if (d && d->owner_tid != 0 && d->owner_tid == (uint64_t)(uintptr_t)pthread_self()) { same_thread = 1; } } else if (hdr->owner_tid != 0 && hdr->owner_tid == (uintptr_t)(uintptr_t)pthread_self()) { same_thread = 1; } if (same_thread) { PoolTLSRing* ring = &g_tls_bin[class_idx].ring; if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; } else { block->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = block; g_tls_bin[class_idx].lo_count++; if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) { size_t spill = g_tls_bin[class_idx].lo_count / 2; int shard = hak_pool_get_shard_index(site_id); while (spill-- && g_tls_bin[class_idx].lo_head) { PoolBlock* b = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = b->next; g_tls_bin[class_idx].lo_count--; HKM_TIME_START(t_remote_push1); uintptr_t old_head; do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); b->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)b, memory_order_release, memory_order_relaxed)); atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push1); } set_nonempty_bit(class_idx, shard); } } } else { if (g_tc_enabled) { uint64_t owner_tid = 0; if (g_hdr_light_enabled < 2) owner_tid = hdr->owner_tid; if (owner_tid == 0) { MidPageDesc* d = mid_desc_lookup(raw); if (d) owner_tid = d->owner_tid; } if (owner_tid != 0) { MidTC* otc = mid_tc_lookup_by_tid(owner_tid); if (otc) { mid_tc_push(otc, class_idx, block); return; } } } int shard = hak_pool_get_shard_index(site_id); uintptr_t old_head; HKM_TIME_START(t_remote_push2); do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); block->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)block, memory_order_release, memory_order_relaxed)); atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push2); set_nonempty_bit(class_idx, shard); } } else { int shard_idx2 = hak_pool_get_shard_index(site_id); pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx2].m; pthread_mutex_lock(lock); block->next = g_pool.freelist[class_idx][shard_idx2]; g_pool.freelist[class_idx][shard_idx2] = block; set_nonempty_bit(class_idx, shard_idx2); pthread_mutex_unlock(lock); } t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } } MidPageDesc* d = mid_desc_lookup(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } void hak_pool_free_fast(void* ptr, uintptr_t site_id) { if (!ptr || !g_pool.initialized) return; if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { mf2_free(ptr); return; } } MidPageDesc* d = mid_desc_lookup(ptr); if (!d) return; size_t sz = g_class_sizes[(int)d->class_idx]; if (sz == 0) return; hak_pool_free(ptr, sz, site_id); } #endif // HAKMEM_POOL_API_NO_PUBLIC #endif // POOL_CORE_API_INC_H