// pool_free_v1_box.h — Box: Pool V1 Free Implementation (Fast/Slow Split) // // Purpose: Pool v1 free path with L0-SplitBox + L1-FastBox/SlowBox // Pattern: Header-based predicate for same-thread fast path // Phase: POOL-FREE-V1-OPT Steps 1-2 (reject stats + fast split) // Dependencies: Assumes pool_api.inc.h includes this after hakmem_internal.h // (provides AllocHeader, PoolBlock, PoolTLSRing, g_pool, etc.) #ifndef POOL_FREE_V1_BOX_H #define POOL_FREE_V1_BOX_H #include "pool_config_box.h" // For hak_pool_v1_free_fastsplit_enabled, etc #include "pool_stats_box.h" // For g_pool_v1_flat_stats #include "pool_mid_desc_cache_box.h" // For mid_desc_lookup_cached #include "pool_mid_inuse_deferred_box.h" // For mid_inuse_dec_deferred #include #include // Forward declaration only (full definitions available from hakmem_internal.h) struct MidPageDesc; typedef struct MidPageDesc MidPageDesc; // External functions extern void hak_pool_init(void); extern int hak_pool_is_poolable(size_t size); extern int hak_pool_get_class_index(size_t size); extern int hak_pool_get_shard_index(uintptr_t site_id); extern void set_nonempty_bit(int class_idx, int shard); extern void mid_page_inuse_dec_and_maybe_dn(void* raw); extern void mf2_free(void* ptr); // Assumed available from caller includes: // - AllocHeader (from hakmem_internal.h) // - PoolBlock (from pool_tls_types.inc.h or hakmem_pool.c) // - PoolTLSRing (from pool_tls_types.inc.h) // - g_pool, g_tls_bin, g_pool_v1_flat_stats (from hakmem_pool.c) // - g_tls_ring_enabled, g_tls_lo_max, g_hdr_light_enabled, g_mf2_enabled, t_pool_rng // - HEADER_SIZE, ALLOC_METHOD_POOL, POOL_L2_RING_CAP, HAKMEM_MAGIC // ============================================================================ // Phase POOL-FREE-V1-OPT: L1-FastBox (same-thread TLS free, no mid_desc_lookup) // ============================================================================ // Precondition: fast predicate already verified by caller // - g_pool.tls_free_enabled == true // - g_hdr_light_enabled == 0 (header owner_tid trusted) // - hdr->owner_tid == self (same-thread) // Effect: Skips 2x mid_desc_lookup calls vs slow path static inline void hak_pool_free_v1_fast_impl(void* raw, int class_idx, uintptr_t site_id) { PoolBlock* block = (PoolBlock*)raw; // Same-thread TLS free path (ring → lo_head → spill) PoolTLSRing* ring = &g_tls_bin[class_idx].ring; if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; } else { block->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = block; g_tls_bin[class_idx].lo_count++; if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) { size_t spill = g_tls_bin[class_idx].lo_count / 2; int shard = hak_pool_get_shard_index(site_id); while (spill-- && g_tls_bin[class_idx].lo_head) { PoolBlock* b = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = b->next; g_tls_bin[class_idx].lo_count--; HKM_TIME_START(t_remote_push1); uintptr_t old_head; do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); b->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)b, memory_order_release, memory_order_relaxed)); atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push1); } set_nonempty_bit(class_idx, shard); } } // Common tail: sample counter + inuse dec (deferred) t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<magic != HAKMEM_MAGIC) { MF2_ERROR_LOG("Invalid magic 0x%X in pool_free, expected 0x%X", hdr->magic, HAKMEM_MAGIC); return; } if (hdr->method != ALLOC_METHOD_POOL) { MF2_ERROR_LOG("Wrong method %d in pool_free, expected POOL (%d)", hdr->method, ALLOC_METHOD_POOL); return; } } int class_idx = mid_by_desc ? (int)d_desc->class_idx : hak_pool_get_class_index(size); if (class_idx < 0) return; PoolBlock* block = (PoolBlock*)raw; if (g_pool.tls_free_enabled) { int same_thread = 0; if (g_hdr_light_enabled >= 1) { MidPageDesc* d = mid_desc_lookup_cached(raw); if (d && d->owner_tid != 0 && d->owner_tid == (uint64_t)(uintptr_t)pthread_self()) { same_thread = 1; } } else if (hdr->owner_tid != 0 && hdr->owner_tid == (uintptr_t)(uintptr_t)pthread_self()) { same_thread = 1; } if (same_thread) { PoolTLSRing* ring = &g_tls_bin[class_idx].ring; if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; } else { block->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = block; g_tls_bin[class_idx].lo_count++; if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) { size_t spill = g_tls_bin[class_idx].lo_count / 2; int shard = hak_pool_get_shard_index(site_id); while (spill-- && g_tls_bin[class_idx].lo_head) { PoolBlock* b = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = b->next; g_tls_bin[class_idx].lo_count--; HKM_TIME_START(t_remote_push1); uintptr_t old_head; do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); b->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)b, memory_order_release, memory_order_relaxed)); atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push1); } set_nonempty_bit(class_idx, shard); } } } else { if (g_tc_enabled) { uint64_t owner_tid = 0; if (g_hdr_light_enabled < 2) owner_tid = hdr->owner_tid; if (owner_tid == 0) { MidPageDesc* d = mid_desc_lookup_cached(raw); if (d) owner_tid = d->owner_tid; } if (owner_tid != 0) { MidTC* otc = mid_tc_lookup_by_tid(owner_tid); if (otc) { mid_tc_push(otc, class_idx, block); goto tail; } } } int shard = hak_pool_get_shard_index(site_id); uintptr_t old_head; HKM_TIME_START(t_remote_push2); do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); block->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)block, memory_order_release, memory_order_relaxed)); atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push2); set_nonempty_bit(class_idx, shard); } } else { int shard_idx2 = hak_pool_get_shard_index(site_id); pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx2].m; pthread_mutex_lock(lock); block->next = g_pool.freelist[class_idx][shard_idx2]; g_pool.freelist[class_idx][shard_idx2] = block; set_nonempty_bit(class_idx, shard_idx2); pthread_mutex_unlock(lock); } tail: t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<magic == HAKMEM_MAGIC && hdr->method == ALLOC_METHOD_POOL && hdr->owner_tid != 0 && hdr->owner_tid == self) { // class_idx from size (caller provided) int class_idx = hak_pool_get_class_index(size); if (class_idx >= 0) { if (__builtin_expect(hak_pool_v1_flatten_stats_enabled(), 0)) { atomic_fetch_add_explicit(&g_pool_v1_flat_stats.fastsplit_fast_hit, 1, memory_order_relaxed); } hak_pool_free_v1_fast_impl(raw, class_idx, site_id); return; } } // Fast predicate failed, fall through to slow if (__builtin_expect(hak_pool_v1_flatten_stats_enabled(), 0)) { atomic_fetch_add_explicit(&g_pool_v1_flat_stats.fastsplit_slow_hit, 1, memory_order_relaxed); } } // Fallback to slow path hak_pool_free_v1_slow_impl(ptr, size, site_id); } #endif // POOL_FREE_V1_BOX_H