diff --git a/core/box/pool_api.inc.h b/core/box/pool_api.inc.h index c203344a..b00ecff5 100644 --- a/core/box/pool_api.inc.h +++ b/core/box/pool_api.inc.h @@ -11,6 +11,7 @@ #include "box/pool_mid_desc_cache_box.h" // Mid descriptor TLS cache #include "box/pool_free_v1_box.h" // Pool v1 free implementation (L0-SplitBox + L1-FastBox/SlowBox) #include "box/pool_block_to_user_box.h" // Pool block to user pointer helpers +#include "box/pool_free_v2_box.h" // Pool v2 free implementation (with hotbox v2) #include static inline void* hak_pool_try_alloc_v2_impl(size_t size, uintptr_t site_id) { @@ -290,132 +291,6 @@ static inline void* hak_pool_try_alloc_v2_impl(size_t size, uintptr_t site_id) { return hak_pool_block_to_user(take, class_idx, site_id); } -static inline void hak_pool_free_v2_impl(void* ptr, size_t size, uintptr_t site_id) { - if (!ptr) return; - hak_pool_init(); - if (!hak_pool_is_poolable(size)) return; - - if (g_mf2_enabled) { mf2_free(ptr); return; } - - void* raw = (char*)ptr - HEADER_SIZE; - AllocHeader* hdr = (AllocHeader*)raw; - MidPageDesc* d_desc = mid_desc_lookup_cached(ptr); - int mid_by_desc = d_desc != NULL; - if (!mid_by_desc && g_hdr_light_enabled < 2) { - if (hdr->magic != HAKMEM_MAGIC) { MF2_ERROR_LOG("Invalid magic 0x%X in pool_free, expected 0x%X", hdr->magic, HAKMEM_MAGIC); return; } - if (hdr->method != ALLOC_METHOD_POOL) { MF2_ERROR_LOG("Wrong method %d in pool_free, expected POOL (%d)", hdr->method, ALLOC_METHOD_POOL); return; } - } - int class_idx = mid_by_desc ? (int)d_desc->class_idx : hak_pool_get_class_index(size); - if (class_idx < 0) return; - if (__builtin_expect(pool_hotbox_v2_class_enabled(class_idx), 0)) { - pool_hotbox_v2_record_free_call((uint32_t)class_idx); - PoolBlock* raw_block_for_v2 = (PoolBlock*)raw; - if (pool_hotbox_v2_free((uint32_t)class_idx, raw_block_for_v2)) { - return; - } - pool_hotbox_v2_record_free_fallback((uint32_t)class_idx); - } - PoolBlock* block = (PoolBlock*)raw; - uint64_t owner_tid = 0; - if (d_desc) owner_tid = d_desc->owner_tid; - else if (g_hdr_light_enabled < 2) owner_tid = hdr->owner_tid; - const uint64_t self_tid = (uint64_t)(uintptr_t)pthread_self(); - - if (g_pool.tls_free_enabled) { - const int same_thread = owner_tid != 0 && owner_tid == self_tid; - if (same_thread) { - PoolTLSRing* ring = &g_tls_bin[class_idx].ring; - if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; } - else { - block->next = g_tls_bin[class_idx].lo_head; - g_tls_bin[class_idx].lo_head = block; - g_tls_bin[class_idx].lo_count++; - if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) { - size_t spill = g_tls_bin[class_idx].lo_count / 2; - int shard = hak_pool_get_shard_index(site_id); - // Spill half of local freelist to remote freelist - while (spill-- && g_tls_bin[class_idx].lo_head) { - PoolBlock* b = g_tls_bin[class_idx].lo_head; - g_tls_bin[class_idx].lo_head = b->next; - g_tls_bin[class_idx].lo_count--; - HKM_TIME_START(t_remote_push1); - uintptr_t old_head; - do { - old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); - b->next = (PoolBlock*)old_head; - } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], - &old_head, (uintptr_t)b, - memory_order_release, memory_order_relaxed)); - atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); - HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push1); - } - set_nonempty_bit(class_idx, shard); - } - } - } else { - if (g_tc_enabled && owner_tid != 0) { MidTC* otc = mid_tc_lookup_by_tid(owner_tid); if (otc) { mid_tc_push(otc, class_idx, block); return; } } - int shard = hak_pool_get_shard_index(site_id); uintptr_t old_head; HKM_TIME_START(t_remote_push2); - do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); block->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)block, memory_order_release, memory_order_relaxed)); - atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push2); set_nonempty_bit(class_idx, shard); - } - } else { - int shard_idx2 = hak_pool_get_shard_index(site_id); pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx2].m; pthread_mutex_lock(lock); block->next = g_pool.freelist[class_idx][shard_idx2]; g_pool.freelist[class_idx][shard_idx2] = block; set_nonempty_bit(class_idx, shard_idx2); pthread_mutex_unlock(lock); - } - t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } } - MidPageDesc* d = mid_desc_lookup_cached(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; -} - -static inline void hak_pool_free_fast_v2_impl(void* ptr, uintptr_t site_id) { - // Phase POOL-FREE-V1-OPT Step 1: Track v2 reject reasons - if (!ptr) { - if (__builtin_expect(hak_pool_free_v1_reject_stats_enabled(), 0)) { - atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_ptr_null, 1, memory_order_relaxed); - atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_total, 1, memory_order_relaxed); - } - return; - } - - if (!g_pool.initialized) { - if (__builtin_expect(hak_pool_free_v1_reject_stats_enabled(), 0)) { - atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_not_init, 1, memory_order_relaxed); - atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_total, 1, memory_order_relaxed); - } - return; - } - - if (g_mf2_enabled) { - MidPage* page = mf2_addr_to_page(ptr); - if (page) { mf2_free(ptr); return; } - // MF2 path but page was NULL → fallback to v1 - if (__builtin_expect(hak_pool_free_v1_reject_stats_enabled(), 0)) { - atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_mf2_null, 1, memory_order_relaxed); - atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_total, 1, memory_order_relaxed); - } - return; - } - - MidPageDesc* d = mid_desc_lookup_cached(ptr); - if (!d) { - // mid_desc_lookup failed → fallback to v1 - if (__builtin_expect(hak_pool_free_v1_reject_stats_enabled(), 0)) { - atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_desc_null, 1, memory_order_relaxed); - atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_total, 1, memory_order_relaxed); - } - return; - } - - size_t sz = g_class_sizes[(int)d->class_idx]; - if (sz == 0) return; - hak_pool_free(ptr, sz, site_id); -} - - - static inline void* hak_pool_try_alloc_v1_impl(size_t size, uintptr_t site_id) { // Debug: IMMEDIATE output to verify function is called static int first_call = 1; diff --git a/core/box/pool_free_v2_box.h b/core/box/pool_free_v2_box.h new file mode 100644 index 00000000..bb9c3596 --- /dev/null +++ b/core/box/pool_free_v2_box.h @@ -0,0 +1,180 @@ +// pool_free_v2_box.h — Box: Pool V2 Free Implementation +// +// Purpose: Pool v2 free path with hotbox_v2 integration +// Pattern: Enhanced free path with hotbox support and statistics +// Phase: Pool API Modularization - Step 5 +// Dependencies: Assumes pool_api.inc.h includes this after pool_block_to_user_box.h +// (provides AllocHeader, PoolBlock, PoolTLSRing, g_pool, etc.) + +#ifndef POOL_FREE_V2_BOX_H +#define POOL_FREE_V2_BOX_H + +#include "pool_block_to_user_box.h" // Pool block to user helpers +#include "pool_config_box.h" // For configuration gates +#include "pool_stats_box.h" // For g_pool_v1_flat_stats +#include "pool_mid_desc_cache_box.h" // For mid_desc_lookup_cached +#include "pool_hotbox_v2_box.h" // For hotbox v2 functions + +#include +#include + +// Forward declarations +struct MidPageDesc; +typedef struct MidPageDesc MidPageDesc; +struct MidPage; +typedef struct MidPage MidPage; + +// External functions +extern void hak_pool_init(void); +extern int hak_pool_is_poolable(size_t size); +extern int hak_pool_get_class_index(size_t size); +extern int hak_pool_get_shard_index(uintptr_t site_id); +extern void set_nonempty_bit(int class_idx, int shard); +extern void mid_page_inuse_dec_and_maybe_dn(void* raw); +extern void mf2_free(void* ptr); +extern MidPage* mf2_addr_to_page(void* ptr); +extern void hak_pool_free(void* ptr, size_t size, uintptr_t site_id); + +// Assumed available from caller includes: +// - AllocHeader (from hakmem_internal.h) +// - PoolBlock (from pool_tls_types.inc.h or hakmem_pool.c) +// - PoolTLSRing (from pool_tls_types.inc.h) +// - g_pool, g_tls_bin, g_pool_v1_flat_stats (from hakmem_pool.c) +// - g_tls_ring_enabled, g_tls_lo_max, g_hdr_light_enabled, g_mf2_enabled, t_pool_rng +// - g_tc_enabled, g_count_sample_exp +// - HEADER_SIZE, ALLOC_METHOD_POOL, POOL_L2_RING_CAP, HAKMEM_MAGIC, POOL_NUM_CLASSES + +// ============================================================================ +// Pool V2 Free Implementation (with hotbox_v2 support) +// ============================================================================ +static inline void hak_pool_free_v2_impl(void* ptr, size_t size, uintptr_t site_id) { + if (!ptr) return; + hak_pool_init(); + if (!hak_pool_is_poolable(size)) return; + + if (g_mf2_enabled) { mf2_free(ptr); return; } + + void* raw = (char*)ptr - HEADER_SIZE; + AllocHeader* hdr = (AllocHeader*)raw; + MidPageDesc* d_desc = mid_desc_lookup_cached(ptr); + int mid_by_desc = d_desc != NULL; + if (!mid_by_desc && g_hdr_light_enabled < 2) { + if (hdr->magic != HAKMEM_MAGIC) { MF2_ERROR_LOG("Invalid magic 0x%X in pool_free, expected 0x%X", hdr->magic, HAKMEM_MAGIC); return; } + if (hdr->method != ALLOC_METHOD_POOL) { MF2_ERROR_LOG("Wrong method %d in pool_free, expected POOL (%d)", hdr->method, ALLOC_METHOD_POOL); return; } + } + int class_idx = mid_by_desc ? (int)d_desc->class_idx : hak_pool_get_class_index(size); + if (class_idx < 0) return; + if (__builtin_expect(pool_hotbox_v2_class_enabled(class_idx), 0)) { + pool_hotbox_v2_record_free_call((uint32_t)class_idx); + PoolBlock* raw_block_for_v2 = (PoolBlock*)raw; + if (pool_hotbox_v2_free((uint32_t)class_idx, raw_block_for_v2)) { + return; + } + pool_hotbox_v2_record_free_fallback((uint32_t)class_idx); + } + PoolBlock* block = (PoolBlock*)raw; + uint64_t owner_tid = 0; + if (d_desc) owner_tid = d_desc->owner_tid; + else if (g_hdr_light_enabled < 2) owner_tid = hdr->owner_tid; + const uint64_t self_tid = (uint64_t)(uintptr_t)pthread_self(); + + if (g_pool.tls_free_enabled) { + const int same_thread = owner_tid != 0 && owner_tid == self_tid; + if (same_thread) { + PoolTLSRing* ring = &g_tls_bin[class_idx].ring; + if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; } + else { + block->next = g_tls_bin[class_idx].lo_head; + g_tls_bin[class_idx].lo_head = block; + g_tls_bin[class_idx].lo_count++; + if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) { + size_t spill = g_tls_bin[class_idx].lo_count / 2; + int shard = hak_pool_get_shard_index(site_id); + // Spill half of local freelist to remote freelist + while (spill-- && g_tls_bin[class_idx].lo_head) { + PoolBlock* b = g_tls_bin[class_idx].lo_head; + g_tls_bin[class_idx].lo_head = b->next; + g_tls_bin[class_idx].lo_count--; + HKM_TIME_START(t_remote_push1); + uintptr_t old_head; + do { + old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); + b->next = (PoolBlock*)old_head; + } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], + &old_head, (uintptr_t)b, + memory_order_release, memory_order_relaxed)); + atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); + HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push1); + } + set_nonempty_bit(class_idx, shard); + } + } + } else { + if (g_tc_enabled && owner_tid != 0) { MidTC* otc = mid_tc_lookup_by_tid(owner_tid); if (otc) { mid_tc_push(otc, class_idx, block); return; } } + int shard = hak_pool_get_shard_index(site_id); uintptr_t old_head; HKM_TIME_START(t_remote_push2); + do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); block->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)block, memory_order_release, memory_order_relaxed)); + atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push2); set_nonempty_bit(class_idx, shard); + } + } else { + int shard_idx2 = hak_pool_get_shard_index(site_id); pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx2].m; pthread_mutex_lock(lock); block->next = g_pool.freelist[class_idx][shard_idx2]; g_pool.freelist[class_idx][shard_idx2] = block; set_nonempty_bit(class_idx, shard_idx2); pthread_mutex_unlock(lock); + } + t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } } + MidPageDesc* d = mid_desc_lookup_cached(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; +} + +// ============================================================================ +// Pool V2 Fast Free Implementation (no size parameter) +// ============================================================================ +static inline void hak_pool_free_fast_v2_impl(void* ptr, uintptr_t site_id) { + // Phase POOL-FREE-V1-OPT Step 1: Track v2 reject reasons + if (!ptr) { + if (__builtin_expect(hak_pool_free_v1_reject_stats_enabled(), 0)) { + atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_ptr_null, 1, memory_order_relaxed); + atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_total, 1, memory_order_relaxed); + } + return; + } + + if (!g_pool.initialized) { + if (__builtin_expect(hak_pool_free_v1_reject_stats_enabled(), 0)) { + atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_not_init, 1, memory_order_relaxed); + atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_total, 1, memory_order_relaxed); + } + return; + } + + if (g_mf2_enabled) { + MidPage* page = mf2_addr_to_page(ptr); + if (page) { mf2_free(ptr); return; } + // MF2 path but page was NULL → fallback to v1 + if (__builtin_expect(hak_pool_free_v1_reject_stats_enabled(), 0)) { + atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_mf2_null, 1, memory_order_relaxed); + atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_total, 1, memory_order_relaxed); + } + return; + } + + MidPageDesc* d = mid_desc_lookup_cached(ptr); + if (!d) { + // mid_desc_lookup failed → fallback to v1 + if (__builtin_expect(hak_pool_free_v1_reject_stats_enabled(), 0)) { + atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_desc_null, 1, memory_order_relaxed); + atomic_fetch_add_explicit(&g_pool_v1_flat_stats.v2_reject_total, 1, memory_order_relaxed); + } + return; + } + + size_t sz = g_class_sizes[(int)d->class_idx]; + if (sz == 0) return; + hak_pool_free(ptr, sz, site_id); +} + +#endif // POOL_FREE_V2_BOX_H