Phase: Pool API Modularization - Step 6: Extract pool_alloc_v1_flat_box.h
Extract 103 lines: hak_pool_try_alloc_v1_flat() + hak_pool_free_v1_flat() - New box: core/box/pool_alloc_v1_flat_box.h (v1 flatten TLS-only fast path) - Updated: pool_api.inc.h (add include, remove extracted functions) - Build: OK, bench_mid_large_mt_hakmem: 9.17M ops/s (baseline ~8M, within ±2%) - Risk: MINIMAL (TLS-only path, well-isolated) - Note: Added forward declarations for v1_impl functions (defined later) Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
152
core/box/pool_alloc_v1_flat_box.h
Normal file
152
core/box/pool_alloc_v1_flat_box.h
Normal file
@ -0,0 +1,152 @@
|
||||
// pool_alloc_v1_flat_box.h — Box: Pool V1 Flatten (TLS-only fast path)
|
||||
//
|
||||
// Purpose: Pool v1 flatten optimization (TLS-only alloc/free)
|
||||
// Pattern: TLS-only fast path with fallback to full v1 implementation
|
||||
// Phase: Pool API Modularization - Step 6
|
||||
// Dependencies: Assumes pool_api.inc.h includes this after pool_free_v1_box.h
|
||||
// (provides AllocHeader, PoolBlock, PoolTLSRing, g_pool, etc.)
|
||||
|
||||
#ifndef POOL_ALLOC_V1_FLAT_BOX_H
|
||||
#define POOL_ALLOC_V1_FLAT_BOX_H
|
||||
|
||||
#include "pool_block_to_user_box.h" // Pool block to user helpers
|
||||
#include "pool_config_box.h" // For configuration gates
|
||||
#include "pool_stats_box.h" // For g_pool_v1_flat_stats
|
||||
#include "pool_mid_desc_cache_box.h" // For mid_desc_lookup_cached
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdatomic.h>
|
||||
|
||||
// Forward declarations
|
||||
struct MidPageDesc;
|
||||
typedef struct MidPageDesc MidPageDesc;
|
||||
|
||||
// External functions
|
||||
extern int hak_pool_is_poolable(size_t size);
|
||||
extern int hak_pool_get_class_index(size_t size);
|
||||
extern int hak_pool_get_shard_index(uintptr_t site_id);
|
||||
extern void set_nonempty_bit(int class_idx, int shard);
|
||||
extern void mid_desc_adopt(void* block, int class_idx, uint64_t owner_tid);
|
||||
|
||||
// Forward declarations for functions defined later in pool_api.inc.h
|
||||
static inline void* hak_pool_try_alloc_v1_impl(size_t size, uintptr_t site_id);
|
||||
static inline void hak_pool_free_v1_impl(void* ptr, size_t size, uintptr_t site_id);
|
||||
|
||||
// Assumed available from caller includes:
|
||||
// - AllocHeader (from hakmem_internal.h)
|
||||
// - PoolBlock (from pool_tls_types.inc.h or hakmem_pool.c)
|
||||
// - PoolTLSRing (from pool_tls_types.inc.h)
|
||||
// - g_pool, g_tls_bin, g_pool_v1_flat_stats (from hakmem_pool.c)
|
||||
// - g_tls_ring_enabled, g_tls_lo_max
|
||||
// - HEADER_SIZE, POOL_L2_RING_CAP, POOL_NUM_CLASSES
|
||||
|
||||
// ============================================================================
|
||||
// Pool V1 Flatten Alloc (TLS-only fast path)
|
||||
// ============================================================================
|
||||
static inline void* hak_pool_try_alloc_v1_flat(size_t size, uintptr_t site_id) {
|
||||
if (!hak_pool_is_poolable(size)) return NULL;
|
||||
int class_idx = hak_pool_get_class_index(size);
|
||||
if (class_idx < 0) return NULL;
|
||||
|
||||
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
|
||||
if (g_tls_ring_enabled && ring->top > 0) {
|
||||
PoolBlock* tlsb = ring->items[--ring->top];
|
||||
// Adopt shared pages to this thread so free can stay on the fast path.
|
||||
mid_desc_adopt(tlsb, class_idx, (uint64_t)(uintptr_t)pthread_self());
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
|
||||
}
|
||||
return hak_pool_block_to_user(tlsb, class_idx, site_id);
|
||||
}
|
||||
|
||||
if (g_tls_bin[class_idx].lo_head) {
|
||||
PoolBlock* b = g_tls_bin[class_idx].lo_head;
|
||||
g_tls_bin[class_idx].lo_head = b->next;
|
||||
if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--;
|
||||
mid_desc_adopt(b, class_idx, (uint64_t)(uintptr_t)pthread_self());
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
|
||||
}
|
||||
return hak_pool_block_to_user(b, class_idx, site_id);
|
||||
}
|
||||
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_fallback_v1, 1, memory_order_relaxed);
|
||||
}
|
||||
return hak_pool_try_alloc_v1_impl(size, site_id);
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Pool V1 Flatten Free (TLS-only fast path)
|
||||
// ============================================================================
|
||||
static inline void hak_pool_free_v1_flat(void* ptr, size_t size, uintptr_t site_id) {
|
||||
if (!ptr) return;
|
||||
if (!hak_pool_is_poolable(size)) return;
|
||||
|
||||
void* raw = (char*)ptr - HEADER_SIZE;
|
||||
MidPageDesc* d_desc = mid_desc_lookup_cached(ptr);
|
||||
if (!d_desc) {
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_page_null, 1, memory_order_relaxed);
|
||||
}
|
||||
hak_pool_free_v1_impl(ptr, size, site_id);
|
||||
return;
|
||||
}
|
||||
|
||||
int class_idx = (int)d_desc->class_idx;
|
||||
if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) {
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_other, 1, memory_order_relaxed);
|
||||
}
|
||||
hak_pool_free_v1_impl(ptr, size, site_id);
|
||||
return;
|
||||
}
|
||||
|
||||
const uint64_t owner_tid = d_desc->owner_tid;
|
||||
const uint64_t self_tid = (uint64_t)(uintptr_t)pthread_self();
|
||||
|
||||
if (g_pool.tls_free_enabled && owner_tid != 0 && owner_tid == self_tid) {
|
||||
PoolBlock* block = (PoolBlock*)raw;
|
||||
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
|
||||
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
|
||||
ring->items[ring->top++] = block;
|
||||
} else {
|
||||
block->next = g_tls_bin[class_idx].lo_head;
|
||||
g_tls_bin[class_idx].lo_head = block;
|
||||
g_tls_bin[class_idx].lo_count++;
|
||||
if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) {
|
||||
size_t spill = g_tls_bin[class_idx].lo_count / 2;
|
||||
int shard = hak_pool_get_shard_index(site_id);
|
||||
while (spill-- && g_tls_bin[class_idx].lo_head) {
|
||||
PoolBlock* b = g_tls_bin[class_idx].lo_head;
|
||||
g_tls_bin[class_idx].lo_head = b->next;
|
||||
g_tls_bin[class_idx].lo_count--;
|
||||
uintptr_t old_head;
|
||||
do {
|
||||
old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire);
|
||||
b->next = (PoolBlock*)old_head;
|
||||
} while (!atomic_compare_exchange_weak_explicit(
|
||||
&g_pool.remote_head[class_idx][shard],
|
||||
&old_head, (uintptr_t)b,
|
||||
memory_order_release, memory_order_relaxed));
|
||||
atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed);
|
||||
}
|
||||
set_nonempty_bit(class_idx, shard);
|
||||
}
|
||||
}
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_tls_hit, 1, memory_order_relaxed);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_not_mine, 1, memory_order_relaxed);
|
||||
}
|
||||
hak_pool_free_v1_impl(ptr, size, site_id);
|
||||
}
|
||||
|
||||
#endif // POOL_ALLOC_V1_FLAT_BOX_H
|
||||
@ -12,6 +12,7 @@
|
||||
#include "box/pool_free_v1_box.h" // Pool v1 free implementation (L0-SplitBox + L1-FastBox/SlowBox)
|
||||
#include "box/pool_block_to_user_box.h" // Pool block to user pointer helpers
|
||||
#include "box/pool_free_v2_box.h" // Pool v2 free implementation (with hotbox v2)
|
||||
#include "box/pool_alloc_v1_flat_box.h" // Pool v1 flatten (TLS-only fast path)
|
||||
#include <stdint.h>
|
||||
|
||||
static inline void* hak_pool_try_alloc_v2_impl(size_t size, uintptr_t site_id) {
|
||||
@ -577,111 +578,6 @@ static inline void* hak_pool_try_alloc_v1_impl(size_t size, uintptr_t site_id) {
|
||||
return user4;
|
||||
}
|
||||
|
||||
// --- v1 flatten (opt-in) ----------------------------------------------------
|
||||
|
||||
static inline void* hak_pool_try_alloc_v1_flat(size_t size, uintptr_t site_id) {
|
||||
if (!hak_pool_is_poolable(size)) return NULL;
|
||||
int class_idx = hak_pool_get_class_index(size);
|
||||
if (class_idx < 0) return NULL;
|
||||
|
||||
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
|
||||
if (g_tls_ring_enabled && ring->top > 0) {
|
||||
PoolBlock* tlsb = ring->items[--ring->top];
|
||||
// Adopt shared pages to this thread so free can stay on the fast path.
|
||||
mid_desc_adopt(tlsb, class_idx, (uint64_t)(uintptr_t)pthread_self());
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
|
||||
}
|
||||
return hak_pool_block_to_user(tlsb, class_idx, site_id);
|
||||
}
|
||||
|
||||
if (g_tls_bin[class_idx].lo_head) {
|
||||
PoolBlock* b = g_tls_bin[class_idx].lo_head;
|
||||
g_tls_bin[class_idx].lo_head = b->next;
|
||||
if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--;
|
||||
mid_desc_adopt(b, class_idx, (uint64_t)(uintptr_t)pthread_self());
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
|
||||
}
|
||||
return hak_pool_block_to_user(b, class_idx, site_id);
|
||||
}
|
||||
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_fallback_v1, 1, memory_order_relaxed);
|
||||
}
|
||||
return hak_pool_try_alloc_v1_impl(size, site_id);
|
||||
}
|
||||
|
||||
static inline void hak_pool_free_v1_flat(void* ptr, size_t size, uintptr_t site_id) {
|
||||
if (!ptr) return;
|
||||
if (!hak_pool_is_poolable(size)) return;
|
||||
|
||||
void* raw = (char*)ptr - HEADER_SIZE;
|
||||
MidPageDesc* d_desc = mid_desc_lookup_cached(ptr);
|
||||
if (!d_desc) {
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_page_null, 1, memory_order_relaxed);
|
||||
}
|
||||
hak_pool_free_v1_impl(ptr, size, site_id);
|
||||
return;
|
||||
}
|
||||
|
||||
int class_idx = (int)d_desc->class_idx;
|
||||
if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) {
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_other, 1, memory_order_relaxed);
|
||||
}
|
||||
hak_pool_free_v1_impl(ptr, size, site_id);
|
||||
return;
|
||||
}
|
||||
|
||||
const uint64_t owner_tid = d_desc->owner_tid;
|
||||
const uint64_t self_tid = (uint64_t)(uintptr_t)pthread_self();
|
||||
|
||||
if (g_pool.tls_free_enabled && owner_tid != 0 && owner_tid == self_tid) {
|
||||
PoolBlock* block = (PoolBlock*)raw;
|
||||
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
|
||||
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
|
||||
ring->items[ring->top++] = block;
|
||||
} else {
|
||||
block->next = g_tls_bin[class_idx].lo_head;
|
||||
g_tls_bin[class_idx].lo_head = block;
|
||||
g_tls_bin[class_idx].lo_count++;
|
||||
if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) {
|
||||
size_t spill = g_tls_bin[class_idx].lo_count / 2;
|
||||
int shard = hak_pool_get_shard_index(site_id);
|
||||
while (spill-- && g_tls_bin[class_idx].lo_head) {
|
||||
PoolBlock* b = g_tls_bin[class_idx].lo_head;
|
||||
g_tls_bin[class_idx].lo_head = b->next;
|
||||
g_tls_bin[class_idx].lo_count--;
|
||||
uintptr_t old_head;
|
||||
do {
|
||||
old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire);
|
||||
b->next = (PoolBlock*)old_head;
|
||||
} while (!atomic_compare_exchange_weak_explicit(
|
||||
&g_pool.remote_head[class_idx][shard],
|
||||
&old_head, (uintptr_t)b,
|
||||
memory_order_release, memory_order_relaxed));
|
||||
atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed);
|
||||
}
|
||||
set_nonempty_bit(class_idx, shard);
|
||||
}
|
||||
}
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_tls_hit, 1, memory_order_relaxed);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_not_mine, 1, memory_order_relaxed);
|
||||
}
|
||||
hak_pool_free_v1_impl(ptr, size, site_id);
|
||||
}
|
||||
|
||||
static inline int hak_pool_mid_lookup_v1_impl(void* ptr, size_t* out_size) {
|
||||
if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { int c = (int)page->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } }
|
||||
MidPageDesc* d = mid_desc_lookup_cached(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1;
|
||||
|
||||
Reference in New Issue
Block a user