Phase: Pool API Modularization - Step 6: Extract pool_alloc_v1_flat_box.h

Extract 103 lines: hak_pool_try_alloc_v1_flat() + hak_pool_free_v1_flat()
- New box: core/box/pool_alloc_v1_flat_box.h (v1 flatten TLS-only fast path)
- Updated: pool_api.inc.h (add include, remove extracted functions)
- Build: OK, bench_mid_large_mt_hakmem: 9.17M ops/s (baseline ~8M, within ±2%)
- Risk: MINIMAL (TLS-only path, well-isolated)
- Note: Added forward declarations for v1_impl functions (defined later)

Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-12 22:20:19 +09:00
parent 0ad9c57aca
commit 5f069e08bf
2 changed files with 153 additions and 105 deletions

View File

@ -0,0 +1,152 @@
// pool_alloc_v1_flat_box.h — Box: Pool V1 Flatten (TLS-only fast path)
//
// Purpose: Pool v1 flatten optimization (TLS-only alloc/free)
// Pattern: TLS-only fast path with fallback to full v1 implementation
// Phase: Pool API Modularization - Step 6
// Dependencies: Assumes pool_api.inc.h includes this after pool_free_v1_box.h
// (provides AllocHeader, PoolBlock, PoolTLSRing, g_pool, etc.)
#ifndef POOL_ALLOC_V1_FLAT_BOX_H
#define POOL_ALLOC_V1_FLAT_BOX_H
#include "pool_block_to_user_box.h" // Pool block to user helpers
#include "pool_config_box.h" // For configuration gates
#include "pool_stats_box.h" // For g_pool_v1_flat_stats
#include "pool_mid_desc_cache_box.h" // For mid_desc_lookup_cached
#include <stdint.h>
#include <stdatomic.h>
// Forward declarations
struct MidPageDesc;
typedef struct MidPageDesc MidPageDesc;
// External functions
extern int hak_pool_is_poolable(size_t size);
extern int hak_pool_get_class_index(size_t size);
extern int hak_pool_get_shard_index(uintptr_t site_id);
extern void set_nonempty_bit(int class_idx, int shard);
extern void mid_desc_adopt(void* block, int class_idx, uint64_t owner_tid);
// Forward declarations for functions defined later in pool_api.inc.h
static inline void* hak_pool_try_alloc_v1_impl(size_t size, uintptr_t site_id);
static inline void hak_pool_free_v1_impl(void* ptr, size_t size, uintptr_t site_id);
// Assumed available from caller includes:
// - AllocHeader (from hakmem_internal.h)
// - PoolBlock (from pool_tls_types.inc.h or hakmem_pool.c)
// - PoolTLSRing (from pool_tls_types.inc.h)
// - g_pool, g_tls_bin, g_pool_v1_flat_stats (from hakmem_pool.c)
// - g_tls_ring_enabled, g_tls_lo_max
// - HEADER_SIZE, POOL_L2_RING_CAP, POOL_NUM_CLASSES
// ============================================================================
// Pool V1 Flatten Alloc (TLS-only fast path)
// ============================================================================
static inline void* hak_pool_try_alloc_v1_flat(size_t size, uintptr_t site_id) {
if (!hak_pool_is_poolable(size)) return NULL;
int class_idx = hak_pool_get_class_index(size);
if (class_idx < 0) return NULL;
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
if (g_tls_ring_enabled && ring->top > 0) {
PoolBlock* tlsb = ring->items[--ring->top];
// Adopt shared pages to this thread so free can stay on the fast path.
mid_desc_adopt(tlsb, class_idx, (uint64_t)(uintptr_t)pthread_self());
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
}
return hak_pool_block_to_user(tlsb, class_idx, site_id);
}
if (g_tls_bin[class_idx].lo_head) {
PoolBlock* b = g_tls_bin[class_idx].lo_head;
g_tls_bin[class_idx].lo_head = b->next;
if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--;
mid_desc_adopt(b, class_idx, (uint64_t)(uintptr_t)pthread_self());
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
}
return hak_pool_block_to_user(b, class_idx, site_id);
}
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_fallback_v1, 1, memory_order_relaxed);
}
return hak_pool_try_alloc_v1_impl(size, site_id);
}
// ============================================================================
// Pool V1 Flatten Free (TLS-only fast path)
// ============================================================================
static inline void hak_pool_free_v1_flat(void* ptr, size_t size, uintptr_t site_id) {
if (!ptr) return;
if (!hak_pool_is_poolable(size)) return;
void* raw = (char*)ptr - HEADER_SIZE;
MidPageDesc* d_desc = mid_desc_lookup_cached(ptr);
if (!d_desc) {
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_page_null, 1, memory_order_relaxed);
}
hak_pool_free_v1_impl(ptr, size, site_id);
return;
}
int class_idx = (int)d_desc->class_idx;
if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) {
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_other, 1, memory_order_relaxed);
}
hak_pool_free_v1_impl(ptr, size, site_id);
return;
}
const uint64_t owner_tid = d_desc->owner_tid;
const uint64_t self_tid = (uint64_t)(uintptr_t)pthread_self();
if (g_pool.tls_free_enabled && owner_tid != 0 && owner_tid == self_tid) {
PoolBlock* block = (PoolBlock*)raw;
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
ring->items[ring->top++] = block;
} else {
block->next = g_tls_bin[class_idx].lo_head;
g_tls_bin[class_idx].lo_head = block;
g_tls_bin[class_idx].lo_count++;
if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) {
size_t spill = g_tls_bin[class_idx].lo_count / 2;
int shard = hak_pool_get_shard_index(site_id);
while (spill-- && g_tls_bin[class_idx].lo_head) {
PoolBlock* b = g_tls_bin[class_idx].lo_head;
g_tls_bin[class_idx].lo_head = b->next;
g_tls_bin[class_idx].lo_count--;
uintptr_t old_head;
do {
old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire);
b->next = (PoolBlock*)old_head;
} while (!atomic_compare_exchange_weak_explicit(
&g_pool.remote_head[class_idx][shard],
&old_head, (uintptr_t)b,
memory_order_release, memory_order_relaxed));
atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed);
}
set_nonempty_bit(class_idx, shard);
}
}
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_tls_hit, 1, memory_order_relaxed);
}
return;
}
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_not_mine, 1, memory_order_relaxed);
}
hak_pool_free_v1_impl(ptr, size, site_id);
}
#endif // POOL_ALLOC_V1_FLAT_BOX_H

View File

@ -12,6 +12,7 @@
#include "box/pool_free_v1_box.h" // Pool v1 free implementation (L0-SplitBox + L1-FastBox/SlowBox)
#include "box/pool_block_to_user_box.h" // Pool block to user pointer helpers
#include "box/pool_free_v2_box.h" // Pool v2 free implementation (with hotbox v2)
#include "box/pool_alloc_v1_flat_box.h" // Pool v1 flatten (TLS-only fast path)
#include <stdint.h>
static inline void* hak_pool_try_alloc_v2_impl(size_t size, uintptr_t site_id) {
@ -577,111 +578,6 @@ static inline void* hak_pool_try_alloc_v1_impl(size_t size, uintptr_t site_id) {
return user4;
}
// --- v1 flatten (opt-in) ----------------------------------------------------
static inline void* hak_pool_try_alloc_v1_flat(size_t size, uintptr_t site_id) {
if (!hak_pool_is_poolable(size)) return NULL;
int class_idx = hak_pool_get_class_index(size);
if (class_idx < 0) return NULL;
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
if (g_tls_ring_enabled && ring->top > 0) {
PoolBlock* tlsb = ring->items[--ring->top];
// Adopt shared pages to this thread so free can stay on the fast path.
mid_desc_adopt(tlsb, class_idx, (uint64_t)(uintptr_t)pthread_self());
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
}
return hak_pool_block_to_user(tlsb, class_idx, site_id);
}
if (g_tls_bin[class_idx].lo_head) {
PoolBlock* b = g_tls_bin[class_idx].lo_head;
g_tls_bin[class_idx].lo_head = b->next;
if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--;
mid_desc_adopt(b, class_idx, (uint64_t)(uintptr_t)pthread_self());
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
}
return hak_pool_block_to_user(b, class_idx, site_id);
}
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_fallback_v1, 1, memory_order_relaxed);
}
return hak_pool_try_alloc_v1_impl(size, site_id);
}
static inline void hak_pool_free_v1_flat(void* ptr, size_t size, uintptr_t site_id) {
if (!ptr) return;
if (!hak_pool_is_poolable(size)) return;
void* raw = (char*)ptr - HEADER_SIZE;
MidPageDesc* d_desc = mid_desc_lookup_cached(ptr);
if (!d_desc) {
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_page_null, 1, memory_order_relaxed);
}
hak_pool_free_v1_impl(ptr, size, site_id);
return;
}
int class_idx = (int)d_desc->class_idx;
if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) {
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_other, 1, memory_order_relaxed);
}
hak_pool_free_v1_impl(ptr, size, site_id);
return;
}
const uint64_t owner_tid = d_desc->owner_tid;
const uint64_t self_tid = (uint64_t)(uintptr_t)pthread_self();
if (g_pool.tls_free_enabled && owner_tid != 0 && owner_tid == self_tid) {
PoolBlock* block = (PoolBlock*)raw;
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
ring->items[ring->top++] = block;
} else {
block->next = g_tls_bin[class_idx].lo_head;
g_tls_bin[class_idx].lo_head = block;
g_tls_bin[class_idx].lo_count++;
if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) {
size_t spill = g_tls_bin[class_idx].lo_count / 2;
int shard = hak_pool_get_shard_index(site_id);
while (spill-- && g_tls_bin[class_idx].lo_head) {
PoolBlock* b = g_tls_bin[class_idx].lo_head;
g_tls_bin[class_idx].lo_head = b->next;
g_tls_bin[class_idx].lo_count--;
uintptr_t old_head;
do {
old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire);
b->next = (PoolBlock*)old_head;
} while (!atomic_compare_exchange_weak_explicit(
&g_pool.remote_head[class_idx][shard],
&old_head, (uintptr_t)b,
memory_order_release, memory_order_relaxed));
atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed);
}
set_nonempty_bit(class_idx, shard);
}
}
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_tls_hit, 1, memory_order_relaxed);
}
return;
}
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_not_mine, 1, memory_order_relaxed);
}
hak_pool_free_v1_impl(ptr, size, site_id);
}
static inline int hak_pool_mid_lookup_v1_impl(void* ptr, size_t* out_size) {
if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { int c = (int)page->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } }
MidPageDesc* d = mid_desc_lookup_cached(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1;