Files
hakmem/core/box/pool_alloc_v1_box.h
Moe Charm (CI) 506d8f2e5e Phase: Pool API Modularization - Step 8 (FINAL): Extract pool_alloc_v1_box.h
Extract 288 lines: hak_pool_try_alloc_v1_impl() - LARGEST SIZE
- New box: core/box/pool_alloc_v1_box.h (v1 alloc baseline, no hotbox_v2)
- Updated: pool_api.inc.h (add include, remove extracted function)
- Build: OK, bench_mid_large_mt_hakmem: 8.01M ops/s (baseline ~8M, within ±2%)
- Risk: MEDIUM (simpler than v2 but large function, validated)
- Result: pool_api.inc.h reduced from 909 lines to ~40 lines (95% reduction)

ALL 5 STEPS COMPLETE (Steps 4-8):
- Step 4: pool_block_to_user_box.h (30 lines) - helpers
- Step 5: pool_free_v2_box.h (121 lines) - v2 free with hotbox
- Step 6: pool_alloc_v1_flat_box.h (103 lines) - v1 flatten TLS
- Step 7: pool_alloc_v2_box.h (277 lines) - v2 alloc with hotbox
- Step 8: pool_alloc_v1_box.h (288 lines) - v1 alloc baseline

Total extracted: 819 lines
Final pool_api.inc.h size: ~40 lines (public wrappers only)
Performance: MAINTAINED (8M ops/s baseline)

Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-12 22:28:13 +09:00

343 lines
18 KiB
C

// pool_alloc_v1_box.h — Box: Pool V1 Alloc Implementation
//
// Purpose: Pool v1 alloc path (baseline without hotbox_v2)
// Pattern: Standard alloc path with MF2, TC drain, and TLS support
// Phase: Pool API Modularization - Step 8 (LARGEST SIZE - 288 lines, FINAL STEP)
// Dependencies: Assumes pool_api.inc.h includes this after pool_alloc_v2_box.h
// (provides AllocHeader, PoolBlock, PoolTLSRing, g_pool, etc.)
#ifndef POOL_ALLOC_V1_BOX_H
#define POOL_ALLOC_V1_BOX_H
#include "pool_block_to_user_box.h" // Pool block to user helpers (no longer used inline but provides mid_set_header)
#include "pool_config_box.h" // For configuration gates
#include "pool_stats_box.h" // For statistics
#include "pool_mid_desc_cache_box.h" // For mid_desc_lookup_cached
#include "pagefault_telemetry_box.h" // For pagefault_telemetry_touch
#include <stdint.h>
#include <stdatomic.h>
// External functions (same as v2 but without hotbox_v2)
extern void hak_pool_init(void);
extern int hak_pool_is_poolable(size_t size);
extern int hak_pool_get_class_index(size_t size);
extern int hak_pool_get_shard_index(uintptr_t site_id);
extern void set_nonempty_bit(int class_idx, int shard);
extern void clear_nonempty_bit(int class_idx, int shard);
extern void mid_desc_adopt(void* block, int class_idx, uint64_t owner_tid);
extern void* mf2_alloc_fast(int class_idx, size_t size, uintptr_t site_id);
extern int choose_nonempty_shard(int class_idx, int shard_idx);
extern void drain_remote_locked(int class_idx, int shard_idx);
extern int is_shard_nonempty(int class_idx, int shard_idx);
extern int refill_freelist(int class_idx, int shard_idx);
extern void mid_set_header(AllocHeader* hdr, size_t size, uintptr_t site_id);
extern void mid_page_inuse_inc(void* raw);
// Note: The following functions/macros/types are assumed to be available from the
// caller's compilation unit (hakmem_pool.c):
// - PoolTLSPage, PoolTLSBin, FrozenPolicy (types from pool_tls_types.inc.h)
// - mid_tc_has_items, mid_tc_drain_into_tls (from pool_mid_tc.inc.h)
// - refill_tls_from_active_page, alloc_tls_page (from pool_tls_core.inc.h)
// - hkm_policy_get (from hakmem_policy.h)
// - hkm_prof_begin, hkm_prof_end (macros from hakmem_prof.h)
// Assumed available from caller includes:
// - AllocHeader, PoolBlock, PoolTLSRing, PoolTLSPage (from hakmem_internal.h / pool_tls_types.inc.h)
// - g_pool, g_tls_bin, g_class_sizes, t_pool_rng, g_count_sample_exp (from hakmem_pool.c)
// - g_tls_ring_enabled, g_tls_active_page_a/b/c, g_tc_enabled, g_tc_drain_trigger
// - g_mf2_enabled, g_wrap_l2_enabled, g_trylock_probes
// - HEADER_SIZE, POOL_L2_RING_CAP, POOL_NUM_SHARDS, POOL_MIN_SIZE, POOL_MAX_SIZE
// - HKM_TIME_START, HKM_TIME_END, HKM_CAT_*, HKP_* macros
// ============================================================================
// Pool V1 Alloc Implementation (baseline with MF2, TC drain, TLS support)
// ============================================================================
static inline void* hak_pool_try_alloc_v1_impl(size_t size, uintptr_t site_id) {
// Debug: IMMEDIATE output to verify function is called
static int first_call = 1;
if (first_call) { HAKMEM_LOG("[Pool] hak_pool_try_alloc FIRST CALL EVER!\n"); first_call = 0; }
if (size == 40960) { HAKMEM_LOG("[Pool] hak_pool_try_alloc called with 40KB (Bridge class 5)\n"); }
hak_pool_init(); // pthread_once() ensures thread-safe init (no data race!)
// Debug for 33-41KB allocations
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] hak_pool_try_alloc: size=%zu (after init)\n", size); }
// P1.7 guard: allow pool by default even when called from wrappers.
// Only block if explicitly disabled via env or during nested recursion.
extern int hak_in_wrapper(void);
extern __thread int g_hakmem_lock_depth;
int in_wrapper = hak_in_wrapper();
if (in_wrapper && g_hakmem_lock_depth > 1) {
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: nested wrapper depth=%d\n", g_hakmem_lock_depth); }
return NULL;
}
if (in_wrapper && !g_wrap_l2_enabled) {
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: in_wrapper=%d, wrap_l2=%d\n", in_wrapper, g_wrap_l2_enabled); }
return NULL;
}
if (!hak_pool_is_poolable(size)) {
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: not poolable (min=%d, max=%d)\n", POOL_MIN_SIZE, POOL_MAX_SIZE); }
return NULL;
}
// Get class and shard indices
int class_idx = hak_pool_get_class_index(size);
if (class_idx < 0) {
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: class_idx=%d (size=%zu not mapped)\n", class_idx, size); }
return NULL;
}
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] ACCEPTED: class_idx=%d, proceeding with allocation\n", class_idx); }
// MF2: Per-Page Sharding path
if (g_mf2_enabled) {
return mf2_alloc_fast(class_idx, size, site_id);
}
// OLD PATH: TLS fast path (ring then local LIFO); drain TC only when needed
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
if (g_tc_enabled && ring->top < g_tc_drain_trigger && mid_tc_has_items(class_idx)) {
HKM_TIME_START(t_tc_drain);
if (mid_tc_drain_into_tls(class_idx, ring, &g_tls_bin[class_idx])) {
HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc_drain);
if (ring->top > 0) {
HKM_TIME_START(t_ring_pop0);
PoolBlock* tlsb = ring->items[--ring->top];
HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop0);
void* raw = (void*)tlsb;
AllocHeader* hdr = (AllocHeader*)raw;
mid_set_header(hdr, g_class_sizes[class_idx], site_id);
void* user0 = (char*)raw + HEADER_SIZE;
mid_page_inuse_inc(raw);
t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5;
if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.hits[class_idx]++;
pagefault_telemetry_touch(PF_BUCKET_MID, user0);
return user0;
}
} else { HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc_drain); }
}
if (g_tls_ring_enabled) {
if (ring->top == 0) {
atomic_fetch_add_explicit(&g_pool.ring_underflow, 1, memory_order_relaxed);
}
if (ring->top > 0) {
HKM_TIME_START(t_ring_pop1);
PoolBlock* tlsb = ring->items[--ring->top];
HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop1);
void* raw = (void*)tlsb;
AllocHeader* hdr = (AllocHeader*)raw;
mid_set_header(hdr, g_class_sizes[class_idx], site_id);
void* user1 = (char*)raw + HEADER_SIZE;
t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5;
if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.hits[class_idx]++;
pagefault_telemetry_touch(PF_BUCKET_MID, user1);
return user1;
}
}
if (g_tls_bin[class_idx].lo_head) {
HKM_TIME_START(t_lifo_pop0);
PoolBlock* b = g_tls_bin[class_idx].lo_head;
g_tls_bin[class_idx].lo_head = b->next;
if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--;
HKM_TIME_END(HKM_CAT_POOL_TLS_LIFO_POP, t_lifo_pop0);
void* raw = (void*)b; AllocHeader* hdr = (AllocHeader*)raw;
mid_set_header(hdr, g_class_sizes[class_idx], site_id);
void* user2 = (char*)raw + HEADER_SIZE;
mid_page_inuse_inc(raw);
t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5;
if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.hits[class_idx]++;
pagefault_telemetry_touch(PF_BUCKET_MID, user2);
return user2;
}
// Compute shard only when we need to access shared structures
int shard_idx = hak_pool_get_shard_index(site_id);
// Try to batch-pop from a non-empty shard using trylock to fill TLS ring
if (g_tls_ring_enabled) {
int s0 = choose_nonempty_shard(class_idx, shard_idx);
for (int probe = 0; probe < g_trylock_probes; ++probe) {
int s = (s0 + probe) & (POOL_NUM_SHARDS - 1);
pthread_mutex_t* l = &g_pool.freelist_locks[class_idx][s].m;
atomic_fetch_add_explicit(&g_pool.trylock_attempts, 1, memory_order_relaxed);
if (pthread_mutex_trylock(l) == 0) {
atomic_fetch_add_explicit(&g_pool.trylock_success, 1, memory_order_relaxed);
// First, drain any remote frees into freelist
if (atomic_load_explicit(&g_pool.remote_count[class_idx][s], memory_order_relaxed) != 0) {
drain_remote_locked(class_idx, s);
}
PoolBlock* head = g_pool.freelist[class_idx][s];
int to_ring = POOL_L2_RING_CAP - ring->top; if (to_ring < 0) to_ring = 0;
while (head && to_ring-- > 0) { PoolBlock* nxt = head->next; ring->items[ring->top++] = head; head = nxt; }
while (head) { PoolBlock* nxt = head->next; head->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = head; g_tls_bin[class_idx].lo_count++; head = nxt; }
g_pool.freelist[class_idx][s] = head;
if (!head) clear_nonempty_bit(class_idx, s);
pthread_mutex_unlock(l);
if (ring->top > 0) {
PoolBlock* tlsb = ring->items[--ring->top];
void* raw = (void*)tlsb;
AllocHeader* hdr = (AllocHeader*)raw;
mid_set_header(hdr, g_class_sizes[class_idx], site_id);
mid_page_inuse_inc(raw);
t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5;
if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.hits[class_idx]++;
return (char*)raw + HEADER_SIZE;
}
}
}
}
// Try TLS active pages (owner-only local bump-run, up to 3)
PoolTLSPage* ap = NULL;
if (g_tls_active_page_a[class_idx].page && g_tls_active_page_a[class_idx].count > 0 && g_tls_active_page_a[class_idx].bump < g_tls_active_page_a[class_idx].end) ap = &g_tls_active_page_a[class_idx];
else if (g_tls_active_page_b[class_idx].page && g_tls_active_page_b[class_idx].count > 0 && g_tls_active_page_b[class_idx].bump < g_tls_active_page_b[class_idx].end) ap = &g_tls_active_page_b[class_idx];
else if (g_tls_active_page_c[class_idx].page && g_tls_active_page_c[class_idx].count > 0 && g_tls_active_page_c[class_idx].bump < g_tls_active_page_c[class_idx].end) ap = &g_tls_active_page_c[class_idx];
if (ap) {
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
int need = POOL_L2_RING_CAP - ring->top;
(void)refill_tls_from_active_page(class_idx, ring, &g_tls_bin[class_idx], ap, need);
}
PoolBlock* b = NULL;
if (ring->top > 0) { b = ring->items[--ring->top]; }
else if (ap->page && ap->count > 0 && ap->bump < ap->end) {
b = (PoolBlock*)(void*)ap->bump; ap->bump += (HEADER_SIZE + g_class_sizes[class_idx]); ap->count--; if (ap->bump >= ap->end || ap->count<=0){ ap->page=NULL; ap->count=0; }
}
if (b) {
void* raw = (void*)b; AllocHeader* hdr = (AllocHeader*)raw;
mid_set_header(hdr, g_class_sizes[class_idx], site_id);
mid_page_inuse_inc(raw);
g_pool.hits[class_idx]++;
return (char*)raw + HEADER_SIZE;
}
}
// Lock the shard freelist for this (class, shard)
pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx].m;
HKM_TIME_START(t_lock);
struct timespec ts_lk1; int lk1 = hkm_prof_begin(&ts_lk1);
(void)ts_lk1; (void)lk1; // Unused profiling variables
pthread_mutex_lock(lock);
HKM_TIME_END(HKM_CAT_POOL_LOCK, t_lock);
hkm_prof_end(lk1, HKP_POOL_LOCK, &ts_lk1);
// Try to pop from freelist
PoolBlock* block = g_pool.freelist[class_idx][shard_idx];
if (!block) {
// Before refilling, try draining remote stack and simple shard steal
int stole = 0;
const FrozenPolicy* pol = hkm_policy_get();
if (pol) {
uint16_t cap = 0;
if (class_idx < 5) cap = pol->mid_cap[class_idx];
else if (class_idx == 5 && pol->mid_dyn1_bytes != 0) cap = pol->mid_cap_dyn1;
else if (class_idx == 6 && pol->mid_dyn2_bytes != 0) cap = pol->mid_cap_dyn2;
// Drain remotes
if (atomic_load_explicit(&g_pool.remote_count[class_idx][shard_idx], memory_order_relaxed) != 0) {
drain_remote_locked(class_idx, shard_idx);
block = g_pool.freelist[class_idx][shard_idx];
}
// Light shard steal when over cap
if (!block && cap > 0 && g_pool.pages_by_class[class_idx] >= cap) {
HKM_TIME_START(t_steal);
for (int d = 1; d <= 4 && !stole; d++) {
int s1 = (shard_idx + d) & (POOL_NUM_SHARDS - 1);
int s2 = (shard_idx - d) & (POOL_NUM_SHARDS - 1);
if (is_shard_nonempty(class_idx, s1)) {
pthread_mutex_t* l2 = &g_pool.freelist_locks[class_idx][s1].m;
pthread_mutex_lock(l2);
PoolBlock* b2 = g_pool.freelist[class_idx][s1];
if (b2) {
g_pool.freelist[class_idx][s1] = b2->next;
if (!g_pool.freelist[class_idx][s1]) clear_nonempty_bit(class_idx, s1);
block = b2; stole = 1;
}
pthread_mutex_unlock(l2);
}
if (!stole && is_shard_nonempty(class_idx, s2)) {
pthread_mutex_t* l3 = &g_pool.freelist_locks[class_idx][s2].m;
pthread_mutex_lock(l3);
PoolBlock* b3 = g_pool.freelist[class_idx][s2];
if (b3) {
g_pool.freelist[class_idx][s2] = b3->next;
if (!g_pool.freelist[class_idx][s2]) clear_nonempty_bit(class_idx, s2);
block = b3; stole = 1;
}
pthread_mutex_unlock(l3);
}
}
HKM_TIME_END(HKM_CAT_SHARD_STEAL, t_steal);
}
}
if (!stole && !block) {
// Freelist empty, refill page
PoolTLSPage* tap = NULL;
if (g_tls_active_page_a[class_idx].page == NULL || g_tls_active_page_a[class_idx].count == 0) tap = &g_tls_active_page_a[class_idx];
else if (g_tls_active_page_b[class_idx].page == NULL || g_tls_active_page_b[class_idx].count == 0) tap = &g_tls_active_page_b[class_idx];
else if (g_tls_active_page_c[class_idx].page == NULL || g_tls_active_page_c[class_idx].count == 0) tap = &g_tls_active_page_c[class_idx];
else tap = &g_tls_active_page_a[class_idx];
HKM_TIME_START(t_alloc_page);
if (alloc_tls_page(class_idx, tap)) {
HKM_TIME_END(HKM_CAT_POOL_ALLOC_TLS_PAGE, t_alloc_page);
pthread_mutex_unlock(lock);
// Top-up ring and return
ap = tap;
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
int need = POOL_L2_RING_CAP - ring->top;
(void)refill_tls_from_active_page(class_idx, ring, &g_tls_bin[class_idx], ap, need);
}
PoolBlock* takeb = NULL;
if (ring->top > 0) { HKM_TIME_START(t_ring_pop2); takeb = ring->items[--ring->top]; HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop2);}
else if (ap->page && ap->count > 0 && ap->bump < ap->end) { takeb = (PoolBlock*)(void*)ap->bump; ap->bump += (HEADER_SIZE + g_class_sizes[class_idx]); ap->count--; if (ap->bump >= ap->end || ap->count==0){ ap->page=NULL; ap->count=0; } }
void* raw2 = (void*)takeb; AllocHeader* hdr2 = (AllocHeader*)raw2;
mid_set_header(hdr2, g_class_sizes[class_idx], site_id);
void* user3 = (char*)raw2 + HEADER_SIZE;
mid_page_inuse_inc(raw2);
g_pool.hits[class_idx]++;
pagefault_telemetry_touch(PF_BUCKET_MID, user3);
return user3;
}
HKM_TIME_START(t_refill);
struct timespec ts_rf; int rf = hkm_prof_begin(&ts_rf);
(void)ts_rf; (void)rf;
int ok = refill_freelist(class_idx, shard_idx);
HKM_TIME_END(HKM_CAT_POOL_REFILL, t_refill);
hkm_prof_end(rf, HKP_POOL_REFILL, &ts_rf);
if (!ok) {
t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5;
if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.misses[class_idx]++;
pthread_mutex_unlock(lock);
return NULL;
}
}
}
// Pop block and adopt page
g_pool.freelist[class_idx][shard_idx] = block->next;
mid_desc_adopt(block, class_idx, (uint64_t)(uintptr_t)pthread_self());
t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5;
if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.hits[class_idx]++;
if (g_pool.freelist[class_idx][shard_idx] == NULL) clear_nonempty_bit(class_idx, shard_idx);
pthread_mutex_unlock(lock);
// Store to TLS then pop
PoolBlock* take;
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; take = ring->items[--ring->top]; }
else { block->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = block; g_tls_bin[class_idx].lo_count++;
if (g_tls_ring_enabled && ring->top > 0) { take = ring->items[--ring->top]; }
else { take = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = take->next; if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--; } }
void* raw = (void*)take; AllocHeader* hdr = (AllocHeader*)raw;
mid_set_header(hdr, g_class_sizes[class_idx], site_id);
void* user4 = (char*)raw + HEADER_SIZE;
mid_page_inuse_inc(raw);
pagefault_telemetry_touch(PF_BUCKET_MID, user4);
return user4;
}
#endif // POOL_ALLOC_V1_BOX_H