Files
hakmem/core/box/pool_core_api.inc.h
Moe Charm (CI) 1da8754d45 CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消
**問題:**
- Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走)
- System/mimalloc は 4T で 33.52M ops/s 正常動作
- SS OFF + Remote OFF でも 4T で SEGV

**根本原因: (Task agent ultrathink 調査結果)**
```
CRASH: mov (%r15),%r13
R15 = 0x6261  ← ASCII "ba" (ゴミ値、未初期化TLS)
```

Worker スレッドの TLS 変数が未初期化:
- `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];`  ← 初期化なし
- pthread_create() で生成されたスレッドでゼロ初期化されない
- NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV

**修正内容:**
全 TLS 配列に明示的初期化子 `= {0}` を追加:

1. **core/hakmem_tiny.c:**
   - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}`
   - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}`
   - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bcur[TINY_NUM_CLASSES] = {0}`
   - `g_tls_bend[TINY_NUM_CLASSES] = {0}`

2. **core/tiny_fastcache.c:**
   - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}`
   - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}`

3. **core/hakmem_tiny_magazine.c:**
   - `g_tls_mags[TINY_NUM_CLASSES] = {0}`

4. **core/tiny_sticky.c:**
   - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}`
   - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}`

**効果:**
```
Before: 1T: 2.09M   |  4T: SEGV 💀
After:  1T: 2.41M   |  4T: 4.19M   (+15% 1T, SEGV解消)
```

**テスト:**
```bash
# 1 thread: 完走
./larson_hakmem 2 8 128 1024 1 12345 1
→ Throughput = 2,407,597 ops/s 

# 4 threads: 完走(以前は SEGV)
./larson_hakmem 2 8 128 1024 1 12345 4
→ Throughput = 4,192,155 ops/s 
```

**調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00

328 lines
26 KiB
C

// pool_core_api.inc.h — Box: L2 Pool core state and basic config
#ifndef POOL_CORE_API_INC_H
#define POOL_CORE_API_INC_H
// Global knobs (env-configurable)
static int g_wrap_l2_enabled = 0; // env: HAKMEM_WRAP_L2=1 to allow in wrappers
static int g_shard_mix_enabled = 0; // env: HAKMEM_SHARD_MIX=1 to enable stronger hashing
static int g_tls_ring_enabled = 1; // env: HAKMEM_POOL_TLS_RING=1 to enable TLS ring
static int g_trylock_probes = 3; // env: HAKMEM_TRYLOCK_PROBES (1..8)
static int g_ring_return_div = 2; // env: HAKMEM_RING_RETURN_DIV (2=half, 3=third)
static int g_tls_lo_max = 256; // env: HAKMEM_TLS_LO_MAX (LIFO size cap)
int g_hdr_light_enabled = 0; // env: HAKMEM_HDR_LIGHT=1/2
static int g_pool_min_bundle = 2; // env: HAKMEM_POOL_MIN_BUNDLE
static int g_count_sample_exp = 10; // env: HAKMEM_POOL_COUNT_SAMPLE (0..16)
static __thread uint32_t t_pool_rng = 0x243f6a88u; // per-thread RNG for sampling
// Size class table (for O(1) lookup). Index 5/6 are Bridge classes for 32-64KB gap.
static size_t g_class_sizes[POOL_NUM_CLASSES] = {
POOL_CLASS_2KB, POOL_CLASS_4KB, POOL_CLASS_8KB, POOL_CLASS_16KB,
POOL_CLASS_32KB, POOL_CLASS_40KB, POOL_CLASS_52KB
};
__attribute__((unused)) static const int g_blocks_per_page[POOL_NUM_CLASSES] = {
POOL_PAGE_SIZE / POOL_CLASS_2KB,
POOL_PAGE_SIZE / POOL_CLASS_4KB,
POOL_PAGE_SIZE / POOL_CLASS_8KB,
POOL_PAGE_SIZE / POOL_CLASS_16KB,
POOL_PAGE_SIZE / POOL_CLASS_32KB,
POOL_PAGE_SIZE / POOL_CLASS_40KB,
POOL_PAGE_SIZE / POOL_CLASS_52KB
};
// Global pool state
typedef struct {
PoolBlock* freelist[POOL_NUM_CLASSES][POOL_NUM_SHARDS];
PaddedMutex freelist_locks[POOL_NUM_CLASSES][POOL_NUM_SHARDS];
atomic_uint_fast64_t nonempty_mask[POOL_NUM_CLASSES];
atomic_uintptr_t remote_head[POOL_NUM_CLASSES][POOL_NUM_SHARDS];
atomic_uint remote_count[POOL_NUM_CLASSES][POOL_NUM_SHARDS];
uint64_t hits[POOL_NUM_CLASSES] __attribute__((aligned(64)));
uint64_t misses[POOL_NUM_CLASSES] __attribute__((aligned(64)));
uint64_t refills[POOL_NUM_CLASSES] __attribute__((aligned(64)));
uint64_t frees[POOL_NUM_CLASSES] __attribute__((aligned(64)));
uint64_t total_bytes_allocated __attribute__((aligned(64)));
uint64_t total_pages_allocated __attribute__((aligned(64)));
uint64_t pages_by_class[POOL_NUM_CLASSES] __attribute__((aligned(64)));
int bundle_factor[POOL_NUM_CLASSES];
uint64_t last_hits[POOL_NUM_CLASSES];
uint64_t last_misses[POOL_NUM_CLASSES];
int initialized;
int tls_free_enabled;
atomic_uint_fast64_t trylock_attempts __attribute__((aligned(64)));
atomic_uint_fast64_t trylock_success __attribute__((aligned(64)));
atomic_uint_fast64_t ring_underflow __attribute__((aligned(64)));
} PoolGlobal;
static PoolGlobal g_pool;
// --- Boxed Public/Core API implementations moved from hakmem_pool.c ---
// Adjust bundle factor based on window stats
static inline void pool_update_bundle_factor(int class_idx) {
uint64_t h = g_pool.hits[class_idx];
uint64_t m = g_pool.misses[class_idx];
uint64_t dh = h - g_pool.last_hits[class_idx];
uint64_t dm = m - g_pool.last_misses[class_idx];
uint64_t dt = dh + dm;
if (dt < 256) return;
int bf = g_pool.bundle_factor[class_idx];
if (bf <= 0) bf = 1;
if (dt > 0) {
double hit_rate = (double)dh / (double)dt;
if (hit_rate < 0.60 && dm > (dh + 16)) { if (bf < 4) bf++; }
else if (hit_rate > 0.90 && dh > (dm + 32)) { if (bf > 1) bf--; }
}
g_pool.bundle_factor[class_idx] = bf;
g_pool.last_hits[class_idx] = h;
g_pool.last_misses[class_idx] = m;
}
// Refill freelist by allocating a new 64KiB page and splitting to blocks
static int refill_freelist(int class_idx, int shard_idx) {
if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) return 0;
if (shard_idx < 0 || shard_idx >= POOL_NUM_SHARDS) return 0;
size_t user_size = g_class_sizes[class_idx];
size_t block_size = HEADER_SIZE + user_size;
int blocks_per_page = POOL_PAGE_SIZE / block_size;
if (blocks_per_page == 0) return 0;
void* page = mmap(NULL, POOL_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (!page) return 0;
pool_update_bundle_factor(class_idx);
int bundles = g_pool.bundle_factor[class_idx];
if (bundles < 1) bundles = 1; if (bundles > 4) bundles = 4;
const FrozenPolicy* pol = hkm_policy_get();
if (pol) {
uint16_t cap = 0;
if (class_idx < 5) cap = pol->mid_cap[class_idx];
else if (class_idx == 5 && pol->mid_dyn1_bytes != 0) cap = pol->mid_cap_dyn1;
else if (class_idx == 6 && pol->mid_dyn2_bytes != 0) cap = pol->mid_cap_dyn2;
if (cap > 0) {
uint64_t have = g_pool.pages_by_class[class_idx];
if (have >= cap) bundles = 1; else {
uint64_t deficit = (cap - have);
if (deficit < (uint64_t)bundles) bundles = (int)deficit;
if (bundles < 1) bundles = 1; if (bundles > 4) bundles = 4;
if (deficit >= (uint64_t)g_pool_min_bundle && bundles < g_pool_min_bundle) bundles = g_pool_min_bundle;
}
}
}
int pages_allocated_this_call = 0;
for (int b = 0; b < bundles; b++) {
PoolBlock* freelist_head = NULL;
for (int i = 0; i < blocks_per_page; i++) {
void* raw_block = (char*)page + (i * block_size);
__builtin_prefetch((char*)raw_block + block_size, 1, 1);
PoolBlock* block = (PoolBlock*)raw_block;
block->next = freelist_head; freelist_head = block;
}
if (g_pool.freelist[class_idx][shard_idx]) {
PoolBlock* tail = freelist_head; while (tail->next) tail = tail->next;
tail->next = g_pool.freelist[class_idx][shard_idx];
}
g_pool.freelist[class_idx][shard_idx] = freelist_head;
mid_desc_register(page, class_idx, 0);
if (b + 1 < bundles) {
page = mmap(NULL, POOL_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (!page) break;
}
pages_allocated_this_call++;
}
set_nonempty_bit(class_idx, shard_idx);
g_pool.refills[class_idx]++;
g_pool.total_pages_allocated += pages_allocated_this_call;
g_pool.pages_by_class[class_idx] += pages_allocated_this_call;
g_pool.total_bytes_allocated += (uint64_t)pages_allocated_this_call * (uint64_t)POOL_PAGE_SIZE;
return 1;
}
// Initialization and teardown
#ifndef HAKMEM_POOL_API_NO_PUBLIC
static pthread_once_t hak_pool_init_once_control = PTHREAD_ONCE_INIT;
static void hak_pool_init_impl(void) {
const FrozenPolicy* pol = hkm_policy_get();
if (pol && pol->mid_dyn1_bytes >= POOL_MIN_SIZE && pol->mid_dyn1_bytes <= POOL_MAX_SIZE) g_class_sizes[5] = pol->mid_dyn1_bytes; else g_class_sizes[5] = 0;
if (pol && pol->mid_dyn2_bytes >= POOL_MIN_SIZE && pol->mid_dyn2_bytes <= POOL_MAX_SIZE) g_class_sizes[6] = pol->mid_dyn2_bytes; else g_class_sizes[6] = 0;
for (int c = 0; c < POOL_NUM_CLASSES; c++) {
for (int s = 0; s < POOL_NUM_SHARDS; s++) { g_pool.freelist[c][s] = NULL; }
atomic_store(&g_pool.nonempty_mask[c], 0);
for (int s = 0; s < POOL_NUM_SHARDS; s++) {
pthread_mutex_init(&g_pool.freelist_locks[c][s].m, NULL);
atomic_store(&g_pool.remote_head[c][s], (uintptr_t)0);
atomic_store(&g_pool.remote_count[c][s], 0);
}
g_pool.hits[c] = 0; g_pool.misses[c] = 0; g_pool.refills[c] = 0; g_pool.frees[c] = 0; g_pool.pages_by_class[c] = 0;
g_pool.bundle_factor[c] = 1; g_pool.last_hits[c] = 0; g_pool.last_misses[c] = 0;
}
g_pool.total_bytes_allocated = 0; g_pool.total_pages_allocated = 0;
atomic_store(&g_pool.trylock_attempts, 0); atomic_store(&g_pool.trylock_success, 0); atomic_store(&g_pool.ring_underflow, 0);
const char* e_tls = getenv("HAKMEM_POOL_TLS_FREE"); g_pool.tls_free_enabled = (e_tls == NULL) ? 1 : (atoi(e_tls) != 0);
const char* e_wrap = getenv("HAKMEM_WRAP_L2"); g_wrap_l2_enabled = (e_wrap && atoi(e_wrap) != 0) ? 1 : 0;
const char* e_minb = getenv("HAKMEM_POOL_MIN_BUNDLE"); if (e_minb) { int v = atoi(e_minb); if (v>=1 && v<=8) g_pool_min_bundle = v; }
const char* e_mix = getenv("HAKMEM_SHARD_MIX"); g_shard_mix_enabled = (e_mix && atoi(e_mix) != 0) ? 1 : 0;
const char* e_ring = getenv("HAKMEM_POOL_TLS_RING"); if (e_ring) g_tls_ring_enabled = (atoi(e_ring) != 0);
const char* e_hdr = getenv("HAKMEM_HDR_LIGHT"); if (e_hdr) g_hdr_light_enabled = atoi(e_hdr);
const char* e_probe = getenv("HAKMEM_TRYLOCK_PROBES"); if (e_probe) { int v = atoi(e_probe); if (v>=1 && v<=8) g_trylock_probes = v; }
const char* e_div = getenv("HAKMEM_RING_RETURN_DIV"); if (e_div) { int v = atoi(e_div); if (v>=2 && v<=4) g_ring_return_div = v; }
const char* e_lo = getenv("HAKMEM_TLS_LO_MAX"); if (e_lo) { int v = atoi(e_lo); if (v>=32 && v<=16384) g_tls_lo_max = v; }
const char* e_cs = getenv("HAKMEM_POOL_COUNT_SAMPLE"); if (e_cs) { int v = atoi(e_cs); if (v>=0 && v<=16) g_count_sample_exp = v; }
const char* e_tc = getenv("HAKMEM_TC_ENABLE"); if (e_tc) g_tc_enabled = (atoi(e_tc) != 0);
const char* e_tcu = getenv("HAKMEM_TC_UNBOUNDED"); if (e_tcu) g_tc_drain_unbounded = (atoi(e_tcu) != 0);
const char* e_tcm = getenv("HAKMEM_TC_DRAIN_MAX"); if (e_tcm) { int v = atoi(e_tcm); if (v>=0 && v<=65536) g_tc_drain_max = v; }
const char* e_tct = getenv("HAKMEM_TC_DRAIN_TRIGGER"); if (e_tct) { int v = atoi(e_tct); if (v>=0 && v<=POOL_L2_RING_CAP) g_tc_drain_trigger = v; }
const char* e_mf2 = getenv("HAKMEM_MF2_ENABLE");
if (e_mf2 && atoi(e_mf2) != 0) {
g_mf2_enabled = 1; mf2_page_registry_init();
const char* e_maxq = getenv("HAKMEM_MF2_MAX_QUEUES"); if (e_maxq) { int v = atoi(e_maxq); if (v>=1 && v<=256) g_mf2_max_queues = v; }
const char* e_lease = getenv("HAKMEM_MF2_LEASE_MS"); if (e_lease) { int v = atoi(e_lease); if (v>=0 && v<=1000) g_mf2_lease_ms = v; }
const char* e_idle = getenv("HAKMEM_MF2_IDLE_THRESHOLD_US"); if (e_idle) { int v = atoi(e_idle); if (v>=0 && v<=10000) g_mf2_idle_threshold_us = v; }
HAKMEM_LOG("[Pool] MF2 Per-Page Sharding enabled\n");
HAKMEM_LOG("[MF2] max_queues=%d, lease_ms=%d, idle_threshold_us=%d\n", g_mf2_max_queues, g_mf2_lease_ms, g_mf2_idle_threshold_us);
}
g_pool.initialized = 1;
HAKMEM_LOG("[Pool] Initialized (L2 Hybrid Pool)\n");
if (g_class_sizes[5] != 0 || g_class_sizes[6] != 0) {
HAKMEM_LOG("[Pool] Classes: 2KB, 4KB, 8KB, 16KB, 32KB%s%s%s\n", g_class_sizes[5]?", dyn1=" : "", g_class_sizes[5]?"":(g_class_sizes[6]?",":""), (g_class_sizes[5]||g_class_sizes[6])?"":"");
} else {
HAKMEM_LOG("[Pool] Classes: 2KB, 4KB, 8KB, 16KB, 32KB\n");
}
HAKMEM_LOG("[Pool] Page size: %d KB\n", POOL_PAGE_SIZE/1024);
HAKMEM_LOG("[Pool] Shards: %d (site-based)\n", POOL_NUM_SHARDS);
}
static void mf2_print_debug_stats(void) {
if (!g_mf2_enabled) return;
fprintf(stderr, "\n[MF2 DEBUG STATS]\n");
fprintf(stderr, "Alloc fast hits: %12lu\n", (unsigned long)atomic_load(&g_mf2_alloc_fast_hit));
fprintf(stderr, "Alloc slow hits: %12lu\n", (unsigned long)atomic_load(&g_mf2_alloc_slow_hit));
fprintf(stderr, "Page reuses: %12lu\n", (unsigned long)atomic_load(&g_mf2_page_reuse_count));
fprintf(stderr, "New pages: %12lu\n", (unsigned long)atomic_load(&g_mf2_new_page_count));
fprintf(stderr, "Owner frees: %12lu\n", (unsigned long)atomic_load(&g_mf2_free_owner_count));
fprintf(stderr, "Remote frees: %12lu\n", (unsigned long)atomic_load(&g_mf2_free_remote_count));
fprintf(stderr, "Slow checked: %12lu\n", (unsigned long)atomic_load(&g_mf2_slow_checked_drain));
fprintf(stderr, "Slow found rem: %12lu\n", (unsigned long)atomic_load(&g_mf2_slow_found_remote));
fprintf(stderr, "Full scan chk: %12lu\n", (unsigned long)atomic_load(&g_mf2_full_scan_checked));
fprintf(stderr, "Full scan rem: %12lu\n", (unsigned long)atomic_load(&g_mf2_full_scan_found_remote));
fprintf(stderr, "Eager scan: %12lu\n", (unsigned long)atomic_load(&g_mf2_eager_drain_scanned));
fprintf(stderr, "Eager found: %12lu\n", (unsigned long)atomic_load(&g_mf2_eager_drain_found));
fprintf(stderr, "Drain attempts: %12lu\n", (unsigned long)atomic_load(&g_mf2_drain_attempts));
fprintf(stderr, "Drain successes: %12lu\n", (unsigned long)atomic_load(&g_mf2_drain_success));
fprintf(stderr, "Remote drains: %12lu (blocks: %lu)\n",
(unsigned long)atomic_load(&g_mf2_drain_count), (unsigned long)atomic_load(&g_mf2_drain_blocks));
fprintf(stderr, "\n[PENDING QUEUE]\n");
fprintf(stderr, "Pending enqueued: %12lu\n", (unsigned long)atomic_load(&g_mf2_pending_enqueued));
fprintf(stderr, "Pending drained: %12lu\n", (unsigned long)atomic_load(&g_mf2_pending_drained));
fprintf(stderr, "Pending requeued: %12lu\n", (unsigned long)atomic_load(&g_mf2_pending_requeued));
uint64_t total_allocs = atomic_load(&g_mf2_alloc_fast_hit) + atomic_load(&g_mf2_alloc_slow_hit);
uint64_t total_frees = atomic_load(&g_mf2_free_owner_count) + atomic_load(&g_mf2_free_remote_count);
if (total_allocs > 0) fprintf(stderr, "\nFast path hit rate: %.2f%%\n", 100.0 * atomic_load(&g_mf2_alloc_fast_hit) / total_allocs);
if (total_frees > 0) fprintf(stderr, "Owner free rate: %.2f%%\n", 100.0 * atomic_load(&g_mf2_free_owner_count) / total_frees);
fflush(stderr);
}
__attribute__((destructor)) static void mf2_destructor(void) { mf2_print_debug_stats(); }
void hak_pool_init(void) { pthread_once(&hak_pool_init_once_control, hak_pool_init_impl); }
void hak_pool_shutdown(void) {
if (!g_pool.initialized) return; extern void hak_pool_print_stats(void); hak_pool_print_stats(); mf2_print_debug_stats(); g_pool.initialized = 0;
}
// Try-alloc: legacy TLS path or MF2
void* hak_pool_try_alloc(size_t size, uintptr_t site_id) {
hak_pool_init(); extern int hak_in_wrapper(void); if (hak_in_wrapper() && !g_wrap_l2_enabled) return NULL; if (!hak_pool_is_poolable(size)) return NULL;
int class_idx = hak_pool_get_class_index(size); if (class_idx < 0) return NULL;
if (g_mf2_enabled) { return mf2_alloc_fast(class_idx, size, site_id); }
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
if (g_tc_enabled && ring->top < g_tc_drain_trigger && mid_tc_has_items(class_idx)) {
HKM_TIME_START(t_tc_drain); if (mid_tc_drain_into_tls(class_idx, ring, &g_tls_bin[class_idx])) { HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc_drain); if (ring->top > 0) { HKM_TIME_START(t_ring_pop0); PoolBlock* tlsb = ring->items[--ring->top]; HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop0); void* raw = (void*)tlsb; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.hits[class_idx]++; return (char*)raw + HEADER_SIZE; } } else { HKM_TIME_END(HKM_CAT_TC_DRAIN, t_tc_drain); } }
if (g_tls_ring_enabled) { if (ring->top == 0) { atomic_fetch_add_explicit(&g_pool.ring_underflow, 1, memory_order_relaxed); } if (ring->top > 0) { HKM_TIME_START(t_ring_pop1); PoolBlock* tlsb = ring->items[--ring->top]; HKM_TIME_END(HKM_CAT_POOL_TLS_RING_POP, t_ring_pop1); void* raw = (void*)tlsb; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.hits[class_idx]++; return (char*)raw + HEADER_SIZE; } }
if (g_tls_bin[class_idx].lo_head) { HKM_TIME_START(t_lifo_pop0); PoolBlock* b = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = b->next; if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--; HKM_TIME_END(HKM_CAT_POOL_TLS_LIFO_POP, t_lifo_pop0); void* raw = (void*)b; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.hits[class_idx]++; return (char*)raw + HEADER_SIZE; }
int shard_idx = hak_pool_get_shard_index(site_id);
if (g_tls_ring_enabled) {
int s0 = choose_nonempty_shard(class_idx, shard_idx);
for (int probe = 0; probe < g_trylock_probes; ++probe) {
int s = (s0 + probe) & (POOL_NUM_SHARDS - 1);
pthread_mutex_t* l = &g_pool.freelist_locks[class_idx][s].m;
atomic_fetch_add_explicit(&g_pool.trylock_attempts, 1, memory_order_relaxed);
if (pthread_mutex_trylock(l) == 0) {
atomic_fetch_add_explicit(&g_pool.trylock_success, 1, memory_order_relaxed);
if (atomic_load_explicit(&g_pool.remote_count[class_idx][s], memory_order_relaxed) != 0) drain_remote_locked(class_idx, s);
PoolBlock* head = g_pool.freelist[class_idx][s];
int to_ring = POOL_L2_RING_CAP - ring->top; if (to_ring < 0) to_ring = 0;
while (head && to_ring-- > 0) { PoolBlock* nxt = head->next; ring->items[ring->top++] = head; head = nxt; }
while (head) { PoolBlock* nxt = head->next; head->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = head; g_tls_bin[class_idx].lo_count++; head = nxt; }
g_pool.freelist[class_idx][s] = head; if (!head) clear_nonempty_bit(class_idx, s);
pthread_mutex_unlock(l);
if (ring->top > 0) { PoolBlock* tlsb = ring->items[--ring->top]; void* raw = (void*)tlsb; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.hits[class_idx]++; return (char*)raw + HEADER_SIZE; }
}
}
}
PoolTLSPage* ap = NULL;
if (g_tls_active_page_a[class_idx].page && g_tls_active_page_a[class_idx].count > 0 && g_tls_active_page_a[class_idx].bump < g_tls_active_page_a[class_idx].end) ap = &g_tls_active_page_a[class_idx];
else if (g_tls_active_page_b[class_idx].page && g_tls_active_page_b[class_idx].count > 0 && g_tls_active_page_b[class_idx].bump < g_tls_active_page_b[class_idx].end) ap = &g_tls_active_page_b[class_idx];
else if (g_tls_active_page_c[class_idx].page && g_tls_active_page_c[class_idx].count > 0 && g_tls_active_page_c[class_idx].bump < g_tls_active_page_c[class_idx].end) ap = &g_tls_active_page_c[class_idx];
if (ap) {
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { int need = POOL_L2_RING_CAP - ring->top; (void)refill_tls_from_active_page(class_idx, ring, &g_tls_bin[class_idx], ap, need); }
PoolBlock* b = NULL; if (ring->top > 0) { b = ring->items[--ring->top]; } else if (ap->page && ap->count > 0 && ap->bump < ap->end) { b = (PoolBlock*)(void*)ap->bump; ap->bump += (HEADER_SIZE + g_class_sizes[class_idx]); ap->count--; if (ap->bump >= ap->end || ap->count<=0){ ap->page=NULL; ap->count=0; } }
if (b) { void* raw = (void*)b; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); mid_page_inuse_inc(raw); g_pool.hits[class_idx]++; return (char*)raw + HEADER_SIZE; }
}
pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx].m; HKM_TIME_START(t_lock); struct timespec ts_lk1; int lk1 = hkm_prof_begin(&ts_lk1); (void)ts_lk1; (void)lk1; pthread_mutex_lock(lock); HKM_TIME_END(HKM_CAT_POOL_LOCK, t_lock); hkm_prof_end(lk1, HKP_POOL_LOCK, &ts_lk1);
PoolBlock* block = g_pool.freelist[class_idx][shard_idx];
if (!block) {
int stole = 0; const FrozenPolicy* pol2 = hkm_policy_get();
if (pol2) {
uint16_t cap = 0; if (class_idx < 5) cap = pol2->mid_cap[class_idx]; else if (class_idx == 5 && pol2->mid_dyn1_bytes != 0) cap = pol2->mid_cap_dyn1; else if (class_idx == 6 && pol2->mid_dyn2_bytes != 0) cap = pol2->mid_cap_dyn2;
if (atomic_load_explicit(&g_pool.remote_count[class_idx][shard_idx], memory_order_relaxed) != 0) { drain_remote_locked(class_idx, shard_idx); }
int neighbor = (shard_idx + 1) & (POOL_NUM_SHARDS - 1);
if (is_shard_nonempty(class_idx, neighbor)) {
PoolBlock* nb = g_pool.freelist[class_idx][neighbor]; if (nb) { g_pool.freelist[class_idx][neighbor] = nb->next; nb->next = NULL; block = nb; stole = 1; }
if (!g_pool.freelist[class_idx][neighbor]) clear_nonempty_bit(class_idx, neighbor);
}
}
if (!stole && !block) { (void)refill_freelist(class_idx, shard_idx); block = g_pool.freelist[class_idx][shard_idx]; }
}
if (!block) { pthread_mutex_unlock(lock); g_pool.misses[class_idx]++; return NULL; }
g_pool.freelist[class_idx][shard_idx] = block->next; if (!g_pool.freelist[class_idx][shard_idx]) clear_nonempty_bit(class_idx, shard_idx); pthread_mutex_unlock(lock);
void* raw = (void*)block; AllocHeader* hdr = (AllocHeader*)raw; mid_set_header(hdr, g_class_sizes[class_idx], site_id); mid_page_inuse_inc(raw); t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.hits[class_idx]++;
return (char*)raw + HEADER_SIZE;
}
void hak_pool_free(void* ptr, size_t size, uintptr_t site_id) {
if (!ptr) return; hak_pool_init(); if (!hak_pool_is_poolable(size)) return;
if (g_mf2_enabled) { mf2_free(ptr); return; }
void* raw = (char*)ptr - HEADER_SIZE; AllocHeader* hdr = (AllocHeader*)raw; int mid_by_desc = 0; MidPageDesc* d_desc = mid_desc_lookup(ptr); if (d_desc) mid_by_desc = 1;
if (!mid_by_desc && g_hdr_light_enabled < 2) { if (hdr->magic != HAKMEM_MAGIC) { MF2_ERROR_LOG("Invalid magic 0x%X in pool_free, expected 0x%X", hdr->magic, HAKMEM_MAGIC); return; } if (hdr->method != ALLOC_METHOD_POOL) { MF2_ERROR_LOG("Wrong method %d in pool_free, expected POOL (%d)", hdr->method, ALLOC_METHOD_POOL); return; } }
int class_idx = mid_by_desc ? (int)d_desc->class_idx : hak_pool_get_class_index(size); if (class_idx < 0) return;
PoolBlock* block = (PoolBlock*)raw;
if (g_pool.tls_free_enabled) {
int same_thread = 0;
if (g_hdr_light_enabled >= 1) { MidPageDesc* d = mid_desc_lookup(raw); if (d && d->owner_tid != 0 && d->owner_tid == (uint64_t)(uintptr_t)pthread_self()) { same_thread = 1; } }
else if (hdr->owner_tid != 0 && hdr->owner_tid == (uintptr_t)(uintptr_t)pthread_self()) { same_thread = 1; }
if (same_thread) {
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) { ring->items[ring->top++] = block; }
else { block->next = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = block; g_tls_bin[class_idx].lo_count++; if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) { size_t spill = g_tls_bin[class_idx].lo_count / 2; int shard = hak_pool_get_shard_index(site_id); while (spill-- && g_tls_bin[class_idx].lo_head) { PoolBlock* b = g_tls_bin[class_idx].lo_head; g_tls_bin[class_idx].lo_head = b->next; g_tls_bin[class_idx].lo_count--; HKM_TIME_START(t_remote_push1); uintptr_t old_head; do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); b->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)b, memory_order_release, memory_order_relaxed)); atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push1); } set_nonempty_bit(class_idx, shard); } }
} else {
if (g_tc_enabled) { uint64_t owner_tid = 0; if (g_hdr_light_enabled < 2) owner_tid = hdr->owner_tid; if (owner_tid == 0) { MidPageDesc* d = mid_desc_lookup(raw); if (d) owner_tid = d->owner_tid; } if (owner_tid != 0) { MidTC* otc = mid_tc_lookup_by_tid(owner_tid); if (otc) { mid_tc_push(otc, class_idx, block); return; } } }
int shard = hak_pool_get_shard_index(site_id); uintptr_t old_head; HKM_TIME_START(t_remote_push2);
do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); block->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)block, memory_order_release, memory_order_relaxed));
atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push2); set_nonempty_bit(class_idx, shard);
}
} else {
int shard_idx2 = hak_pool_get_shard_index(site_id); pthread_mutex_t* lock = &g_pool.freelist_locks[class_idx][shard_idx2].m; pthread_mutex_lock(lock); block->next = g_pool.freelist[class_idx][shard_idx2]; g_pool.freelist[class_idx][shard_idx2] = block; set_nonempty_bit(class_idx, shard_idx2); pthread_mutex_unlock(lock);
}
t_pool_rng ^= t_pool_rng << 13; t_pool_rng ^= t_pool_rng >> 17; t_pool_rng ^= t_pool_rng << 5; if ((t_pool_rng & ((1u<<g_count_sample_exp)-1u)) == 0u) g_pool.frees[class_idx]++;
mid_page_inuse_dec_and_maybe_dn(raw);
}
// Mid lookups (MF2-aware) and fast free wrapper
int hak_pool_mid_lookup(void* ptr, size_t* out_size) {
if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { int c = (int)page->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } }
MidPageDesc* d = mid_desc_lookup(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1;
}
void hak_pool_free_fast(void* ptr, uintptr_t site_id) {
if (!ptr || !g_pool.initialized) return; if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { mf2_free(ptr); return; } }
MidPageDesc* d = mid_desc_lookup(ptr); if (!d) return; size_t sz = g_class_sizes[(int)d->class_idx]; if (sz == 0) return; hak_pool_free(ptr, sz, site_id);
}
#endif // HAKMEM_POOL_API_NO_PUBLIC
#endif // POOL_CORE_API_INC_H