pool v1 flatten: break down free fallback causes and normalize mid_desc keys

This commit is contained in:
Moe Charm (CI)
2025-12-09 19:34:54 +09:00
parent 8f18963ad5
commit e274d5f6a9
4 changed files with 474 additions and 6 deletions

View File

@ -3,6 +3,7 @@
#define POOL_API_INC_H
#include "pagefault_telemetry_box.h" // Box PageFaultTelemetry (PF_BUCKET_MID)
#include "box/pool_hotbox_v2_box.h"
// Pool v2 is experimental. Default OFF (use legacy v1 path).
static inline int hak_pool_v2_enabled(void) {
@ -35,6 +36,61 @@ static inline int hak_pool_v2_tls_fast_enabled(void) {
return g;
}
// Pool v1 flatten (hot path only) is experimental and opt-in.
static inline int hak_pool_v1_flatten_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_POOL_V1_FLATTEN_ENABLED");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g;
}
static inline int hak_pool_v1_flatten_stats_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_POOL_V1_FLATTEN_STATS");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g;
}
typedef struct PoolV1FlattenStats {
_Atomic uint64_t alloc_tls_hit;
_Atomic uint64_t alloc_fallback_v1;
_Atomic uint64_t free_tls_hit;
_Atomic uint64_t free_fallback_v1;
_Atomic uint64_t free_fb_page_null;
_Atomic uint64_t free_fb_not_mine;
_Atomic uint64_t free_fb_other;
} PoolV1FlattenStats;
static PoolV1FlattenStats g_pool_v1_flat_stats = {0};
static inline void pool_v1_flat_stats_dump(void) {
if (!hak_pool_v1_flatten_stats_enabled()) return;
fprintf(stderr,
"[POOL_V1_FLAT] alloc_tls_hit=%llu alloc_fb=%llu free_tls_hit=%llu free_fb=%llu page_null=%llu not_mine=%llu other=%llu\n",
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.alloc_tls_hit,
memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.alloc_fallback_v1,
memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_tls_hit,
memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fallback_v1,
memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fb_page_null,
memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fb_not_mine,
memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fb_other,
memory_order_relaxed));
}
__attribute__((destructor)) static void pool_v1_flatten_stats_destructor(void) {
pool_v1_flat_stats_dump();
}
// Thin helper to keep the hot path straight-line when converting a PoolBlock to
// a user pointer. All sampling/stat updates remain here so the callers stay
// small.
@ -123,6 +179,13 @@ static inline void* hak_pool_try_alloc_v2_impl(size_t size, uintptr_t site_id) {
return NULL;
}
// Experimental PoolHotBox v2 (Hot path) — currently structure only.
if (__builtin_expect(pool_hotbox_v2_class_enabled(class_idx), 0)) {
void* p = pool_hotbox_v2_alloc((uint32_t)class_idx, size, site_id);
if (p) return p;
pool_hotbox_v2_record_alloc_fallback((uint32_t)class_idx);
}
if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
HAKMEM_LOG("[Pool] ACCEPTED: class_idx=%d, proceeding with allocation\n", class_idx);
}
@ -357,6 +420,14 @@ static inline void hak_pool_free_v2_impl(void* ptr, size_t size, uintptr_t site_
}
int class_idx = mid_by_desc ? (int)d_desc->class_idx : hak_pool_get_class_index(size);
if (class_idx < 0) return;
if (__builtin_expect(pool_hotbox_v2_class_enabled(class_idx), 0)) {
pool_hotbox_v2_record_free_call((uint32_t)class_idx);
PoolBlock* raw_block_for_v2 = (PoolBlock*)raw;
if (pool_hotbox_v2_free((uint32_t)class_idx, raw_block_for_v2)) {
return;
}
pool_hotbox_v2_record_free_fallback((uint32_t)class_idx);
}
PoolBlock* block = (PoolBlock*)raw;
uint64_t owner_tid = 0;
if (d_desc) owner_tid = d_desc->owner_tid;
@ -768,6 +839,111 @@ static inline void hak_pool_free_v1_impl(void* ptr, size_t size, uintptr_t site_
mid_page_inuse_dec_and_maybe_dn(raw);
}
// --- v1 flatten (opt-in) ----------------------------------------------------
static inline void* hak_pool_try_alloc_v1_flat(size_t size, uintptr_t site_id) {
if (!hak_pool_is_poolable(size)) return NULL;
int class_idx = hak_pool_get_class_index(size);
if (class_idx < 0) return NULL;
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
if (g_tls_ring_enabled && ring->top > 0) {
PoolBlock* tlsb = ring->items[--ring->top];
// Adopt shared pages to this thread so free can stay on the fast path.
mid_desc_adopt(tlsb, class_idx, (uint64_t)(uintptr_t)pthread_self());
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
}
return hak_pool_block_to_user(tlsb, class_idx, site_id);
}
if (g_tls_bin[class_idx].lo_head) {
PoolBlock* b = g_tls_bin[class_idx].lo_head;
g_tls_bin[class_idx].lo_head = b->next;
if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--;
mid_desc_adopt(b, class_idx, (uint64_t)(uintptr_t)pthread_self());
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
}
return hak_pool_block_to_user(b, class_idx, site_id);
}
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_fallback_v1, 1, memory_order_relaxed);
}
return hak_pool_try_alloc_v1_impl(size, site_id);
}
static inline void hak_pool_free_v1_flat(void* ptr, size_t size, uintptr_t site_id) {
if (!ptr) return;
if (!hak_pool_is_poolable(size)) return;
void* raw = (char*)ptr - HEADER_SIZE;
MidPageDesc* d_desc = mid_desc_lookup(ptr);
if (!d_desc) {
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_page_null, 1, memory_order_relaxed);
}
hak_pool_free_v1_impl(ptr, size, site_id);
return;
}
int class_idx = (int)d_desc->class_idx;
if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) {
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_other, 1, memory_order_relaxed);
}
hak_pool_free_v1_impl(ptr, size, site_id);
return;
}
const uint64_t owner_tid = d_desc->owner_tid;
const uint64_t self_tid = (uint64_t)(uintptr_t)pthread_self();
if (g_pool.tls_free_enabled && owner_tid != 0 && owner_tid == self_tid) {
PoolBlock* block = (PoolBlock*)raw;
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
ring->items[ring->top++] = block;
} else {
block->next = g_tls_bin[class_idx].lo_head;
g_tls_bin[class_idx].lo_head = block;
g_tls_bin[class_idx].lo_count++;
if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) {
size_t spill = g_tls_bin[class_idx].lo_count / 2;
int shard = hak_pool_get_shard_index(site_id);
while (spill-- && g_tls_bin[class_idx].lo_head) {
PoolBlock* b = g_tls_bin[class_idx].lo_head;
g_tls_bin[class_idx].lo_head = b->next;
g_tls_bin[class_idx].lo_count--;
uintptr_t old_head;
do {
old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire);
b->next = (PoolBlock*)old_head;
} while (!atomic_compare_exchange_weak_explicit(
&g_pool.remote_head[class_idx][shard],
&old_head, (uintptr_t)b,
memory_order_release, memory_order_relaxed));
atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed);
}
set_nonempty_bit(class_idx, shard);
}
}
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_tls_hit, 1, memory_order_relaxed);
}
return;
}
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_not_mine, 1, memory_order_relaxed);
}
hak_pool_free_v1_impl(ptr, size, site_id);
}
static inline int hak_pool_mid_lookup_v1_impl(void* ptr, size_t* out_size) {
if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { int c = (int)page->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } }
MidPageDesc* d = mid_desc_lookup(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1;
@ -783,6 +959,9 @@ static inline int hak_pool_v2_route(void) { return hak_pool_v2_enabled(); }
void* hak_pool_try_alloc(size_t size, uintptr_t site_id) {
if (!hak_pool_v2_route()) {
if (hak_pool_v1_flatten_enabled()) {
return hak_pool_try_alloc_v1_flat(size, site_id);
}
return hak_pool_try_alloc_v1_impl(size, site_id);
}
return hak_pool_try_alloc_v2_impl(size, site_id);
@ -790,7 +969,11 @@ void* hak_pool_try_alloc(size_t size, uintptr_t site_id) {
void hak_pool_free(void* ptr, size_t size, uintptr_t site_id) {
if (!hak_pool_v2_route()) {
hak_pool_free_v1_impl(ptr, size, site_id);
if (hak_pool_v1_flatten_enabled()) {
hak_pool_free_v1_flat(ptr, size, site_id);
} else {
hak_pool_free_v1_impl(ptr, size, site_id);
}
return;
}
hak_pool_free_v2_impl(ptr, size, site_id);
@ -798,6 +981,8 @@ void hak_pool_free(void* ptr, size_t size, uintptr_t site_id) {
void hak_pool_free_fast(void* ptr, uintptr_t site_id) {
if (!hak_pool_v2_route()) {
// fast path lacks size; keep existing v1 fast implementation even when
// flatten is enabled to avoid behavior drift.
hak_pool_free_fast_v1_impl(ptr, site_id);
return;
}