pool v1 flatten: break down free fallback causes and normalize mid_desc keys
This commit is contained in:
@ -3,6 +3,7 @@
|
||||
#define POOL_API_INC_H
|
||||
|
||||
#include "pagefault_telemetry_box.h" // Box PageFaultTelemetry (PF_BUCKET_MID)
|
||||
#include "box/pool_hotbox_v2_box.h"
|
||||
|
||||
// Pool v2 is experimental. Default OFF (use legacy v1 path).
|
||||
static inline int hak_pool_v2_enabled(void) {
|
||||
@ -35,6 +36,61 @@ static inline int hak_pool_v2_tls_fast_enabled(void) {
|
||||
return g;
|
||||
}
|
||||
|
||||
// Pool v1 flatten (hot path only) is experimental and opt-in.
|
||||
static inline int hak_pool_v1_flatten_enabled(void) {
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_POOL_V1_FLATTEN_ENABLED");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
static inline int hak_pool_v1_flatten_stats_enabled(void) {
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_POOL_V1_FLATTEN_STATS");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
typedef struct PoolV1FlattenStats {
|
||||
_Atomic uint64_t alloc_tls_hit;
|
||||
_Atomic uint64_t alloc_fallback_v1;
|
||||
_Atomic uint64_t free_tls_hit;
|
||||
_Atomic uint64_t free_fallback_v1;
|
||||
_Atomic uint64_t free_fb_page_null;
|
||||
_Atomic uint64_t free_fb_not_mine;
|
||||
_Atomic uint64_t free_fb_other;
|
||||
} PoolV1FlattenStats;
|
||||
|
||||
static PoolV1FlattenStats g_pool_v1_flat_stats = {0};
|
||||
|
||||
static inline void pool_v1_flat_stats_dump(void) {
|
||||
if (!hak_pool_v1_flatten_stats_enabled()) return;
|
||||
fprintf(stderr,
|
||||
"[POOL_V1_FLAT] alloc_tls_hit=%llu alloc_fb=%llu free_tls_hit=%llu free_fb=%llu page_null=%llu not_mine=%llu other=%llu\n",
|
||||
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.alloc_tls_hit,
|
||||
memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.alloc_fallback_v1,
|
||||
memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_tls_hit,
|
||||
memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fallback_v1,
|
||||
memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fb_page_null,
|
||||
memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fb_not_mine,
|
||||
memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fb_other,
|
||||
memory_order_relaxed));
|
||||
}
|
||||
|
||||
__attribute__((destructor)) static void pool_v1_flatten_stats_destructor(void) {
|
||||
pool_v1_flat_stats_dump();
|
||||
}
|
||||
|
||||
// Thin helper to keep the hot path straight-line when converting a PoolBlock to
|
||||
// a user pointer. All sampling/stat updates remain here so the callers stay
|
||||
// small.
|
||||
@ -123,6 +179,13 @@ static inline void* hak_pool_try_alloc_v2_impl(size_t size, uintptr_t site_id) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Experimental PoolHotBox v2 (Hot path) — currently structure only.
|
||||
if (__builtin_expect(pool_hotbox_v2_class_enabled(class_idx), 0)) {
|
||||
void* p = pool_hotbox_v2_alloc((uint32_t)class_idx, size, site_id);
|
||||
if (p) return p;
|
||||
pool_hotbox_v2_record_alloc_fallback((uint32_t)class_idx);
|
||||
}
|
||||
|
||||
if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
|
||||
HAKMEM_LOG("[Pool] ACCEPTED: class_idx=%d, proceeding with allocation\n", class_idx);
|
||||
}
|
||||
@ -357,6 +420,14 @@ static inline void hak_pool_free_v2_impl(void* ptr, size_t size, uintptr_t site_
|
||||
}
|
||||
int class_idx = mid_by_desc ? (int)d_desc->class_idx : hak_pool_get_class_index(size);
|
||||
if (class_idx < 0) return;
|
||||
if (__builtin_expect(pool_hotbox_v2_class_enabled(class_idx), 0)) {
|
||||
pool_hotbox_v2_record_free_call((uint32_t)class_idx);
|
||||
PoolBlock* raw_block_for_v2 = (PoolBlock*)raw;
|
||||
if (pool_hotbox_v2_free((uint32_t)class_idx, raw_block_for_v2)) {
|
||||
return;
|
||||
}
|
||||
pool_hotbox_v2_record_free_fallback((uint32_t)class_idx);
|
||||
}
|
||||
PoolBlock* block = (PoolBlock*)raw;
|
||||
uint64_t owner_tid = 0;
|
||||
if (d_desc) owner_tid = d_desc->owner_tid;
|
||||
@ -768,6 +839,111 @@ static inline void hak_pool_free_v1_impl(void* ptr, size_t size, uintptr_t site_
|
||||
mid_page_inuse_dec_and_maybe_dn(raw);
|
||||
}
|
||||
|
||||
// --- v1 flatten (opt-in) ----------------------------------------------------
|
||||
|
||||
static inline void* hak_pool_try_alloc_v1_flat(size_t size, uintptr_t site_id) {
|
||||
if (!hak_pool_is_poolable(size)) return NULL;
|
||||
int class_idx = hak_pool_get_class_index(size);
|
||||
if (class_idx < 0) return NULL;
|
||||
|
||||
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
|
||||
if (g_tls_ring_enabled && ring->top > 0) {
|
||||
PoolBlock* tlsb = ring->items[--ring->top];
|
||||
// Adopt shared pages to this thread so free can stay on the fast path.
|
||||
mid_desc_adopt(tlsb, class_idx, (uint64_t)(uintptr_t)pthread_self());
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
|
||||
}
|
||||
return hak_pool_block_to_user(tlsb, class_idx, site_id);
|
||||
}
|
||||
|
||||
if (g_tls_bin[class_idx].lo_head) {
|
||||
PoolBlock* b = g_tls_bin[class_idx].lo_head;
|
||||
g_tls_bin[class_idx].lo_head = b->next;
|
||||
if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--;
|
||||
mid_desc_adopt(b, class_idx, (uint64_t)(uintptr_t)pthread_self());
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
|
||||
}
|
||||
return hak_pool_block_to_user(b, class_idx, site_id);
|
||||
}
|
||||
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_fallback_v1, 1, memory_order_relaxed);
|
||||
}
|
||||
return hak_pool_try_alloc_v1_impl(size, site_id);
|
||||
}
|
||||
|
||||
static inline void hak_pool_free_v1_flat(void* ptr, size_t size, uintptr_t site_id) {
|
||||
if (!ptr) return;
|
||||
if (!hak_pool_is_poolable(size)) return;
|
||||
|
||||
void* raw = (char*)ptr - HEADER_SIZE;
|
||||
MidPageDesc* d_desc = mid_desc_lookup(ptr);
|
||||
if (!d_desc) {
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_page_null, 1, memory_order_relaxed);
|
||||
}
|
||||
hak_pool_free_v1_impl(ptr, size, site_id);
|
||||
return;
|
||||
}
|
||||
|
||||
int class_idx = (int)d_desc->class_idx;
|
||||
if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) {
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_other, 1, memory_order_relaxed);
|
||||
}
|
||||
hak_pool_free_v1_impl(ptr, size, site_id);
|
||||
return;
|
||||
}
|
||||
|
||||
const uint64_t owner_tid = d_desc->owner_tid;
|
||||
const uint64_t self_tid = (uint64_t)(uintptr_t)pthread_self();
|
||||
|
||||
if (g_pool.tls_free_enabled && owner_tid != 0 && owner_tid == self_tid) {
|
||||
PoolBlock* block = (PoolBlock*)raw;
|
||||
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
|
||||
if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
|
||||
ring->items[ring->top++] = block;
|
||||
} else {
|
||||
block->next = g_tls_bin[class_idx].lo_head;
|
||||
g_tls_bin[class_idx].lo_head = block;
|
||||
g_tls_bin[class_idx].lo_count++;
|
||||
if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) {
|
||||
size_t spill = g_tls_bin[class_idx].lo_count / 2;
|
||||
int shard = hak_pool_get_shard_index(site_id);
|
||||
while (spill-- && g_tls_bin[class_idx].lo_head) {
|
||||
PoolBlock* b = g_tls_bin[class_idx].lo_head;
|
||||
g_tls_bin[class_idx].lo_head = b->next;
|
||||
g_tls_bin[class_idx].lo_count--;
|
||||
uintptr_t old_head;
|
||||
do {
|
||||
old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire);
|
||||
b->next = (PoolBlock*)old_head;
|
||||
} while (!atomic_compare_exchange_weak_explicit(
|
||||
&g_pool.remote_head[class_idx][shard],
|
||||
&old_head, (uintptr_t)b,
|
||||
memory_order_release, memory_order_relaxed));
|
||||
atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed);
|
||||
}
|
||||
set_nonempty_bit(class_idx, shard);
|
||||
}
|
||||
}
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_tls_hit, 1, memory_order_relaxed);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (hak_pool_v1_flatten_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_not_mine, 1, memory_order_relaxed);
|
||||
}
|
||||
hak_pool_free_v1_impl(ptr, size, site_id);
|
||||
}
|
||||
|
||||
static inline int hak_pool_mid_lookup_v1_impl(void* ptr, size_t* out_size) {
|
||||
if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { int c = (int)page->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } }
|
||||
MidPageDesc* d = mid_desc_lookup(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1;
|
||||
@ -783,6 +959,9 @@ static inline int hak_pool_v2_route(void) { return hak_pool_v2_enabled(); }
|
||||
|
||||
void* hak_pool_try_alloc(size_t size, uintptr_t site_id) {
|
||||
if (!hak_pool_v2_route()) {
|
||||
if (hak_pool_v1_flatten_enabled()) {
|
||||
return hak_pool_try_alloc_v1_flat(size, site_id);
|
||||
}
|
||||
return hak_pool_try_alloc_v1_impl(size, site_id);
|
||||
}
|
||||
return hak_pool_try_alloc_v2_impl(size, site_id);
|
||||
@ -790,7 +969,11 @@ void* hak_pool_try_alloc(size_t size, uintptr_t site_id) {
|
||||
|
||||
void hak_pool_free(void* ptr, size_t size, uintptr_t site_id) {
|
||||
if (!hak_pool_v2_route()) {
|
||||
hak_pool_free_v1_impl(ptr, size, site_id);
|
||||
if (hak_pool_v1_flatten_enabled()) {
|
||||
hak_pool_free_v1_flat(ptr, size, site_id);
|
||||
} else {
|
||||
hak_pool_free_v1_impl(ptr, size, site_id);
|
||||
}
|
||||
return;
|
||||
}
|
||||
hak_pool_free_v2_impl(ptr, size, site_id);
|
||||
@ -798,6 +981,8 @@ void hak_pool_free(void* ptr, size_t size, uintptr_t site_id) {
|
||||
|
||||
void hak_pool_free_fast(void* ptr, uintptr_t site_id) {
|
||||
if (!hak_pool_v2_route()) {
|
||||
// fast path lacks size; keep existing v1 fast implementation even when
|
||||
// flatten is enabled to avoid behavior drift.
|
||||
hak_pool_free_fast_v1_impl(ptr, site_id);
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user