pool v1 flatten: break down free fallback causes and normalize mid_desc keys

2025-12-09 19:34:54 +09:00
parent 8f18963ad5
commit e274d5f6a9
4 changed files with 474 additions and 6 deletions
--- a/core/box/pool_api.inc.h
+++ b/core/box/pool_api.inc.h
@ -3,6 +3,7 @@
 #define POOL_API_INC_H

 #include "pagefault_telemetry_box.h"  // Box PageFaultTelemetry (PF_BUCKET_MID)
+#include "box/pool_hotbox_v2_box.h"

 // Pool v2 is experimental. Default OFF (use legacy v1 path).
 static inline int hak_pool_v2_enabled(void) {
@ -35,6 +36,61 @@ static inline int hak_pool_v2_tls_fast_enabled(void) {
    return g;
 }

+// Pool v1 flatten (hot path only) is experimental and opt-in.
+static inline int hak_pool_v1_flatten_enabled(void) {
+    static int g = -1;
+    if (__builtin_expect(g == -1, 0)) {
+        const char* e = getenv("HAKMEM_POOL_V1_FLATTEN_ENABLED");
+        g = (e && *e && *e != '0') ? 1 : 0;
+    }
+    return g;
+}
+
+static inline int hak_pool_v1_flatten_stats_enabled(void) {
+    static int g = -1;
+    if (__builtin_expect(g == -1, 0)) {
+        const char* e = getenv("HAKMEM_POOL_V1_FLATTEN_STATS");
+        g = (e && *e && *e != '0') ? 1 : 0;
+    }
+    return g;
+}
+
+typedef struct PoolV1FlattenStats {
+    _Atomic uint64_t alloc_tls_hit;
+    _Atomic uint64_t alloc_fallback_v1;
+    _Atomic uint64_t free_tls_hit;
+    _Atomic uint64_t free_fallback_v1;
+    _Atomic uint64_t free_fb_page_null;
+    _Atomic uint64_t free_fb_not_mine;
+    _Atomic uint64_t free_fb_other;
+} PoolV1FlattenStats;
+
+static PoolV1FlattenStats g_pool_v1_flat_stats = {0};
+
+static inline void pool_v1_flat_stats_dump(void) {
+    if (!hak_pool_v1_flatten_stats_enabled()) return;
+    fprintf(stderr,
+            "[POOL_V1_FLAT] alloc_tls_hit=%llu alloc_fb=%llu free_tls_hit=%llu free_fb=%llu page_null=%llu not_mine=%llu other=%llu\n",
+            (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.alloc_tls_hit,
+                                                     memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.alloc_fallback_v1,
+                                                     memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_tls_hit,
+                                                     memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fallback_v1,
+                                                     memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fb_page_null,
+                                                     memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fb_not_mine,
+                                                     memory_order_relaxed),
+            (unsigned long long)atomic_load_explicit(&g_pool_v1_flat_stats.free_fb_other,
+                                                     memory_order_relaxed));
+}
+
+__attribute__((destructor)) static void pool_v1_flatten_stats_destructor(void) {
+    pool_v1_flat_stats_dump();
+}
+
 // Thin helper to keep the hot path straight-line when converting a PoolBlock to
 // a user pointer. All sampling/stat updates remain here so the callers stay
 // small.
@ -123,6 +179,13 @@ static inline void* hak_pool_try_alloc_v2_impl(size_t size, uintptr_t site_id) {
        return NULL;
    }

+    // Experimental PoolHotBox v2 (Hot path) — currently structure only.
+    if (__builtin_expect(pool_hotbox_v2_class_enabled(class_idx), 0)) {
+        void* p = pool_hotbox_v2_alloc((uint32_t)class_idx, size, site_id);
+        if (p) return p;
+        pool_hotbox_v2_record_alloc_fallback((uint32_t)class_idx);
+    }
+
    if (__builtin_expect(size >= 33000 && size <= 41000, 0)) {
        HAKMEM_LOG("[Pool]   ACCEPTED: class_idx=%d, proceeding with allocation\n", class_idx);
    }
@ -357,6 +420,14 @@ static inline void hak_pool_free_v2_impl(void* ptr, size_t size, uintptr_t site_
    }
    int class_idx = mid_by_desc ? (int)d_desc->class_idx : hak_pool_get_class_index(size);
    if (class_idx < 0) return;
+    if (__builtin_expect(pool_hotbox_v2_class_enabled(class_idx), 0)) {
+        pool_hotbox_v2_record_free_call((uint32_t)class_idx);
+        PoolBlock* raw_block_for_v2 = (PoolBlock*)raw;
+        if (pool_hotbox_v2_free((uint32_t)class_idx, raw_block_for_v2)) {
+            return;
+        }
+        pool_hotbox_v2_record_free_fallback((uint32_t)class_idx);
+    }
    PoolBlock* block = (PoolBlock*)raw;
    uint64_t owner_tid = 0;
    if (d_desc) owner_tid = d_desc->owner_tid;
@ -768,6 +839,111 @@ static inline void hak_pool_free_v1_impl(void* ptr, size_t size, uintptr_t site_
    mid_page_inuse_dec_and_maybe_dn(raw);
 }

+// --- v1 flatten (opt-in) ----------------------------------------------------
+
+static inline void* hak_pool_try_alloc_v1_flat(size_t size, uintptr_t site_id) {
+    if (!hak_pool_is_poolable(size)) return NULL;
+    int class_idx = hak_pool_get_class_index(size);
+    if (class_idx < 0) return NULL;
+
+    PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
+    if (g_tls_ring_enabled && ring->top > 0) {
+        PoolBlock* tlsb = ring->items[--ring->top];
+        // Adopt shared pages to this thread so free can stay on the fast path.
+        mid_desc_adopt(tlsb, class_idx, (uint64_t)(uintptr_t)pthread_self());
+        if (hak_pool_v1_flatten_stats_enabled()) {
+            atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
+        }
+        return hak_pool_block_to_user(tlsb, class_idx, site_id);
+    }
+
+    if (g_tls_bin[class_idx].lo_head) {
+        PoolBlock* b = g_tls_bin[class_idx].lo_head;
+        g_tls_bin[class_idx].lo_head = b->next;
+        if (g_tls_bin[class_idx].lo_count) g_tls_bin[class_idx].lo_count--;
+        mid_desc_adopt(b, class_idx, (uint64_t)(uintptr_t)pthread_self());
+        if (hak_pool_v1_flatten_stats_enabled()) {
+            atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_tls_hit, 1, memory_order_relaxed);
+        }
+        return hak_pool_block_to_user(b, class_idx, site_id);
+    }
+
+    if (hak_pool_v1_flatten_stats_enabled()) {
+        atomic_fetch_add_explicit(&g_pool_v1_flat_stats.alloc_fallback_v1, 1, memory_order_relaxed);
+    }
+    return hak_pool_try_alloc_v1_impl(size, site_id);
+}
+
+static inline void hak_pool_free_v1_flat(void* ptr, size_t size, uintptr_t site_id) {
+    if (!ptr) return;
+    if (!hak_pool_is_poolable(size)) return;
+
+    void* raw = (char*)ptr - HEADER_SIZE;
+    MidPageDesc* d_desc = mid_desc_lookup(ptr);
+    if (!d_desc) {
+        if (hak_pool_v1_flatten_stats_enabled()) {
+            atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
+            atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_page_null, 1, memory_order_relaxed);
+        }
+        hak_pool_free_v1_impl(ptr, size, site_id);
+        return;
+    }
+
+    int class_idx = (int)d_desc->class_idx;
+    if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) {
+        if (hak_pool_v1_flatten_stats_enabled()) {
+            atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
+            atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_other, 1, memory_order_relaxed);
+        }
+        hak_pool_free_v1_impl(ptr, size, site_id);
+        return;
+    }
+
+    const uint64_t owner_tid = d_desc->owner_tid;
+    const uint64_t self_tid = (uint64_t)(uintptr_t)pthread_self();
+
+    if (g_pool.tls_free_enabled && owner_tid != 0 && owner_tid == self_tid) {
+        PoolBlock* block = (PoolBlock*)raw;
+        PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
+        if (g_tls_ring_enabled && ring->top < POOL_L2_RING_CAP) {
+            ring->items[ring->top++] = block;
+        } else {
+            block->next = g_tls_bin[class_idx].lo_head;
+            g_tls_bin[class_idx].lo_head = block;
+            g_tls_bin[class_idx].lo_count++;
+            if ((int)g_tls_bin[class_idx].lo_count > g_tls_lo_max) {
+                size_t spill = g_tls_bin[class_idx].lo_count / 2;
+                int shard = hak_pool_get_shard_index(site_id);
+                while (spill-- && g_tls_bin[class_idx].lo_head) {
+                    PoolBlock* b = g_tls_bin[class_idx].lo_head;
+                    g_tls_bin[class_idx].lo_head = b->next;
+                    g_tls_bin[class_idx].lo_count--;
+                    uintptr_t old_head;
+                    do {
+                        old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire);
+                        b->next = (PoolBlock*)old_head;
+                    } while (!atomic_compare_exchange_weak_explicit(
+                        &g_pool.remote_head[class_idx][shard],
+                        &old_head, (uintptr_t)b,
+                        memory_order_release, memory_order_relaxed));
+                    atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed);
+                }
+                set_nonempty_bit(class_idx, shard);
+            }
+        }
+        if (hak_pool_v1_flatten_stats_enabled()) {
+            atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_tls_hit, 1, memory_order_relaxed);
+        }
+        return;
+    }
+
+        if (hak_pool_v1_flatten_stats_enabled()) {
+            atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
+            atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fb_not_mine, 1, memory_order_relaxed);
+        }
+        hak_pool_free_v1_impl(ptr, size, site_id);
+}
+
 static inline int hak_pool_mid_lookup_v1_impl(void* ptr, size_t* out_size) {
    if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { int c = (int)page->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } }
    MidPageDesc* d = mid_desc_lookup(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1;
@ -783,6 +959,9 @@ static inline int hak_pool_v2_route(void) { return hak_pool_v2_enabled(); }

 void* hak_pool_try_alloc(size_t size, uintptr_t site_id) {
    if (!hak_pool_v2_route()) {
+        if (hak_pool_v1_flatten_enabled()) {
+            return hak_pool_try_alloc_v1_flat(size, site_id);
+        }
        return hak_pool_try_alloc_v1_impl(size, site_id);
    }
    return hak_pool_try_alloc_v2_impl(size, site_id);
@ -790,7 +969,11 @@ void* hak_pool_try_alloc(size_t size, uintptr_t site_id) {

 void hak_pool_free(void* ptr, size_t size, uintptr_t site_id) {
    if (!hak_pool_v2_route()) {
-        hak_pool_free_v1_impl(ptr, size, site_id);
+        if (hak_pool_v1_flatten_enabled()) {
+            hak_pool_free_v1_flat(ptr, size, site_id);
+        } else {
+            hak_pool_free_v1_impl(ptr, size, site_id);
+        }
        return;
    }
    hak_pool_free_v2_impl(ptr, size, site_id);
@ -798,6 +981,8 @@ void hak_pool_free(void* ptr, size_t size, uintptr_t site_id) {

 void hak_pool_free_fast(void* ptr, uintptr_t site_id) {
    if (!hak_pool_v2_route()) {
+        // fast path lacks size; keep existing v1 fast implementation even when
+        // flatten is enabled to avoid behavior drift.
        hak_pool_free_fast_v1_impl(ptr, site_id);
        return;
    }