Phase POOL-MID-DN-BATCH Step 4: Deferred API implementation with thread cleanup

This commit is contained in:
Moe Charm (CI)
2025-12-12 23:00:00 +09:00
parent d45729f063
commit cba444b943

View File

@ -0,0 +1,157 @@
// pool_mid_inuse_deferred_box.h — Box: Deferred inuse_dec API
//
// Purpose: Eliminate mid_desc_lookup from hot path by batching decrements
// Pattern: TLS map accumulates page→count, drain performs batched lookup
// Phase: POOL-MID-DN-BATCH Step 3
//
// Design:
// - Hot path (mid_inuse_dec_deferred): O(1) map search, no lookup/atomic/lock
// - Cold path (mid_inuse_deferred_drain): Batched lookup + atomic subtract
// - Safety: Deferred dec only delays DONTNEED (safe direction)
#ifndef POOL_MID_INUSE_DEFERRED_BOX_H
#define POOL_MID_INUSE_DEFERRED_BOX_H
#include "pool_mid_inuse_deferred_env_box.h"
#include "pool_mid_inuse_tls_pagemap_box.h"
#include "pool_mid_inuse_deferred_stats_box.h"
#include <stdint.h>
#include <stdatomic.h>
#include <pthread.h>
// Forward declarations (available from hakmem_internal.h / pool includes)
struct MidPageDesc;
typedef struct MidPageDesc MidPageDesc;
// External functions
extern MidPageDesc* mid_desc_lookup(void* addr);
extern void mid_page_inuse_dec_and_maybe_dn(void* raw);
extern int hak_batch_add_page(void* page, size_t size);
// POOL_PAGE_SIZE (defined in hakmem_pool.h, typically 64KB)
#ifndef POOL_PAGE_SIZE
#define POOL_PAGE_SIZE (64 * 1024)
#endif
// Forward declaration of drain (needed by mid_inuse_dec_deferred)
static inline void mid_inuse_deferred_drain(void);
// Thread exit cleanup (ensures all deferred ops are processed)
static void mid_inuse_deferred_thread_cleanup(void* arg) {
(void)arg;
if (hak_pool_mid_inuse_deferred_enabled()) {
mid_inuse_deferred_drain();
}
}
// Global key for thread cleanup
static pthread_key_t g_mid_inuse_deferred_cleanup_key;
static pthread_once_t g_mid_inuse_deferred_key_once = PTHREAD_ONCE_INIT;
static void mid_inuse_deferred_init_key(void) {
pthread_key_create(&g_mid_inuse_deferred_cleanup_key, mid_inuse_deferred_thread_cleanup);
}
// Register thread cleanup once per thread
static __thread int g_mid_inuse_deferred_cleanup_registered = 0;
static inline void mid_inuse_deferred_ensure_cleanup(void) {
if (!g_mid_inuse_deferred_cleanup_registered) {
pthread_once(&g_mid_inuse_deferred_key_once, mid_inuse_deferred_init_key);
pthread_setspecific(g_mid_inuse_deferred_cleanup_key, (void*)1);
g_mid_inuse_deferred_cleanup_registered = 1;
}
}
// ============================================================================
// Hot API: mid_inuse_dec_deferred - Defer inuse decrement to TLS map
// ============================================================================
// Called from: hak_pool_free_v1_fast_impl, hak_pool_free_v1_slow_impl
// Replaces: mid_page_inuse_dec_and_maybe_dn(raw)
//
// Fast path: No lookup, no atomic, no lock - just TLS map update
// Map full: Trigger drain (rare, amortized cost)
static inline void mid_inuse_dec_deferred(void* raw) {
// ENV gate: If disabled, fallback to immediate decrement
if (!hak_pool_mid_inuse_deferred_enabled()) {
mid_page_inuse_dec_and_maybe_dn(raw);
return;
}
// Ensure cleanup is registered (first-time only)
mid_inuse_deferred_ensure_cleanup();
// Calculate page base (POOL_PAGE_SIZE = 64KB, power of 2)
void* page = (void*)((uintptr_t)raw & ~((uintptr_t)POOL_PAGE_SIZE - 1));
// Search TLS map for existing page entry
MidInuseTlsPageMap* map = &g_mid_inuse_tls_map;
for (uint32_t i = 0; i < map->used; i++) {
if (map->pages[i] == page) {
// Page already in map, increment count
map->counts[i]++;
MID_INUSE_DEFERRED_STAT_INC(mid_inuse_deferred_hit);
return;
}
}
// New page entry: Check if map is full
if (map->used >= MID_INUSE_TLS_MAP_SIZE) {
// Map full, drain to free space (cold boundary)
mid_inuse_deferred_drain();
}
// Add new entry to map
uint32_t idx = map->used++;
map->pages[idx] = page;
map->counts[idx] = 1;
MID_INUSE_DEFERRED_STAT_INC(mid_inuse_deferred_hit);
}
// ============================================================================
// Cold API: mid_inuse_deferred_drain - Batch process TLS map
// ============================================================================
// Called from:
// 1. mid_inuse_dec_deferred (when map full - rare)
// 2. Refill/slow boundaries (future optimization)
// 3. Thread exit (optional, future)
//
// Effect: Performs all pending inuse decrements with SINGLE lookup per page
// Safety: Exact same logic as mid_page_inuse_dec_and_maybe_dn, just batched
static inline void mid_inuse_deferred_drain(void) {
MidInuseTlsPageMap* map = &g_mid_inuse_tls_map;
// Track drain call
MID_INUSE_DEFERRED_STAT_INC(drain_calls);
// Process each entry in map
for (uint32_t i = 0; i < map->used; i++) {
void* page = map->pages[i];
uint32_t n = map->counts[i];
// ONLY lookup happens here (once per page, not once per free)
MidPageDesc* d = mid_desc_lookup(page);
if (!d) continue;
// Track pages drained
MID_INUSE_DEFERRED_STAT_ADD(pages_drained, n);
// Atomic subtract (batched count)
int old = atomic_fetch_sub_explicit(&d->in_use, (int)n, memory_order_relaxed);
int nv = old - (int)n;
// Check for empty transition (COPIED from mid_page_inuse_dec_and_maybe_dn)
if (nv <= 0) {
// Fire once per empty transition
// Use atomic_exchange to ensure only ONE thread enqueues DONTNEED
if (atomic_exchange_explicit(&d->pending_dn, 1, memory_order_acq_rel) == 0) {
MID_INUSE_DEFERRED_STAT_INC(empty_transitions);
hak_batch_add_page(d->page, POOL_PAGE_SIZE);
}
}
}
// Clear map (reset for next batch)
map->used = 0;
}
#endif // POOL_MID_INUSE_DEFERRED_BOX_H