Files
hakmem/core/box/tiny_alloc_gate_box.h
Moe Charm (CI) 5685c2f4c9 Implement Warm Pool Secondary Prefill Optimization (Phase B-2c Complete)
Problem: Warm pool had 0% hit rate (only 1 hit per 3976 misses) despite being
implemented, causing all cache misses to go through expensive superslab_refill
registry scans.

Root Cause Analysis:
- Warm pool was initialized once and pushed a single slab after each refill
- When that slab was exhausted, it was discarded (not pushed back)
- Next refill would push another single slab, which was immediately exhausted
- Pool would oscillate between 0 and 1 items, yielding 0% hit rate

Solution: Secondary Prefill on Cache Miss
When warm pool becomes empty, we now do multiple superslab_refills and prefill
the pool with 3 additional HOT superlslabs before attempting to carve. This
builds a working set of slabs that can sustain allocation pressure.

Implementation Details:
- Modified unified_cache_refill() cold path to detect empty pool
- Added prefill loop: when pool count == 0, load 3 extra superlslabs
- Store extra slabs in warm pool, keep 1 in TLS for immediate carving
- Track prefill events in g_warm_pool_stats[].prefilled counter

Results (1M Random Mixed 256B allocations):
- Before: C7 hits=1, misses=3976, hit_rate=0.0%
- After:  C7 hits=3929, misses=3143, hit_rate=55.6%
- Throughput: 4.055M ops/s (maintained vs 4.07M baseline)
- Stability: Consistent 55.6% hit rate at 5M allocations (4.102M ops/s)

Performance Impact:
- No regression: throughput remained stable at ~4.1M ops/s
- Registry scan avoided in 55.6% of cache misses (significant savings)
- Warm pool now functioning as intended with strong locality

Configuration:
- TINY_WARM_POOL_MAX_PER_CLASS increased from 4 to 16 to support prefill
- Prefill budget hardcoded to 3 (tunable via env var if needed later)
- All statistics always compiled, ENV-gated printing via HAKMEM_WARM_POOL_STATS=1

Next Steps:
- Monitor for further optimization opportunities (prefill budget tuning)
- Consider adaptive prefill budget based on class-specific hit rates
- Validate at larger allocation counts (10M+ pending registry size fix)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-04 23:31:54 +09:00

221 lines
8.2 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_alloc_gate_box.h - Box: Tiny Alloc Gatekeeper
//
// 役割:
// - malloc 側の Tiny フロントエンドで、Tiny 向け割り当ての「入口箱」として振る舞う。
// - いまは既存の malloc_tiny_fast(size) を薄くラップしつつ、
// 将来の BASE/USER 変換・Bridge・Guard を 1 箱に集約できるフックを提供する。
//
// Box 理論:
// - Single Responsibility:
// 「Tiny alloc の入口で、返す USER ポインタを一度だけ検査/正規化する」。
// - Clear Boundary:
// malloc ラッパ (hak_wrappers) から Tiny Fast Path への入口を
// tiny_alloc_gate_fast() に一本化する。
// - Reversible:
// 診断は ENV (HAKMEM_TINY_ALLOC_GATE_DIAG) で ON/OFF 切替可能。
// OFF 時は従来どおり malloc_tiny_fast の挙動・コストを保つ。
#ifndef HAKMEM_TINY_ALLOC_GATE_BOX_H
#define HAKMEM_TINY_ALLOC_GATE_BOX_H
#include "../hakmem_build_flags.h"
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include "../hakmem_tiny.h" // hak_tiny_size_to_class
#include "ptr_type_box.h"
#include "ptr_conversion_box.h" // USER↔BASE 変換
#include "tiny_ptr_bridge_box.h" // Tiny Superslab Bridge
#include "../tiny_region_id.h" // Header 読み出し
#include "../front/malloc_tiny_fast.h" // 既存 Tiny Fast Path
#include "tiny_route_box.h" // Tiny Front Routing Policy
// 将来の拡張用コンテキスト:
// - size : 要求サイズ
// - class_idx : サイズ→クラス写像(期待値)
// - user : 返された USER ポインタ
// - base : USER→BASE 変換後
// - bridge : Superslab / slab / meta / class 情報
typedef struct TinyAllocGateContext {
size_t size;
int class_idx;
hak_user_ptr_t user;
hak_base_ptr_t base;
TinyPtrBridgeInfo bridge;
} TinyAllocGateContext;
// 診断用 Gatekeeper 拡張の ON/OFFENV: HAKMEM_TINY_ALLOC_GATE_DIAG
static inline int tiny_alloc_gate_diag_enabled(void)
{
static __thread int s_diag = -1;
if (__builtin_expect(s_diag == -1, 0)) {
#if !HAKMEM_BUILD_RELEASE
const char* e = getenv("HAKMEM_TINY_ALLOC_GATE_DIAG");
s_diag = (e && *e && *e != '0') ? 1 : 0;
#else
(void)getenv;
s_diag = 0;
#endif
}
return s_diag;
}
// 診断用: USER ポインタが Tiny Superslab 上で期待クラスに属しているかを検査。
// 戻り値:
// 1: 検査 OKTiny 管理下で、class_idx との整合性あり)
// 0: 何らかの不整合Bridge 失敗/クラス不一致など)
static inline int tiny_alloc_gate_validate(TinyAllocGateContext* ctx)
{
if (!ctx) return 0;
void* user_raw = HAK_USER_TO_RAW(ctx->user);
if (!user_raw) return 0;
// 範囲上明らかにおかしいものは Tiny 管理外扱い
uintptr_t addr = (uintptr_t)user_raw;
if (addr < 4096 || addr > 0x00007fffffffffffULL) {
return 0;
}
// Bridge: Superslab / Slab / Meta / Class を一括取得
TinyPtrBridgeInfo info = tiny_ptr_bridge_classify_raw(user_raw);
ctx->bridge = info;
if (!info.ss || !info.meta || info.slab_idx < 0) {
return 0;
}
// 期待クラス (size 由来) と meta クラスの整合性チェック
uint8_t meta_cls = info.meta_cls;
if (meta_cls >= TINY_NUM_CLASSES) {
return 0;
}
if (ctx->class_idx >= 0 && (uint8_t)ctx->class_idx != meta_cls) {
static _Atomic uint32_t g_alloc_gate_cls_mis = 0;
uint32_t n = atomic_fetch_add_explicit(&g_alloc_gate_cls_mis, 1, memory_order_relaxed);
if (n < 8) {
fprintf(stderr,
"[TINY_ALLOC_GATE_CLASS_MISMATCH] size=%zu cls_expect=%d meta_cls=%u user=%p ss=%p slab=%d\n",
ctx->size,
ctx->class_idx,
(unsigned)meta_cls,
user_raw,
(void*)info.ss,
info.slab_idx);
fflush(stderr);
}
// クラス不一致自体は Fail-Fast せず、ログだけ残す(将来の Guard 差し込みポイント)。
}
#if !HAKMEM_BUILD_RELEASE
// Header 由来の class と meta class の整合性も確認
int hdr_cls = tiny_region_id_read_header(user_raw);
if (hdr_cls >= 0 && hdr_cls != (int)meta_cls) {
static _Atomic uint32_t g_alloc_gate_hdr_meta_mis = 0;
uint32_t n = atomic_fetch_add_explicit(&g_alloc_gate_hdr_meta_mis, 1, memory_order_relaxed);
if (n < 8) {
fprintf(stderr,
"[TINY_ALLOC_GATE_HDR_META_MISMATCH] size=%zu hdr_cls=%d meta_cls=%u user=%p ss=%p slab=%d\n",
ctx->size,
hdr_cls,
(unsigned)meta_cls,
user_raw,
(void*)info.ss,
info.slab_idx);
fflush(stderr);
}
}
#endif
// USER→BASE 変換Box 経由)を一度だけ行い、将来 Base ベースの Guard に活用。
ctx->base = ptr_user_to_base(ctx->user, meta_cls);
return 1;
}
// Tiny Alloc Gatekeeper 本体:
// - malloc ラッパ (hak_wrappers) から呼ばれる Tiny fast alloc の入口。
// - ルーティングポリシーに基づき Tiny front / Pool fallback を振り分け、
// 診断 ON のときだけ返された USER ポインタに対して Bridge + Layout 検査を追加。
static __attribute__((always_inline)) void* tiny_alloc_gate_fast(size_t size)
{
int class_idx = hak_tiny_size_to_class(size);
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
// サイズが Tiny 管理外 → Pool/backend に任せるNULL で Gate を抜けさせる)
return NULL;
}
TinyRoutePolicy route = tiny_route_get(class_idx);
// Pool-only: Tiny front は完全スキップGate から見ると「Tiny では取れなかった」扱い)
if (__builtin_expect(route == ROUTE_POOL_ONLY, 0)) {
return NULL;
}
// まず Tiny Fast Path で割り当てUSER ポインタを得る)
void* user_ptr = malloc_tiny_fast(size);
// Tiny-only: その結果をそのまま返すNULL なら上位が扱う)
if (__builtin_expect(route == ROUTE_TINY_ONLY, 1)) {
#if !HAKMEM_BUILD_RELEASE
// Layer 3aalloc 側): 明らかに異常なポインタは debug ビルドで早期検出
if (user_ptr) {
uintptr_t addr = (uintptr_t)user_ptr;
if (__builtin_expect(addr < 4096, 0)) {
fprintf(stderr,
"[TINY_ALLOC_GATE_RANGE_INVALID] size=%zu user=%p\n",
size, user_ptr);
fflush(stderr);
abort();
}
}
if (__builtin_expect(tiny_alloc_gate_diag_enabled(), 0) && user_ptr) {
TinyAllocGateContext ctx;
ctx.size = size;
ctx.user = HAK_USER_FROM_RAW(user_ptr);
ctx.class_idx = class_idx;
ctx.base = HAK_BASE_FROM_RAW(NULL);
ctx.bridge.ss = NULL;
ctx.bridge.meta = NULL;
ctx.bridge.slab_idx = -1;
ctx.bridge.meta_cls = 0xffu;
(void)tiny_alloc_gate_validate(&ctx);
}
#endif
return user_ptr;
}
// ROUTE_TINY_FIRST: Tiny で取れなければ Pool/backend fallback を許可NULL で Gate 脱出)
#if !HAKMEM_BUILD_RELEASE
if (user_ptr) {
uintptr_t addr = (uintptr_t)user_ptr;
if (__builtin_expect(addr < 4096, 0)) {
fprintf(stderr,
"[TINY_ALLOC_GATE_RANGE_INVALID] size=%zu user=%p\n",
size, user_ptr);
fflush(stderr);
abort();
}
if (__builtin_expect(tiny_alloc_gate_diag_enabled(), 0)) {
TinyAllocGateContext ctx;
ctx.size = size;
ctx.user = HAK_USER_FROM_RAW(user_ptr);
ctx.class_idx = class_idx;
ctx.base = HAK_BASE_FROM_RAW(NULL);
ctx.bridge.ss = NULL;
ctx.bridge.meta = NULL;
ctx.bridge.slab_idx = -1;
ctx.bridge.meta_cls = 0xffu;
(void)tiny_alloc_gate_validate(&ctx);
}
}
#endif
return user_ptr;
}
#endif // HAKMEM_TINY_ALLOC_GATE_BOX_H