## Summary
Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)
## New Modules
- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
- Remote drain → Freelist → Carve priority
- Header restoration for C1-C6 (NOT C0/C7)
- ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN
- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition
## Allocation Path (core/tiny_alloc_fast.inc.h)
- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
- Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
- Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)
## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)
- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)
## Legacy Sealing
- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry
## ENV Controls
- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)
## Benchmarks (Front-Direct Enabled)
```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
HAKMEM_TINY_BUMP_CHUNK=256
bench_random_mixed (16-1040B random, 200K iter):
256 slots: 1.44M ops/s (STABLE, 0 SEGV)
128 slots: 1.44M ops/s (STABLE, 0 SEGV)
bench_fixed_size (fixed size, 200K iter):
256B: 4.06M ops/s (has debug logs, expected >10M without logs)
128B: Similar (debug logs affect)
```
## Verification
- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV
## Next Steps
- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)
Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
406 lines
13 KiB
C
406 lines
13 KiB
C
// hakmem_tiny_refill.inc.h
|
||
// Phase 12: Minimal refill helpers needed by Box fast path.
|
||
//
|
||
// 本ヘッダは、以下を提供する:
|
||
// - superslab_tls_bump_fast: TinyTLSSlab + SuperSlab メタからのTLSバンプ窓
|
||
// - tiny_fast_refill_and_take: FastCache/TLS SLL からの最小 refill + 1個取得
|
||
// - bulk_mag_to_sll_if_room: Magazine→SLL へのバルク移送(容量チェック付き)
|
||
// - sll_refill_small_from_ss: Phase12 shared SuperSlab pool 向けの最小実装
|
||
//
|
||
// 旧来の g_sll_cap_override / getenv ベースの多経路ロジックは一切含めない。
|
||
|
||
#ifndef HAKMEM_TINY_REFILL_INC_H
|
||
#define HAKMEM_TINY_REFILL_INC_H
|
||
|
||
#include "hakmem_tiny.h"
|
||
#include "hakmem_tiny_superslab.h"
|
||
#include "hakmem_tiny_tls_list.h"
|
||
#include "tiny_box_geometry.h"
|
||
#include "superslab/superslab_inline.h"
|
||
#include "box/tls_sll_box.h"
|
||
#include "hakmem_tiny_integrity.h"
|
||
#include "box/tiny_next_ptr_box.h"
|
||
#include "tiny_region_id.h" // For HEADER_MAGIC/HEADER_CLASS_MASK (prepare header before SLL push)
|
||
#include <stdint.h>
|
||
#include <stdatomic.h>
|
||
|
||
// ========= Externs from hakmem_tiny.c and friends =========
|
||
|
||
extern int g_use_superslab;
|
||
extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
|
||
|
||
extern int g_fastcache_enable;
|
||
extern uint16_t g_fast_cap[TINY_NUM_CLASSES];
|
||
extern __thread TinyFastCache g_fast_cache[TINY_NUM_CLASSES];
|
||
|
||
extern int g_tls_sll_enable;
|
||
extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES];
|
||
extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
|
||
|
||
extern _Atomic uint32_t g_frontend_fill_target[TINY_NUM_CLASSES];
|
||
|
||
extern int g_ultra_bump_shadow;
|
||
extern int g_bump_chunk;
|
||
extern __thread uint8_t* g_tls_bcur[TINY_NUM_CLASSES];
|
||
extern __thread uint8_t* g_tls_bend[TINY_NUM_CLASSES];
|
||
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
extern uint64_t g_bump_hits[TINY_NUM_CLASSES];
|
||
extern uint64_t g_bump_arms[TINY_NUM_CLASSES];
|
||
extern uint64_t g_path_refill_calls[TINY_NUM_CLASSES];
|
||
extern uint64_t g_ultra_refill_calls[TINY_NUM_CLASSES];
|
||
extern int g_path_debug_enabled;
|
||
#endif
|
||
|
||
// ========= From other units =========
|
||
|
||
SuperSlab* superslab_refill(int class_idx);
|
||
|
||
void ss_active_inc(SuperSlab* ss);
|
||
void ss_active_add(SuperSlab* ss, uint32_t n);
|
||
|
||
size_t tiny_stride_for_class(int class_idx);
|
||
uint8_t* tiny_slab_base_for_geometry(SuperSlab* ss, int slab_idx);
|
||
|
||
extern uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap);
|
||
|
||
/* ultra_* 系は hakmem_tiny.c 側に定義があるため、ここでは宣言しない */
|
||
/* tls_sll_push は box/tls_sll_box.h で static inline bool tls_sll_push(...) 提供済み */
|
||
/* tiny_small_mags_init_once / tiny_mag_init_if_needed も hakmem_tiny_magazine.h で宣言済みなので、ここでは再宣言しない */
|
||
/* tiny_fast_pop / tiny_fast_push / fastcache_* は hakmem_tiny_fastcache.inc.h 側の static inline なので、ここでは未宣言でOK */
|
||
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
static inline void tiny_debug_validate_node_base(int class_idx, void* node, const char* where)
|
||
{
|
||
(void)class_idx;
|
||
(void)where;
|
||
|
||
// 最低限の防御: 異常に小さいアドレスを弾く
|
||
if ((uintptr_t)node < 4096) {
|
||
fprintf(stderr,
|
||
"[TINY_REFILL_GUARD] %s: suspicious node=%p cls=%d\n",
|
||
where, node, class_idx);
|
||
abort();
|
||
}
|
||
}
|
||
#else
|
||
static inline void tiny_debug_validate_node_base(int class_idx, void* node, const char* where)
|
||
{
|
||
(void)class_idx;
|
||
(void)node;
|
||
(void)where;
|
||
}
|
||
#endif
|
||
|
||
// ========= superslab_tls_bump_fast =========
|
||
//
|
||
// Ultra bump shadow: current slabが freelist 空で carved<capacity のとき、
|
||
// 連続領域を TLS window としてまとめ予約する。
|
||
// tiny_hot_pop_class{0..3} から呼ばれる。
|
||
|
||
static inline void* superslab_tls_bump_fast(int class_idx) {
|
||
if (!g_ultra_bump_shadow || !g_use_superslab) return NULL;
|
||
|
||
uint8_t* cur = g_tls_bcur[class_idx];
|
||
if (cur) {
|
||
uint8_t* end = g_tls_bend[class_idx];
|
||
size_t stride = tiny_stride_for_class(class_idx);
|
||
if (cur + stride <= end) {
|
||
g_tls_bcur[class_idx] = cur + stride;
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
g_bump_hits[class_idx]++;
|
||
#endif
|
||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||
// Headerは呼び出し元で書く or strideに含め済み想定。ここでは生ポインタ返す。
|
||
#endif
|
||
return cur;
|
||
}
|
||
g_tls_bcur[class_idx] = NULL;
|
||
g_tls_bend[class_idx] = NULL;
|
||
}
|
||
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
TinySlabMeta* meta = tls->meta;
|
||
if (!tls->ss || !meta || meta->freelist) return NULL;
|
||
|
||
uint16_t carved = meta->carved;
|
||
uint16_t cap = meta->capacity;
|
||
if (carved >= cap) return NULL;
|
||
|
||
uint32_t avail = (uint32_t)cap - (uint32_t)carved;
|
||
uint32_t chunk = (g_bump_chunk > 0) ? (uint32_t)g_bump_chunk : 1u;
|
||
if (chunk > avail) chunk = avail;
|
||
|
||
size_t stride = tiny_stride_for_class(class_idx);
|
||
uint8_t* base = tls->slab_base
|
||
? tls->slab_base
|
||
: tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||
uint8_t* start = base + (size_t)carved * stride;
|
||
|
||
meta->carved = (uint16_t)(carved + (uint16_t)chunk);
|
||
meta->used = (uint16_t)(meta->used + (uint16_t)chunk);
|
||
ss_active_add(tls->ss, chunk);
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
g_bump_arms[class_idx]++;
|
||
#endif
|
||
|
||
// 1個目を即返し、残りをTLS windowとして保持
|
||
g_tls_bcur[class_idx] = start + stride;
|
||
g_tls_bend[class_idx] = start + (size_t)chunk * stride;
|
||
return start;
|
||
}
|
||
|
||
// ========= tiny_fast_refill_and_take =========
|
||
//
|
||
// FCが空の時に、TLS list/superslab からバッチ取得して一つ返す。
|
||
// 旧来の複雑な経路を削り、FC/SLLのみの最小ロジックにする。
|
||
|
||
static inline void* tiny_fast_refill_and_take(int class_idx, TinyTLSList* tls) {
|
||
// 1) Front FastCache から直接
|
||
if (__builtin_expect(g_fastcache_enable && class_idx <= 3, 1)) {
|
||
void* fc = fastcache_pop(class_idx);
|
||
if (fc) {
|
||
extern unsigned long long g_front_fc_hit[TINY_NUM_CLASSES];
|
||
g_front_fc_hit[class_idx]++;
|
||
return fc;
|
||
}
|
||
}
|
||
|
||
// 2) ローカルfast list
|
||
{
|
||
void* p = tiny_fast_pop(class_idx);
|
||
if (p) return p;
|
||
}
|
||
|
||
uint16_t cap = g_fast_cap[class_idx];
|
||
if (cap == 0) return NULL;
|
||
TinyFastCache* fc = &g_fast_cache[class_idx];
|
||
int room = (int)cap - fc->top;
|
||
if (room <= 0) return NULL;
|
||
|
||
// 3) TLS SLL から詰め替え
|
||
int filled = 0;
|
||
while (room > 0 && g_tls_sll_enable) {
|
||
void* h = NULL;
|
||
if (!tls_sll_pop(class_idx, &h)) break;
|
||
tiny_debug_validate_node_base(class_idx, h, "tiny_fast_refill_and_take");
|
||
fc->items[fc->top++] = h;
|
||
room--;
|
||
filled++;
|
||
}
|
||
|
||
if (filled == 0) {
|
||
// 4) Superslab bump (optional)
|
||
void* bump = superslab_tls_bump_fast(class_idx);
|
||
if (bump) return bump;
|
||
return NULL;
|
||
}
|
||
|
||
// 5) 1個返す
|
||
return fc->items[--fc->top];
|
||
}
|
||
|
||
// ========= bulk_mag_to_sll_if_room =========
|
||
//
|
||
// Magazine → SLL への安全な流し込み。
|
||
// tiny_free_magazine.inc.h から参照される。
|
||
|
||
static inline int bulk_mag_to_sll_if_room(int class_idx, TinyTLSMag* mag, int n) {
|
||
if (!g_tls_sll_enable || n <= 0) return 0;
|
||
|
||
uint32_t cap = sll_cap_for_class(class_idx, (uint32_t)mag->cap);
|
||
uint32_t have = g_tls_sll_count[class_idx];
|
||
if (have >= cap) return 0;
|
||
|
||
int room = (int)(cap - have);
|
||
int take = n < room ? n : room;
|
||
if (take <= 0) return 0;
|
||
if (take > mag->top) take = mag->top;
|
||
if (take <= 0) return 0;
|
||
|
||
int pushed = 0;
|
||
for (int i = 0; i < take; i++) {
|
||
void* p = mag->items[--mag->top].ptr;
|
||
if (!tls_sll_push(class_idx, p, cap)) {
|
||
mag->top++; // rollback last
|
||
break;
|
||
}
|
||
pushed++;
|
||
}
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
if (pushed > 0) g_path_refill_calls[class_idx]++;
|
||
#endif
|
||
return pushed;
|
||
}
|
||
|
||
/*
|
||
* ========= Minimal Phase 12 sll_refill_small_from_ss =========
|
||
*
|
||
* Box化方針:
|
||
* - フロントエンド(tiny_fast_refill 等)は:
|
||
* - TLS SLL: tls_sll_box.h API のみを使用
|
||
* - Superslab: 本関数を唯一の「小サイズ SLL 補充 Box」として利用
|
||
* - バックエンド:
|
||
* - 現段階(Stage A/B)では既存 TLS Superslab/TinySlabMeta を直接利用
|
||
* - 将来(Stage C)に shared_pool_acquire_slab() に差し替え可能なよう、
|
||
* ここに Superslab 内部アクセスを閉じ込める
|
||
*
|
||
* 契約:
|
||
* - Tiny classes のみ (0 <= class_idx < TINY_NUM_CLASSES)
|
||
* - max_take は「この呼び出しで SLL に積みたい最大個数」
|
||
* - 戻り値は実際に SLL に積んだ個数(0 以上)
|
||
* - 呼び出し側は head/count/meta 等に触れず、Box API (tls_sll_box) のみ利用する
|
||
*/
|
||
|
||
__attribute__((noinline))
|
||
int sll_refill_small_from_ss(int class_idx, int max_take)
|
||
{
|
||
// Hard defensive gate: Tiny classes only, never trust caller.
|
||
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
|
||
return 0;
|
||
}
|
||
|
||
HAK_CHECK_CLASS_IDX(class_idx, "sll_refill_small_from_ss");
|
||
atomic_fetch_add(&g_integrity_check_class_bounds, 1);
|
||
|
||
// Phase12: 起動直後など、shared pool / superslab 未有効時は絶対に動かさない。
|
||
if (!g_use_superslab || max_take <= 0) {
|
||
return 0;
|
||
}
|
||
|
||
// TLS slab 未構成状態 (ss/meta/slab_base すべて NULL) のときは、ここでは触らない。
|
||
// superslab_refill は「本当に必要になったタイミング」でのみ呼ぶ。
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
if (!tls) {
|
||
return 0;
|
||
}
|
||
|
||
bool tls_uninitialized =
|
||
(tls->ss == NULL) &&
|
||
(tls->meta == NULL) &&
|
||
(tls->slab_base == NULL);
|
||
|
||
if (tls_uninitialized) {
|
||
// 初回は、呼び出し元の上位ロジックが superslab_refill を呼ぶことを期待し、ここでは何もしない。
|
||
return 0;
|
||
}
|
||
|
||
// Ensure we have a valid TLS slab for this class via shared pool.
|
||
// superslab_refill() 契約:
|
||
// - 成功: g_tls_slabs[class_idx] に ss/meta/slab_base/slab_idx を一貫して設定
|
||
// - 失敗: TLS は不変 or 巻き戻し、NULL を返す
|
||
if (!tls->ss || !tls->meta ||
|
||
tls->meta->class_idx != (uint8_t)class_idx ||
|
||
!tls->slab_base) {
|
||
if (!superslab_refill(class_idx)) {
|
||
return 0;
|
||
}
|
||
tls = &g_tls_slabs[class_idx];
|
||
if (!tls->ss || !tls->meta ||
|
||
tls->meta->class_idx != (uint8_t)class_idx ||
|
||
!tls->slab_base) {
|
||
return 0;
|
||
}
|
||
}
|
||
|
||
TinySlabMeta* meta = tls->meta;
|
||
// Meta invariants: class & capacity は妥当であること
|
||
if (!meta ||
|
||
meta->class_idx != (uint8_t)class_idx ||
|
||
meta->capacity == 0) {
|
||
return 0;
|
||
}
|
||
|
||
const uint32_t cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP);
|
||
const uint32_t cur = g_tls_sll_count[class_idx];
|
||
if (cur >= cap) {
|
||
return 0;
|
||
}
|
||
|
||
int room = (int)(cap - cur);
|
||
int target = (max_take < room) ? max_take : room;
|
||
if (target <= 0) {
|
||
return 0;
|
||
}
|
||
|
||
int taken = 0;
|
||
const size_t stride = tiny_stride_for_class(class_idx);
|
||
|
||
while (taken < target) {
|
||
void* p = NULL;
|
||
|
||
// freelist 優先
|
||
if (meta->freelist) {
|
||
p = meta->freelist;
|
||
meta->freelist = tiny_next_read(class_idx, p);
|
||
meta->used++;
|
||
if (__builtin_expect(meta->used > meta->capacity, 0)) {
|
||
// 異常検出時はロールバックして終了(fail-fast 回避のため静かに中断)
|
||
meta->used = meta->capacity;
|
||
break;
|
||
}
|
||
ss_active_inc(tls->ss);
|
||
}
|
||
// freelist が尽きていて carved < capacity なら線形 carve
|
||
else if (meta->carved < meta->capacity) {
|
||
uint8_t* base = tls->slab_base
|
||
? tls->slab_base
|
||
: tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||
if (!base) {
|
||
break;
|
||
}
|
||
uint16_t idx = meta->carved;
|
||
if (idx >= meta->capacity) {
|
||
break;
|
||
}
|
||
uint8_t* addr = base + ((size_t)idx * stride);
|
||
meta->carved++;
|
||
meta->used++;
|
||
if (__builtin_expect(meta->used > meta->capacity, 0)) {
|
||
meta->used = meta->capacity;
|
||
break;
|
||
}
|
||
ss_active_inc(tls->ss);
|
||
p = addr;
|
||
}
|
||
// freelist も carve も尽きたら、新しい slab を shared pool から取得
|
||
else {
|
||
if (!superslab_refill(class_idx)) {
|
||
break;
|
||
}
|
||
tls = &g_tls_slabs[class_idx];
|
||
meta = tls->meta;
|
||
if (!tls->ss || !meta ||
|
||
meta->class_idx != (uint8_t)class_idx ||
|
||
!tls->slab_base ||
|
||
meta->capacity == 0) {
|
||
break;
|
||
}
|
||
continue;
|
||
}
|
||
|
||
if (!p) {
|
||
break;
|
||
}
|
||
|
||
tiny_debug_validate_node_base(class_idx, p, "sll_refill_small_from_ss");
|
||
|
||
// Prepare header for header-classes so that safeheader mode accepts the push
|
||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||
if (class_idx != 0 && class_idx != 7) {
|
||
*(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
|
||
}
|
||
#endif
|
||
// SLL push 失敗時はそれ以上積まない(p はTLS slab管理下なので破棄でOK)
|
||
if (!tls_sll_push(class_idx, p, cap)) {
|
||
break;
|
||
}
|
||
|
||
taken++;
|
||
}
|
||
|
||
return taken;
|
||
}
|
||
|
||
#endif // HAKMEM_TINY_REFILL_INC_H
|