Merge separate g_tls_sll_head[] and g_tls_sll_count[] arrays into unified TinyTLSSLL struct to improve L1D cache locality. Expected performance gain: +12-18% from reducing cache line splits (2 loads → 1 load per operation). Changes: - core/hakmem_tiny.h: Add TinyTLSSLL type (16B aligned, head+count+pad) - core/hakmem_tiny.c: Replace separate arrays with g_tls_sll[8] - core/box/tls_sll_box.h: Update Box API (13 sites) for unified access - Updated 32+ files: All g_tls_sll_head[i] → g_tls_sll[i].head - Updated 32+ files: All g_tls_sll_count[i] → g_tls_sll[i].count - core/hakmem_tiny_integrity.h: Unified canary guards - core/box/integrity_box.c: Simplified canary validation - Makefile: Added core/box/tiny_sizeclass_hist_box.o to link Build: ✅ PASS (10K ops sanity test) Warnings: Only pre-existing LTO type mismatches (unrelated) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
405 lines
13 KiB
C
405 lines
13 KiB
C
// hakmem_tiny_refill.inc.h
|
||
// Phase 12: Minimal refill helpers needed by Box fast path.
|
||
//
|
||
// 本ヘッダは、以下を提供する:
|
||
// - superslab_tls_bump_fast: TinyTLSSlab + SuperSlab メタからのTLSバンプ窓
|
||
// - tiny_fast_refill_and_take: FastCache/TLS SLL からの最小 refill + 1個取得
|
||
// - bulk_mag_to_sll_if_room: Magazine→SLL へのバルク移送(容量チェック付き)
|
||
// - sll_refill_small_from_ss: Phase12 shared SuperSlab pool 向けの最小実装
|
||
//
|
||
// 旧来の g_sll_cap_override / getenv ベースの多経路ロジックは一切含めない。
|
||
|
||
#ifndef HAKMEM_TINY_REFILL_INC_H
|
||
#define HAKMEM_TINY_REFILL_INC_H
|
||
|
||
#include "hakmem_tiny.h"
|
||
#include "hakmem_tiny_superslab.h"
|
||
#include "hakmem_tiny_tls_list.h"
|
||
#include "tiny_box_geometry.h"
|
||
#include "superslab/superslab_inline.h"
|
||
#include "box/tls_sll_box.h"
|
||
#include "hakmem_tiny_integrity.h"
|
||
#include "box/tiny_next_ptr_box.h"
|
||
#include "tiny_region_id.h" // For HEADER_MAGIC/HEADER_CLASS_MASK (prepare header before SLL push)
|
||
#include <stdint.h>
|
||
#include <stdatomic.h>
|
||
|
||
// ========= Externs from hakmem_tiny.c and friends =========
|
||
|
||
extern int g_use_superslab;
|
||
extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
|
||
|
||
extern int g_fastcache_enable;
|
||
extern uint16_t g_fast_cap[TINY_NUM_CLASSES];
|
||
extern __thread TinyFastCache g_fast_cache[TINY_NUM_CLASSES];
|
||
|
||
extern int g_tls_sll_enable;
|
||
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
|
||
|
||
extern _Atomic uint32_t g_frontend_fill_target[TINY_NUM_CLASSES];
|
||
|
||
extern int g_ultra_bump_shadow;
|
||
extern int g_bump_chunk;
|
||
extern __thread uint8_t* g_tls_bcur[TINY_NUM_CLASSES];
|
||
extern __thread uint8_t* g_tls_bend[TINY_NUM_CLASSES];
|
||
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
extern uint64_t g_bump_hits[TINY_NUM_CLASSES];
|
||
extern uint64_t g_bump_arms[TINY_NUM_CLASSES];
|
||
extern uint64_t g_path_refill_calls[TINY_NUM_CLASSES];
|
||
extern uint64_t g_ultra_refill_calls[TINY_NUM_CLASSES];
|
||
extern int g_path_debug_enabled;
|
||
#endif
|
||
|
||
// ========= From other units =========
|
||
|
||
SuperSlab* superslab_refill(int class_idx);
|
||
|
||
void ss_active_inc(SuperSlab* ss);
|
||
void ss_active_add(SuperSlab* ss, uint32_t n);
|
||
|
||
size_t tiny_stride_for_class(int class_idx);
|
||
uint8_t* tiny_slab_base_for_geometry(SuperSlab* ss, int slab_idx);
|
||
|
||
extern uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap);
|
||
|
||
/* ultra_* 系は hakmem_tiny.c 側に定義があるため、ここでは宣言しない */
|
||
/* tls_sll_push は box/tls_sll_box.h で static inline bool tls_sll_push(...) 提供済み */
|
||
/* tiny_small_mags_init_once / tiny_mag_init_if_needed も hakmem_tiny_magazine.h で宣言済みなので、ここでは再宣言しない */
|
||
/* tiny_fast_pop / tiny_fast_push / fastcache_* は hakmem_tiny_fastcache.inc.h 側の static inline なので、ここでは未宣言でOK */
|
||
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
static inline void tiny_debug_validate_node_base(int class_idx, void* node, const char* where)
|
||
{
|
||
(void)class_idx;
|
||
(void)where;
|
||
|
||
// 最低限の防御: 異常に小さいアドレスを弾く
|
||
if ((uintptr_t)node < 4096) {
|
||
fprintf(stderr,
|
||
"[TINY_REFILL_GUARD] %s: suspicious node=%p cls=%d\n",
|
||
where, node, class_idx);
|
||
abort();
|
||
}
|
||
}
|
||
#else
|
||
static inline void tiny_debug_validate_node_base(int class_idx, void* node, const char* where)
|
||
{
|
||
(void)class_idx;
|
||
(void)node;
|
||
(void)where;
|
||
}
|
||
#endif
|
||
|
||
// ========= superslab_tls_bump_fast =========
|
||
//
|
||
// Ultra bump shadow: current slabが freelist 空で carved<capacity のとき、
|
||
// 連続領域を TLS window としてまとめ予約する。
|
||
// tiny_hot_pop_class{0..3} から呼ばれる。
|
||
|
||
static inline void* superslab_tls_bump_fast(int class_idx) {
|
||
if (!g_ultra_bump_shadow || !g_use_superslab) return NULL;
|
||
|
||
uint8_t* cur = g_tls_bcur[class_idx];
|
||
if (cur) {
|
||
uint8_t* end = g_tls_bend[class_idx];
|
||
size_t stride = tiny_stride_for_class(class_idx);
|
||
if (cur + stride <= end) {
|
||
g_tls_bcur[class_idx] = cur + stride;
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
g_bump_hits[class_idx]++;
|
||
#endif
|
||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||
// Headerは呼び出し元で書く or strideに含め済み想定。ここでは生ポインタ返す。
|
||
#endif
|
||
return cur;
|
||
}
|
||
g_tls_bcur[class_idx] = NULL;
|
||
g_tls_bend[class_idx] = NULL;
|
||
}
|
||
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
TinySlabMeta* meta = tls->meta;
|
||
if (!tls->ss || !meta || meta->freelist) return NULL;
|
||
|
||
uint16_t carved = meta->carved;
|
||
uint16_t cap = meta->capacity;
|
||
if (carved >= cap) return NULL;
|
||
|
||
uint32_t avail = (uint32_t)cap - (uint32_t)carved;
|
||
uint32_t chunk = (g_bump_chunk > 0) ? (uint32_t)g_bump_chunk : 1u;
|
||
if (chunk > avail) chunk = avail;
|
||
|
||
size_t stride = tiny_stride_for_class(class_idx);
|
||
uint8_t* base = tls->slab_base
|
||
? tls->slab_base
|
||
: tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||
uint8_t* start = base + (size_t)carved * stride;
|
||
|
||
meta->carved = (uint16_t)(carved + (uint16_t)chunk);
|
||
meta->used = (uint16_t)(meta->used + (uint16_t)chunk);
|
||
ss_active_add(tls->ss, chunk);
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
g_bump_arms[class_idx]++;
|
||
#endif
|
||
|
||
// 1個目を即返し、残りをTLS windowとして保持
|
||
g_tls_bcur[class_idx] = start + stride;
|
||
g_tls_bend[class_idx] = start + (size_t)chunk * stride;
|
||
return start;
|
||
}
|
||
|
||
// ========= tiny_fast_refill_and_take =========
|
||
//
|
||
// FCが空の時に、TLS list/superslab からバッチ取得して一つ返す。
|
||
// 旧来の複雑な経路を削り、FC/SLLのみの最小ロジックにする。
|
||
|
||
static inline void* tiny_fast_refill_and_take(int class_idx, TinyTLSList* tls) {
|
||
// 1) Front FastCache から直接
|
||
if (__builtin_expect(g_fastcache_enable && class_idx <= 3, 1)) {
|
||
void* fc = fastcache_pop(class_idx);
|
||
if (fc) {
|
||
extern unsigned long long g_front_fc_hit[TINY_NUM_CLASSES];
|
||
g_front_fc_hit[class_idx]++;
|
||
return fc;
|
||
}
|
||
}
|
||
|
||
// 2) ローカルfast list
|
||
{
|
||
void* p = tiny_fast_pop(class_idx);
|
||
if (p) return p;
|
||
}
|
||
|
||
uint16_t cap = g_fast_cap[class_idx];
|
||
if (cap == 0) return NULL;
|
||
TinyFastCache* fc = &g_fast_cache[class_idx];
|
||
int room = (int)cap - fc->top;
|
||
if (room <= 0) return NULL;
|
||
|
||
// 3) TLS SLL から詰め替え
|
||
int filled = 0;
|
||
while (room > 0 && g_tls_sll_enable) {
|
||
void* h = NULL;
|
||
if (!tls_sll_pop(class_idx, &h)) break;
|
||
tiny_debug_validate_node_base(class_idx, h, "tiny_fast_refill_and_take");
|
||
fc->items[fc->top++] = h;
|
||
room--;
|
||
filled++;
|
||
}
|
||
|
||
if (filled == 0) {
|
||
// 4) Superslab bump (optional)
|
||
void* bump = superslab_tls_bump_fast(class_idx);
|
||
if (bump) return bump;
|
||
return NULL;
|
||
}
|
||
|
||
// 5) 1個返す
|
||
return fc->items[--fc->top];
|
||
}
|
||
|
||
// ========= bulk_mag_to_sll_if_room =========
|
||
//
|
||
// Magazine → SLL への安全な流し込み。
|
||
// tiny_free_magazine.inc.h から参照される。
|
||
|
||
static inline int bulk_mag_to_sll_if_room(int class_idx, TinyTLSMag* mag, int n) {
|
||
if (!g_tls_sll_enable || n <= 0) return 0;
|
||
|
||
uint32_t cap = sll_cap_for_class(class_idx, (uint32_t)mag->cap);
|
||
uint32_t have = g_tls_sll[class_idx].count;
|
||
if (have >= cap) return 0;
|
||
|
||
int room = (int)(cap - have);
|
||
int take = n < room ? n : room;
|
||
if (take <= 0) return 0;
|
||
if (take > mag->top) take = mag->top;
|
||
if (take <= 0) return 0;
|
||
|
||
int pushed = 0;
|
||
for (int i = 0; i < take; i++) {
|
||
void* p = mag->items[--mag->top].ptr;
|
||
if (!tls_sll_push(class_idx, p, cap)) {
|
||
mag->top++; // rollback last
|
||
break;
|
||
}
|
||
pushed++;
|
||
}
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
if (pushed > 0) g_path_refill_calls[class_idx]++;
|
||
#endif
|
||
return pushed;
|
||
}
|
||
|
||
/*
|
||
* ========= Minimal Phase 12 sll_refill_small_from_ss =========
|
||
*
|
||
* Box化方針:
|
||
* - フロントエンド(tiny_fast_refill 等)は:
|
||
* - TLS SLL: tls_sll_box.h API のみを使用
|
||
* - Superslab: 本関数を唯一の「小サイズ SLL 補充 Box」として利用
|
||
* - バックエンド:
|
||
* - 現段階(Stage A/B)では既存 TLS Superslab/TinySlabMeta を直接利用
|
||
* - 将来(Stage C)に shared_pool_acquire_slab() に差し替え可能なよう、
|
||
* ここに Superslab 内部アクセスを閉じ込める
|
||
*
|
||
* 契約:
|
||
* - Tiny classes のみ (0 <= class_idx < TINY_NUM_CLASSES)
|
||
* - max_take は「この呼び出しで SLL に積みたい最大個数」
|
||
* - 戻り値は実際に SLL に積んだ個数(0 以上)
|
||
* - 呼び出し側は head/count/meta 等に触れず、Box API (tls_sll_box) のみ利用する
|
||
*/
|
||
|
||
__attribute__((noinline))
|
||
int sll_refill_small_from_ss(int class_idx, int max_take)
|
||
{
|
||
// Hard defensive gate: Tiny classes only, never trust caller.
|
||
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
|
||
return 0;
|
||
}
|
||
|
||
HAK_CHECK_CLASS_IDX(class_idx, "sll_refill_small_from_ss");
|
||
atomic_fetch_add(&g_integrity_check_class_bounds, 1);
|
||
|
||
// Phase12: 起動直後など、shared pool / superslab 未有効時は絶対に動かさない。
|
||
if (!g_use_superslab || max_take <= 0) {
|
||
return 0;
|
||
}
|
||
|
||
// TLS slab 未構成状態 (ss/meta/slab_base すべて NULL) のときは、ここでは触らない。
|
||
// superslab_refill は「本当に必要になったタイミング」でのみ呼ぶ。
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
if (!tls) {
|
||
return 0;
|
||
}
|
||
|
||
bool tls_uninitialized =
|
||
(tls->ss == NULL) &&
|
||
(tls->meta == NULL) &&
|
||
(tls->slab_base == NULL);
|
||
|
||
if (tls_uninitialized) {
|
||
// 初回は、呼び出し元の上位ロジックが superslab_refill を呼ぶことを期待し、ここでは何もしない。
|
||
return 0;
|
||
}
|
||
|
||
// Ensure we have a valid TLS slab for this class via shared pool.
|
||
// superslab_refill() 契約:
|
||
// - 成功: g_tls_slabs[class_idx] に ss/meta/slab_base/slab_idx を一貫して設定
|
||
// - 失敗: TLS は不変 or 巻き戻し、NULL を返す
|
||
if (!tls->ss || !tls->meta ||
|
||
tls->meta->class_idx != (uint8_t)class_idx ||
|
||
!tls->slab_base) {
|
||
if (!superslab_refill(class_idx)) {
|
||
return 0;
|
||
}
|
||
tls = &g_tls_slabs[class_idx];
|
||
if (!tls->ss || !tls->meta ||
|
||
tls->meta->class_idx != (uint8_t)class_idx ||
|
||
!tls->slab_base) {
|
||
return 0;
|
||
}
|
||
}
|
||
|
||
TinySlabMeta* meta = tls->meta;
|
||
// Meta invariants: class & capacity は妥当であること
|
||
if (!meta ||
|
||
meta->class_idx != (uint8_t)class_idx ||
|
||
meta->capacity == 0) {
|
||
return 0;
|
||
}
|
||
|
||
const uint32_t cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP);
|
||
const uint32_t cur = g_tls_sll[class_idx].count;
|
||
if (cur >= cap) {
|
||
return 0;
|
||
}
|
||
|
||
int room = (int)(cap - cur);
|
||
int target = (max_take < room) ? max_take : room;
|
||
if (target <= 0) {
|
||
return 0;
|
||
}
|
||
|
||
int taken = 0;
|
||
const size_t stride = tiny_stride_for_class(class_idx);
|
||
|
||
while (taken < target) {
|
||
void* p = NULL;
|
||
|
||
// freelist 優先
|
||
if (meta->freelist) {
|
||
p = meta->freelist;
|
||
meta->freelist = tiny_next_read(class_idx, p);
|
||
meta->used++;
|
||
if (__builtin_expect(meta->used > meta->capacity, 0)) {
|
||
// 異常検出時はロールバックして終了(fail-fast 回避のため静かに中断)
|
||
meta->used = meta->capacity;
|
||
break;
|
||
}
|
||
ss_active_inc(tls->ss);
|
||
}
|
||
// freelist が尽きていて carved < capacity なら線形 carve
|
||
else if (meta->carved < meta->capacity) {
|
||
uint8_t* base = tls->slab_base
|
||
? tls->slab_base
|
||
: tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||
if (!base) {
|
||
break;
|
||
}
|
||
uint16_t idx = meta->carved;
|
||
if (idx >= meta->capacity) {
|
||
break;
|
||
}
|
||
uint8_t* addr = base + ((size_t)idx * stride);
|
||
meta->carved++;
|
||
meta->used++;
|
||
if (__builtin_expect(meta->used > meta->capacity, 0)) {
|
||
meta->used = meta->capacity;
|
||
break;
|
||
}
|
||
ss_active_inc(tls->ss);
|
||
p = addr;
|
||
}
|
||
// freelist も carve も尽きたら、新しい slab を shared pool から取得
|
||
else {
|
||
if (!superslab_refill(class_idx)) {
|
||
break;
|
||
}
|
||
tls = &g_tls_slabs[class_idx];
|
||
meta = tls->meta;
|
||
if (!tls->ss || !meta ||
|
||
meta->class_idx != (uint8_t)class_idx ||
|
||
!tls->slab_base ||
|
||
meta->capacity == 0) {
|
||
break;
|
||
}
|
||
continue;
|
||
}
|
||
|
||
if (!p) {
|
||
break;
|
||
}
|
||
|
||
tiny_debug_validate_node_base(class_idx, p, "sll_refill_small_from_ss");
|
||
|
||
// Prepare header for header-classes so that safeheader mode accepts the push
|
||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||
if (class_idx != 0 && class_idx != 7) {
|
||
*(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
|
||
}
|
||
#endif
|
||
// SLL push 失敗時はそれ以上積まない(p はTLS slab管理下なので破棄でOK)
|
||
if (!tls_sll_push(class_idx, p, cap)) {
|
||
break;
|
||
}
|
||
|
||
taken++;
|
||
}
|
||
|
||
return taken;
|
||
}
|
||
|
||
#endif // HAKMEM_TINY_REFILL_INC_H
|