// hakmem_tiny_refill.inc.h // Phase 12: Minimal refill helpers needed by Box fast path. // // 本ヘッダは、以下を提供する: // - superslab_tls_bump_fast: TinyTLSSlab + SuperSlab メタからのTLSバンプ窓 // - tiny_fast_refill_and_take: FastCache/TLS SLL からの最小 refill + 1個取得 // - bulk_mag_to_sll_if_room: Magazine→SLL へのバルク移送(容量チェック付き) // - sll_refill_small_from_ss: Phase12 shared SuperSlab pool 向けの最小実装 // // 旧来の g_sll_cap_override / getenv ベースの多経路ロジックは一切含めない。 #ifndef HAKMEM_TINY_REFILL_INC_H #define HAKMEM_TINY_REFILL_INC_H #include "hakmem_tiny.h" #include "hakmem_tiny_superslab.h" #include "hakmem_tiny_tls_list.h" #include "tiny_box_geometry.h" #include "superslab/superslab_inline.h" // Provides hak_super_lookup() and SUPERSLAB_MAGIC #include "box/tls_sll_box.h" #include "box/tiny_header_box.h" // Header Box: Single Source of Truth for header operations #include "box/tiny_front_config_box.h" // Phase 7-Step6-Fix: Config macros for dead code elimination #include "hakmem_tiny_integrity.h" #include "box/tiny_next_ptr_box.h" #include "tiny_region_id.h" // For HEADER_MAGIC/HEADER_CLASS_MASK (prepare header before SLL push) #include #include #include // For fprintf diagnostics // ========= Externs from hakmem_tiny.c and friends ========= extern int g_use_superslab; extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES]; extern int g_fastcache_enable; extern uint16_t g_fast_cap[TINY_NUM_CLASSES]; extern __thread TinyFastCache g_fast_cache[TINY_NUM_CLASSES]; // Phase 7-Step7: g_tls_sll_enable now accessed via TINY_FRONT_TLS_SLL_ENABLED macro extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES]; extern _Atomic uint32_t g_frontend_fill_target[TINY_NUM_CLASSES]; extern int g_ultra_bump_shadow; extern int g_bump_chunk; extern __thread uint8_t* g_tls_bcur[TINY_NUM_CLASSES]; extern __thread uint8_t* g_tls_bend[TINY_NUM_CLASSES]; #if HAKMEM_DEBUG_COUNTERS extern uint64_t g_bump_hits[TINY_NUM_CLASSES]; extern uint64_t g_bump_arms[TINY_NUM_CLASSES]; extern uint64_t g_path_refill_calls[TINY_NUM_CLASSES]; extern uint64_t g_ultra_refill_calls[TINY_NUM_CLASSES]; extern int g_path_debug_enabled; #endif // ========= From other units ========= SuperSlab* superslab_refill(int class_idx); void ss_active_inc(SuperSlab* ss); void ss_active_add(SuperSlab* ss, uint32_t n); size_t tiny_stride_for_class(int class_idx); uint8_t* tiny_slab_base_for_geometry(SuperSlab* ss, int slab_idx); extern uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap); /* ultra_* 系は hakmem_tiny.c 側に定義があるため、ここでは宣言しない */ /* tls_sll_push は box/tls_sll_box.h で static inline bool tls_sll_push(...) 提供済み */ /* tiny_small_mags_init_once / tiny_mag_init_if_needed も hakmem_tiny_magazine.h で宣言済みなので、ここでは再宣言しない */ /* tiny_fast_pop / tiny_fast_push / fastcache_* は hakmem_tiny_fastcache.inc.h 側の static inline なので、ここでは未宣言でOK */ #if !HAKMEM_BUILD_RELEASE static inline void tiny_debug_validate_node_base(int class_idx, void* node, const char* where) { (void)class_idx; (void)where; // 最低限の防御: 異常に小さいアドレスを弾く if ((uintptr_t)node < 4096) { fprintf(stderr, "[TINY_REFILL_GUARD] %s: suspicious node=%p cls=%d\n", where, node, class_idx); abort(); } } #else static inline void tiny_debug_validate_node_base(int class_idx, void* node, const char* where) { (void)class_idx; (void)node; (void)where; } #endif // ========= superslab_tls_bump_fast ========= // // Ultra bump shadow: current slabが freelist 空で carvedmeta; if (!tls->ss || !meta || meta->freelist) return NULL; uint16_t carved = meta->carved; uint16_t cap = meta->capacity; if (carved >= cap) return NULL; uint32_t avail = (uint32_t)cap - (uint32_t)carved; uint32_t chunk = (g_bump_chunk > 0) ? (uint32_t)g_bump_chunk : 1u; if (chunk > avail) chunk = avail; size_t stride = tiny_stride_for_class(class_idx); uint8_t* base = tls->slab_base ? tls->slab_base : tiny_slab_base_for_geometry(tls->ss, tls->slab_idx); uint8_t* start = base + (size_t)carved * stride; meta->carved = (uint16_t)(carved + (uint16_t)chunk); meta->used = (uint16_t)(meta->used + (uint16_t)chunk); ss_active_add(tls->ss, chunk); #if HAKMEM_DEBUG_COUNTERS g_bump_arms[class_idx]++; #endif // 1個目を即返し、残りをTLS windowとして保持 g_tls_bcur[class_idx] = start + stride; g_tls_bend[class_idx] = start + (size_t)chunk * stride; return start; } // ========= tiny_fast_refill_and_take ========= // // FCが空の時に、TLS list/superslab からバッチ取得して一つ返す。 // 旧来の複雑な経路を削り、FC/SLLのみの最小ロジックにする。 static inline void* tiny_fast_refill_and_take(int class_idx, TinyTLSList* tls) { // 1) Front FastCache から直接 // Phase 7-Step6-Fix: Use config macro for dead code elimination in PGO mode if (__builtin_expect(TINY_FRONT_FASTCACHE_ENABLED && class_idx <= 3, 1)) { hak_base_ptr_t fc = fastcache_pop(class_idx); if (!hak_base_is_null(fc)) { extern unsigned long long g_front_fc_hit[TINY_NUM_CLASSES]; g_front_fc_hit[class_idx]++; return HAK_BASE_TO_RAW(fc); } } // 2) ローカルfast list { hak_base_ptr_t p = tiny_fast_pop(class_idx); if (!hak_base_is_null(p)) return HAK_BASE_TO_RAW(p); } uint16_t cap = g_fast_cap[class_idx]; if (cap == 0) return NULL; TinyFastCache* fc = &g_fast_cache[class_idx]; int room = (int)cap - fc->top; if (room <= 0) return NULL; // 3) TLS SLL から詰め替え int filled = 0; // Phase 7-Step7: Use config macro for dead code elimination in PGO mode while (room > 0 && TINY_FRONT_TLS_SLL_ENABLED) { void* h = NULL; if (!tls_sll_pop(class_idx, &h)) break; tiny_debug_validate_node_base(class_idx, h, "tiny_fast_refill_and_take"); fc->items[fc->top++] = h; room--; filled++; } if (filled == 0) { // 4) Superslab bump (optional) void* bump = superslab_tls_bump_fast(class_idx); if (bump) return bump; return NULL; } // 5) 1個返す return fc->items[--fc->top]; } // ========= bulk_mag_to_sll_if_room ========= // // Magazine → SLL への安全な流し込み。 // tiny_free_magazine.inc.h から参照される。 static inline int bulk_mag_to_sll_if_room(int class_idx, TinyTLSMag* mag, int n) { // Phase 7-Step7: Use config macro for dead code elimination in PGO mode if (!TINY_FRONT_TLS_SLL_ENABLED || n <= 0) return 0; uint32_t cap = sll_cap_for_class(class_idx, (uint32_t)mag->cap); uint32_t have = g_tls_sll[class_idx].count; if (have >= cap) return 0; int room = (int)(cap - have); int take = n < room ? n : room; if (take <= 0) return 0; if (take > mag->top) take = mag->top; if (take <= 0) return 0; int pushed = 0; for (int i = 0; i < take; i++) { void* p = mag->items[--mag->top].ptr; hak_base_ptr_t base_p = HAK_BASE_FROM_RAW(p); if (!tls_sll_push(class_idx, base_p, cap)) { mag->top++; // rollback last break; } pushed++; } #if HAKMEM_DEBUG_COUNTERS if (pushed > 0) g_path_refill_calls[class_idx]++; #endif return pushed; } /* * ========= Minimal Phase 12 sll_refill_small_from_ss ========= * * Box化方針: * - フロントエンド(tiny_fast_refill 等)は: * - TLS SLL: tls_sll_box.h API のみを使用 * - Superslab: 本関数を唯一の「小サイズ SLL 補充 Box」として利用 * - バックエンド: * - 現段階(Stage A/B)では既存 TLS Superslab/TinySlabMeta を直接利用 * - 将来(Stage C)に shared_pool_acquire_slab() に差し替え可能なよう、 * ここに Superslab 内部アクセスを閉じ込める * * 契約: * - Tiny classes のみ (0 <= class_idx < TINY_NUM_CLASSES) * - max_take は「この呼び出しで SLL に積みたい最大個数」 * - 戻り値は実際に SLL に積んだ個数(0 以上) * - 呼び出し側は head/count/meta 等に触れず、Box API (tls_sll_box) のみ利用する */ __attribute__((noinline)) int sll_refill_small_from_ss(int class_idx, int max_take) { // Hard defensive gate: Tiny classes only, never trust caller. if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) { return 0; } HAK_CHECK_CLASS_IDX(class_idx, "sll_refill_small_from_ss"); atomic_fetch_add(&g_integrity_check_class_bounds, 1); // Phase12: 起動直後など、shared pool / superslab 未有効時は絶対に動かさない。 if (!g_use_superslab || max_take <= 0) { return 0; } // TLS slab 未構成状態 (ss/meta/slab_base すべて NULL) のときは、ここでは触らない。 // superslab_refill は「本当に必要になったタイミング」でのみ呼ぶ。 TinyTLSSlab* tls = &g_tls_slabs[class_idx]; if (!tls) { return 0; } // FIX: TLS未初期化時も superslab_refill() で初期化する(早期リターン削除) // 以前は tls_uninitialized の場合に return 0 していたが、これだと // TLS SLL が永遠に空のままになり、Larson ベンチで 70x slowdown が発生していた。 // Ensure we have a valid TLS slab for this class via shared pool. // superslab_refill() 契約: // - 成功: g_tls_slabs[class_idx] に ss/meta/slab_base/slab_idx を一貫して設定 // - 失敗: TLS は不変 or 巻き戻し、NULL を返す if (!tls->ss || !tls->meta || tls->meta->class_idx != (uint8_t)class_idx || !tls->slab_base) { if (!superslab_refill(class_idx)) { return 0; } tls = &g_tls_slabs[class_idx]; if (!tls->ss || !tls->meta || tls->meta->class_idx != (uint8_t)class_idx || !tls->slab_base) { return 0; } } TinySlabMeta* meta = tls->meta; // Meta invariants: class & capacity は妥当であること if (!meta || meta->class_idx != (uint8_t)class_idx || meta->capacity == 0) { return 0; } const uint32_t cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP); const uint32_t cur = g_tls_sll[class_idx].count; if (cur >= cap) { return 0; } int room = (int)(cap - cur); int target = (max_take < room) ? max_take : room; if (target <= 0) { return 0; } int taken = 0; const size_t stride = tiny_stride_for_class(class_idx); while (taken < target) { void* p = NULL; // freelist 優先 if (meta->freelist) { p = meta->freelist; // Point 4: Freelist chain integrity check (CRITICAL - detect corruption early) void* next_raw = tiny_next_read(class_idx, p); uintptr_t next_addr = (uintptr_t)next_raw; // Check 4a: NULL is valid (end of freelist) if (next_raw != NULL) { // Check 4b: Valid address range (not obviously corrupted) if (next_addr < 4096 || next_addr > 0x00007fffffffffffULL) { fprintf(stderr, "[FREELIST_NEXT_INVALID] cls=%d p=%p next=%p addr=%#lx (out of valid range)\n", class_idx, p, next_raw, next_addr); fprintf(stderr, "[FREELIST_NEXT_INVALID] ss=%p meta=%p freelist_head=%p\n", (void*)tls->ss, (void*)meta, p); abort(); } // Check 4c: SuperSlab ownership validation SuperSlab* ss_check = hak_super_lookup(next_raw); if (!ss_check || ss_check->magic != SUPERSLAB_MAGIC) { fprintf(stderr, "[FREELIST_NEXT_INVALID] cls=%d p=%p next=%p ss_check=%p (not in valid SuperSlab)\n", class_idx, p, next_raw, (void*)ss_check); if (ss_check) { fprintf(stderr, "[FREELIST_NEXT_INVALID] ss_check->magic=%#llx (expected %#llx)\n", (unsigned long long)ss_check->magic, (unsigned long long)SUPERSLAB_MAGIC); } abort(); } } meta->freelist = next_raw; meta->used++; if (__builtin_expect(meta->used > meta->capacity, 0)) { // 異常検出時はロールバックして終了(fail-fast 回避のため静かに中断) meta->used = meta->capacity; break; } ss_active_inc(tls->ss); } // freelist が尽きていて carved < capacity なら線形 carve else if (meta->carved < meta->capacity) { uint8_t* base = tls->slab_base ? tls->slab_base : tiny_slab_base_for_geometry(tls->ss, tls->slab_idx); if (!base) { break; } uint16_t idx = meta->carved; if (idx >= meta->capacity) { break; } // Point 5: Stride calculation bounds check (CRITICAL - prevent out-of-bounds carving) // Check 5a: Stride must be valid (not 0, not suspiciously large) if (stride == 0 || stride > 100000) { fprintf(stderr, "[STRIDE_INVALID] cls=%d stride=%zu idx=%u cap=%u\n", class_idx, stride, idx, meta->capacity); fprintf(stderr, "[STRIDE_INVALID] ss=%p meta=%p base=%p\n", (void*)tls->ss, (void*)meta, (void*)base); abort(); } uint8_t* addr = base + ((size_t)idx * stride); // Check 5b: Calculated address must be within slab bounds uintptr_t base_addr = (uintptr_t)base; uintptr_t addr_addr = (uintptr_t)addr; size_t max_offset = (size_t)meta->capacity * stride; if (addr_addr < base_addr || (addr_addr - base_addr) > max_offset) { fprintf(stderr, "[ADDR_OUT_OF_BOUNDS] cls=%d base=%p addr=%p offset=%zu max=%zu\n", class_idx, (void*)base, (void*)addr, (addr_addr - base_addr), max_offset); fprintf(stderr, "[ADDR_OUT_OF_BOUNDS] idx=%u cap=%u stride=%zu\n", idx, meta->capacity, stride); abort(); } meta->carved++; meta->used++; if (__builtin_expect(meta->used > meta->capacity, 0)) { meta->used = meta->capacity; break; } ss_active_inc(tls->ss); p = addr; } // freelist も carve も尽きたら、新しい slab を shared pool から取得 else { if (!superslab_refill(class_idx)) { break; } tls = &g_tls_slabs[class_idx]; meta = tls->meta; if (!tls->ss || !meta || meta->class_idx != (uint8_t)class_idx || !tls->slab_base || meta->capacity == 0) { break; } continue; } if (!p) { break; } tiny_debug_validate_node_base(class_idx, p, "sll_refill_small_from_ss"); // Prepare header for header-classes so that safeheader mode accepts the push // Uses Header Box API (C1-C6 only; C0/C7 skip - offset=0 overwrites header) tiny_header_write_if_preserved(p, class_idx); // SLL push 失敗時はそれ以上積まない(p はTLS slab管理下なので破棄でOK) if (!tls_sll_push(class_idx, p, cap)) { break; } taken++; } return taken; } #endif // HAKMEM_TINY_REFILL_INC_H