Add TinyHeap class mask and extend routing
This commit is contained in:
@ -237,6 +237,12 @@
|
||||
- 長時間 delta debug(META_LIGHT=1 DELTA_DEBUG=1):
|
||||
- 100k/200k: `[C7_DELTA_SUMMARY] nonzero_pages=0 used_delta_sum=0 active_delta_sum=0`(delta 残なし)
|
||||
|
||||
### Phase 7: TinyHeap クラス選択(C6 載せ替えの土台)
|
||||
- ENV `HAKMEM_TINY_HEAP_CLASSES`(bitmask、デフォルト 0x80=C7 のみ)を追加。`tiny_heap_class_route_enabled(cls)` で TinyHeap front を使うクラスを切替。
|
||||
- Front gate: `malloc_tiny_fast` / `free_tiny_fast` でクラスごとに TinyHeap ルートを選択。C7 は従来通り `tiny_c7_heap_mode_enabled()`(HAKMEM_TINY_C7_HOT 連動)でガードし、それ以外のクラスは `tiny_heap_alloc/free_class_fast()` を呼ぶ経路を追加。
|
||||
- TLS SLL との分離をクラス単位に拡張: `sll_refill_small_from_ss` / `sll_refill_batch_from_ss` / `hak_tiny_prewarm_tls_cache` は `tiny_heap_class_route_enabled(cls)` のとき即 return/skip(C6 も TinyHeap に載せたら SLL を経由しない)。
|
||||
- ドキュメント: TinyHeapBox/C7HotBox 設計にクラス bitmask と multi-class 対応の方針を追記。ベンチは今後 C6/C7 切替パターンで再計測予定。
|
||||
|
||||
ホットパス perf フェーズの TODO(案)
|
||||
1. tiny_alloc_fast / tiny_free_fast_v2 の再プロファイル:残存分岐・間接呼び出し・重い箱を特定。
|
||||
2. Unified Cache ヒットパスを最短化:ヒット時を 1–2 load + 軽分岐に近づける(必要なら C7 専用インライン版検討)。
|
||||
|
||||
@ -41,6 +41,7 @@ core/box/carve_push_box.o: core/box/carve_push_box.c \
|
||||
core/box/../hakmem_shared_pool.h core/box/../hakmem_internal.h \
|
||||
core/box/../tiny_region_id.h core/box/../hakmem_tiny_integrity.h \
|
||||
core/box/../box/slab_freelist_atomic.h core/box/tiny_header_box.h \
|
||||
core/box/tiny_heap_env_box.h core/box/c7_hotpath_env_box.h \
|
||||
core/box/../tiny_refill_opt.h core/box/../box/tls_sll_box.h \
|
||||
core/box/../tiny_box_geometry.h core/box/c7_meta_used_counter_box.h
|
||||
core/box/../hakmem_tiny.h:
|
||||
@ -115,6 +116,8 @@ core/box/../tiny_region_id.h:
|
||||
core/box/../hakmem_tiny_integrity.h:
|
||||
core/box/../box/slab_freelist_atomic.h:
|
||||
core/box/tiny_header_box.h:
|
||||
core/box/tiny_heap_env_box.h:
|
||||
core/box/c7_hotpath_env_box.h:
|
||||
core/box/../tiny_refill_opt.h:
|
||||
core/box/../box/tls_sll_box.h:
|
||||
core/box/../tiny_box_geometry.h:
|
||||
|
||||
@ -35,6 +35,7 @@ core/box/front_gate_box.o: core/box/front_gate_box.c \
|
||||
core/box/../hakmem_internal.h core/box/../tiny_region_id.h \
|
||||
core/box/../hakmem_tiny_integrity.h \
|
||||
core/box/../box/slab_freelist_atomic.h core/box/tiny_header_box.h \
|
||||
core/box/tiny_heap_env_box.h core/box/c7_hotpath_env_box.h \
|
||||
core/box/ptr_conversion_box.h core/box/ptr_type_box.h
|
||||
core/box/front_gate_box.h:
|
||||
core/hakmem_tiny.h:
|
||||
@ -107,5 +108,7 @@ core/box/../tiny_region_id.h:
|
||||
core/box/../hakmem_tiny_integrity.h:
|
||||
core/box/../box/slab_freelist_atomic.h:
|
||||
core/box/tiny_header_box.h:
|
||||
core/box/tiny_heap_env_box.h:
|
||||
core/box/c7_hotpath_env_box.h:
|
||||
core/box/ptr_conversion_box.h:
|
||||
core/box/ptr_type_box.h:
|
||||
|
||||
@ -6,6 +6,8 @@
|
||||
#include "../hakmem_tiny.h" // For tiny_get_max_size() + hak_lane_classify.inc.h
|
||||
#include "../hakmem_pool.h" // Phase 2: For hak_pool_try_alloc() (Pool lane 1025B-52KB)
|
||||
#include "../hakmem_smallmid.h" // For Small-Mid Front Box (Phase 17-1)
|
||||
#include "tiny_heap_env_box.h" // TinyHeap front gate (C7)
|
||||
#include "tiny_c7_hotbox.h" // tiny_c7_alloc_fast wrapper
|
||||
|
||||
#ifdef HAKMEM_POOL_TLS_PHASE1
|
||||
#include "../pool_tls.h"
|
||||
@ -86,6 +88,12 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
|
||||
// PERF_OPT: likely hint - tiny allocations usually succeed (hot path)
|
||||
if (__builtin_expect(tiny_ptr != NULL, 1)) { hkm_ace_track_alloc(); return tiny_ptr; }
|
||||
|
||||
// TinyHeap front (C7) は Tiny lane の成功として扱う
|
||||
if (__builtin_expect(size == 1024 && tiny_c7_heap_mode_enabled(), 0)) {
|
||||
void* c7_ptr = tiny_c7_alloc_fast(size);
|
||||
if (c7_ptr) { hkm_ace_track_alloc(); return c7_ptr; }
|
||||
}
|
||||
|
||||
// PHASE 7 CRITICAL FIX: No malloc fallback for Tiny failures
|
||||
// If Tiny fails for size <= tiny_get_max_size(), let it flow to Mid/ACE layers
|
||||
// This prevents mixed HAKMEM/libc allocation bugs
|
||||
@ -222,6 +230,15 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
|
||||
// LANE_TINY failed - this is a design bug!
|
||||
HAK_LANE_ASSERT_NO_FALLBACK(LANE_FALLBACK, size);
|
||||
static _Atomic int oom_count = 0;
|
||||
const int c7_heap_on = (size == 1024 && tiny_heap_box_enabled());
|
||||
if (__builtin_expect(c7_heap_on, 0)) {
|
||||
if (tiny_c7_hot_enabled()) {
|
||||
void* retry = tiny_c7_alloc_fast(size);
|
||||
if (retry) { hkm_ace_track_alloc(); return retry; }
|
||||
}
|
||||
errno = ENOMEM;
|
||||
return NULL;
|
||||
}
|
||||
int count = atomic_fetch_add(&oom_count, 1);
|
||||
if (count < 10) {
|
||||
fprintf(stderr, "[HAKMEM] BUG: Tiny lane failed for size=%zu (should not happen)\n", size);
|
||||
|
||||
56
core/box/tiny_c7_hotbox.h
Normal file
56
core/box/tiny_c7_hotbox.h
Normal file
@ -0,0 +1,56 @@
|
||||
// tiny_c7_hotbox.h - C7 専用 TinyHeap(TinyHeapBox 上の薄ラッパ)
|
||||
// Box 方針:
|
||||
// - C7 (≈1KiB) だけを担当するホットパス入口をここで閉じ込める。
|
||||
// - 実体の管理は core/box/tiny_heap_box.h の汎用 TinyHeapBox に委譲し、下層 Box との
|
||||
// 接続(Superslab/Warm/Tier)は TinyHeapBox の slow 境界に集約する。
|
||||
#pragma once
|
||||
|
||||
#include "tiny_heap_box.h" // 共通 TinyHeap コンテキスト
|
||||
#include "c7_hotpath_env_box.h" // HAKMEM_TINY_C7_HOT gate
|
||||
|
||||
// 旧 C7HotBox の型名互換(TinyHeapBox の型をエイリアス)
|
||||
typedef tiny_heap_page_t tiny_c7_page_t;
|
||||
typedef tiny_heap_ctx_t tiny_c7_heap_t;
|
||||
|
||||
// 旧 MAX 設定の互換マクロ
|
||||
#ifndef TINY_C7_HOTBOX_MAX_PAGES
|
||||
#define TINY_C7_HOTBOX_MAX_PAGES TINY_HEAP_MAX_PAGES_PER_CLASS
|
||||
#endif
|
||||
|
||||
static inline tiny_c7_heap_t* tiny_c7_heap_for_thread(void) {
|
||||
return tiny_heap_ctx_for_thread();
|
||||
}
|
||||
|
||||
static inline tiny_c7_page_t* tiny_c7_page_of(void* base_ptr) {
|
||||
return tiny_heap_page_of(tiny_c7_heap_for_thread(), 7, base_ptr);
|
||||
}
|
||||
|
||||
static inline tiny_c7_page_t* tiny_c7_heap_attach_page(tiny_c7_heap_t* heap,
|
||||
SuperSlab* ss,
|
||||
int slab_idx) {
|
||||
return tiny_heap_attach_page(heap, 7, ss, slab_idx);
|
||||
}
|
||||
|
||||
static inline void tiny_c7_page_becomes_empty(tiny_c7_heap_t* heap, tiny_c7_page_t* page) {
|
||||
tiny_heap_page_becomes_empty(heap ? heap : tiny_c7_heap_for_thread(), 7, page);
|
||||
}
|
||||
|
||||
static inline void* tiny_c7_alloc_slow_from_heap(tiny_c7_heap_t* heap) {
|
||||
return tiny_heap_alloc_slow_from_class(heap ? heap : tiny_c7_heap_for_thread(), 7);
|
||||
}
|
||||
|
||||
// C7 alloc ホットパス(size は Gate で 1024 確定済み)
|
||||
__attribute__((always_inline)) static inline void* tiny_c7_alloc_fast(size_t size) {
|
||||
(void)size;
|
||||
return tiny_heap_alloc_class_fast(tiny_c7_heap_for_thread(), 7, size);
|
||||
}
|
||||
|
||||
// Superslab/Slab メタが既に分かっている場合の free(Gate から渡されるホットパス用)
|
||||
static inline void tiny_c7_free_fast_with_meta(SuperSlab* ss, int slab_idx, void* base) {
|
||||
tiny_heap_free_class_fast_with_meta(tiny_c7_heap_for_thread(), 7, ss, slab_idx, base);
|
||||
}
|
||||
|
||||
// C7 free ホットパス(ptr は USER ポインタ)
|
||||
static inline void tiny_c7_free_fast(void* ptr) {
|
||||
tiny_heap_free_class_fast(tiny_c7_heap_for_thread(), 7, ptr);
|
||||
}
|
||||
@ -1,63 +0,0 @@
|
||||
// C7 専用の実験的ホットパス。HAKMEM_TINY_C7_HOT=1 でのみ有効化し、
|
||||
// デフォルト(未設定/0)のときは従来経路に完全フォールバックする。
|
||||
// 本番デフォルトで ON にしない前提の A/B 用スイッチ。
|
||||
#pragma once
|
||||
|
||||
#include "../hakmem_build_flags.h"
|
||||
#include "c7_hotpath_env_box.h"
|
||||
#include "tiny_c7_uc_hit_box.h"
|
||||
#include "tiny_c7_warm_spill_box.h"
|
||||
#include "tiny_c7_stats_sample_box.h"
|
||||
#include "tiny_front_hot_box.h"
|
||||
#include "tiny_front_cold_box.h"
|
||||
#include "front_gate_box.h"
|
||||
#include "tls_sll_box.h"
|
||||
#include "ptr_conversion_box.h"
|
||||
|
||||
// C7 alloc ホットパス。
|
||||
// 順序:
|
||||
// 1) TLS/SFC (front_gate_try_pop) を先に覗く
|
||||
// 2) Unified Cache のヒット専用パス tiny_uc_pop_c7_hit_only()
|
||||
// 3) それでもダメなら通常の cold refill(refill/統計は cold 側に任せる)
|
||||
static inline void* tiny_c7_alloc_hot(size_t size) {
|
||||
(void)size; // size は class_idx=7 前提なので未使用
|
||||
void* user = NULL;
|
||||
|
||||
// 1) SFC/TLS SLL 直叩き(ユーザーポインタが返る)
|
||||
if (front_gate_try_pop(/*class_idx=*/7, &user)) {
|
||||
return user;
|
||||
}
|
||||
|
||||
// 2) Unified Cache ヒット
|
||||
user = tiny_uc_pop_c7_hit_only();
|
||||
if (__builtin_expect(user != NULL, 1)) {
|
||||
return user;
|
||||
}
|
||||
|
||||
// 3) Cold refill へフォールバック
|
||||
return tiny_cold_refill_and_alloc(7);
|
||||
}
|
||||
|
||||
// C7 free ホットパス。BASE を受け取り TLS→UC の順に試す。
|
||||
static inline int tiny_c7_free_hot(void* base) {
|
||||
// 1) TLS SLL へ直接 push(BASE のまま渡す)
|
||||
extern int g_tls_sll_enable;
|
||||
if (__builtin_expect(g_tls_sll_enable, 1)) {
|
||||
if (tls_sll_push(7, HAK_BASE_FROM_RAW(base), UINT32_MAX)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Unified Cache へ push(ヒット専用の軽量版)
|
||||
if (tiny_uc_push_c7_hot(base)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// 3) Warm spill(将来用のフック)
|
||||
if (tiny_c7_warm_spill_one(base)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// 4) 最後に cold free パスへフォールバック
|
||||
return tiny_cold_drain_and_free(7, base);
|
||||
}
|
||||
@ -1,9 +0,0 @@
|
||||
// tiny_c7_stats_sample_box.h - Lightweight sampling helper for C7 stats
|
||||
// 現状は簡易 1/16 サンプリング。hot path から #if を排除するための小箱。
|
||||
#pragma once
|
||||
|
||||
static inline int tiny_c7_stats_sample(void) {
|
||||
static __thread unsigned counter = 0;
|
||||
counter++;
|
||||
return (counter & 0xF) == 0; // 約 1/16
|
||||
}
|
||||
@ -1,58 +0,0 @@
|
||||
// tiny_c7_uc_hit_box.h - C7 専用 Unified Cache hit-only helpers
|
||||
// 契約: ヒット時のみ処理。ミス時は NULL/0 を返し、refill・統計は行わない。
|
||||
#pragma once
|
||||
|
||||
#include "../front/tiny_unified_cache.h"
|
||||
#include "tiny_layout_box.h"
|
||||
|
||||
// C7 UC ヒット専用 pop
|
||||
static inline void* tiny_uc_pop_c7_hit_only(void) {
|
||||
TinyUnifiedCache* cache = &g_unified_cache[7];
|
||||
|
||||
#if !HAKMEM_TINY_FRONT_PGO
|
||||
if (__builtin_expect(cache->slots == NULL, 0)) {
|
||||
unified_cache_init();
|
||||
if (cache->slots == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (__builtin_expect(cache->head == cache->tail, 0)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void* base = cache->slots[cache->head];
|
||||
cache->head = (cache->head + 1) & cache->mask;
|
||||
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
tiny_region_id_write_header(base, 7);
|
||||
size_t user_offset = tiny_user_offset(7);
|
||||
return (void*)((char*)base + user_offset);
|
||||
#else
|
||||
return base;
|
||||
#endif
|
||||
}
|
||||
|
||||
// C7 UC ヒット専用 push
|
||||
static inline int tiny_uc_push_c7_hot(void* base) {
|
||||
TinyUnifiedCache* cache = &g_unified_cache[7];
|
||||
|
||||
#if !HAKMEM_TINY_FRONT_PGO
|
||||
if (__builtin_expect(cache->slots == NULL, 0)) {
|
||||
unified_cache_init();
|
||||
if (cache->slots == NULL) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
uint16_t next_tail = (cache->tail + 1) & cache->mask;
|
||||
if (__builtin_expect(next_tail == cache->head, 0)) {
|
||||
return 0; // full
|
||||
}
|
||||
|
||||
cache->slots[cache->tail] = base;
|
||||
cache->tail = next_tail;
|
||||
return 1;
|
||||
}
|
||||
@ -1,9 +0,0 @@
|
||||
// tiny_c7_warm_spill_box.h - C7 Warm spill hook (placeholder)
|
||||
// Purpose: allow swapping spill実装 without touchingホットパス。
|
||||
#pragma once
|
||||
|
||||
// いまは no-op。将来 Warm spill を挿すときに差し替える。
|
||||
static inline int tiny_c7_warm_spill_one(void* base) {
|
||||
(void)base;
|
||||
return 0;
|
||||
}
|
||||
54
core/box/tiny_heap_env_box.h
Normal file
54
core/box/tiny_heap_env_box.h
Normal file
@ -0,0 +1,54 @@
|
||||
// tiny_heap_env_box.h - ENV gate for TinyHeap front (A/B 切り替え)
|
||||
// 役割:
|
||||
// - 新しい TinyHeap front を ON/OFF する環境変数の読み出しをホットパス外に分離。
|
||||
// - デフォルト OFF(環境変数が未設定または 0 のとき)。
|
||||
#pragma once
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "c7_hotpath_env_box.h" // tiny_c7_hot_enabled()
|
||||
|
||||
// ENV: HAKMEM_TINY_HEAP_BOX=1 で TinyHeap front を有効化
|
||||
static inline int tiny_heap_box_enabled(void) {
|
||||
static int g_enable = -1;
|
||||
if (__builtin_expect(g_enable == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_HEAP_BOX");
|
||||
g_enable = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g_enable;
|
||||
}
|
||||
|
||||
// ENV: HAKMEM_TINY_HEAP_CLASSES (bitmask, bit i が 1 のクラスを TinyHeap 経路に載せる)
|
||||
// 例: 0x80 (デフォルト) → C7 のみ / 0xC0 → C6 + C7 / 0xFF → 全クラス
|
||||
static inline int tiny_heap_class_enabled(int class_idx) {
|
||||
static int g_parsed = 0;
|
||||
static unsigned g_mask = 0;
|
||||
|
||||
if (__builtin_expect(!g_parsed, 0)) {
|
||||
g_mask = 0;
|
||||
const char* e = getenv("HAKMEM_TINY_HEAP_CLASSES");
|
||||
if (e && *e) {
|
||||
unsigned v = (unsigned)strtoul(e, NULL, 0);
|
||||
g_mask = v & 0xFFu;
|
||||
} else {
|
||||
// デフォルト: C7 のみ
|
||||
g_mask = 1u << 7;
|
||||
}
|
||||
g_parsed = 1;
|
||||
}
|
||||
|
||||
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) return 0;
|
||||
return (g_mask & (1u << class_idx)) != 0;
|
||||
}
|
||||
|
||||
// TinyHeap front とクラス bitmask の両方が有効なときにだけ TinyHeap 経路を使う
|
||||
static inline int tiny_heap_class_route_enabled(int class_idx) {
|
||||
if (class_idx == 7) {
|
||||
return tiny_heap_box_enabled() && tiny_c7_hot_enabled() && tiny_heap_class_enabled(class_idx);
|
||||
}
|
||||
return tiny_heap_box_enabled() && tiny_heap_class_enabled(class_idx);
|
||||
}
|
||||
|
||||
// Helper: TinyHeap front + C7 Hot path が両方 ON のときに true
|
||||
static inline int tiny_c7_heap_mode_enabled(void) {
|
||||
return tiny_heap_class_route_enabled(7);
|
||||
}
|
||||
@ -40,6 +40,7 @@
|
||||
#include "tiny_ptr_bridge_box.h" // Box: ptr→(ss,slab,meta,class) bridge
|
||||
#include "tiny_next_ptr_box.h"
|
||||
#include "tiny_header_box.h" // Header Box: Single Source of Truth for header operations
|
||||
#include "tiny_heap_env_box.h" // TinyHeap/C7 gate (SLL bypass for C7 TinyHeapBox)
|
||||
|
||||
// ============================================================================
|
||||
// Performance Measurement: TLS SLL Hit Rate (ENV-gated)
|
||||
@ -80,6 +81,41 @@ extern __thread uint64_t g_tls_canary_after_sll;
|
||||
extern __thread const char* g_tls_sll_last_writer[TINY_NUM_CLASSES];
|
||||
extern int g_tls_sll_class_mask; // bit i=1 → SLL allowed for class i
|
||||
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
// Narrow triage: ENV-gated logging for class1 push path (release-safe)
|
||||
static inline int tiny_sll_log_c1_enabled(void)
|
||||
{
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_SLL_LOG_C1");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g;
|
||||
}
|
||||
#else
|
||||
// Debug builds already have richer tracing; reuse the same gate for symmetry.
|
||||
static inline int tiny_sll_log_c1_enabled(void)
|
||||
{
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_SLL_LOG_C1");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Optional: log first few pushes for any class when triaging crashes.
|
||||
static inline int tiny_sll_log_any_enabled(void)
|
||||
{
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_SLL_LOG_ANY");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Global callsite record (debug only; zero overhead in release)
|
||||
static const char* g_tls_sll_push_file[TINY_NUM_CLASSES] = {0};
|
||||
@ -581,6 +617,11 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t
|
||||
}
|
||||
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push");
|
||||
|
||||
// C7 TinyHeap front (TinyHeapBox) 有効時は C7 を TLS SLL に積まない。
|
||||
if (__builtin_expect(class_idx == 7 && tiny_c7_heap_mode_enabled(), 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Class mask gate (narrow triage): if disallowed, reject push
|
||||
if (__builtin_expect(((g_tls_sll_class_mask & (1u << class_idx)) == 0), 0)) {
|
||||
return false;
|
||||
@ -605,6 +646,66 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t
|
||||
// Base pointer only (callers must pass BASE; this is a no-op by design).
|
||||
ptr = tls_sll_normalize_base(class_idx, ptr);
|
||||
void* raw_ptr = HAK_BASE_TO_RAW(ptr);
|
||||
uintptr_t ptr_addr = (uintptr_t)raw_ptr;
|
||||
|
||||
// Broad triage logging (ENV: HAKMEM_TINY_SLL_LOG_ANY=1)
|
||||
int log_any = tiny_sll_log_any_enabled();
|
||||
if (log_any && class_idx != 7 && class_idx != 1) {
|
||||
log_any = 0; // Focus triage on C7 (and C1 if enabled) to avoid noise
|
||||
}
|
||||
uint32_t log_any_seq = 0;
|
||||
if (__builtin_expect(log_any, 0)) {
|
||||
static _Atomic uint32_t g_tls_sll_log_any = 0;
|
||||
log_any_seq = atomic_fetch_add_explicit(&g_tls_sll_log_any, 1, memory_order_relaxed);
|
||||
if (log_any_seq < 512) {
|
||||
fprintf(stderr,
|
||||
"[TLS_SLL_PUSH_ANY] n=%u cls=%d base=%p cap=%u count=%u head=%p mask=0x%08x where=%s\n",
|
||||
log_any_seq,
|
||||
class_idx,
|
||||
raw_ptr,
|
||||
(unsigned)capacity,
|
||||
(unsigned)g_tls_sll[class_idx].count,
|
||||
HAK_BASE_TO_RAW(g_tls_sll[class_idx].head),
|
||||
(unsigned)g_tls_sll_class_mask,
|
||||
where ? where : "(null)");
|
||||
fflush(stderr);
|
||||
}
|
||||
}
|
||||
|
||||
// Narrow scoped logging for class1 triage (ENV: HAKMEM_TINY_SLL_LOG_C1=1)
|
||||
int log_c1 = (class_idx == 1 && tiny_sll_log_c1_enabled());
|
||||
uint32_t log_seq = 0;
|
||||
SuperSlab* log_ss = NULL;
|
||||
int log_slab_idx = -1;
|
||||
uint8_t log_meta_cls = 0xff;
|
||||
if (__builtin_expect(log_c1, 0)) {
|
||||
static _Atomic uint32_t g_tls_sll_log_c1 = 0;
|
||||
log_seq = atomic_fetch_add_explicit(&g_tls_sll_log_c1, 1, memory_order_relaxed);
|
||||
if (log_seq < 64) {
|
||||
if (ptr_addr >= 4096) {
|
||||
log_ss = ss_fast_lookup(raw_ptr);
|
||||
if (log_ss && log_ss->magic == SUPERSLAB_MAGIC) {
|
||||
int idx = slab_index_for(log_ss, raw_ptr);
|
||||
if (idx >= 0 && idx < ss_slabs_capacity(log_ss)) {
|
||||
log_slab_idx = idx;
|
||||
log_meta_cls = log_ss->slabs[idx].class_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(stderr,
|
||||
"[TLS_SLL_PUSH_C1] n=%u base=%p cap=%u count=%u mask=0x%08x where=%s ss=%p slab_idx=%d meta_cls=%u\n",
|
||||
log_seq,
|
||||
raw_ptr,
|
||||
(unsigned)capacity,
|
||||
(unsigned)g_tls_sll[class_idx].count,
|
||||
(unsigned)g_tls_sll_class_mask,
|
||||
where ? where : "(null)",
|
||||
(void*)log_ss,
|
||||
log_slab_idx,
|
||||
(unsigned)log_meta_cls);
|
||||
fflush(stderr);
|
||||
}
|
||||
}
|
||||
|
||||
// TWO-SPEED: Full validation with hak_super_lookup is DEBUG-ONLY.
|
||||
// Release builds use ss_fast_lookup (O(1) mask arithmetic) for pinning only.
|
||||
@ -660,6 +761,49 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t
|
||||
ss_ptr = ss_fast_lookup(raw_ptr);
|
||||
#endif // !HAKMEM_BUILD_RELEASE
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Minimal range guard before we touch memory.
|
||||
if (!validate_ptr_range(raw_ptr, "tls_sll_push_base")) {
|
||||
fprintf(stderr,
|
||||
"[TLS_SLL_PUSH] FATAL invalid BASE ptr cls=%d base=%p\n",
|
||||
class_idx, raw_ptr);
|
||||
abort();
|
||||
}
|
||||
#else
|
||||
// Release: drop malformed ptrs but keep running. Place this before header touch.
|
||||
if (ptr_addr < 4096 || ptr_addr > 0x00007fffffffffffULL) {
|
||||
extern _Atomic uint64_t g_tls_sll_invalid_push[];
|
||||
uint64_t cnt = atomic_fetch_add_explicit(&g_tls_sll_invalid_push[class_idx], 1, memory_order_relaxed);
|
||||
static __thread uint8_t s_log_limit_push[TINY_NUM_CLASSES] = {0};
|
||||
if (__builtin_expect(log_c1 && log_seq < 32, 0)) {
|
||||
fprintf(stderr,
|
||||
"[TLS_SLL_PUSH_C1_DROP] n=%u cls=%d base=%p cap=%u count=%u mask=0x%08x where=%s\n",
|
||||
log_seq,
|
||||
class_idx,
|
||||
raw_ptr,
|
||||
(unsigned)capacity,
|
||||
(unsigned)g_tls_sll[class_idx].count,
|
||||
(unsigned)g_tls_sll_class_mask,
|
||||
where ? where : "(null)");
|
||||
} else if (__builtin_expect(log_any && log_any_seq < 512, 0)) {
|
||||
fprintf(stderr,
|
||||
"[TLS_SLL_PUSH_DROP] n=%u cls=%d base=%p cap=%u count=%u mask=0x%08x where=%s\n",
|
||||
log_any_seq,
|
||||
class_idx,
|
||||
raw_ptr,
|
||||
(unsigned)capacity,
|
||||
(unsigned)g_tls_sll[class_idx].count,
|
||||
(unsigned)g_tls_sll_class_mask,
|
||||
where ? where : "(null)");
|
||||
} else if (s_log_limit_push[class_idx] < 4) {
|
||||
fprintf(stderr, "[TLS_SLL_PUSH_INVALID] cls=%d base=%p dropped count=%llu\n",
|
||||
class_idx, raw_ptr, (unsigned long long)cnt + 1);
|
||||
s_log_limit_push[class_idx]++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// Validate header on push - detect blocks pushed without header write
|
||||
// Enabled via HAKMEM_DEBUG_LEVEL >= 3 (INFO level) or in debug builds
|
||||
@ -694,30 +838,6 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t
|
||||
} while (0);
|
||||
#endif
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Minimal range guard before we touch memory.
|
||||
if (!validate_ptr_range(raw_ptr, "tls_sll_push_base")) {
|
||||
fprintf(stderr,
|
||||
"[TLS_SLL_PUSH] FATAL invalid BASE ptr cls=%d base=%p\n",
|
||||
class_idx, raw_ptr);
|
||||
abort();
|
||||
}
|
||||
#else
|
||||
// Release: drop malformed ptrs but keep running.
|
||||
uintptr_t ptr_addr = (uintptr_t)raw_ptr;
|
||||
if (ptr_addr < 4096 || ptr_addr > 0x00007fffffffffffULL) {
|
||||
extern _Atomic uint64_t g_tls_sll_invalid_push[];
|
||||
uint64_t cnt = atomic_fetch_add_explicit(&g_tls_sll_invalid_push[class_idx], 1, memory_order_relaxed);
|
||||
static __thread uint8_t s_log_limit_push[TINY_NUM_CLASSES] = {0};
|
||||
if (s_log_limit_push[class_idx] < 4) {
|
||||
fprintf(stderr, "[TLS_SLL_PUSH_INVALID] cls=%d base=%p dropped count=%llu\n",
|
||||
class_idx, raw_ptr, (unsigned long long)cnt + 1);
|
||||
s_log_limit_push[class_idx]++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Capacity check BEFORE any writes.
|
||||
uint32_t cur = g_tls_sll[class_idx].count;
|
||||
if (!unlimited && cur >= capacity) {
|
||||
|
||||
@ -36,7 +36,9 @@
|
||||
#include "../hakmem_tiny.h" // For hak_tiny_size_to_class
|
||||
#include "../box/tiny_front_hot_box.h" // Phase 4-Step2: Hot Path Box
|
||||
#include "../box/tiny_front_cold_box.h" // Phase 4-Step2: Cold Path Box
|
||||
#include "../box/tiny_c7_hotpath_box.h" // Optional: C7 専用ホットパス
|
||||
#include "../box/tiny_c7_hotbox.h" // Optional: C7 専用ホットボックス
|
||||
#include "../box/tiny_heap_box.h" // TinyHeap 汎用 Box
|
||||
#include "../box/tiny_heap_env_box.h" // ENV gate for TinyHeap front (A/B)
|
||||
|
||||
// Helper: current thread id (low 32 bits) for owner check
|
||||
#ifndef TINY_SELF_U32_LOCAL_DEFINED
|
||||
@ -99,9 +101,16 @@ static inline void* malloc_tiny_fast(size_t size) {
|
||||
// 1. size → class_idx (inline table lookup, 1-2 instructions)
|
||||
int class_idx = hak_tiny_size_to_class(size);
|
||||
|
||||
// Optional: C7 専用ホットパス(環境変数 HAKMEM_TINY_C7_HOT でON)
|
||||
if (__builtin_expect(class_idx == 7 && tiny_c7_hot_enabled(), 0)) {
|
||||
return tiny_c7_alloc_hot(size);
|
||||
// Optional: TinyHeap front(ENV: HAKMEM_TINY_HEAP_BOX=1 + HAKMEM_TINY_HEAP_CLASSES bitmask)
|
||||
const int use_tiny_heap = (class_idx == 7)
|
||||
? tiny_c7_heap_mode_enabled()
|
||||
: tiny_heap_class_route_enabled(class_idx);
|
||||
if (__builtin_expect(use_tiny_heap, 0)) {
|
||||
tiny_heap_ctx_t* ctx = tiny_heap_ctx_for_thread();
|
||||
if (class_idx == 7 && size == 1024) {
|
||||
return tiny_c7_alloc_fast(size);
|
||||
}
|
||||
return tiny_heap_alloc_class_fast(ctx, class_idx, size);
|
||||
}
|
||||
|
||||
// 2. Phase 4-Step2: Hot/Cold Path Box
|
||||
@ -171,7 +180,7 @@ static inline int free_tiny_fast(void* ptr) {
|
||||
}
|
||||
#endif // !HAKMEM_BUILD_RELEASE
|
||||
|
||||
// Cross-thread free detection (Larson MT crash fix, ENV gated)
|
||||
// Cross-thread free detection (Larson MT crash fix, ENV gated) + TinyHeap free path
|
||||
{
|
||||
static __thread int g_larson_fix = -1;
|
||||
if (__builtin_expect(g_larson_fix == -1, 0)) {
|
||||
@ -183,7 +192,10 @@ static inline int free_tiny_fast(void* ptr) {
|
||||
#endif
|
||||
}
|
||||
|
||||
if (__builtin_expect(g_larson_fix, 0)) {
|
||||
const int use_tiny_heap = (class_idx == 7)
|
||||
? tiny_c7_heap_mode_enabled()
|
||||
: tiny_heap_class_route_enabled(class_idx);
|
||||
if (__builtin_expect(g_larson_fix || use_tiny_heap, 0)) {
|
||||
// Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100)
|
||||
SuperSlab* ss = ss_fast_lookup(base);
|
||||
if (ss) {
|
||||
@ -219,9 +231,22 @@ static inline int free_tiny_fast(void* ptr) {
|
||||
return 1; // handled via remote queue
|
||||
}
|
||||
return 0; // remote push failed; fall back to normal path
|
||||
} else if (__builtin_expect(use_tiny_heap, 0)) {
|
||||
tiny_heap_ctx_t* ctx = tiny_heap_ctx_for_thread();
|
||||
if (class_idx == 7) {
|
||||
tiny_c7_free_fast_with_meta(ss, slab_idx, base);
|
||||
} else {
|
||||
tiny_heap_free_class_fast_with_meta(ctx, class_idx, ss, slab_idx, base);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (use_tiny_heap) {
|
||||
// fallback: lookup failed but TinyHeap front is ON → use generic TinyHeap free
|
||||
tiny_heap_free_class_fast(tiny_heap_ctx_for_thread(), class_idx, ptr);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -241,14 +266,6 @@ static inline int free_tiny_fast(void* ptr) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// Optional: C7 専用ホットパス(キャッシュのみで完了させる)
|
||||
if (__builtin_expect(class_idx == 7 && tiny_c7_hot_enabled(), 0)) {
|
||||
if (tiny_c7_free_hot(base)) {
|
||||
return 1;
|
||||
}
|
||||
// fallthrough to unified cache push on failure
|
||||
}
|
||||
|
||||
int pushed = unified_cache_push(class_idx, HAK_BASE_FROM_RAW(base));
|
||||
if (__builtin_expect(pushed, 1)) {
|
||||
return 1; // Success
|
||||
|
||||
@ -26,6 +26,8 @@
|
||||
#include "tiny_tls_guard.h"
|
||||
#include "tiny_ready.h"
|
||||
#include "box/c7_meta_used_counter_box.h"
|
||||
#include "box/tiny_c7_hotbox.h"
|
||||
#include "box/tiny_heap_box.h"
|
||||
#include "box/super_reg_box.h"
|
||||
#include "hakmem_tiny_tls_list.h"
|
||||
#include "hakmem_tiny_remote_target.h" // Phase 2C-1: Remote target queue
|
||||
@ -45,6 +47,50 @@ extern uint64_t g_bytes_allocated; // from hakmem_tiny_superslab.c
|
||||
// Debug: TLS SLL last push tracking (for core/box/tls_sll_box.h)
|
||||
// ============================================================================
|
||||
__thread hak_base_ptr_t s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};
|
||||
__thread tiny_heap_ctx_t g_tiny_heap_ctx;
|
||||
__thread int g_tiny_heap_ctx_init = 0;
|
||||
TinyC7HeapStats g_c7_heap_stats = {0};
|
||||
|
||||
static int tiny_c7_heap_stats_dump_enabled(void) {
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_C7_HEAP_STATS_DUMP");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
__attribute__((destructor))
|
||||
static void tiny_c7_heap_stats_dump(void) {
|
||||
if (!tiny_c7_heap_stats_enabled() || !tiny_c7_heap_stats_dump_enabled()) {
|
||||
return;
|
||||
}
|
||||
TinyC7HeapStats snap = {
|
||||
.alloc_fast_current = atomic_load_explicit(&g_c7_heap_stats.alloc_fast_current, memory_order_relaxed),
|
||||
.alloc_slow_prepare = atomic_load_explicit(&g_c7_heap_stats.alloc_slow_prepare, memory_order_relaxed),
|
||||
.free_fast_local = atomic_load_explicit(&g_c7_heap_stats.free_fast_local, memory_order_relaxed),
|
||||
.free_slow_fallback = atomic_load_explicit(&g_c7_heap_stats.free_slow_fallback, memory_order_relaxed),
|
||||
.alloc_prepare_fail = atomic_load_explicit(&g_c7_heap_stats.alloc_prepare_fail, memory_order_relaxed),
|
||||
.alloc_fail = atomic_load_explicit(&g_c7_heap_stats.alloc_fail, memory_order_relaxed),
|
||||
};
|
||||
fprintf(stderr,
|
||||
"[C7_HEAP_STATS] alloc_fast_current=%llu alloc_slow_prepare=%llu free_fast_local=%llu free_slow_fallback=%llu alloc_prepare_fail=%llu alloc_fail=%llu\n",
|
||||
(unsigned long long)snap.alloc_fast_current,
|
||||
(unsigned long long)snap.alloc_slow_prepare,
|
||||
(unsigned long long)snap.free_fast_local,
|
||||
(unsigned long long)snap.free_slow_fallback,
|
||||
(unsigned long long)snap.alloc_prepare_fail,
|
||||
(unsigned long long)snap.alloc_fail);
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
__attribute__((destructor))
|
||||
static void tiny_c7_delta_debug_destructor(void) {
|
||||
if (!tiny_c7_meta_light_enabled() || !tiny_c7_delta_debug_enabled()) {
|
||||
return;
|
||||
}
|
||||
tiny_c7_heap_debug_dump_deltas();
|
||||
}
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Helper to dump last push from core/hakmem.c (SEGV handler)
|
||||
|
||||
@ -21,6 +21,7 @@
|
||||
#include "box/c7_meta_used_counter_box.h"
|
||||
#include "box/tiny_header_box.h" // Header Box: Single Source of Truth for header operations
|
||||
#include "box/tiny_front_config_box.h" // Phase 7-Step6-Fix: Config macros for dead code elimination
|
||||
#include "box/tiny_heap_env_box.h" // TinyHeap front gate (C7 TinyHeapBox)
|
||||
#include "hakmem_tiny_integrity.h"
|
||||
#include "box/tiny_next_ptr_box.h"
|
||||
#include "tiny_region_id.h" // For HEADER_MAGIC/HEADER_CLASS_MASK (prepare header before SLL push)
|
||||
@ -305,6 +306,11 @@ int sll_refill_small_from_ss(int class_idx, int max_take)
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TinyHeap front で扱うクラスは TLS SLL を使わない(TinyHeapBox 内で完結)。
|
||||
if (tiny_heap_class_route_enabled(class_idx)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
HAK_CHECK_CLASS_IDX(class_idx, "sll_refill_small_from_ss");
|
||||
atomic_fetch_add(&g_integrity_check_class_bounds, 1);
|
||||
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
#include "superslab/superslab_inline.h" // For _ss_remote_drain_to_freelist_unsafe()
|
||||
#include "box/integrity_box.h" // Box I: Integrity verification (Priority ALPHA)
|
||||
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
|
||||
#include "box/tiny_heap_env_box.h" // TinyHeap front gate (C7 TinyHeapBox)
|
||||
|
||||
// Debug counters (compile-time gated)
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
@ -44,6 +45,11 @@ static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TinyHeap front に載せるクラスは TLS SLL 経由で補充しない。
|
||||
if (tiny_heap_class_route_enabled(class_idx)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!g_use_superslab || max_take <= 0) {
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
if (!g_use_superslab) g_rf_early_no_ss[class_idx]++;
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
// hakmem_tiny_slow.inc
|
||||
// Slow path allocation implementation
|
||||
|
||||
#include "box/tiny_heap_env_box.h" // tiny_c7_heap_mode_enabled()
|
||||
#include "box/tiny_c7_hotbox.h" // tiny_c7_alloc_fast (TinyHeapBox 経由)
|
||||
|
||||
// Slow path allocation function
|
||||
// Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c)
|
||||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||||
@ -14,6 +17,11 @@ static void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, in
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// C7 TinyHeap front ON のときは旧 slow path を通さず TinyHeapBox に委譲する。
|
||||
if (__builtin_expect(class_idx == 7 && tiny_c7_heap_mode_enabled(), 0)) {
|
||||
return tiny_c7_alloc_fast(size);
|
||||
}
|
||||
|
||||
// Try refilling from HotMag
|
||||
if (g_hotmag_enable && class_idx <= 3) {
|
||||
TinyHotMag* hm = &g_tls_hot_mag[class_idx];
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
// Global atomic counters for TLS SLL performance measurement
|
||||
// ENV: HAKMEM_MEASURE_UNIFIED_CACHE=1 to enable (default: OFF)
|
||||
#include <stdatomic.h>
|
||||
#include "box/tiny_heap_env_box.h" // TinyHeap/C7 gate for TLS SLL skips
|
||||
_Atomic uint64_t g_tls_sll_push_count_global = 0;
|
||||
_Atomic uint64_t g_tls_sll_pop_count_global = 0;
|
||||
_Atomic uint64_t g_tls_sll_pop_empty_count_global = 0;
|
||||
@ -180,6 +181,11 @@ void hak_tiny_prewarm_tls_cache(void) {
|
||||
// This reduces the first-allocation miss penalty by populating TLS cache
|
||||
// Phase E1-CORRECT: ALL classes (including C7) now use TLS SLL
|
||||
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
|
||||
// TinyHeap front ON では対象クラスを TLS SLL へ積まず、TinyHeapBox 側に任せる。
|
||||
if (tiny_heap_class_route_enabled(class_idx)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int count = HAKMEM_TINY_PREWARM_COUNT; // Default: 16 blocks per class
|
||||
|
||||
// Trigger refill to populate TLS cache
|
||||
|
||||
@ -38,6 +38,7 @@ core/tiny_alloc_fast_push.o: core/tiny_alloc_fast_push.c \
|
||||
core/box/../hakmem_internal.h core/box/../tiny_region_id.h \
|
||||
core/box/../hakmem_tiny_integrity.h \
|
||||
core/box/../box/slab_freelist_atomic.h core/box/tiny_header_box.h \
|
||||
core/box/tiny_heap_env_box.h core/box/c7_hotpath_env_box.h \
|
||||
core/box/front_gate_box.h core/hakmem_tiny.h
|
||||
core/hakmem_tiny_config.h:
|
||||
core/box/tls_sll_box.h:
|
||||
@ -105,5 +106,7 @@ core/box/../tiny_region_id.h:
|
||||
core/box/../hakmem_tiny_integrity.h:
|
||||
core/box/../box/slab_freelist_atomic.h:
|
||||
core/box/tiny_header_box.h:
|
||||
core/box/tiny_heap_env_box.h:
|
||||
core/box/c7_hotpath_env_box.h:
|
||||
core/box/front_gate_box.h:
|
||||
core/hakmem_tiny.h:
|
||||
|
||||
@ -99,6 +99,12 @@ Phase 6: delta 閾値 flush + attach clamp(bench)
|
||||
- 100k: `[C7_DELTA_SUMMARY] nonzero_pages=0 used_delta_sum=0 active_delta_sum=0`
|
||||
- 200k: 同上 (delta 0) → delta が無制限に積もらないことを確認。
|
||||
|
||||
Phase 7: クラス選択式 TinyHeap(C6/C5 拡張のためのゲート)
|
||||
------------------------------------------------------
|
||||
- ENV `HAKMEM_TINY_HEAP_CLASSES` を追加(bitmask、デフォルト 0x80=C7 のみ)。`tiny_heap_class_route_enabled(cls)` で TinyHeap front を使うクラスを判定し、C6/C5 も段階的に TinyHeap へ載せ替え可能にした。
|
||||
- Gate: `malloc_tiny_fast` / `free_tiny_fast` がクラスごとに TinyHeap 経路を選択。C7 は `tiny_c7_heap_mode_enabled()`(`HAKMEM_TINY_C7_HOT` 連動)を維持しつつ、他クラスは `tiny_heap_alloc/free_class_fast()` を使う薄ラッパで扱う。
|
||||
- TLS SLL 側もクラス単位で分離し、`sll_refill_small_from_ss` / `sll_refill_batch_from_ss` / `hak_tiny_prewarm_tls_cache` が TinyHeap クラスを早期 return/skip。C7 は「TinyHeapBox ↔ Superslab/Tier/Guard」だけを踏む二層構造のまま。
|
||||
|
||||
TinyHeapBox への載せ替え(Phase 1.0 構造)
|
||||
------------------------------------------
|
||||
- C7HotBox の実体を `core/box/tiny_heap_box.h` の汎用 TinyHeapBox 上に配置し、型は `tiny_heap_ctx_t` / `tiny_heap_page_t` へ統一。
|
||||
|
||||
@ -93,6 +93,13 @@ Phase 6: delta 閾値 flush + attach 時 clamp(bench)
|
||||
- TinyHeap HEAP_BOX=1 HOT=1 LARSON_FIX=1 META_LIGHT=0: ≈43.1M ops/s
|
||||
- TinyHeap META_LIGHT=1 (閾値 flush/clamp): ≈42.6M ops/s、delta debug なしでも off と同等レンジに戻った。
|
||||
|
||||
Phase 7: クラス選択式 TinyHeap(C6 拡張の足場)
|
||||
------------------------------------------------
|
||||
- ENV `HAKMEM_TINY_HEAP_CLASSES` を追加(bitmask, デフォルト 0x80=C7 のみ)。`tiny_heap_class_route_enabled(cls)` で TinyHeap front を使うクラスを判定し、C6/C5 を段階的に載せ替える A/B を可能にした。
|
||||
- Front gate: `malloc_tiny_fast` / `free_tiny_fast` がクラスごとに TinyHeap 経路を選択。C7 は従来通り `tiny_c7_heap_mode_enabled()`(`HAKMEM_TINY_C7_HOT` 連動)でガードし、C6 などは `tiny_heap_alloc/free_class_fast()` に直行。
|
||||
- TLS SLL との切り離しをクラス単位に拡張: `sll_refill_small_from_ss` / `sll_refill_batch_from_ss` / `hak_tiny_prewarm_tls_cache` は `tiny_heap_class_route_enabled(cls)` なら即 return/skip。TinyHeap クラスは Superslab↔TinyHeapBox のみを通る。
|
||||
- 例: `HAKMEM_TINY_HEAP_CLASSES=0x40` で C6 だけ TinyHeap、`0xC0` で C6+C7 TinyHeap。今後のベンチで C6-only / mixed ワークロードの hit 率と slow_prepare 割合を確認する。
|
||||
|
||||
今後の拡張ステップ
|
||||
------------------
|
||||
- C5〜C6 を TinyHeapBox に移す際は `tiny_heap_alloc_class_fast()` を流用し、Box 境界 (ページ補給/返却) の 1 箇所化を維持する。
|
||||
|
||||
59
hakmem.d
59
hakmem.d
@ -29,18 +29,22 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \
|
||||
core/box/hak_core_init.inc.h core/hakmem_phase7_config.h \
|
||||
core/box/ss_hot_prewarm_box.h core/box/hak_alloc_api.inc.h \
|
||||
core/box/../hakmem_tiny.h core/box/../hakmem_pool.h \
|
||||
core/box/../hakmem_smallmid.h core/box/mid_large_config_box.h \
|
||||
core/box/../hakmem_config.h core/box/../hakmem_features.h \
|
||||
core/box/hak_free_api.inc.h core/hakmem_tiny_superslab.h \
|
||||
core/box/../hakmem_trace_master.h core/box/front_gate_v2.h \
|
||||
core/box/external_guard_box.h core/box/../hakmem_stats_master.h \
|
||||
core/box/ss_slab_meta_box.h core/box/../superslab/superslab_types.h \
|
||||
core/box/slab_freelist_atomic.h core/box/fg_tiny_gate_box.h \
|
||||
core/box/tiny_free_gate_box.h core/box/ptr_type_box.h \
|
||||
core/box/ptr_conversion_box.h core/box/tiny_ptr_bridge_box.h \
|
||||
core/box/../hakmem_smallmid.h core/box/tiny_heap_env_box.h \
|
||||
core/box/c7_hotpath_env_box.h core/box/tiny_c7_hotbox.h \
|
||||
core/box/tiny_heap_box.h core/box/../hakmem_tiny_superslab.h \
|
||||
core/box/../superslab/superslab_inline.h core/box/../tiny_tls.h \
|
||||
core/box/../hakmem_tiny_superslab.h core/box/../tiny_box_geometry.h \
|
||||
core/box/mid_large_config_box.h core/box/../hakmem_config.h \
|
||||
core/box/../hakmem_features.h core/box/hak_free_api.inc.h \
|
||||
core/hakmem_tiny_superslab.h core/box/../hakmem_trace_master.h \
|
||||
core/box/front_gate_v2.h core/box/external_guard_box.h \
|
||||
core/box/../hakmem_stats_master.h core/box/ss_slab_meta_box.h \
|
||||
core/box/../superslab/superslab_types.h core/box/slab_freelist_atomic.h \
|
||||
core/box/fg_tiny_gate_box.h core/box/tiny_free_gate_box.h \
|
||||
core/box/ptr_type_box.h core/box/ptr_conversion_box.h \
|
||||
core/box/tiny_ptr_bridge_box.h \
|
||||
core/box/../hakmem_tiny_superslab_internal.h \
|
||||
core/box/../hakmem_build_flags.h core/box/../hakmem_tiny_superslab.h \
|
||||
core/box/../box/ss_hot_cold_box.h \
|
||||
core/box/../hakmem_build_flags.h core/box/../box/ss_hot_cold_box.h \
|
||||
core/box/../box/../superslab/superslab_types.h \
|
||||
core/box/../box/ss_allocation_box.h core/box/../hakmem_debug_master.h \
|
||||
core/box/../hakmem_tiny.h core/box/../hakmem_tiny_config.h \
|
||||
@ -87,19 +91,9 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \
|
||||
core/box/../front/../box/../front/tiny_unified_cache.h \
|
||||
core/box/../front/../box/tiny_layout_box.h \
|
||||
core/box/../front/../box/tiny_front_cold_box.h \
|
||||
core/box/../front/../box/tiny_c7_hotpath_box.h \
|
||||
core/box/../front/../box/c7_hotpath_env_box.h \
|
||||
core/box/../front/../box/tiny_c7_uc_hit_box.h \
|
||||
core/box/../front/../box/tiny_c7_warm_spill_box.h \
|
||||
core/box/../front/../box/tiny_c7_stats_sample_box.h \
|
||||
core/box/../front/../box/tiny_front_hot_box.h \
|
||||
core/box/../front/../box/tiny_front_cold_box.h \
|
||||
core/box/../front/../box/front_gate_box.h \
|
||||
core/box/../front/../box/tls_sll_box.h \
|
||||
core/box/../front/../box/ptr_conversion_box.h \
|
||||
core/box/tiny_alloc_gate_box.h core/box/tiny_route_box.h \
|
||||
core/box/tiny_front_config_box.h core/box/wrapper_env_box.h \
|
||||
core/box/../hakmem_internal.h core/box/../superslab/superslab_inline.h
|
||||
core/box/../hakmem_internal.h
|
||||
core/hakmem.h:
|
||||
core/hakmem_build_flags.h:
|
||||
core/hakmem_config.h:
|
||||
@ -171,6 +165,15 @@ core/box/hak_alloc_api.inc.h:
|
||||
core/box/../hakmem_tiny.h:
|
||||
core/box/../hakmem_pool.h:
|
||||
core/box/../hakmem_smallmid.h:
|
||||
core/box/tiny_heap_env_box.h:
|
||||
core/box/c7_hotpath_env_box.h:
|
||||
core/box/tiny_c7_hotbox.h:
|
||||
core/box/tiny_heap_box.h:
|
||||
core/box/../hakmem_tiny_superslab.h:
|
||||
core/box/../superslab/superslab_inline.h:
|
||||
core/box/../tiny_tls.h:
|
||||
core/box/../hakmem_tiny_superslab.h:
|
||||
core/box/../tiny_box_geometry.h:
|
||||
core/box/mid_large_config_box.h:
|
||||
core/box/../hakmem_config.h:
|
||||
core/box/../hakmem_features.h:
|
||||
@ -190,7 +193,6 @@ core/box/ptr_conversion_box.h:
|
||||
core/box/tiny_ptr_bridge_box.h:
|
||||
core/box/../hakmem_tiny_superslab_internal.h:
|
||||
core/box/../hakmem_build_flags.h:
|
||||
core/box/../hakmem_tiny_superslab.h:
|
||||
core/box/../box/ss_hot_cold_box.h:
|
||||
core/box/../box/../superslab/superslab_types.h:
|
||||
core/box/../box/ss_allocation_box.h:
|
||||
@ -256,19 +258,8 @@ core/box/../front/../box/../tiny_region_id.h:
|
||||
core/box/../front/../box/../front/tiny_unified_cache.h:
|
||||
core/box/../front/../box/tiny_layout_box.h:
|
||||
core/box/../front/../box/tiny_front_cold_box.h:
|
||||
core/box/../front/../box/tiny_c7_hotpath_box.h:
|
||||
core/box/../front/../box/c7_hotpath_env_box.h:
|
||||
core/box/../front/../box/tiny_c7_uc_hit_box.h:
|
||||
core/box/../front/../box/tiny_c7_warm_spill_box.h:
|
||||
core/box/../front/../box/tiny_c7_stats_sample_box.h:
|
||||
core/box/../front/../box/tiny_front_hot_box.h:
|
||||
core/box/../front/../box/tiny_front_cold_box.h:
|
||||
core/box/../front/../box/front_gate_box.h:
|
||||
core/box/../front/../box/tls_sll_box.h:
|
||||
core/box/../front/../box/ptr_conversion_box.h:
|
||||
core/box/tiny_alloc_gate_box.h:
|
||||
core/box/tiny_route_box.h:
|
||||
core/box/tiny_front_config_box.h:
|
||||
core/box/wrapper_env_box.h:
|
||||
core/box/../hakmem_internal.h:
|
||||
core/box/../superslab/superslab_inline.h:
|
||||
|
||||
@ -37,6 +37,7 @@ hakmem_shared_pool.o: core/hakmem_shared_pool.c \
|
||||
core/box/../hakmem_internal.h core/box/../tiny_region_id.h \
|
||||
core/box/../hakmem_tiny_integrity.h \
|
||||
core/box/../box/slab_freelist_atomic.h core/box/tiny_header_box.h \
|
||||
core/box/tiny_heap_env_box.h core/box/c7_hotpath_env_box.h \
|
||||
core/box/slab_recycling_box.h core/box/../hakmem_tiny_superslab.h \
|
||||
core/box/ss_hot_cold_box.h core/box/ss_release_guard_box.h \
|
||||
core/box/free_local_box.h core/box/ptr_type_box.h \
|
||||
@ -119,6 +120,8 @@ core/box/../tiny_region_id.h:
|
||||
core/box/../hakmem_tiny_integrity.h:
|
||||
core/box/../box/slab_freelist_atomic.h:
|
||||
core/box/tiny_header_box.h:
|
||||
core/box/tiny_heap_env_box.h:
|
||||
core/box/c7_hotpath_env_box.h:
|
||||
core/box/slab_recycling_box.h:
|
||||
core/box/../hakmem_tiny_superslab.h:
|
||||
core/box/ss_hot_cold_box.h:
|
||||
|
||||
@ -33,6 +33,7 @@ hakmem_tiny_sfc.o: core/hakmem_tiny_sfc.c core/tiny_alloc_fast_sfc.inc.h \
|
||||
core/box/../hakmem_internal.h core/box/../tiny_region_id.h \
|
||||
core/box/../hakmem_tiny_integrity.h \
|
||||
core/box/../box/slab_freelist_atomic.h core/box/tiny_header_box.h \
|
||||
core/box/tiny_heap_env_box.h core/box/c7_hotpath_env_box.h \
|
||||
core/hakmem_env_cache.h
|
||||
core/tiny_alloc_fast_sfc.inc.h:
|
||||
core/hakmem_tiny.h:
|
||||
@ -105,4 +106,6 @@ core/box/../tiny_region_id.h:
|
||||
core/box/../hakmem_tiny_integrity.h:
|
||||
core/box/../box/slab_freelist_atomic.h:
|
||||
core/box/tiny_header_box.h:
|
||||
core/box/tiny_heap_env_box.h:
|
||||
core/box/c7_hotpath_env_box.h:
|
||||
core/hakmem_env_cache.h:
|
||||
|
||||
Reference in New Issue
Block a user