Merge separate g_tls_sll_head[] and g_tls_sll_count[] arrays into unified TinyTLSSLL struct to improve L1D cache locality. Expected performance gain: +12-18% from reducing cache line splits (2 loads → 1 load per operation). Changes: - core/hakmem_tiny.h: Add TinyTLSSLL type (16B aligned, head+count+pad) - core/hakmem_tiny.c: Replace separate arrays with g_tls_sll[8] - core/box/tls_sll_box.h: Update Box API (13 sites) for unified access - Updated 32+ files: All g_tls_sll_head[i] → g_tls_sll[i].head - Updated 32+ files: All g_tls_sll_count[i] → g_tls_sll[i].count - core/hakmem_tiny_integrity.h: Unified canary guards - core/box/integrity_box.c: Simplified canary validation - Makefile: Added core/box/tiny_sizeclass_hist_box.o to link Build: ✅ PASS (10K ops sanity test) Warnings: Only pre-existing LTO type mismatches (unrelated) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
204 lines
6.5 KiB
C
204 lines
6.5 KiB
C
#include "tiny_ultra_heap.h"
|
||
|
||
#if HAKMEM_TINY_ULTRA_HEAP
|
||
|
||
// TinyTLS slab 配列は既存 Tiny 層の「page/local slab ビュー」
|
||
// UltraHeap ではこれを Box 経由で見るだけに留める(挙動はまだ変えない)。
|
||
extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
|
||
|
||
// Unified front removed (A/B test: OFF is faster)
|
||
// #include "../front/tiny_unified_cache.h"
|
||
#include "../tiny_region_id.h"
|
||
#include "../hakmem_tiny_unified_stats.h"
|
||
#include <stdlib.h>
|
||
#include <stdio.h>
|
||
|
||
__thread TinyUltraHeap g_tiny_ultra_heap = {0};
|
||
|
||
// UltraHeap L0 キャッシュ制御 (ENV: HAKMEM_TINY_ULTRA_L0)
|
||
static inline int tiny_ultra_l0_enabled(void)
|
||
{
|
||
static int g_enable = -1;
|
||
if (__builtin_expect(g_enable == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_ULTRA_L0");
|
||
// デフォルト: 無効(0)。明示的に 1 を指定した場合のみ有効化。
|
||
g_enable = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
return g_enable;
|
||
}
|
||
|
||
// L0 から 1 ブロック取得(BASE を返す)
|
||
static inline void*
|
||
tiny_ultra_heap_l0_pop(TinyUltraHeap* heap, int class_idx)
|
||
{
|
||
if (!tiny_ultra_l0_enabled()) {
|
||
return NULL;
|
||
}
|
||
TinyUltraL0* l0 = &heap->l0[class_idx];
|
||
if (l0->count == 0) {
|
||
return NULL;
|
||
}
|
||
return l0->slots[--l0->count];
|
||
}
|
||
|
||
// L0 を Unified Cache から補充(BASE を複数取り出して slots[] に積む)
|
||
// DELETED (A/B test: Unified Cache OFF is faster)
|
||
static inline void
|
||
tiny_ultra_heap_l0_refill_from_unified(TinyUltraHeap* heap, int class_idx)
|
||
{
|
||
// Unified Cache removed - no refill possible
|
||
(void)heap;
|
||
(void)class_idx;
|
||
return;
|
||
}
|
||
|
||
// Box UH-1: size → class の境界を 1 箇所に集約
|
||
static inline int
|
||
tiny_ultra_heap_class_for_size(size_t size)
|
||
{
|
||
if (__builtin_expect(size == 0 || size > tiny_get_max_size(), 0)) {
|
||
return -1;
|
||
}
|
||
|
||
int class_idx = hak_tiny_size_to_class(size);
|
||
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
||
return -1;
|
||
}
|
||
|
||
return class_idx;
|
||
}
|
||
|
||
// Box UH-2: Unified front 統合の境界
|
||
// - hit/miss 判定と統計更新、header 書き込みまでを 1 箇所に閉じ込める。
|
||
// DELETED (A/B test: Unified Cache OFF is faster)
|
||
static inline void*
|
||
tiny_ultra_heap_try_unified(TinyUltraHeap* heap, int class_idx)
|
||
{
|
||
// Unified Cache removed - always return NULL
|
||
(void)heap;
|
||
(void)class_idx;
|
||
return NULL;
|
||
}
|
||
|
||
void tiny_ultra_heap_init(void)
|
||
{
|
||
if (__builtin_expect(g_tiny_ultra_heap.initialized, 1)) {
|
||
return;
|
||
}
|
||
|
||
// Box 1: TinyUltraHeap 自体の init
|
||
g_tiny_ultra_heap.initialized = 1;
|
||
|
||
// Box 2: PageLocal ビューの初期化(g_tls_slabs を alias するだけ)
|
||
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
||
g_tiny_ultra_heap.page[cls].tls = &g_tls_slabs[cls];
|
||
g_tiny_ultra_heap.page[cls].cls = (uint8_t)cls;
|
||
g_tiny_ultra_heap.alloc_unified_hit[cls] = 0;
|
||
g_tiny_ultra_heap.alloc_unified_refill[cls] = 0;
|
||
g_tiny_ultra_heap.alloc_fallback_ultrafront[cls] = 0;
|
||
}
|
||
}
|
||
|
||
void* tiny_ultra_heap_alloc(size_t size)
|
||
{
|
||
tiny_ultra_heap_init();
|
||
|
||
// Box UH-1: size→class 変換
|
||
int class_idx = tiny_ultra_heap_class_for_size(size);
|
||
if (__builtin_expect(class_idx < 0, 0)) {
|
||
// UltraHeap は Tiny 範囲のみ担当。範囲外は NULL で Fail-Fast。
|
||
return NULL;
|
||
}
|
||
|
||
TinyUltraHeap* heap = &g_tiny_ultra_heap;
|
||
|
||
// UltraHeap L0 (実験用): ホットクラス (例: C2/C3) だけを対象に、
|
||
// Unified Cache に到達する前にローカル L0 からの供給を試す。
|
||
if (tiny_ultra_l0_enabled() && (class_idx == 2 || class_idx == 3)) {
|
||
void* base = tiny_ultra_heap_l0_pop(heap, class_idx);
|
||
if (!base) {
|
||
tiny_ultra_heap_l0_refill_from_unified(heap, class_idx);
|
||
base = tiny_ultra_heap_l0_pop(heap, class_idx);
|
||
}
|
||
if (base) {
|
||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||
return tiny_region_id_write_header(base, class_idx);
|
||
#else
|
||
return base;
|
||
#endif
|
||
}
|
||
}
|
||
|
||
// Unified Cache removed (A/B test: OFF is faster)
|
||
// Always use UltraFront fallback
|
||
void* fallback = tiny_ultrafront_malloc(size);
|
||
if (fallback) {
|
||
heap->alloc_fallback_ultrafront[class_idx]++;
|
||
}
|
||
return fallback;
|
||
}
|
||
|
||
int tiny_ultra_heap_free(void* ptr)
|
||
{
|
||
tiny_ultra_heap_init();
|
||
|
||
// Free については現状の UltraFront free(Unified push)に完全委譲。
|
||
// 将来、PageLocal の freelist 連携や page 返却をここに追加する。
|
||
return tiny_ultrafront_free(ptr);
|
||
}
|
||
|
||
void tiny_ultra_heap_stats_snapshot(uint64_t hit[TINY_NUM_CLASSES],
|
||
uint64_t refill[TINY_NUM_CLASSES],
|
||
uint64_t fallback[TINY_NUM_CLASSES],
|
||
int reset)
|
||
{
|
||
tiny_ultra_heap_init();
|
||
if (!hit || !refill || !fallback) {
|
||
return;
|
||
}
|
||
|
||
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
||
hit[cls] = g_tiny_ultra_heap.alloc_unified_hit[cls];
|
||
refill[cls] = g_tiny_ultra_heap.alloc_unified_refill[cls];
|
||
fallback[cls] = g_tiny_ultra_heap.alloc_fallback_ultrafront[cls];
|
||
}
|
||
|
||
if (reset) {
|
||
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
||
g_tiny_ultra_heap.alloc_unified_hit[cls] = 0;
|
||
g_tiny_ultra_heap.alloc_unified_refill[cls] = 0;
|
||
g_tiny_ultra_heap.alloc_fallback_ultrafront[cls] = 0;
|
||
}
|
||
}
|
||
}
|
||
|
||
// オプション: プロセス終了時に UltraHeap front 統計を 1 回だけダンプ(ENV で制御)
|
||
// ENV: HAKMEM_TINY_ULTRA_HEAP_DUMP=1 で有効化(デフォルト: 無効)
|
||
static void tiny_ultra_heap_dump_stats(void) __attribute__((destructor));
|
||
static void tiny_ultra_heap_dump_stats(void)
|
||
{
|
||
const char* dump = getenv("HAKMEM_TINY_ULTRA_HEAP_DUMP");
|
||
if (!dump || !*dump || *dump == '0') {
|
||
return;
|
||
}
|
||
|
||
uint64_t hit[TINY_NUM_CLASSES] = {0};
|
||
uint64_t refill[TINY_NUM_CLASSES] = {0};
|
||
uint64_t fallback[TINY_NUM_CLASSES] = {0};
|
||
|
||
tiny_ultra_heap_stats_snapshot(hit, refill, fallback, 0);
|
||
|
||
fprintf(stderr, "[ULTRA_HEAP_STATS] class hit refill fallback\n");
|
||
for (int c = 0; c < TINY_NUM_CLASSES; c++) {
|
||
if (hit[c] || refill[c] || fallback[c]) {
|
||
fprintf(stderr, " C%d: %llu %llu %llu\n",
|
||
c,
|
||
(unsigned long long)hit[c],
|
||
(unsigned long long)refill[c],
|
||
(unsigned long long)fallback[c]);
|
||
}
|
||
}
|
||
}
|
||
|
||
#endif // HAKMEM_TINY_ULTRA_HEAP
|