Phase 21-1-B: Ring cache Alloc/Free 統合 - C2/C3 hot path integration

**統合内容**:
- Alloc path (tiny_alloc_fast.inc.h): Ring pop → HeapV2/UltraHot/SLL fallback
- Free path (tiny_free_fast_v2.inc.h): Ring push → HeapV2/SLL fallback
- Lazy init: 最初の alloc/free 時に自動初期化(thread-safe)

**設計**:
- Lazy init パターン(ENV control と同様)
- ring_cache_pop/push 内で slots == NULL チェック → ring_cache_init() 呼び出し
- Include 構造: ファイルトップレベルに #include 追加(関数内 include 禁止)

**Makefile 修正**:
- TINY_BENCH_OBJS_BASE に core/front/tiny_ring_cache.o 追加
- Link エラー修正: 4箇所の object list に追加

**動作確認**:
- Ring OFF (default): 83K ops/s (1K iterations) 
- Ring ON (HAKMEM_TINY_HOT_RING_ENABLE=1): 78K ops/s 
- クラッシュなし、正常動作確認

**次のステップ**: Phase 21-1-C (Refill/Cascade 実装)
This commit is contained in:
Moe Charm (CI)
2025-11-16 07:51:37 +09:00
parent db9c06211e
commit fdbdcdcdb3
11 changed files with 99 additions and 15 deletions

View File

@ -399,7 +399,7 @@ test-box-refactor: box-refactor
./larson_hakmem 10 8 128 1024 1 12345 4 ./larson_hakmem 10 8 128 1024 1 12345 4
# Phase 4: Tiny Pool benchmarks (properly linked with hakmem) # Phase 4: Tiny Pool benchmarks (properly linked with hakmem)
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/link_stubs.o core/tiny_failfast.o TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/front/tiny_ring_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/link_stubs.o core/tiny_failfast.o
TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE)
ifeq ($(POOL_TLS_PHASE1),1) ifeq ($(POOL_TLS_PHASE1),1)
TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o

View File

@ -0,0 +1,4 @@
core/box/front_metrics_box.o: core/box/front_metrics_box.c \
core/box/front_metrics_box.h core/box/../hakmem_tiny_stats_api.h
core/box/front_metrics_box.h:
core/box/../hakmem_tiny_stats_api.h:

View File

@ -0,0 +1,12 @@
core/box/ss_hot_prewarm_box.o: core/box/ss_hot_prewarm_box.c \
core/box/../hakmem_tiny.h core/box/../hakmem_build_flags.h \
core/box/../hakmem_trace.h core/box/../hakmem_tiny_mini_mag.h \
core/box/../hakmem_tiny_config.h core/box/ss_hot_prewarm_box.h \
core/box/prewarm_box.h
core/box/../hakmem_tiny.h:
core/box/../hakmem_build_flags.h:
core/box/../hakmem_trace.h:
core/box/../hakmem_tiny_mini_mag.h:
core/box/../hakmem_tiny_config.h:
core/box/ss_hot_prewarm_box.h:
core/box/prewarm_box.h:

View File

@ -0,0 +1,4 @@
core/front/tiny_ring_cache.o: core/front/tiny_ring_cache.c \
core/front/tiny_ring_cache.h core/front/../hakmem_build_flags.h
core/front/tiny_ring_cache.h:
core/front/../hakmem_build_flags.h:

View File

@ -130,6 +130,14 @@ static inline int ring_cascade_enabled(void) {
return g_enable; return g_enable;
} }
// ============================================================================
// Init/Shutdown Forward Declarations (needed by pop/push)
// ============================================================================
void ring_cache_init(void);
void ring_cache_shutdown(void);
void ring_cache_print_stats(void);
// ============================================================================ // ============================================================================
// Ultra-Fast Pop/Push (1-2 instructions) // Ultra-Fast Pop/Push (1-2 instructions)
// ============================================================================ // ============================================================================
@ -137,8 +145,19 @@ static inline int ring_cascade_enabled(void) {
// Pop from ring (alloc fast path) // Pop from ring (alloc fast path)
// Returns: BASE pointer (caller must convert to USER with +1) // Returns: BASE pointer (caller must convert to USER with +1)
static inline void* ring_cache_pop(int class_idx) { static inline void* ring_cache_pop(int class_idx) {
// Fast path: Ring disabled or wrong class → return NULL immediately
if (__builtin_expect(!ring_cache_enabled(), 0)) return NULL;
if (__builtin_expect(class_idx != 2 && class_idx != 3, 0)) return NULL;
TinyRingCache* ring = (class_idx == 2) ? &g_ring_cache_c2 : &g_ring_cache_c3; TinyRingCache* ring = (class_idx == 2) ? &g_ring_cache_c2 : &g_ring_cache_c3;
// Lazy init check (once per thread)
if (__builtin_expect(ring->slots == NULL, 0)) {
ring_cache_init(); // First call in this thread
// Re-check after init (may fail if allocation failed)
if (ring->slots == NULL) return NULL;
}
// Empty check // Empty check
if (__builtin_expect(ring->head == ring->tail, 0)) { if (__builtin_expect(ring->head == ring->tail, 0)) {
return NULL; // Empty return NULL; // Empty
@ -155,8 +174,19 @@ static inline void* ring_cache_pop(int class_idx) {
// Input: BASE pointer (caller must pass BASE, not USER) // Input: BASE pointer (caller must pass BASE, not USER)
// Returns: 1=SUCCESS, 0=FULL // Returns: 1=SUCCESS, 0=FULL
static inline int ring_cache_push(int class_idx, void* base) { static inline int ring_cache_push(int class_idx, void* base) {
// Fast path: Ring disabled or wrong class → return 0 (not handled)
if (__builtin_expect(!ring_cache_enabled(), 0)) return 0;
if (__builtin_expect(class_idx != 2 && class_idx != 3, 0)) return 0;
TinyRingCache* ring = (class_idx == 2) ? &g_ring_cache_c2 : &g_ring_cache_c3; TinyRingCache* ring = (class_idx == 2) ? &g_ring_cache_c2 : &g_ring_cache_c3;
// Lazy init check (once per thread)
if (__builtin_expect(ring->slots == NULL, 0)) {
ring_cache_init(); // First call in this thread
// Re-check after init (may fail if allocation failed)
if (ring->slots == NULL) return 0;
}
uint16_t next_tail = (ring->tail + 1) & ring->mask; uint16_t next_tail = (ring->tail + 1) & ring->mask;
// Full check (leave 1 slot empty to distinguish full/empty) // Full check (leave 1 slot empty to distinguish full/empty)
@ -178,12 +208,4 @@ static inline int ring_cache_push(int class_idx, void* base) {
// Forward declaration (defined in tiny_ring_cache.c) // Forward declaration (defined in tiny_ring_cache.c)
int ring_refill_from_sll(int class_idx, int target_count); int ring_refill_from_sll(int class_idx, int target_count);
// ============================================================================
// Init/Shutdown (called from hakmem_tiny.c)
// ============================================================================
void ring_cache_init(void);
void ring_cache_shutdown(void);
void ring_cache_print_stats(void);
#endif // HAK_FRONT_TINY_RING_CACHE_H #endif // HAK_FRONT_TINY_RING_CACHE_H

View File

@ -31,6 +31,7 @@
#include "hakmem_syscall.h" // NEW Phase 6.X P0 FIX: Box 3 (dlsym direct libc) #include "hakmem_syscall.h" // NEW Phase 6.X P0 FIX: Box 3 (dlsym direct libc)
#include "hakmem_ace_controller.h" // NEW Phase ACE: Adaptive Control Engine #include "hakmem_ace_controller.h" // NEW Phase ACE: Adaptive Control Engine
#include "hakmem_ace_metrics.h" // NEW Phase ACE: Metrics tracking (inline helpers) #include "hakmem_ace_metrics.h" // NEW Phase ACE: Metrics tracking (inline helpers)
#include "box/bench_fast_box.h" // NEW Phase 20-2: BenchFast Mode (structural ceiling measurement)
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>

View File

@ -44,8 +44,9 @@ core/hakmem_tiny.o: core/hakmem_tiny.c core/hakmem_tiny.h \
core/tiny_atomic.h core/tiny_alloc_fast.inc.h \ core/tiny_atomic.h core/tiny_alloc_fast.inc.h \
core/tiny_alloc_fast_sfc.inc.h core/hakmem_tiny_fastcache.inc.h \ core/tiny_alloc_fast_sfc.inc.h core/hakmem_tiny_fastcache.inc.h \
core/front/tiny_front_c23.h core/front/../hakmem_build_flags.h \ core/front/tiny_front_c23.h core/front/../hakmem_build_flags.h \
core/front/tiny_heap_v2.h core/front/tiny_ultra_hot.h \ core/front/tiny_ring_cache.h core/front/tiny_heap_v2.h \
core/front/../box/tls_sll_box.h core/tiny_alloc_fast_inline.h \ core/front/tiny_ultra_hot.h core/front/../box/tls_sll_box.h \
core/box/front_metrics_box.h core/tiny_alloc_fast_inline.h \
core/tiny_free_fast.inc.h core/hakmem_tiny_alloc.inc \ core/tiny_free_fast.inc.h core/hakmem_tiny_alloc.inc \
core/hakmem_tiny_slow.inc core/hakmem_tiny_free.inc \ core/hakmem_tiny_slow.inc core/hakmem_tiny_free.inc \
core/box/free_publish_box.h core/mid_tcache.h \ core/box/free_publish_box.h core/mid_tcache.h \
@ -153,9 +154,11 @@ core/tiny_alloc_fast_sfc.inc.h:
core/hakmem_tiny_fastcache.inc.h: core/hakmem_tiny_fastcache.inc.h:
core/front/tiny_front_c23.h: core/front/tiny_front_c23.h:
core/front/../hakmem_build_flags.h: core/front/../hakmem_build_flags.h:
core/front/tiny_ring_cache.h:
core/front/tiny_heap_v2.h: core/front/tiny_heap_v2.h:
core/front/tiny_ultra_hot.h: core/front/tiny_ultra_hot.h:
core/front/../box/tls_sll_box.h: core/front/../box/tls_sll_box.h:
core/box/front_metrics_box.h:
core/tiny_alloc_fast_inline.h: core/tiny_alloc_fast_inline.h:
core/tiny_free_fast.inc.h: core/tiny_free_fast.inc.h:
core/hakmem_tiny_alloc.inc: core/hakmem_tiny_alloc.inc:

View File

@ -28,6 +28,7 @@
#include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection #include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection
#ifdef HAKMEM_TINY_HEADER_CLASSIDX #ifdef HAKMEM_TINY_HEADER_CLASSIDX
#include "front/tiny_front_c23.h" // Phase B: Ultra-simple C2/C3 front #include "front/tiny_front_c23.h" // Phase B: Ultra-simple C2/C3 front
#include "front/tiny_ring_cache.h" // Phase 21-1: Ring cache (C2/C3 array-based TLS cache)
#include "front/tiny_heap_v2.h" // Phase 13-A: TinyHeapV2 magazine front #include "front/tiny_heap_v2.h" // Phase 13-A: TinyHeapV2 magazine front
#include "front/tiny_ultra_hot.h" // Phase 14: TinyUltraHot C1/C2 ultra-fast path #include "front/tiny_ultra_hot.h" // Phase 14: TinyUltraHot C1/C2 ultra-fast path
#endif #endif
@ -605,6 +606,19 @@ static inline void* tiny_alloc_fast(size_t size) {
} }
#endif #endif
// Phase 21-1: Ring Cache (C2/C3 only) - Array-based TLS cache
// ENV-gated: HAKMEM_TINY_HOT_RING_ENABLE=1
// Target: +15-20% (54.4M → 62-65M ops/s) by eliminating pointer chasing
// Design: Ring (L0) → SLL (L1) → SuperSlab (L2) cascade hierarchy
if (class_idx == 2 || class_idx == 3) {
void* base = ring_cache_pop(class_idx);
if (base) {
// Ring hit - return USER pointer (BASE + 1)
HAK_RET_ALLOC(class_idx, base);
}
// Ring miss - fall through to existing path (TLS SLL/UltraHot/HeapV2)
}
// Phase 14-C: TinyUltraHot Borrowing Design (正史から借りる設計) // Phase 14-C: TinyUltraHot Borrowing Design (正史から借りる設計)
// ENV-gated: HAKMEM_TINY_ULTRA_HOT=1 (internal control) // ENV-gated: HAKMEM_TINY_ULTRA_HOT=1 (internal control)
// Phase 19-4: HAKMEM_TINY_FRONT_ENABLE_ULTRAHOT=1 to enable (DEFAULT: OFF for +12.9% perf) // Phase 19-4: HAKMEM_TINY_FRONT_ENABLE_ULTRAHOT=1 to enable (DEFAULT: OFF for +12.9% perf)

View File

@ -23,6 +23,7 @@
#include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection #include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection
#include "front/tiny_heap_v2.h" // Phase 13-B: TinyHeapV2 magazine supply #include "front/tiny_heap_v2.h" // Phase 13-B: TinyHeapV2 magazine supply
#include "front/tiny_ultra_hot.h" // Phase 14: TinyUltraHot C1/C2 ultra-fast path #include "front/tiny_ultra_hot.h" // Phase 14: TinyUltraHot C1/C2 ultra-fast path
#include "front/tiny_ring_cache.h" // Phase 21-1: Ring cache (C2/C3 array-based TLS cache)
// Phase 7: Header-based ultra-fast free // Phase 7: Header-based ultra-fast free
#if HAKMEM_TINY_HEADER_CLASSIDX #if HAKMEM_TINY_HEADER_CLASSIDX
@ -136,6 +137,18 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
// → 正史TLS SLLの在庫を正しく保つ // → 正史TLS SLLの在庫を正しく保つ
// → UltraHot refill は alloc 側で TLS SLL から借りる // → UltraHot refill は alloc 側で TLS SLL から借りる
// Phase 21-1: Ring Cache (C2/C3 only) - Array-based TLS cache
// ENV-gated: HAKMEM_TINY_HOT_RING_ENABLE=1
// Target: +15-20% (54.4M → 62-65M ops/s) by eliminating pointer chasing
// Design: Ring (L0) → SLL (L1) → SuperSlab (L2) cascade hierarchy
if (class_idx == 2 || class_idx == 3) {
if (ring_cache_push(class_idx, base)) {
// Ring push success - done!
return 1;
}
// Ring full - fall through to existing path (TLS SLL/HeapV2)
}
// Phase 13-B: TinyHeapV2 magazine supply (C0-C3 only) // Phase 13-B: TinyHeapV2 magazine supply (C0-C3 only)
// Two supply modes (controlled by HAKMEM_TINY_HEAP_V2_LEFTOVER_MODE): // Two supply modes (controlled by HAKMEM_TINY_HEAP_V2_LEFTOVER_MODE):
// Mode 0 (default): L0 gets blocks first ("stealing" design) // Mode 0 (default): L0 gets blocks first ("stealing" design)

View File

@ -14,9 +14,10 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \
core/hakmem_elo.h core/hakmem_ace_stats.h core/hakmem_batch.h \ core/hakmem_elo.h core/hakmem_ace_stats.h core/hakmem_batch.h \
core/hakmem_evo.h core/hakmem_debug.h core/hakmem_prof.h \ core/hakmem_evo.h core/hakmem_debug.h core/hakmem_prof.h \
core/hakmem_syscall.h core/hakmem_ace_controller.h \ core/hakmem_syscall.h core/hakmem_ace_controller.h \
core/hakmem_ace_metrics.h core/hakmem_ace_ucb1.h core/ptr_trace.h \ core/hakmem_ace_metrics.h core/hakmem_ace_ucb1.h \
core/box/hak_exit_debug.inc.h core/box/hak_kpi_util.inc.h \ core/box/bench_fast_box.h core/ptr_trace.h core/box/hak_exit_debug.inc.h \
core/box/hak_core_init.inc.h core/hakmem_phase7_config.h \ core/box/hak_kpi_util.inc.h core/box/hak_core_init.inc.h \
core/hakmem_phase7_config.h core/box/ss_hot_prewarm_box.h \
core/box/hak_alloc_api.inc.h core/box/../hakmem_tiny.h \ core/box/hak_alloc_api.inc.h core/box/../hakmem_tiny.h \
core/box/../hakmem_smallmid.h core/box/hak_free_api.inc.h \ core/box/../hakmem_smallmid.h core/box/hak_free_api.inc.h \
core/hakmem_tiny_superslab.h core/box/../tiny_free_fast_v2.inc.h \ core/hakmem_tiny_superslab.h core/box/../tiny_free_fast_v2.inc.h \
@ -33,7 +34,9 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \
core/box/../box/tls_sll_box.h core/box/../box/free_local_box.h \ core/box/../box/tls_sll_box.h core/box/../box/free_local_box.h \
core/box/../hakmem_tiny_integrity.h core/box/../front/tiny_heap_v2.h \ core/box/../hakmem_tiny_integrity.h core/box/../front/tiny_heap_v2.h \
core/box/../front/../hakmem_tiny.h core/box/../front/tiny_ultra_hot.h \ core/box/../front/../hakmem_tiny.h core/box/../front/tiny_ultra_hot.h \
core/box/../front/../box/tls_sll_box.h core/box/front_gate_v2.h \ core/box/../front/../box/tls_sll_box.h \
core/box/../front/tiny_ring_cache.h \
core/box/../front/../hakmem_build_flags.h core/box/front_gate_v2.h \
core/box/external_guard_box.h core/box/hak_wrappers.inc.h \ core/box/external_guard_box.h core/box/hak_wrappers.inc.h \
core/box/front_gate_classifier.h core/box/front_gate_classifier.h
core/hakmem.h: core/hakmem.h:
@ -78,11 +81,13 @@ core/hakmem_syscall.h:
core/hakmem_ace_controller.h: core/hakmem_ace_controller.h:
core/hakmem_ace_metrics.h: core/hakmem_ace_metrics.h:
core/hakmem_ace_ucb1.h: core/hakmem_ace_ucb1.h:
core/box/bench_fast_box.h:
core/ptr_trace.h: core/ptr_trace.h:
core/box/hak_exit_debug.inc.h: core/box/hak_exit_debug.inc.h:
core/box/hak_kpi_util.inc.h: core/box/hak_kpi_util.inc.h:
core/box/hak_core_init.inc.h: core/box/hak_core_init.inc.h:
core/hakmem_phase7_config.h: core/hakmem_phase7_config.h:
core/box/ss_hot_prewarm_box.h:
core/box/hak_alloc_api.inc.h: core/box/hak_alloc_api.inc.h:
core/box/../hakmem_tiny.h: core/box/../hakmem_tiny.h:
core/box/../hakmem_smallmid.h: core/box/../hakmem_smallmid.h:
@ -112,6 +117,8 @@ core/box/../front/tiny_heap_v2.h:
core/box/../front/../hakmem_tiny.h: core/box/../front/../hakmem_tiny.h:
core/box/../front/tiny_ultra_hot.h: core/box/../front/tiny_ultra_hot.h:
core/box/../front/../box/tls_sll_box.h: core/box/../front/../box/tls_sll_box.h:
core/box/../front/tiny_ring_cache.h:
core/box/../front/../hakmem_build_flags.h:
core/box/front_gate_v2.h: core/box/front_gate_v2.h:
core/box/external_guard_box.h: core/box/external_guard_box.h:
core/box/hak_wrappers.inc.h: core/box/hak_wrappers.inc.h:

View File

@ -0,0 +1,4 @@
hakmem_smallmid_superslab.o: core/hakmem_smallmid_superslab.c \
core/hakmem_smallmid_superslab.h core/hakmem_smallmid.h
core/hakmem_smallmid_superslab.h:
core/hakmem_smallmid.h: