diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index 84898198..456961ac 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -31,7 +31,7 @@ --- -## Phase MID-V3: Mid/Pool HotBox v3 完成(2025-12-12) +## Phase MID-V3: Mid/Pool HotBox v3 完成 → 本線採用(2025-12-12) ### 役割分担の明確化 @@ -50,22 +50,30 @@ Size Range | Allocator | Performance 52KB+ | Large mmap | Existing path ``` -### 実装完了 +### 実装完了 → 本線プロファイル採用 - ✅ MID-V3-0~5: 型定義、RegionIdBox 統合、alloc/free 実装 - ✅ MID-V3-6: hakmem.c メイン経路統合(箱化モジュール化) - ✅ Performance: C6 +11.1%, Mixed (257-768B) +19.8% - ✅ Role separation: C7 を MID v3 から除外、ULTRA に一本化 +- ✅ **Mainline adoption**: C6_HEAVY_LEGACY_POOLV1 と MIXED_TINYV3_C7_SAFE プロファイルでデフォルト ON -### ENV 設定 +### ENV 設定(本線プロファイルでデフォルト ON) ```bash -HAKMEM_MID_V3_ENABLED=1 # Master switch (default: OFF) -HAKMEM_MID_V3_CLASSES=0x40 # C6 only (recommended) -HAKMEM_MID_V3_DEBUG=1 # Debug logging +# Profile 経由で自動有効化: +HAKMEM_PROFILE=C6_HEAVY_LEGACY_POOLV1 +# または +HAKMEM_PROFILE=MIXED_TINYV3_C7_SAFE + +# 明示的に指定する場合: +HAKMEM_MID_V3_ENABLED=1 # Master switch (profiles でデフォルト ON) +HAKMEM_MID_V3_CLASSES=0x40 # C6 only (profiles でデフォルト設定) +HAKMEM_MID_V3_DEBUG=1 # Debug logging (opt-in) ``` **設計 doc**: `docs/analysis/MID_POOL_V3_DESIGN.md` +**Profile doc**: `docs/analysis/ENV_PROFILE_PRESETS.md` --- @@ -279,6 +287,48 @@ HAKMEM_REGION_ID_V6_OBSERVE=0 # デバッグ用 - **今後**: mid/pool v3 による C6-heavy 本格改善に注力 - **参考設計**: RegionIdBox (分類のみ) + TLS-scope cache はマルチ region 対応時の参考に +--- + +## Phase V7-0: SmallObjectHeap v7 / HAKMEM v3 コア設計スケルトン(新規, 設計のみ) + +### 目的 + +ULTRA + MID v3 + V6 C6-only 世代を「第1章 完成」として締めたうえで、 +small〜mid を一体で扱う新コア **SmallObjectHotBox_v7(= HAKMEM v3 small/mid コア)** の設計だけ先に固める。 +このフェーズでは **型とドキュメントのみ** を追加し、挙動は一切変更しない。 + +### やったこと(設計レベル) + +- 新規ドキュメント `docs/analysis/SMALLOBJECT_V7_DESIGN.md` を追加: + - L0: ULTRA (C4–C7, FROZEN) + - L1: SmallObjectHotBox_v7 (small/mid コア) + - L2: SegmentBox_v7 / ColdIface_v7 + - L3: PolicyBox_v7 / RegionIdBox / PageStatsBox + の 4 層構造を明文化。 + - `SmallPageMeta_v7` / `SmallClassHeap_v7` / `SmallHeapCtx_v7` / `SmallSegment_v7` の struct ひな形を定義(Hot/cold フィールド分離)。 + - RegionIdBox v7 の API(`RegionLookupResult_v7` / `region_id_lookup_v7()`)と header の扱い(薄く残すが fast path では極力触らない)を整理。 + - small v7 / mid v7 / pool v3 の関係(共通の RegionId/Segment/PageStats の上に parallel な HotBox を置く)を記載。 + - Phase v7-0/1/2 のフェーズ分割(型追加→C6-only stub→C6-only 本実装)をまとめた。 + +### ここまでの前提・ルール + +- ULTRA 世代(C4–C7 ULTRA / Tiny front v3)は FROZEN(本線)として維持する。 +- MID v3 は 257–768B 専任の本線箱として維持する。 +- V6 C6-only headerless は研究箱として凍結(v7 の物理層設計の参考)。 +- v7 は **別章(HAKMEM v3 世代)** として設計し、ENV 経由で opt-in するまで front/gate から一切呼ばない。 + +### 次フェーズ候補(実装は別 AI 向け) + +1. **Phase v7-1**: C6-only v7 stub + - route kind に `TINY_ROUTE_SMALL_HEAP_V7` を追加し、C6 クラスだけ v7 route を返すプロファイルを追加。 + - `small_heap_alloc_fast_v7_stub` / `small_heap_free_fast_v7_stub` を実装し、当面はすべて MID v3 / V6 / pool v1 に即フォールバック。 + - RegionIdBox_v7 は OBSERVE モードで `region_id_lookup_v7(ptr)` を呼び、`REGION_SMALL_V7` の統計だけ取る(挙動不変)。 +2. **Phase v7-2**: C6-only v7 本実装(small帯だけ) + - SegmentBox_v7 / ColdIface_v7 を実装し、C6 pages の refill/retire を Segment v7 経由にする。 + - `small_heap_alloc_fast_v7` / `small_heap_free_fast_v7` を実装し、C6-only small 帯を本当に v7 TLS + Segment で回す。 + - C6-heavy / Mixed で v7 vs MID v3 vs V6 vs v2 本線を A/B し、SmallObjectHotBox_v7 の価値を評価。 + + --- ## Phase V6-HDR-1: RegionIdBox 実配線・OBSERVE(完了) @@ -1435,4 +1485,3 @@ if (unlikely(tls->seg_base == 0)) { - **Option C**: 新規サイズクラス (C3/C2 ULTRA) → TLS L1 汚染リスク 推奨: Option A(v3 backend 最適化)を検討 - diff --git a/Makefile b/Makefile index 535dbcfb..5291ac38 100644 --- a/Makefile +++ b/Makefile @@ -218,7 +218,7 @@ LDFLAGS += $(EXTRA_LDFLAGS) # Targets TARGET = test_hakmem -OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/mid_hotbox_v3.o +OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o OBJS = $(OBJS_BASE) # Shared library @@ -250,7 +250,7 @@ endif # Benchmark targets BENCH_HAKMEM = bench_allocators_hakmem BENCH_SYSTEM = bench_allocators_system -BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/mid_hotbox_v3.o bench_allocators_hakmem.o +BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o bench_allocators_hakmem.o BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o @@ -427,7 +427,7 @@ test-box-refactor: box-refactor ./larson_hakmem 10 8 128 1024 1 12345 4 # Phase 4: Tiny Pool benchmarks (properly linked with hakmem) -TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/mid_hotbox_v3.o +TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o diff --git a/core/box/free_dispatch_stats_box.h b/core/box/free_dispatch_stats_box.h index e637444b..e7f02a9b 100644 --- a/core/box/free_dispatch_stats_box.h +++ b/core/box/free_dispatch_stats_box.h @@ -18,6 +18,7 @@ typedef struct FreeDispatchStats { uint64_t route_tiny_legacy; // Tiny legacy path uint64_t route_pool_v1; // pool v1 経由 uint64_t route_core_v6; // core v6 (C6-only) + uint64_t route_core_v7; // core v7 stub (Phase v7-1) // Performance counters uint64_t env_checks; // ENV 読み回数(概算) diff --git a/core/box/free_path_stats_box.h b/core/box/free_path_stats_box.h index aa4d3fb5..d4789d62 100644 --- a/core/box/free_path_stats_box.h +++ b/core/box/free_path_stats_box.h @@ -17,6 +17,7 @@ typedef struct FreePathStats { uint64_t c4_ultra_alloc_hit; // Phase 6: C4 ULTRA-alloc (TLS pop) uint64_t smallheap_v3_fast; uint64_t smallheap_v6_fast; + uint64_t smallheap_v7_fast; // Phase v7-2: SmallHeap v7 fast free uint64_t tiny_heap_v1_fast; uint64_t pool_v1_fast; uint64_t remote_free; diff --git a/core/box/region_id_v6_box.h b/core/box/region_id_v6_box.h index 9c81804a..422d6dec 100644 --- a/core/box/region_id_v6_box.h +++ b/core/box/region_id_v6_box.h @@ -24,6 +24,7 @@ typedef enum { REGION_KIND_LARGE, // Large mmap allocation REGION_KIND_TINY_LEGACY, // Legacy tiny heap REGION_KIND_MID_V3, // Mid/Pool v3 page (MID-V3) + REGION_KIND_SMALL_V7, // SmallObject v7 (Phase v7-1) REGION_KIND_MAX } region_kind_t; diff --git a/core/box/smallobject_cold_iface_v7_box.h b/core/box/smallobject_cold_iface_v7_box.h new file mode 100644 index 00000000..64e978c7 --- /dev/null +++ b/core/box/smallobject_cold_iface_v7_box.h @@ -0,0 +1,109 @@ +// smallobject_cold_iface_v7_box.h - SmallObject ColdIface v7 (Phase v7-2) +// +// Purpose: +// - Page refill/retire for SmallHeapCtx_v7 +// - Interfaces between HotBox (TLS alloc/free) and SegmentBox (page management) + +#ifndef HAKMEM_SMALLOBJECT_COLD_IFACE_V7_BOX_H +#define HAKMEM_SMALLOBJECT_COLD_IFACE_V7_BOX_H + +#include +#include +#include "smallsegment_v7_box.h" + +// ============================================================================ +// SmallHeapCtx_v7 Types (defined in hotbox, forward declared here) +// ============================================================================ + +// Forward declaration +typedef struct SmallClassHeap_v7 SmallClassHeap_v7; +typedef struct SmallHeapCtx_v7 SmallHeapCtx_v7; + +// Number of classes supported by v7 (C0-C7, but v7-2 only uses C6) +#define HAK_SMALL_NUM_CLASSES_V7 8 + +// SmallClassHeap_v7 - Per-class TLS heap state +typedef struct SmallClassHeap_v7 { + SmallPageMeta_v7* current; // Currently allocating page + SmallPageMeta_v7* partial_head; // Pages with free slots + SmallPageMeta_v7* full_head; // Full pages (optional tracking) + + void* local_freelist; // Mini-ULTRA local cache (optional) + uint16_t local_freelist_count; + uint16_t local_freelist_cap; + + uint16_t class_idx; + uint16_t flags; +} SmallClassHeap_v7; + +// SmallHeapCtx_v7 - Thread-local heap context +typedef struct SmallHeapCtx_v7 { + SmallClassHeap_v7 cls[HAK_SMALL_NUM_CLASSES_V7]; + SmallSegment_v7* segment; // TLS segment (may be NULL until first alloc) +} SmallHeapCtx_v7; + +// ============================================================================ +// TLS Context Access +// ============================================================================ + +/// Get TLS heap context (initializes if needed) +SmallHeapCtx_v7* small_heap_ctx_v7(void); + +// ============================================================================ +// ColdIface API +// ============================================================================ + +/// Refill a page for the given class index +/// - Gets or creates TLS segment +/// - Takes a page from segment's free stack +/// - Carves freelist with appropriate block size +/// - Sets ctx->cls[class_idx].current to new page +/// @param ctx: TLS heap context +/// @param class_idx: Size class (v7-2: C6 only) +/// @return: Refilled page, or NULL on failure +SmallPageMeta_v7* small_cold_v7_refill_page(SmallHeapCtx_v7* ctx, uint32_t class_idx); + +/// Retire a fully empty page +/// - Publishes stats to PageStatsBox (future: Learner) +/// - Releases page back to segment's free stack +/// @param ctx: TLS heap context +/// @param page: Page to retire (must have used == 0) +void small_cold_v7_retire_page(SmallHeapCtx_v7* ctx, SmallPageMeta_v7* page); + +// ============================================================================ +// Stats Structure (for future Learner integration) +// ============================================================================ + +typedef struct SmallPageStatsV7 { + uint8_t class_idx; + uint8_t reserved0; + uint16_t page_idx; + + uint32_t capacity; + uint64_t alloc_count; + uint64_t free_count; + uint64_t remote_free_count; + + uint16_t peak_live; + uint16_t remote_burst_max; + uint32_t lifetime_ms; // Approximate lifetime +} SmallPageStatsV7; + +/// Publish page stats (called from retire, future: sends to Learner) +void small_cold_v7_publish_stats(const SmallPageStatsV7* stats); + +// ============================================================================ +// Block Size Lookup (C6-only for v7-2) +// ============================================================================ + +/// Get block size for class index +/// v7-2: Only C6 (512B) is implemented +static inline size_t small_v7_block_size(uint32_t class_idx) { + // v7-2: C6-only + if (class_idx == SMALL_V7_C6_CLASS_IDX) { + return SMALL_V7_C6_BLOCK_SIZE; // 512 + } + return 0; // Unsupported class +} + +#endif // HAKMEM_SMALLOBJECT_COLD_IFACE_V7_BOX_H diff --git a/core/box/smallobject_hotbox_v7_box.h b/core/box/smallobject_hotbox_v7_box.h new file mode 100644 index 00000000..098593c9 --- /dev/null +++ b/core/box/smallobject_hotbox_v7_box.h @@ -0,0 +1,244 @@ +// smallobject_hotbox_v7_box.h - SmallObject HotBox v7 (Phase v7-2: C6-only impl) +// +// Role: +// - SmallObject v7 fast path for alloc/free +// - C6-only implementation (512B blocks, 64KiB pages, 2MiB segments) +// - Uses SmallHeapCtx_v7 + SmallSegment_v7 + ColdIface_v7 + +#pragma once + +#include +#include +#include +#include +#include "smallsegment_v7_box.h" +#include "smallobject_cold_iface_v7_box.h" +#include "region_id_v6_box.h" +#include "../tiny_region_id.h" // For HEADER_MAGIC, HEADER_CLASS_MASK + +#ifndef likely +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif + +// ============================================================================ +// Debug/Observe Support +// ============================================================================ + +// V7 stats functions (defined in smallobject_cold_iface_v7.c) +extern void small_v7_stat_alloc(void); +extern void small_v7_stat_free(void); +extern void small_v7_stat_refill(void); +extern void small_v7_stat_retire(void); + +// Class mismatch logging (for hint validation) +static inline void small_v7_log_class_mismatch(void* ptr, uint8_t hint, uint8_t actual) { + // TODO: Make this ENV-controlled + // For now, silent (Fail-Fast mode would assert here) + (void)ptr; + (void)hint; + (void)actual; +} + +// ============================================================================ +// Alloc Fast Path +// ============================================================================ + +// small_heap_alloc_fast_v7() - v7 alloc (C6-only for v7-2) +// +// Flow: +// 1. Get TLS context +// 2. Check current page freelist +// 3. If empty, check partial list +// 4. If no partial, call ColdIface refill +// 5. Pop from freelist and return USER ptr +// +static inline void* small_heap_alloc_fast_v7(size_t size, uint8_t class_idx) { + // v7-2: Only C6 is implemented + if (unlikely(class_idx != SMALL_V7_C6_CLASS_IDX)) { + return NULL; // Unsupported class -> front falls back + } + + SmallHeapCtx_v7* ctx = small_heap_ctx_v7(); + SmallClassHeap_v7* h = &ctx->cls[class_idx]; + SmallPageMeta_v7* p = h->current; + + // Fast path: current page has free slots + if (likely(p && p->free_list)) { + void* base = p->free_list; + p->free_list = *(void**)base; + p->used++; + + // Update stats + p->alloc_count++; + p->live_current++; + if (p->live_current > p->peak_live) { + p->peak_live = p->live_current; + } + + // Write header (HEADER_MAGIC | class_idx) for front compatibility + ((uint8_t*)base)[0] = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK)); + + small_v7_stat_alloc(); + // Return USER ptr (base + 1 for header compatibility with front) + return (uint8_t*)base + 1; + } + + // Current exhausted -> try partial list + if (h->partial_head) { + p = h->partial_head; + h->partial_head = p->segment_next_partial; + p->segment_next_partial = NULL; + h->current = p; + + if (likely(p->free_list)) { + void* base = p->free_list; + p->free_list = *(void**)base; + p->used++; + + p->alloc_count++; + p->live_current++; + if (p->live_current > p->peak_live) { + p->peak_live = p->live_current; + } + + // Write header (HEADER_MAGIC | class_idx) for front compatibility + ((uint8_t*)base)[0] = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK)); + + small_v7_stat_alloc(); + return (uint8_t*)base + 1; + } + } + + // Completely exhausted -> ColdIface refill + small_v7_stat_refill(); + p = small_cold_v7_refill_page(ctx, class_idx); + if (unlikely(!p || !p->free_list)) { + return NULL; // front falls back to legacy/pool + } + + h->current = p; + + // Pop from new page + void* base = p->free_list; + p->free_list = *(void**)base; + p->used++; + + p->alloc_count++; + p->live_current++; + if (p->live_current > p->peak_live) { + p->peak_live = p->live_current; + } + + // Write header (HEADER_MAGIC | class_idx) for front compatibility + ((uint8_t*)base)[0] = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK)); + + small_v7_stat_alloc(); + return (uint8_t*)base + 1; +} + +// ============================================================================ +// Free Fast Path +// ============================================================================ + +// small_heap_free_fast_v7() - v7 free (C6-only for v7-2) +// +// Flow: +// 1. RegionIdBox lookup to verify ptr is v7-managed +// 2. Get page_meta from segment +// 3. Validate class_idx (hint vs actual) +// 4. Push to page freelist +// 5. If page becomes empty, retire to ColdIface +// +// @param ptr: USER pointer to free +// @param class_idx_hint: Class index hint from front/header (may be ignored) +// @return: true if handled by v7, false if not v7-managed (front should fallback) +// +static inline bool small_heap_free_fast_v7(void* ptr, uint8_t class_idx_hint) { + if (unlikely(!ptr)) { + return false; + } + + // RegionIdBox lookup + RegionLookupV6 lk = region_id_lookup_v6(ptr); + + // Check if this is a v7-managed pointer + if (unlikely(lk.kind != REGION_KIND_SMALL_V7)) { + return false; // Not v7 -> front falls back to legacy/pool/ULTRA + } + + // Get segment from registry metadata + SmallSegment_v7* seg = (SmallSegment_v7*)lk.page_meta; + if (unlikely(!seg || !small_segment_v7_valid(seg))) { + return false; + } + + // Calculate page index from pointer + uintptr_t addr = (uintptr_t)ptr; + if (unlikely(!small_ptr_in_segment_v7(seg, ptr))) { + return false; + } + + size_t page_idx = SMALL_V7_PAGE_IDX(seg, addr); + if (unlikely(page_idx >= seg->num_pages)) { + return false; + } + + SmallPageMeta_v7* page = &seg->page_meta[page_idx]; + + // Validate page is in use + if (unlikely(!small_page_v7_valid(page))) { + return false; + } + + // Get actual class from page (v7 core uses page_meta.class_idx as truth) + uint8_t class_idx = (uint8_t)page->class_idx; + + // OBSERVE: Check hint vs actual + if (unlikely(class_idx != class_idx_hint)) { + small_v7_log_class_mismatch(ptr, class_idx_hint, class_idx); + // Continue with actual class_idx (v7 is header-independent) + } + + // v7-2: Only C6 is implemented + if (unlikely(class_idx != SMALL_V7_C6_CLASS_IDX)) { + return false; // Should not happen, but fallback + } + + // Push BASE ptr to page freelist + // ptr is USER ptr (base + 1), convert back to BASE + void* base = (uint8_t*)ptr - 1; + *(void**)base = page->free_list; + page->free_list = base; + + // Update stats + page->free_count++; + page->live_current--; + + // Decrement used count + if (unlikely(--page->used == 0)) { + // Page is empty -> retire + small_v7_stat_retire(); + SmallHeapCtx_v7* ctx = small_heap_ctx_v7(); + small_cold_v7_retire_page(ctx, page); + } + + small_v7_stat_free(); + return true; +} + +// ============================================================================ +// Stub Functions (for compatibility, forwards to real impl) +// ============================================================================ + +// These maintain backward compatibility with v7-1 stub API + +static inline void* small_heap_alloc_fast_v7_stub(size_t size, uint8_t class_idx) { + // v7-2: Use real implementation + return small_heap_alloc_fast_v7(size, class_idx); +} + +static inline bool small_heap_free_fast_v7_stub(void* ptr, uint8_t class_idx) { + // v7-2: Use real implementation + return small_heap_free_fast_v7(ptr, class_idx); +} diff --git a/core/box/smallsegment_v7_box.h b/core/box/smallsegment_v7_box.h new file mode 100644 index 00000000..2b9291fc --- /dev/null +++ b/core/box/smallsegment_v7_box.h @@ -0,0 +1,158 @@ +// smallsegment_v7_box.h - SmallSegment v7 (Phase v7-2: C6-only implementation) +// +// Purpose: +// - SmallObject v7 segment structure with 2MiB/64KiB geometry +// - Supports free_page stack for page management +// - RegionIdBox integration via REGION_KIND_SMALL_V7 + +#ifndef HAKMEM_SMALLSEGMENT_V7_BOX_H +#define HAKMEM_SMALLSEGMENT_V7_BOX_H + +#include +#include + +// ============================================================================ +// Segment Constants (same geometry as V6/ULTRA) +// ============================================================================ + +#define SMALL_SEGMENT_V7_SIZE (2u * 1024u * 1024u) // 2 MiB +#define SMALL_PAGE_V7_SIZE (64u * 1024u) // 64 KiB +#define SMALL_PAGES_PER_SEG_V7 (SMALL_SEGMENT_V7_SIZE / SMALL_PAGE_V7_SIZE) // 32 +#define SMALL_PAGE_V7_SHIFT 16 // log2(64KiB) +#define SMALL_SEGMENT_V7_MAGIC 0xC07E57u // C0(re) v7 + +// ============================================================================ +// C6 Class Configuration (v7-2: C6-only) +// ============================================================================ + +#define SMALL_V7_C6_CLASS_IDX 6 +#define SMALL_V7_C6_BLOCK_SIZE 512 + +// ============================================================================ +// Page Index Calculation +// ============================================================================ + +#define SMALL_V7_PAGE_IDX(seg, addr) (((uintptr_t)(addr) - (seg)->base) >> SMALL_PAGE_V7_SHIFT) + +// ============================================================================ +// Forward Declarations +// ============================================================================ + +typedef struct SmallSegment_v7 SmallSegment_v7; +typedef struct SmallPageMeta_v7 SmallPageMeta_v7; + +// ============================================================================ +// SmallPageMeta_v7 - Page metadata (per-page hot/cold fields) +// ============================================================================ + +typedef struct SmallPageMeta_v7 { + // ---- Hot fields (cache line 0, accessed in alloc/free) ---- + void* free_list; // LIFO freelist: block -> next + uint32_t used; // Current used slot count + uint32_t capacity; // Total block slots in this page + + uint16_t class_idx; // Size class (C0..C7) + uint16_t flags; // HOT/PARTIAL/FULL/REMOTE_PENDING + uint16_t page_idx; // Index within segment (0..31) + uint16_t reserved0; // Alignment padding + + SmallSegment_v7* segment; // Back pointer to owning segment + + // Intrusive list pointer for partial pages (optional) + SmallPageMeta_v7* segment_next_partial; + + // ---- Cold fields (Stats/Policy, cache line 1+) ---- + uint64_t alloc_count; // Cumulative alloc count + uint64_t free_count; // Cumulative free count + uint64_t remote_free_count; // Cumulative remote free count + + uint16_t live_current; // Current live objects + uint16_t peak_live; // Lifetime peak live + uint16_t remote_burst_max; // Max remote drain in one pass + uint16_t reserved1; + + uint32_t epoch_first_alloc; // Coarse epoch (for L3/Learner) + uint32_t epoch_last_free; // Coarse epoch (for L3/Learner) +} SmallPageMeta_v7; + +// ============================================================================ +// SmallSegment_v7 - 2MiB segment with page metadata +// ============================================================================ + +typedef struct SmallSegment_v7 { + uintptr_t base; // Segment base address (2MiB aligned) + uint32_t num_pages; // Number of pages (32) + uint32_t owner_tid; // Owner thread ID + + uint32_t flags; // SEGMENT_IN_USE / RETIRED etc. + uint32_t magic; // SMALL_SEGMENT_V7_MAGIC + uint32_t region_kind; // REGION_KIND_SMALL_V7 + uint32_t segment_idx; // RegionIdBox index + + uint32_t free_page_head; // Free page stack head (page_idx, 0xFFFFFFFF = empty) + uint32_t free_page_count; // Number of free pages + + SmallPageMeta_v7 page_meta[SMALL_PAGES_PER_SEG_V7]; +} SmallSegment_v7; + +// ============================================================================ +// Inline Helper Functions +// ============================================================================ + +/// Check if page is valid and active +static inline int small_page_v7_valid(SmallPageMeta_v7* page) { + return page != NULL && page->capacity > 0; +} + +/// Check if pointer is within segment bounds +static inline int small_ptr_in_segment_v7(SmallSegment_v7* seg, void* ptr) { + uintptr_t addr = (uintptr_t)ptr; + return addr >= seg->base && addr < seg->base + SMALL_SEGMENT_V7_SIZE; +} + +/// Check if segment is valid and initialized +static inline int small_segment_v7_valid(SmallSegment_v7* seg) { + return seg != NULL && seg->magic == SMALL_SEGMENT_V7_MAGIC; +} + +// ============================================================================ +// Segment API (Cold Path) +// ============================================================================ + +/// Allocate a new segment for thread +/// @param owner_tid: Thread ID of owner +/// @return: Segment pointer on success, NULL on failure +SmallSegment_v7* small_segment_alloc_v7(uint32_t owner_tid); + +/// Free a segment and unmap memory +/// @param seg: Segment to free +void small_segment_free_v7(SmallSegment_v7* seg); + +// ============================================================================ +// Page API (Cold Path - called from ColdIface) +// ============================================================================ + +/// Take a page from segment's free stack +/// @param seg: Segment to take page from +/// @param class_idx: Size class for the page +/// @return: Page metadata pointer on success, NULL if no free pages +SmallPageMeta_v7* small_segment_take_page_v7(SmallSegment_v7* seg, uint32_t class_idx); + +/// Release a page back to segment's free stack +/// @param seg: Segment owning the page +/// @param page: Page to release +void small_segment_release_page_v7(SmallSegment_v7* seg, SmallPageMeta_v7* page); + +// ============================================================================ +// TLS Segment Access +// ============================================================================ + +/// Get or acquire TLS segment for current thread +SmallSegment_v7* small_segment_v7_get_tls(void); + +/// Get page metadata for a pointer (O(1) via TLS segment) +/// @param ptr: Pointer to lookup (USER or BASE pointer) +/// @return: Page metadata if ptr is in v7 segment, NULL otherwise +SmallPageMeta_v7* small_page_meta_v7_of(void* ptr); + +#endif // HAKMEM_SMALLSEGMENT_V7_BOX_H diff --git a/core/box/tiny_route_env_box.h b/core/box/tiny_route_env_box.h index a12d4e9d..c8814498 100644 --- a/core/box/tiny_route_env_box.h +++ b/core/box/tiny_route_env_box.h @@ -29,6 +29,7 @@ typedef enum { TINY_ROUTE_SMALL_HEAP_V4 = 4, // SmallObject HotHeap v4 (stub, route未使用) TINY_ROUTE_SMALL_HEAP_V5 = 5, // SmallObject HotHeap v5 (C6-only route stub, Phase v5-1) TINY_ROUTE_SMALL_HEAP_V6 = 6, // SmallObject Core v6 (C6-only route stub, Phase v6-1) + TINY_ROUTE_SMALL_HEAP_V7 = 7, // SmallObject HotHeap v7 (C6-only route stub, Phase v7-1) } tiny_route_kind_t; extern tiny_route_kind_t g_tiny_route_class[TINY_NUM_CLASSES]; @@ -70,11 +71,52 @@ static inline int small_heap_v6_class_enabled(uint32_t class_idx) { return (mask & (1u << class_idx)) ? 1 : 0; } +// ============================================================================ +// Phase v7-1: SmallObject HotHeap v7 ENV gate (must be before tiny_route_snapshot_init) +// ============================================================================ + +// small_heap_v7_enabled() - グローバル v7 enable check +static inline int small_heap_v7_enabled(void) { + static int g_enabled = ENV_UNINIT; + if (__builtin_expect(g_enabled == ENV_UNINIT, 0)) { + const char* e = getenv("HAKMEM_SMALL_HEAP_V7_ENABLED"); + g_enabled = (e && *e && *e != '0') ? ENV_ENABLED : ENV_DISABLED; + } + return (g_enabled == ENV_ENABLED); +} + +// small_heap_v7_class_mask() - v7 対象クラスのビットマスク +static inline uint32_t small_heap_v7_class_mask(void) { + static int g_mask = ENV_UNINIT; + if (__builtin_expect(g_mask == ENV_UNINIT, 0)) { + const char* e = getenv("HAKMEM_SMALL_HEAP_V7_CLASSES"); + if (e && *e) { + g_mask = (int)strtoul(e, NULL, 0); + } else { + g_mask = 0x0; // default: OFF + } + } + return (uint32_t)g_mask; +} + +// small_heap_v7_class_enabled() - 指定クラスが v7 有効か +static inline int small_heap_v7_class_enabled(uint32_t class_idx) { + if (class_idx >= 8) return 0; + if (!small_heap_v7_enabled()) return 0; + uint32_t mask = small_heap_v7_class_mask(); + return (mask & (1u << class_idx)) ? 1 : 0; +} + static inline void tiny_route_snapshot_init(void) { for (int i = 0; i < TINY_NUM_CLASSES; i++) { - // Phase v6-1: C6-only v6 route stub (highest priority) + // Phase v7-1: C6-only v7 route stub (highest priority) FREE_DISPATCH_STAT_INC(env_checks); // ENV check counter - if (small_heap_v6_class_enabled((uint32_t)i)) { + if (small_heap_v7_class_enabled((uint32_t)i)) { + g_tiny_route_class[i] = TINY_ROUTE_SMALL_HEAP_V7; + FREE_DISPATCH_STAT_INC(route_core_v7); + } else if (small_heap_v6_class_enabled((uint32_t)i)) { + // Phase v6-1: C6-only v6 route stub + FREE_DISPATCH_STAT_INC(env_checks); g_tiny_route_class[i] = TINY_ROUTE_SMALL_HEAP_V6; FREE_DISPATCH_STAT_INC(route_core_v6); } else if (i == 6 && small_heap_v5_class_enabled(6)) { @@ -119,7 +161,8 @@ static inline int tiny_route_is_heap_kind(tiny_route_kind_t route) { route == TINY_ROUTE_SMALL_HEAP_V3 || route == TINY_ROUTE_SMALL_HEAP_V4 || route == TINY_ROUTE_SMALL_HEAP_V5 || - route == TINY_ROUTE_SMALL_HEAP_V6; + route == TINY_ROUTE_SMALL_HEAP_V6 || + route == TINY_ROUTE_SMALL_HEAP_V7; } // C7 front が TinyHeap を使うか(Route snapshot 経由で判定) diff --git a/core/front/malloc_tiny_fast.h b/core/front/malloc_tiny_fast.h index b97d7169..239c1aea 100644 --- a/core/front/malloc_tiny_fast.h +++ b/core/front/malloc_tiny_fast.h @@ -45,6 +45,7 @@ #include "../box/smallobject_hotbox_v5_box.h" // SmallObject HotHeap v5 (C6-only route stub, Phase v5-1) #include "../box/smallobject_core_v6_box.h" // SmallObject Core v6 (Phase V6-HDR-2) #include "../box/smallobject_v6_env_box.h" // SmallObject v6 ENV control (Phase V6-HDR-2) +#include "../box/smallobject_hotbox_v7_box.h" // SmallObject HotBox v7 stub (Phase v7-1) #include "../box/tiny_c7_ultra_box.h" // C7 ULTRA stub (UF-1, delegates to v3) #include "../box/tiny_c6_ultra_free_box.h" // Phase 4-2: C6 ULTRA-free (free-only, C6-only) #include "../box/tiny_c5_ultra_free_box.h" // Phase 5-1/5-2: C5 ULTRA-free + alloc integration @@ -161,7 +162,7 @@ static inline void* malloc_tiny_fast(size_t size) { route != TINY_ROUTE_LEGACY && route != TINY_ROUTE_HEAP && route != TINY_ROUTE_HOTHEAP_V2 && route != TINY_ROUTE_SMALL_HEAP_V3 && route != TINY_ROUTE_SMALL_HEAP_V4 && route != TINY_ROUTE_SMALL_HEAP_V5 && - route != TINY_ROUTE_SMALL_HEAP_V6) { + route != TINY_ROUTE_SMALL_HEAP_V6 && route != TINY_ROUTE_SMALL_HEAP_V7) { // Phase ALLOC-GATE-OPT-1: カウンタ散布 (3. route_for_class 呼び出し) ALLOC_GATE_STAT_INC(route_for_class_calls); route = tiny_route_for_class((uint8_t)class_idx); @@ -223,6 +224,15 @@ static inline void* malloc_tiny_fast(size_t size) { } switch (route) { + case TINY_ROUTE_SMALL_HEAP_V7: { + // Phase v7-1: C6-only v7 stub (MID v3 fallback) + void* v7p = small_heap_alloc_fast_v7_stub(size, (uint8_t)class_idx); + if (TINY_HOT_LIKELY(v7p != NULL)) { + return v7p; + } + // v7 stub returned NULL -> fallback to legacy + break; + } case TINY_ROUTE_SMALL_HEAP_V6: { // Phase V6-HDR-2: Headerless alloc (ENV gated) if (small_v6_headerless_route_enabled((uint8_t)class_idx)) { @@ -386,6 +396,16 @@ static inline int free_tiny_fast(void* ptr) { tiny_route_kind_t route = tiny_route_for_class((uint8_t)class_idx); + // Phase v7-2: v7 early-exit for C6 (v7 uses separate mmap segment, not SuperSlab) + // Must check BEFORE ss_fast_lookup since v7 pointers won't be in SuperSlab registry + if (class_idx == 6 && route == TINY_ROUTE_SMALL_HEAP_V7) { + if (small_heap_free_fast_v7_stub(ptr, (uint8_t)class_idx)) { + FREE_PATH_STAT_INC(smallheap_v7_fast); + return 1; + } + // v7 returned false (ptr not in v7 segment) -> fallback to legacy below + } + if ((class_idx == 7 || class_idx == 6) && route == TINY_ROUTE_SMALL_HEAP_V4 && tiny_ptr_fast_classify_v4_enabled() && @@ -464,6 +484,13 @@ static inline int free_tiny_fast(void* ptr) { // Same-thread + TinyHeap route → route-based free if (__builtin_expect(use_tiny_heap, 0)) { switch (route) { + case TINY_ROUTE_SMALL_HEAP_V7: { + // Phase v7-1: C6-only v7 stub (MID v3 fallback) + if (small_heap_free_fast_v7_stub(ptr, (uint8_t)class_idx)) { + return 1; + } + break; // fallthrough to legacy + } case TINY_ROUTE_SMALL_HEAP_V6: { // Phase V6-HDR-2: Headerless free (ENV gated) if (small_v6_headerless_route_enabled((uint8_t)class_idx)) { diff --git a/core/region_id_v6.c b/core/region_id_v6.c index 3c6a7178..2f280f11 100644 --- a/core/region_id_v6.c +++ b/core/region_id_v6.c @@ -374,6 +374,7 @@ const char* region_kind_to_string(region_kind_t kind) { case REGION_KIND_LARGE: return "LARGE"; case REGION_KIND_TINY_LEGACY: return "TINY_LEGACY"; case REGION_KIND_MID_V3: return "MID_V3"; + case REGION_KIND_SMALL_V7: return "SMALL_V7"; default: return "INVALID"; } } diff --git a/core/smallobject_cold_iface_v7.c b/core/smallobject_cold_iface_v7.c new file mode 100644 index 00000000..31eaebcf --- /dev/null +++ b/core/smallobject_cold_iface_v7.c @@ -0,0 +1,216 @@ +// smallobject_cold_iface_v7.c - SmallObject ColdIface v7 implementation (Phase v7-2) +// +// Purpose: +// - Page refill: acquire page from segment, carve freelist +// - Page retire: release empty page back to segment, publish stats + +#include +#include +#include +#include +#include +#include "box/smallobject_cold_iface_v7_box.h" +#include "box/smallsegment_v7_box.h" +#include "box/region_id_v6_box.h" + +#ifndef likely +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif + +// ============================================================================ +// V7 Stats (for debugging/OBSERVE) +// ============================================================================ + +static uint64_t g_v7_alloc_count = 0; +static uint64_t g_v7_free_count = 0; +static uint64_t g_v7_refill_count = 0; +static uint64_t g_v7_retire_count = 0; + +// Destructor to print stats at exit +__attribute__((destructor)) +static void small_v7_stats_report(void) { + if (g_v7_alloc_count > 0 || g_v7_free_count > 0) { + fprintf(stderr, "[SMALL_V7] alloc=%lu free=%lu refill=%lu retire=%lu\n", + (unsigned long)g_v7_alloc_count, (unsigned long)g_v7_free_count, + (unsigned long)g_v7_refill_count, (unsigned long)g_v7_retire_count); + } +} + +void small_v7_stat_alloc(void) { __sync_fetch_and_add(&g_v7_alloc_count, 1); } +void small_v7_stat_free(void) { __sync_fetch_and_add(&g_v7_free_count, 1); } +void small_v7_stat_refill(void) { __sync_fetch_and_add(&g_v7_refill_count, 1); } +void small_v7_stat_retire(void) { __sync_fetch_and_add(&g_v7_retire_count, 1); } + +// ============================================================================ +// TLS Heap Context +// ============================================================================ + +static __thread SmallHeapCtx_v7 g_small_heap_ctx_v7; +static __thread int g_small_heap_ctx_v7_init = 0; + +SmallHeapCtx_v7* small_heap_ctx_v7(void) { + if (unlikely(!g_small_heap_ctx_v7_init)) { + // Initialize context + memset(&g_small_heap_ctx_v7, 0, sizeof(g_small_heap_ctx_v7)); + + for (int i = 0; i < HAK_SMALL_NUM_CLASSES_V7; i++) { + g_small_heap_ctx_v7.cls[i].class_idx = (uint16_t)i; + g_small_heap_ctx_v7.cls[i].current = NULL; + g_small_heap_ctx_v7.cls[i].partial_head = NULL; + g_small_heap_ctx_v7.cls[i].full_head = NULL; + g_small_heap_ctx_v7.cls[i].local_freelist = NULL; + g_small_heap_ctx_v7.cls[i].local_freelist_count = 0; + g_small_heap_ctx_v7.cls[i].local_freelist_cap = 0; + g_small_heap_ctx_v7.cls[i].flags = 0; + } + + g_small_heap_ctx_v7.segment = NULL; + g_small_heap_ctx_v7_init = 1; + } + + return &g_small_heap_ctx_v7; +} + +// ============================================================================ +// Segment Acquisition (lazy) +// ============================================================================ + +static SmallSegment_v7* cold_v7_ensure_segment(SmallHeapCtx_v7* ctx) { + if (likely(ctx->segment != NULL)) { + return ctx->segment; + } + + // Acquire segment for this thread + uint32_t tid = (uint32_t)getpid(); + SmallSegment_v7* seg = small_segment_alloc_v7(tid); + + if (unlikely(!seg)) { + return NULL; + } + + ctx->segment = seg; + + // Register with RegionIdBox for ptr->region lookup + // This enables region_id_lookup_v6() to recognize v7 pointers + region_id_register_v6( + (void*)seg->base, + SMALL_SEGMENT_V7_SIZE, + REGION_KIND_SMALL_V7, + seg + ); + + return seg; +} + +// ============================================================================ +// Page Refill +// ============================================================================ + +SmallPageMeta_v7* small_cold_v7_refill_page(SmallHeapCtx_v7* ctx, uint32_t class_idx) { + if (unlikely(!ctx)) { + return NULL; + } + + // v7-2: Only C6 is supported + size_t block_size = small_v7_block_size(class_idx); + if (unlikely(block_size == 0)) { + return NULL; // Unsupported class + } + + // Ensure we have a segment + SmallSegment_v7* seg = cold_v7_ensure_segment(ctx); + if (unlikely(!seg)) { + return NULL; + } + + // Take a page from segment's free stack + SmallPageMeta_v7* page = small_segment_take_page_v7(seg, class_idx); + if (unlikely(!page)) { + return NULL; // No free pages + } + + // Calculate page base address + uintptr_t page_offset = (uintptr_t)page->page_idx * SMALL_PAGE_V7_SIZE; + uintptr_t page_base = seg->base + page_offset; + uint8_t* base = (uint8_t*)page_base; + + // Calculate capacity + uint32_t capacity = SMALL_PAGE_V7_SIZE / block_size; + page->capacity = capacity; + page->used = 0; + + // Build intrusive freelist (last to first for cache locality on pop) + // freelist points to BASE pointers (block start) + void* freelist = NULL; + for (int i = (int)capacity - 1; i >= 0; i--) { + uint8_t* block = base + ((size_t)i * block_size); + + // Link: block->next = freelist + void* next = freelist; + memcpy(block, &next, sizeof(void*)); + freelist = block; + } + + page->free_list = freelist; + + // Set as current page for this class + SmallClassHeap_v7* heap = &ctx->cls[class_idx]; + heap->current = page; + + return page; +} + +// ============================================================================ +// Page Retire +// ============================================================================ + +void small_cold_v7_retire_page(SmallHeapCtx_v7* ctx, SmallPageMeta_v7* page) { + if (unlikely(!ctx || !page)) { + return; + } + + // Only retire pages that are fully empty + if (page->used != 0) { + return; + } + + SmallSegment_v7* seg = page->segment; + if (unlikely(!seg)) { + return; + } + + // Publish stats before retiring + SmallPageStatsV7 stats = { + .class_idx = (uint8_t)page->class_idx, + .reserved0 = 0, + .page_idx = page->page_idx, + .capacity = page->capacity, + .alloc_count = page->alloc_count, + .free_count = page->free_count, + .remote_free_count = page->remote_free_count, + .peak_live = page->peak_live, + .remote_burst_max = page->remote_burst_max, + .lifetime_ms = 0 // TODO: Calculate from epoch + }; + small_cold_v7_publish_stats(&stats); + + // Clear current if this was the current page + SmallClassHeap_v7* heap = &ctx->cls[page->class_idx]; + if (heap->current == page) { + heap->current = NULL; + } + + // Release page back to segment + small_segment_release_page_v7(seg, page); +} + +// ============================================================================ +// Stats Publishing (stub for now) +// ============================================================================ + +void small_cold_v7_publish_stats(const SmallPageStatsV7* stats) { + // TODO: Future integration with Learner/PolicyBox + // For now, just a no-op + (void)stats; +} diff --git a/core/smallsegment_v7.c b/core/smallsegment_v7.c new file mode 100644 index 00000000..4a230090 --- /dev/null +++ b/core/smallsegment_v7.c @@ -0,0 +1,271 @@ +// smallsegment_v7.c - SmallSegment v7 implementation (Phase v7-2: C6-only) +// +// Purpose: +// - 2MiB segment allocation with 2MiB alignment +// - Free page stack management +// - TLS segment access for fast path + +#include +#include +#include +#include +#include +#include "box/smallsegment_v7_box.h" +#include "box/region_id_v6_box.h" // For REGION_KIND_SMALL_V7 + +#ifndef likely +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif + +// Invalid page index sentinel +#define SMALL_V7_PAGE_INVALID 0xFFFFFFFFu + +// ============================================================================ +// TLS Segment Slot +// ============================================================================ + +typedef struct { + SmallSegment_v7 seg; + int in_use; + void* mmap_base; // Actual mmap base (for munmap) + size_t mmap_size; // Actual mmap size (for munmap) +} TLSSegmentSlot_v7; + +static __thread TLSSegmentSlot_v7 g_tls_segment_v7; + +// ============================================================================ +// Segment Allocation +// ============================================================================ + +SmallSegment_v7* small_segment_alloc_v7(uint32_t owner_tid) { + TLSSegmentSlot_v7* slot = &g_tls_segment_v7; + + if (slot->in_use) { + return &slot->seg; // Already allocated + } + + // Allocate 2MiB aligned segment via mmap + void* mem = mmap(NULL, SMALL_SEGMENT_V7_SIZE, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (mem == MAP_FAILED || mem == NULL) { + return NULL; + } + + uintptr_t addr = (uintptr_t)mem; + void* mmap_base = mem; + size_t mmap_size = SMALL_SEGMENT_V7_SIZE; + + // Check if we got 2MiB alignment + if ((addr & (SMALL_SEGMENT_V7_SIZE - 1)) != 0) { + // Not aligned - need to reallocate with overallocation + munmap(mem, SMALL_SEGMENT_V7_SIZE); + + // Allocate 4MiB to ensure we can find a 2MiB aligned region + size_t alloc_size = SMALL_SEGMENT_V7_SIZE * 2; + mem = mmap(NULL, alloc_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (mem == MAP_FAILED || mem == NULL) { + return NULL; + } + + // Find the aligned address within this region + uintptr_t raw_addr = (uintptr_t)mem; + addr = (raw_addr + SMALL_SEGMENT_V7_SIZE - 1) & ~((uintptr_t)SMALL_SEGMENT_V7_SIZE - 1); + + // Verify the aligned address is within our mapping + if (addr < raw_addr || addr + SMALL_SEGMENT_V7_SIZE > raw_addr + alloc_size) { + munmap(mem, alloc_size); + return NULL; + } + + mmap_base = mem; + mmap_size = alloc_size; + } + + // Initialize segment structure in TLS + SmallSegment_v7* seg = &slot->seg; + slot->in_use = 1; + slot->mmap_base = mmap_base; + slot->mmap_size = mmap_size; + + seg->base = addr; + seg->num_pages = SMALL_PAGES_PER_SEG_V7; + seg->owner_tid = owner_tid; + seg->flags = 0; + seg->magic = SMALL_SEGMENT_V7_MAGIC; + seg->region_kind = REGION_KIND_SMALL_V7; + seg->segment_idx = 0; // TODO: RegionIdBox integration + + // Initialize free page stack (all pages are free) + // Build stack: page 0 -> page 1 -> ... -> page 31 -> INVALID + seg->free_page_head = 0; + seg->free_page_count = seg->num_pages; + + // Initialize all page metadata and build free list + for (uint32_t i = 0; i < seg->num_pages; i++) { + SmallPageMeta_v7* m = &seg->page_meta[i]; + + // Hot fields + m->free_list = NULL; + m->used = 0; + m->capacity = 0; // 0 = page is free/unused + m->class_idx = 0; + m->flags = 0; + m->page_idx = (uint16_t)i; + m->reserved0 = 0; + m->segment = seg; + m->segment_next_partial = NULL; + + // Cold fields + m->alloc_count = 0; + m->free_count = 0; + m->remote_free_count = 0; + m->live_current = 0; + m->peak_live = 0; + m->remote_burst_max = 0; + m->reserved1 = 0; + m->epoch_first_alloc = 0; + m->epoch_last_free = 0; + } + + // Build intrusive free page stack using reserved0 as next pointer + // (reusing reserved0 temporarily when page is free) + for (uint32_t i = 0; i < seg->num_pages - 1; i++) { + seg->page_meta[i].reserved0 = (uint16_t)(i + 1); + } + seg->page_meta[seg->num_pages - 1].reserved0 = (uint16_t)SMALL_V7_PAGE_INVALID; + + return seg; +} + +void small_segment_free_v7(SmallSegment_v7* seg) { + if (!seg) return; + if (seg->magic != SMALL_SEGMENT_V7_MAGIC) return; + + TLSSegmentSlot_v7* slot = &g_tls_segment_v7; + if (seg != &slot->seg) return; // Not our segment + + seg->magic = 0; // Invalidate + munmap(slot->mmap_base, slot->mmap_size); + + slot->in_use = 0; + slot->mmap_base = NULL; + slot->mmap_size = 0; +} + +// ============================================================================ +// Page Management (Free Page Stack) +// ============================================================================ + +SmallPageMeta_v7* small_segment_take_page_v7(SmallSegment_v7* seg, uint32_t class_idx) { + if (unlikely(!seg || !small_segment_v7_valid(seg))) { + return NULL; + } + + if (seg->free_page_count == 0 || seg->free_page_head >= seg->num_pages) { + return NULL; // No free pages + } + + // Pop from free page stack + uint32_t page_idx = seg->free_page_head; + SmallPageMeta_v7* page = &seg->page_meta[page_idx]; + + // Update stack head to next free page + seg->free_page_head = page->reserved0; // Next in stack + seg->free_page_count--; + + // Initialize page for use + page->class_idx = (uint16_t)class_idx; + page->flags = 0; + page->used = 0; + page->capacity = 0; // Will be set by ColdIface during carve + page->free_list = NULL; // Will be built by ColdIface + page->segment_next_partial = NULL; + page->reserved0 = 0; // Clear stack pointer + + // Reset stats for new allocation cycle + page->alloc_count = 0; + page->free_count = 0; + page->remote_free_count = 0; + page->live_current = 0; + page->peak_live = 0; + page->remote_burst_max = 0; + page->epoch_first_alloc = 0; + page->epoch_last_free = 0; + + return page; +} + +void small_segment_release_page_v7(SmallSegment_v7* seg, SmallPageMeta_v7* page) { + if (unlikely(!seg || !page)) return; + if (unlikely(!small_segment_v7_valid(seg))) return; + if (unlikely(page->segment != seg)) return; + if (unlikely(page->page_idx >= seg->num_pages)) return; + + // Reset page state + page->free_list = NULL; + page->used = 0; + page->capacity = 0; + page->class_idx = 0; + page->flags = 0; + page->segment_next_partial = NULL; + + // Push to free page stack + page->reserved0 = (uint16_t)seg->free_page_head; + seg->free_page_head = page->page_idx; + seg->free_page_count++; +} + +// ============================================================================ +// TLS Access +// ============================================================================ + +SmallSegment_v7* small_segment_v7_get_tls(void) { + TLSSegmentSlot_v7* slot = &g_tls_segment_v7; + + if (likely(slot->in_use)) { + return &slot->seg; + } + + return NULL; // Not initialized yet +} + +SmallPageMeta_v7* small_page_meta_v7_of(void* ptr) { + if (unlikely(!ptr)) { + return NULL; + } + + TLSSegmentSlot_v7* slot = &g_tls_segment_v7; + + if (unlikely(!slot->in_use)) { + return NULL; + } + + SmallSegment_v7* seg = &slot->seg; + + // Check if ptr is within our segment range + if (unlikely(!small_ptr_in_segment_v7(seg, ptr))) { + return NULL; + } + + // Calculate page index + uintptr_t addr = (uintptr_t)ptr; + size_t page_idx = SMALL_V7_PAGE_IDX(seg, addr); + if (unlikely(page_idx >= seg->num_pages)) { + return NULL; + } + + SmallPageMeta_v7* page = &seg->page_meta[page_idx]; + + // Validate that this page is actually in use + if (unlikely(!small_page_v7_valid(page))) { + return NULL; + } + + return page; +} diff --git a/docs/analysis/ENV_PROFILE_PRESETS.md b/docs/analysis/ENV_PROFILE_PRESETS.md index 15ec58bb..bb6c770a 100644 --- a/docs/analysis/ENV_PROFILE_PRESETS.md +++ b/docs/analysis/ENV_PROFILE_PRESETS.md @@ -36,6 +36,8 @@ HAKMEM_BENCH_MAX_SIZE=1024 - `HAKMEM_POOL_V2_ENABLED=0` - `HAKMEM_TINY_FRONT_V3_ENABLED=1` - `HAKMEM_TINY_FRONT_V3_LUT_ENABLED=1` +- `HAKMEM_MID_V3_ENABLED=1`(Phase MID-V3: 257-768B, C6 only) +- `HAKMEM_MID_V3_CLASSES=0x40`(C6 only, C7 は ULTRA に任せる) ### 任意オプション - stats を見たいとき: @@ -67,8 +69,10 @@ HAKMEM_THP=auto - C6-heavy mid/smallmid のベンチ用。 - C6 は v1 固定(C6 v3/v4/ULTRA は研究箱のみ)。Pool v2 OFF。Pool v1 flatten は bench 用に opt-in。 -### ENV(v1 基準線) +### ENV(v1 基準線 + MID v3) ```sh +HAKMEM_PROFILE=C6_HEAVY_LEGACY_POOLV1 +# または直接指定: HAKMEM_BENCH_MIN_SIZE=257 HAKMEM_BENCH_MAX_SIZE=768 HAKMEM_TINY_HEAP_PROFILE=C7_SAFE @@ -78,6 +82,8 @@ HAKMEM_SMALL_HEAP_V3_ENABLED=1 HAKMEM_SMALL_HEAP_V3_CLASSES=0x80 # C7-only v3, C6 v3 は OFF HAKMEM_POOL_V2_ENABLED=0 HAKMEM_POOL_V1_FLATTEN_ENABLED=0 # flatten は初回 OFF +HAKMEM_MID_V3_ENABLED=1 # Phase MID-V3: 257-768B, C6 only +HAKMEM_MID_V3_CLASSES=0x40 # C6 only (+11% on C6-heavy) ``` - mid_desc_lookup TLS キャッシュを試すときだけ: `HAKMEM_MID_DESC_CACHE_ENABLED=1` を上乗せ(デフォルトは OFF)。 diff --git a/docs/analysis/SMALLOBJECT_V7_DESIGN.md b/docs/analysis/SMALLOBJECT_V7_DESIGN.md new file mode 100644 index 00000000..c9e3aec9 --- /dev/null +++ b/docs/analysis/SMALLOBJECT_V7_DESIGN.md @@ -0,0 +1,311 @@ +## SmallObjectHeap v7 / HAKMEM v3 コア設計メモ(2025-12-11) + +このドキュメントは、ULTRA + MID v3 + V6 世代の上に新しく載せる +**SmallObjectHeap v7(= HAKMEM v3 small/mid コア)** の設計方針をまとめたものです。 +当面は設計・型スケルトンのみで、挙動は一切変更しません。 + +--- + +## 1. 位置づけと層構造(Box Theory) + +### 1-1. 既存世代のまとめ + +- L0: ULTRA lanes(現行) + - C4–C7 ULTRA。C7 は 2MiB Segment + 64KiB Page + TLS freelist(C7 ULTRA Box)。 + - Mixed / C7-only で十分な性能が出ており、**FROZEN(完成世代)** とみなす。 +- L1: HotBox v2 世代 + - Tiny front v3 + TinyHeap v1(小クラス)。 + - MID v3(257–768B の mid/smallmid を TLS heap で扱う)。 + - V6 C6-only headerless core(RegionId + Segment + TLS lane)の研究箱。 +- L2: Segment / Superslab / Warm / Remote +- L3: Policy/Learner + Stats + ENV(ACE/ELO/CAP 等) + +この世代では、各帯に特化した箱(ULTRA / MID v3 / V6)を積み上げることで +Mixed 16–1024B を ~30M → ~44M ops/s まで底上げしたが、 +small〜mid を一体で見る「共通の SmallObject コア」は存在しない。 + +### 1-2. v7 世代の狙い + +v7 は L1 に新しく追加する **SmallObjectHotBox_v7** として設計する: + +```text +Front (size→class→route LUT) + | + +-- L0: ULTRA lanes (C4–C7, FROZEN) + | + +-- L1: SmallObjectHotBox_v7 ← NEW small/mid コア + | + +-- L1': TinyHeap v1 / MID v3 / V6 (fallback/legacy) + | + +-- L2: SegmentBox_v7 / ColdIface_v7 + | + +-- L3: PolicyBox_v7 / RegionIdBox / PageStatsBox +``` + +目的: +- small(例: 16〜1KiB or 16〜2KiB)と mid の一部を **1 個の thread-local heap + segment** で扱う土台を作る。 +- ULTRA 世代(C4–C7)は L0 としてそのまま残す(C7 ULTRA は独立 box)。 +- headerless/v6 の実験で得た「RegionId + Segment + TLS lane + PageStats」の物理層パターンをコア側に反映する。 + +--- + +## 2. 型設計(SmallHeapCtx_v7 / SmallSegment_v7) + +v7 の基本構造は v6/V3 の経験を統合したものとする。 + +### 2-1. SmallPageMeta_v7 + +Hot path で頻繁に触るフィールドと Stats 用フィールドを分離する。 + +```c +// Hot line: alloc/free で触るフィールド +typedef struct SmallPageMeta_v7 { + // ---- hot fields (cache line 0 想定) ---- + void *free_list; // LIFO freelist: block -> next + uint32_t used; // 現在の使用スロット数 + uint32_t capacity; // この page にある block スロット数 + + uint16_t class_idx; // サイズクラス (C0..C?) + uint16_t flags; // HOT/PARTIAL/FULL/REMOTE_PENDING 等 + uint16_t page_idx; // Segment 内 index (0..N-1) + uint16_t reserved0; // アラインメント用 + + struct SmallSegment_v7 *segment; // Segment への back pointer + + // ---- cold fields (Stats/Policy, cache line 1〜) ---- + uint64_t alloc_count; // 累積 alloc 数 + uint64_t free_count; // 累積 free 数 + uint64_t remote_free_count; // 累積 remote free 数 + + uint16_t live_current; // 現在の live + uint16_t peak_live; // lifetime 最大 live + uint16_t remote_burst_max; // 一度の drain で吸い上げた remote の最大 + uint16_t reserved1; + + uint32_t epoch_first_alloc; // coarse epoch (L3 用) + uint32_t epoch_last_free; // 同上 +} SmallPageMeta_v7; +``` + +設計ポイント: +- Hot path (alloc/free) では `free_list / used / capacity / class_idx` だけを触る。 +- Stats/Learner は L2 retire 時に cold fields を `SmallPageStatsV7` にまとめて L3 に渡す。 +- `segment` を持たせて退役時の SegmentBox 更新を簡単にする(必要であれば将来削る)。 + +### 2-2. SmallClassHeap_v7 / SmallHeapCtx_v7 + +各クラスの現在/部分/満杯ページと、小さな TLS magazine を持つ。 + +```c +typedef struct SmallClassHeap_v7 { + SmallPageMeta_v7 *current; // いま alloc に使っている page + SmallPageMeta_v7 *partial_head; // まだ空きのあるページ + SmallPageMeta_v7 *full_head; // FULL 判定のページ(Cold 側に寄せる用) + + void *local_freelist; // オプション: mini-ULTRA (class ローカル TLS) + uint16_t local_freelist_count; + uint16_t local_freelist_cap; + + uint16_t class_idx; + uint16_t flags; // class 側のポリシーフラグ (ULTRA禁止など) +} SmallClassHeap_v7; + +#define HAK_SMALL_NUM_CLASSES_V7 /* 例: 16〜24 */ + +typedef struct SmallHeapCtx_v7 { + SmallClassHeap_v7 cls[HAK_SMALL_NUM_CLASSES_V7]; +} SmallHeapCtx_v7; +``` + +設計ポイント: +- v7 第1版では `local_freelist` は無効 (`cap=0`) にしておき、必要なクラスだけ Learner で有効化してもよい。 +- クラス数は最初は「small側(16〜1KiB or 2KiB)をカバーする程度」に抑え、 + mid を扱う `MidHeapCtx_v7` は別箱とする(後述)。 + +### 2-3. SmallSegment_v7 + +ULTRA の 2MiB/64KiB パターンをベースに、small v7 用 SegmentBox を定義する。 + +```c +#define SMALL_SEGMENT_SIZE_V7 (2u * 1024u * 1024u) // 2MiB +#define SMALL_PAGE_SIZE_V7 (64u * 1024u) // 64KiB +#define SMALL_PAGES_PER_SEG_V7 (SMALL_SEGMENT_SIZE_V7 / SMALL_PAGE_SIZE_V7) + +typedef struct SmallSegment_v7 { + uintptr_t base; // 実データ領域の先頭アドレス + uint32_t num_pages; // 実際に使うページ数 + uint32_t owner_tid; // 所有スレッド id + + uint32_t flags; // SEGMENT_IN_USE / RETIRED 等 + uint32_t region_kind; // REGION_SMALL_V7 / REGION_ULTRA / REGION_POOL 等 + uint32_t segment_idx; // RegionIdBox 上の index + uint32_t free_page_head; // free page stack head + uint32_t free_page_count; + + SmallPageMeta_v7 page_meta[SMALL_PAGES_PER_SEG_V7]; +} SmallSegment_v7; +``` + +設計ポイント: +- `ptr & ~(SEG_SIZE-1)` で Segment に直行し、 + `(ptr - base) >> PAGE_SHIFT` で `page_idx` を求めて `page_meta[page_idx]` に行ける O(1) 構造。 +- small/mid/pool で Segment geometry を変えたい場合も、API は共通で持てる(SegmentBox_v7 small / mid 用の 2 種類も可)。 + +--- + +## 3. RegionIdBox / header / class 判定方針(v7 世代) + +### 3-1. header の扱い + +v6 C6-only headerless 実験から: +- header 完全削除は「Region lookup コスト」で相殺されがちで、劇的な改善には繋がらないケースが多い。 +- ただし RegionId + Segment + page_meta.class_idx のパターンは + ptr→segment→page_meta→class を O(1) にする物理層として非常に有用。 + +v7 では次の方針とする: + +1. ヘッダは **薄く残す**(Fail-Fast/legacy/pool bridge/デバッグ用)。 +2. small/mid の fast path では、できるだけ header を触らない。 + - C7 ULTRA / 一部 hot クラス(将来の C6 ULTRA lane など)は完全 headerless も許可。 + - SmallHeapCtx_v7 の core は「carve/refill 時に 1 回だけ書く」程度に抑える。 +3. free 時の class 判定は: + - front/gate の hint(size→class / header) + RegionIdBox + page_meta.class_idx を併用。 + - v7 small pathでは、最終的には `page_meta.class_idx` を真とし、hint は OBSERVE 検証用とする。 + +### 3-2. RegionIdBox API + +small/mid/pool 共通の ptr 分類箱として RegionIdBox_v7 を定義する: + +```c +typedef enum { + REGION_UNKNOWN = 0, + REGION_SMALL_V7, + REGION_ULTRA, + REGION_MID_V7, + REGION_POOL_V3, + REGION_LARGE, +} region_kind_t; + +typedef struct RegionLookupResult_v7 { + region_kind_t kind; + union { + struct { + SmallSegment_v7 *segment; + uint16_t page_idx; + } small_v7; + struct { + void *segment; // C7 ULTRA / mid v7 / pool v3 等 + } other; + } u; +} RegionLookupResult_v7; + +static inline RegionLookupResult_v7 +region_id_lookup_v7(void *ptr); +``` + +small v7 free path(fast path): + +```c +RegionLookupResult_v7 lk = region_id_lookup_v7(ptr); +if (likely(lk.kind == REGION_SMALL_V7)) { + SmallPageMeta_v7* page = + &lk.u.small_v7.segment->page_meta[lk.u.small_v7.page_idx]; + uint8_t class_idx = page->class_idx; + small_heap_free_fast_v7(ctx, page, class_idx, ptr); +} else { + // ULTRA / MID / POOL / LEGACY へ bridge +} +``` + +移行モード: +- Phase OBSERVE: header-based class と page_meta.class_idx を比較して log/assert。挙動はまだ v2 世代のまま。 +- Phase FROZEN: small v7 管理ページでは header を見ず、RegionId + page_meta.class_idx だけで動かす。 + +--- + +## 4. mid/pool との関係(SmallHeapCtx_v7 vs MidHeapCtx_v7) + +v7 世代では small と mid を同じ物理層(RegionIdBox + SegmentBox + PageStatsBox)に乗せつつ、 +HotBox は別箱に分けるのが現実的: + +```text +RegionIdBox / SegmentBox_v7 / PageStatsBox + | + +--> SmallHeapCtx_v7 (small: 16〜1KiB or 2KiB) + | + +--> MidHeapCtx_v7 (mid: 2〜16KiB or 2〜32KiB) + | + +--> PoolCtx_v3/v7 (さらに大きい / 特殊用途) +``` + +方針: +- **共通化するもの**: + - RegionIdBox(ptr→region_kind + segment/page_idx) + - Segment geometry API(small 用と mid 用に派生しても良い) + - PageStats 基本構造(class_idx / alloc/free/remote / live/peak など) +- **専用箱にするもの**: + - SmallHeapCtx_v7(small 特有の TLS/prefetch/クラス配置) + - MidHeapCtx_v7(mid 特有の page サイズ・クラス分割・remote ポリシー) + - PoolCtx_v3/v7(巨大オブジェクト/特殊用途) + +橋渡し: +- RegionIdBox で kind != REGION_SMALL_V7 を検出したときのみ、 + mid/pool の bridge_box(`small_mid_bridge_free()` 等)に渡す。 +- small core / mid core / pool core は互いを直接呼ばず、「bridge 1 箇所」で繋ぐ。 + +--- + +## 5. フェーズ分割(v7-0 / v7-1 / v7-2 の指針) + +いきなり small/mid 全体を v7 にするのではなく、C6-only small 帯から段階的に導入する。 + +### Phase v7-0: 型とインフラだけ追加(挙動一切変更なし) + +目的: +- struct と設計 doc だけ追加し、ビルドと Box 理論上の位置づけを固める。 + +タスク: +- `SmallPageMeta_v7` / `SmallClassHeap_v7` / `SmallHeapCtx_v7` / `SmallSegment_v7` struct をヘッダに追加。 +- RegionIdBox に `REGION_SMALL_V7` と `RegionLookupResult_v7` を追加(実装はまだダミーで OK)。 +- ENV: + - `HAKMEM_SMALL_HEAP_V7_ENABLED=0` + - `HAKMEM_SMALL_HEAP_V7_CLASSES=0x0` + - front/gate は v7 に一切 route しない。 + +### Phase v7-1: C6-only v7 stub(route だけ v7 に向ける) + +目的: +- front/gate・ENV・RegionIdBox の配線が壊れていないか確認する。 + +タスク: +- `TINY_ROUTE_SMALL_HEAP_V7` を route kind に追加。 +- プロファイル: C6-only v7 stub モード(`CLASSES=0x40` など)を追加。 +- `small_heap_alloc_fast_v7_stub(size, ci)` / `small_heap_free_fast_v7_stub(ptr, ci)` を実装し、 + 中身は即座に v2 世代(MID v3 / V6 / pool v1)にフォールバックするだけにする。 +- RegionIdBox は OBSERVE モードで `region_id_lookup_v7(ptr)` を呼んで統計取得のみ(挙動不変)。 + +### Phase v7-2: C6-only v7 本実装(small帯だけ) + +目的: +- C6-only の alloc/free を SmallHeapCtx_v7 + SmallSegment_v7 で本当に回し、 + C6-heavy / Mixed で v3/V6/v2 本線と比較する。 + +タスク: +- SegmentBox_v7 と ColdIface_v7 を実装し、C6 pages の refill/retire を Segment v7 経由にする。 +- `small_heap_alloc_fast_v7(size, ci)` / `small_heap_free_fast_v7(ptr, ci)` を実装: + - alloc: current→partial→cold_refill の順で page/freelist を消費。 + - free: RegionIdBox で small_v7 page を特定し、page_meta.free_list に push(必要時 retire)。 +- プロファイル: + - C6-only v7 ON(他クラスは ULTRA + MID v3 + V6 のまま)。 +- ベンチ: + - C6-heavy / Mixed で v7 vs MID v3 vs V6 vs v2 本線を測り、 + C6-only v7 の価値を評価(十分なら次の v7 拡張フェーズへ)。 + +--- + +## 6. まとめ + +- v2 世代(ULTRA + MID v3 + V6 C6-only)は、Box Theory に沿ってかなりやり切った世代とみなし、ここで一度締める。 +- v3 世代(SmallObjectHeap v7)は、「small〜mid を 1 個の SmallHeapCtx + Segment + RegionIdBox で扱う」第2章として設計する。 +- まずは C6-only small 帯から v7 を導入し、ULTRA/MID v3 を壊さない形で徐々に適用範囲を広げていく。 + diff --git a/docs/analysis/ULTRA_MID_V3_GENERATION_SUMMARY.md b/docs/analysis/ULTRA_MID_V3_GENERATION_SUMMARY.md new file mode 100644 index 00000000..b92be01d --- /dev/null +++ b/docs/analysis/ULTRA_MID_V3_GENERATION_SUMMARY.md @@ -0,0 +1,218 @@ +# ULTRA+MID v3 Generation Summary (2025-12-12) + +## 概要 + +この世代(2025-12-10~12-12)で、Tiny/ULTRA 層の完成と MID v3 の本線採用により、0-1024B 範囲の性能最適化が完了しました。 + +## 層別状態まとめ + +### 0-256B: Tiny/ULTRA(完成・凍結) + +**状態**: Production (frozen) +**性能**: Mixed 16-1024B = **43.9M ops/s** (baseline 30.6M → +43.5%) + +**完成内容**: +- C4-C7 ULTRA: 寄生型 TLS cache で legacy 経路を 49% → 4.8% に削減 +- v3 backend: alloc_current_hit=100%, free_retire=0.1% で堅牢に +- Dispatcher/gate snapshot: ENV/route を hot path から排除 +- C7 ULTRA refill: division → bit shift で +11% + +**設計原則**: +- Small object (C2-C7) = ULTRA 最適化済み(fast path/slow path 共に) +- v3 backend = ロジック完全最適化(残り 5% は header write/memcpy 等の内部コスト) +- 研究箱(v4/v5)は OFF で標準プロファイルに影響なし + +**詳細**: `PERF_EXEC_SUMMARY_ULTRA_PHASE_20251211.md` + +--- + +### 257-768B: MID v3(本線採用) + +**状態**: Production (mainline) +**性能**: C6-heavy = +11.1%, Mixed (257-768B) = **+19.8%** + +**完成内容**: +- MID-V3-0~5: 型定義、RegionIdBox 統合、alloc/free 実装 +- MID-V3-6: hakmem.c メイン経路統合(箱化モジュール化) +- Role separation: 257-768B 専用(C7 は ULTRA に任せる) +- Profile 採用: C6_HEAVY_LEGACY_POOLV1 と MIXED_TINYV3_C7_SAFE でデフォルト ON + +**設計原則**: +- Lane = TLS cache, Page = authoritative freelist(二重管理回避) +- RegionIdBox: ptr→page_meta O(1) lookup(TLS cached binary search) +- Batch refill: 16 items/batch(デフォルト、ENV 調整可能) + +**ENV**: +```bash +HAKMEM_PROFILE=C6_HEAVY_LEGACY_POOLV1 # または MIXED_TINYV3_C7_SAFE +# Auto-enables: +# HAKMEM_MID_V3_ENABLED=1 +# HAKMEM_MID_V3_CLASSES=0x40 # C6 only +``` + +**詳細**: `MID_POOL_V3_DESIGN.md` + +--- + +### 769-1024B: C7 ULTRA(完成・凍結) + +**状態**: Production (frozen) +**性能**: Optimized for 1KB allocations (ULTRA 層に統合済み) + +**設計原則**: +- MID v3 から意図的に除外(C7 は ULTRA が最適) +- v4/v5/v6 研究箱も C7 には適用せず、ULTRA 経路を維持 + +--- + +### V6 C6-only Headerless(研究箱、V6-HDR-4 完成) + +**状態**: Research box (frozen at V6-HDR-4) +**性能**: Cost almost neutral / slight positive (~1-2%) + +**完成内容**: +- V6-HDR-0~3: 型定義、RegionIdBox 統合、alloc/free headerless 実装 +- V6-HDR-4: Cold refill/retire、sanity test、構造検証完了 +- 設計原則: 4層 Box Theory(L0 ULTRA / L1 TLS / L2 Segment / L3 Policy) + +**結論**: +- Headerless 設計の実証完了 +- コストほぼ相殺〜微プラス(1-2% 程度) +- 本線採用せず、研究箱として保持(将来の参考実装) + +**詳細**: `SMALL_V6_DESIGN.md` + +--- + +## 役割分担の完成形 + +``` +Size Range | Allocator | Status | Performance +---------------|---------------|---------------------|------------------ +0-256B | Tiny/ULTRA | Production (frozen) | +43.5% (Mixed) +257-768B | MID v3 | Production | +19.8% (257-768B) +769-1024B | C7 ULTRA | Production (frozen) | Optimized +1025B-52KB | Pool | Existing path | Stable +52KB-2MB | ACE | Existing path | Stable +2MB+ | Large mmap | Existing path | Stable +``` + +## 設計原則の統一 + +### Box Modularization Pattern + +全層で統一された箱化設計: +- **Type Box**: 型定義(`*_box.h`) +- **ENV Box**: 環境変数ゲート(`*_env_box.h`) +- **Implementation**: 実装(`*.c`) +- **Cold Iface**: L1/L2 境界インターフェース(`*_cold_iface*.h`) + +### RegionIdBox Integration + +全層で統合された ptr→meta lookup: +- Tiny/ULTRA: REGION_KIND_SMALL_V6 / REGION_KIND_C7_ULTRA +- MID v3: REGION_KIND_MID_V3 +- v6: REGION_KIND_SMALL_V6(headerless) +- TLS cached binary search で O(1) lookup + +### ENV-Controlled Routing + +研究箱は明示 opt-in: +- v4/v5: `HAKMEM_SMALL_HEAP_V[4|5]_ENABLED=1` + `HAKMEM_SMALL_HEAP_V[4|5]_CLASSES=0x??` +- v6: `HAKMEM_SMALL_HEAP_V6_ENABLED=1` + `HAKMEM_SMALL_HEAP_V6_CLASSES=0x40` +- MID v3: `HAKMEM_MID_V3_ENABLED=1` + `HAKMEM_MID_V3_CLASSES=0x40`(本線では Profile 経由で自動) + +--- + +## 本線プロファイル構成(2025-12-12 現在) + +### MIXED_TINYV3_C7_SAFE(標準 Mixed 16-1024B) + +```bash +HAKMEM_PROFILE=MIXED_TINYV3_C7_SAFE +# Auto-enables: +# - Tiny/ULTRA (C7_SAFE profile) +# - MID v3 (257-768B, C6 only) +# - SmallObject v3 (C7-only backend) +# - Tiny front v3 + LUT + fast classify +# - v4/v5/v6 OFF (研究箱) +``` + +**期待性能**: 42-44M ops/s (Mixed 16-1024B, 1M iter, ws=400) + +### C6_HEAVY_LEGACY_POOLV1(C6-heavy 257-768B) + +```bash +HAKMEM_PROFILE=C6_HEAVY_LEGACY_POOLV1 +# Auto-enables: +# - Tiny/ULTRA (C7_SAFE profile) +# - MID v3 (257-768B, C6 only) +# - SmallObject v3 (C7-only backend) +# - Pool v1 (flatten OFF) +# - v4/v5/v6 OFF (研究箱) +``` + +**期待性能**: 22-23M ops/s (C6-heavy 257-768B, 1M iter, ws=400) + +--- + +## 次世代テーマ候補 + +### Option A: 小さめテーマ(+数%狙い) + +**so_alloc/so_free 枝削り**: +- 現状の v3 backend で残っている条件分岐を削減 +- alloc_current_hit=100% だが、内部に header write/check が残存 +- 狙い: +1-3% 程度の微改善 + +**メリット**: 安全、既存設計の洗練 +**デメリット**: 大きなジャンプはない + +### Option B: 次世代テーマ(大きめ変更) + +**MID/Pool v3 拡張**: +- 現在 257-768B (C6 only) を C5 (128B) や C4 (64B) に拡張 +- 狙い: 128-256B 範囲の改善(現在 Tiny/ULTRA が最適だが、さらなる実験) + +**C2/C3 ULTRA 実験**: +- 16-64B 範囲に ULTRA 適用(現在 legacy 経路が主流) +- 狙い: 小サイズでも ULTRA の恩恵を受ける + +**メリット**: 新しい最適化領域の開拓 +**デメリット**: 既存の安定領域に影響する可能性 + +--- + +## まとめ + +### この世代で達成したこと + +1. **Tiny/ULTRA 完成**: 0-256B で +43.5% (Mixed)、設計凍結 +2. **MID v3 本線採用**: 257-768B で +19.8%、Profile デフォルト ON +3. **C7 ULTRA 凍結**: 769-1024B で最適化完了 +4. **v6 研究箱完成**: C6-only headerless 実証(V6-HDR-4) + +### 設計的な完成度 + +- **役割分担**: 各サイズ範囲に最適化された allocator が明確 +- **Box 化**: 全層で統一された箱化設計 +- **RegionIdBox**: 全層で統合された ptr→meta lookup +- **ENV 制御**: 研究箱は明示 opt-in、本線は Profile 経由で安定 + +### この世代の「形」 + +0-1024B の性能最適化が完了し、役割分担が明確になりました。この構成を「固定」として、次世代テーマは: +- 既存設計の洗練(枝削り) +- 新しい最適化領域の開拓(拡張実験) + +のいずれかから選択できます。 + +--- + +**作成日**: 2025-12-12 +**関連ドキュメント**: +- `PERF_EXEC_SUMMARY_ULTRA_PHASE_20251211.md` +- `MID_POOL_V3_DESIGN.md` +- `SMALL_V6_DESIGN.md` +- `ENV_PROFILE_PRESETS.md` +- `CURRENT_TASK.md` diff --git a/hakmem.d b/hakmem.d index f04c51b3..af412cff 100644 --- a/hakmem.d +++ b/hakmem.d @@ -27,39 +27,41 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \ core/hakmem_batch.h core/hakmem_evo.h core/hakmem_debug.h \ core/hakmem_prof.h core/hakmem_syscall.h core/hakmem_ace_controller.h \ core/hakmem_ace_metrics.h core/hakmem_ace_ucb1.h \ - core/box/bench_fast_box.h core/ptr_trace.h core/hakmem_trace_master.h \ - core/hakmem_stats_master.h core/box/hak_kpi_util.inc.h \ - core/box/hak_core_init.inc.h core/hakmem_phase7_config.h \ - core/box/libm_reloc_guard_box.h core/box/init_bench_preset_box.h \ - core/box/init_diag_box.h core/box/init_env_box.h \ - core/box/../tiny_destructors.h core/box/../hakmem_tiny.h \ + core/box/bench_fast_box.h core/box/mid_hotbox_v3_box.h \ + core/box/tiny_geometry_box.h \ + core/box/../hakmem_tiny_superslab_internal.h \ + core/box/../hakmem_build_flags.h core/box/../hakmem_tiny_superslab.h \ + core/box/../box/ss_hot_cold_box.h \ + core/box/../box/../superslab/superslab_types.h \ + core/box/../box/ss_allocation_box.h core/hakmem_tiny_superslab.h \ + core/box/../hakmem_debug_master.h core/box/../hakmem_tiny.h \ + core/box/../hakmem_tiny_config.h core/box/../hakmem_shared_pool.h \ + core/box/../superslab/superslab_types.h core/box/../hakmem_internal.h \ + core/box/../tiny_region_id.h core/box/../hakmem_tiny_integrity.h \ + core/box/../box/slab_freelist_atomic.h \ + core/box/../superslab/superslab_inline.h \ + core/box/mid_hotbox_v3_env_box.h core/ptr_trace.h \ + core/hakmem_trace_master.h core/hakmem_stats_master.h \ + core/box/hak_kpi_util.inc.h core/box/hak_core_init.inc.h \ + core/hakmem_phase7_config.h core/box/libm_reloc_guard_box.h \ + core/box/init_bench_preset_box.h core/box/init_diag_box.h \ + core/box/init_env_box.h core/box/../tiny_destructors.h \ core/box/ss_hot_prewarm_box.h core/box/hak_alloc_api.inc.h \ core/box/../hakmem_tiny.h core/box/../hakmem_pool.h \ core/box/../hakmem_smallmid.h core/box/tiny_heap_env_box.h \ core/box/c7_hotpath_env_box.h core/box/tiny_heap_box.h \ - core/box/../hakmem_tiny_superslab.h \ - core/box/../superslab/superslab_inline.h core/box/../tiny_tls.h \ - core/box/../hakmem_tiny_superslab.h core/box/../tiny_box_geometry.h \ - core/box/tiny_stats_box.h core/box/tiny_c7_hotbox.h \ - core/box/mid_large_config_box.h core/box/../hakmem_config.h \ - core/box/../hakmem_features.h core/box/hak_free_api.inc.h \ - core/hakmem_tiny_superslab.h core/box/../hakmem_trace_master.h \ + core/box/../hakmem_tiny_superslab.h core/box/../tiny_tls.h \ + core/box/../tiny_box_geometry.h core/box/tiny_stats_box.h \ + core/box/tiny_c7_hotbox.h core/box/mid_large_config_box.h \ + core/box/../hakmem_config.h core/box/../hakmem_features.h \ + core/box/hak_free_api.inc.h core/box/../hakmem_trace_master.h \ core/box/front_gate_v2.h core/box/external_guard_box.h \ core/box/../hakmem_stats_master.h core/box/ss_slab_meta_box.h \ core/box/../superslab/superslab_types.h core/box/slab_freelist_atomic.h \ core/box/fg_tiny_gate_box.h core/box/tiny_free_gate_box.h \ core/box/ptr_type_box.h core/box/ptr_conversion_box.h \ - core/box/tiny_ptr_bridge_box.h \ - core/box/../hakmem_tiny_superslab_internal.h \ - core/box/../hakmem_build_flags.h core/box/../box/ss_hot_cold_box.h \ - core/box/../box/../superslab/superslab_types.h \ - core/box/../box/ss_allocation_box.h core/box/../hakmem_debug_master.h \ - core/box/../hakmem_tiny_config.h core/box/../hakmem_shared_pool.h \ - core/box/../superslab/superslab_types.h core/box/../hakmem_internal.h \ - core/box/../tiny_region_id.h core/box/../hakmem_tiny_integrity.h \ - core/box/../box/slab_freelist_atomic.h \ - core/box/../tiny_free_fast_v2.inc.h core/box/../box/tls_sll_box.h \ - core/box/../box/../hakmem_internal.h \ + core/box/tiny_ptr_bridge_box.h core/box/../tiny_free_fast_v2.inc.h \ + core/box/../box/tls_sll_box.h core/box/../box/../hakmem_internal.h \ core/box/../box/../hakmem_tiny_config.h \ core/box/../box/../hakmem_build_flags.h \ core/box/../box/../hakmem_debug_master.h \ @@ -80,7 +82,8 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \ core/box/../superslab/superslab_inline.h \ core/box/../box/ss_slab_meta_box.h core/box/../box/free_remote_box.h \ core/hakmem_tiny_integrity.h core/box/../box/ptr_conversion_box.h \ - core/box/free_dispatch_stats_box.h core/box/hak_wrappers.inc.h \ + core/box/free_dispatch_stats_box.h core/box/region_id_v6_box.h \ + core/box/smallsegment_v6_box.h core/box/hak_wrappers.inc.h \ core/box/front_gate_classifier.h core/box/../front/malloc_tiny_fast.h \ core/box/../front/../hakmem_build_flags.h \ core/box/../front/../hakmem_tiny_config.h \ @@ -100,8 +103,6 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \ core/box/../front/../box/tiny_hotheap_v2_box.h \ core/box/../front/../box/smallobject_hotbox_v3_box.h \ core/box/../front/../box/tiny_geometry_box.h \ - core/box/../front/../box/../hakmem_tiny_superslab_internal.h \ - core/box/../front/../box/../superslab/superslab_inline.h \ core/box/../front/../box/smallobject_hotbox_v3_env_box.h \ core/box/../front/../box/smallobject_hotbox_v4_box.h \ core/box/../front/../box/smallobject_hotbox_v5_box.h \ @@ -111,6 +112,10 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \ core/box/../front/../box/free_dispatch_stats_box.h \ core/box/../front/../box/smallobject_hotbox_v4_env_box.h \ core/box/../front/../box/smallobject_v5_env_box.h \ + core/box/../front/../box/smallobject_hotbox_v7_box.h \ + core/box/../front/../box/smallsegment_v7_box.h \ + core/box/../front/../box/smallobject_cold_iface_v7_box.h \ + core/box/../front/../box/region_id_v6_box.h \ core/box/../front/../box/tiny_c7_ultra_box.h \ core/box/../front/../box/tiny_c7_ultra_segment_box.h \ core/box/../front/../box/tiny_c6_ultra_free_box.h \ @@ -193,6 +198,26 @@ core/hakmem_ace_controller.h: core/hakmem_ace_metrics.h: core/hakmem_ace_ucb1.h: core/box/bench_fast_box.h: +core/box/mid_hotbox_v3_box.h: +core/box/tiny_geometry_box.h: +core/box/../hakmem_tiny_superslab_internal.h: +core/box/../hakmem_build_flags.h: +core/box/../hakmem_tiny_superslab.h: +core/box/../box/ss_hot_cold_box.h: +core/box/../box/../superslab/superslab_types.h: +core/box/../box/ss_allocation_box.h: +core/hakmem_tiny_superslab.h: +core/box/../hakmem_debug_master.h: +core/box/../hakmem_tiny.h: +core/box/../hakmem_tiny_config.h: +core/box/../hakmem_shared_pool.h: +core/box/../superslab/superslab_types.h: +core/box/../hakmem_internal.h: +core/box/../tiny_region_id.h: +core/box/../hakmem_tiny_integrity.h: +core/box/../box/slab_freelist_atomic.h: +core/box/../superslab/superslab_inline.h: +core/box/mid_hotbox_v3_env_box.h: core/ptr_trace.h: core/hakmem_trace_master.h: core/hakmem_stats_master.h: @@ -204,7 +229,6 @@ core/box/init_bench_preset_box.h: core/box/init_diag_box.h: core/box/init_env_box.h: core/box/../tiny_destructors.h: -core/box/../hakmem_tiny.h: core/box/ss_hot_prewarm_box.h: core/box/hak_alloc_api.inc.h: core/box/../hakmem_tiny.h: @@ -214,9 +238,7 @@ core/box/tiny_heap_env_box.h: core/box/c7_hotpath_env_box.h: core/box/tiny_heap_box.h: core/box/../hakmem_tiny_superslab.h: -core/box/../superslab/superslab_inline.h: core/box/../tiny_tls.h: -core/box/../hakmem_tiny_superslab.h: core/box/../tiny_box_geometry.h: core/box/tiny_stats_box.h: core/box/tiny_c7_hotbox.h: @@ -224,7 +246,6 @@ core/box/mid_large_config_box.h: core/box/../hakmem_config.h: core/box/../hakmem_features.h: core/box/hak_free_api.inc.h: -core/hakmem_tiny_superslab.h: core/box/../hakmem_trace_master.h: core/box/front_gate_v2.h: core/box/external_guard_box.h: @@ -237,19 +258,6 @@ core/box/tiny_free_gate_box.h: core/box/ptr_type_box.h: core/box/ptr_conversion_box.h: core/box/tiny_ptr_bridge_box.h: -core/box/../hakmem_tiny_superslab_internal.h: -core/box/../hakmem_build_flags.h: -core/box/../box/ss_hot_cold_box.h: -core/box/../box/../superslab/superslab_types.h: -core/box/../box/ss_allocation_box.h: -core/box/../hakmem_debug_master.h: -core/box/../hakmem_tiny_config.h: -core/box/../hakmem_shared_pool.h: -core/box/../superslab/superslab_types.h: -core/box/../hakmem_internal.h: -core/box/../tiny_region_id.h: -core/box/../hakmem_tiny_integrity.h: -core/box/../box/slab_freelist_atomic.h: core/box/../tiny_free_fast_v2.inc.h: core/box/../box/tls_sll_box.h: core/box/../box/../hakmem_internal.h: @@ -284,6 +292,8 @@ core/box/../box/free_remote_box.h: core/hakmem_tiny_integrity.h: core/box/../box/ptr_conversion_box.h: core/box/free_dispatch_stats_box.h: +core/box/region_id_v6_box.h: +core/box/smallsegment_v6_box.h: core/box/hak_wrappers.inc.h: core/box/front_gate_classifier.h: core/box/../front/malloc_tiny_fast.h: @@ -306,8 +316,6 @@ core/box/../front/../box/tiny_front_cold_box.h: core/box/../front/../box/tiny_hotheap_v2_box.h: core/box/../front/../box/smallobject_hotbox_v3_box.h: core/box/../front/../box/tiny_geometry_box.h: -core/box/../front/../box/../hakmem_tiny_superslab_internal.h: -core/box/../front/../box/../superslab/superslab_inline.h: core/box/../front/../box/smallobject_hotbox_v3_env_box.h: core/box/../front/../box/smallobject_hotbox_v4_box.h: core/box/../front/../box/smallobject_hotbox_v5_box.h: @@ -317,6 +325,10 @@ core/box/../front/../box/tiny_route_env_box.h: core/box/../front/../box/free_dispatch_stats_box.h: core/box/../front/../box/smallobject_hotbox_v4_env_box.h: core/box/../front/../box/smallobject_v5_env_box.h: +core/box/../front/../box/smallobject_hotbox_v7_box.h: +core/box/../front/../box/smallsegment_v7_box.h: +core/box/../front/../box/smallobject_cold_iface_v7_box.h: +core/box/../front/../box/region_id_v6_box.h: core/box/../front/../box/tiny_c7_ultra_box.h: core/box/../front/../box/tiny_c7_ultra_segment_box.h: core/box/../front/../box/tiny_c6_ultra_free_box.h: