diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index 1a12b91e..17bbd7fb 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -1,5 +1,21 @@ # 本線タスク(現在) +## 更新メモ(2025-12-14 Phase 6 FRONT-FASTLANE-1) + +### Phase 6 FRONT-FASTLANE-1: Front FastLane(Layer Collapse)— ✅ GO / 本線昇格 + +結果: Mixed 10-run で **+11.13%**(HAKMEM史上最大級の改善)。Fail-Fast/境界1箇所を維持したまま “入口固定費” を大幅削減。 + +- A/B 結果: `docs/analysis/PHASE6_FRONT_FASTLANE_1_AB_TEST_RESULTS.md` +- 実装レポート: `docs/analysis/PHASE6_FRONT_FASTLANE_1_IMPLEMENTATION_REPORT.md` +- 設計: `docs/analysis/PHASE6_FRONT_FASTLANE_1_DESIGN.md` +- 指示書(昇格/次): `docs/analysis/PHASE6_FRONT_FASTLANE_NEXT_INSTRUCTIONS.md` +- 外部回答(記録): `PHASE_ML2_CHATGPT_RESPONSE_FASTLANE.md` + +運用ルール: +- A/B は **同一バイナリで ENV トグル**(削除/追加で別バイナリ比較にしない) +- Mixed 10-run は `scripts/run_mixed_10_cleanenv.sh` 基準(ENV 漏れ防止) + ## 更新メモ(2025-12-14 Phase 5 E5-3 Analysis - Strategic Pivot) ### Phase 5 E5-3: Candidate Analysis & Strategic Recommendations ⚠️ DEFER (2025-12-14) @@ -201,6 +217,9 @@ - `docs/analysis/PHASE5_E7_FROZEN_BOX_PRUNE_NEXT_INSTRUCTIONS.md` - `docs/analysis/PHASE5_E7_FROZEN_BOX_PRUNE_AB_TEST_RESULTS.md` - `PHASE_ML2_CHATGPT_QUESTIONNAIRE_FASTLANE.md` + - `PHASE_ML2_CHATGPT_RESPONSE_FASTLANE.md` + - `docs/analysis/PHASE6_FRONT_FASTLANE_1_DESIGN.md` + - `docs/analysis/PHASE6_FRONT_FASTLANE_NEXT_INSTRUCTIONS.md` --- diff --git a/Makefile b/Makefile index bb15e030..2571cdbf 100644 --- a/Makefile +++ b/Makefile @@ -218,7 +218,7 @@ LDFLAGS += $(EXTRA_LDFLAGS) # Targets TARGET = test_hakmem -OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_pt_impl.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/free_wrapper_env_snapshot_box.o core/box/malloc_wrapper_env_snapshot_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/box/hakmem_env_snapshot_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o +OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_pt_impl.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/free_cold_shape_env_box.o core/box/free_cold_shape_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/free_wrapper_env_snapshot_box.o core/box/malloc_wrapper_env_snapshot_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/box/hakmem_env_snapshot_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o OBJS = $(OBJS_BASE) # Shared library @@ -250,7 +250,7 @@ endif # Benchmark targets BENCH_HAKMEM = bench_allocators_hakmem BENCH_SYSTEM = bench_allocators_system -BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/free_wrapper_env_snapshot_box.o core/box/malloc_wrapper_env_snapshot_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o bench_allocators_hakmem.o +BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/free_cold_shape_env_box.o core/box/free_cold_shape_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/free_wrapper_env_snapshot_box.o core/box/malloc_wrapper_env_snapshot_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o bench_allocators_hakmem.o BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o @@ -427,7 +427,7 @@ test-box-refactor: box-refactor ./larson_hakmem 10 8 128 1024 1 12345 4 # Phase 4: Tiny Pool benchmarks (properly linked with hakmem) -TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_pt_impl.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/free_wrapper_env_snapshot_box.o core/box/malloc_wrapper_env_snapshot_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/box/hakmem_env_snapshot_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o +TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_pt_impl.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/free_cold_shape_env_box.o core/box/free_cold_shape_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/free_wrapper_env_snapshot_box.o core/box/malloc_wrapper_env_snapshot_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/box/hakmem_env_snapshot_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o diff --git a/PHASE_ML2_CHATGPT_RESPONSE_FASTLANE.md b/PHASE_ML2_CHATGPT_RESPONSE_FASTLANE.md new file mode 100644 index 00000000..f11aa1ec --- /dev/null +++ b/PHASE_ML2_CHATGPT_RESPONSE_FASTLANE.md @@ -0,0 +1,272 @@ +# PHASE ML2: 外部LLM回答(Front FastLane / Layer Collapse) + +## 0. 位置づけ + +- 本ドキュメントは、`PHASE_ML2_CHATGPT_QUESTIONNAIRE_FASTLANE.md` への外部レビュー回答を記録する。 +- 次フェーズの設計/実装は、本回答を踏まえつつ **hakmem 側の既存 Box/ENV/命名**に合わせて落とし込む。 + +## 1. 外部回答の要点(抜粋) + +### Q1: 優先順位 + +1. **A) Front FastLane(front層の Layer Collapse)** +2. **B) ptr→meta O(1) を mid/pool に波及(segment/mask)** +3. **C) 触るメモリ削減(geometry/metadata)** + +理由: 直近の学び(重複排除/ENV・TLS読み回数削減が勝ち筋、diet/別バイナリ比較は壊れやすい)と整合するため。 + +### 推奨案: A) Front FastLane の骨子 + +- wrapper→gate→policy→route を **Hot側で1箱に畳む** +- Cold側へ落ちるのは **単一フォールバック**(境界1箇所) +- “分岐形”より **分岐数/命令数/断片化**の削減を優先(Zen で branch miss は飽和しやすい) +- A/B は **同一バイナリで ENV トグル**(削除/追加で別バイナリ比較にしない) + +### GO/NO-GO(提案) + +- Mixed 10-run mean: + - GO: **+1.0% 以上** + - NEUTRAL: **±1.0%**(freeze) + - NO-GO: **-1.0% 以下**(rollback/freeze) + +### “mimalloc に追いつく”の現実ライン(提案) + +- 多層構造(wrapper→gate→policy→route→handler)を維持したままだと概ね **mimalloc の 50–65%**が現実的。 +- **70%+**を狙うなら、Front の collapse(A)に加えて、free側 ptr→meta を O(1) 化(B)がほぼ必須。 + +## 2. 外部回答(原文) + +以下は外部LLMの回答本文(チャット貼り付け)を、そのまま保持する。 + +--- + +はい、いま **hakmem_private** 側の資料は参照できています。少なくともご指定の **`AGENTS.md`(Box Theory ルール)**、および Phase5 の A/B の学び(例:別バイナリ比較が壊れる件、Mixed 10-run 手順)を repo から確認できました。 + +以下、**PHASE ML2 の指定フォーマット**(1枚サマリ→Box図→API一覧→A/B手順→リスク対策)で返します。 + +--- + +## 1. 1枚サマリ + +* **Q1(優先順位)結論** + 1位 **A) Front FastLane(front層の Layer Collapse)** + 2位 **B) ptr→meta O(1) を mid/pool に波及(segment/mask)** + 3位 **C) 触るメモリ削減(geometry/metadata)** + 理由は「ROI/リスク/工数」の積で、いまの学び(“境界の冗長排除”が勝ち筋、dietは別バイナリ比較で壊れる)に最も整合するのが A。 + +* **推奨案(A: Front FastLane)の狙い** + wrapper→gate→policy→route の “細い箱列” を **Hot側で1箱に畳み**、**Cold側へ落ちるのは単一フォールバック**(境界1箇所)にする。 + “分岐形”改善よりも **分岐数/命令数/コード断片化**を減らす方向(Zenでmis予測は飽和しやすい)に寄せる。 + +* **Box Theory 整合** + + * 箱=責務分離、境界1箇所、ENVで戻せる、最小可視化、Fail-fast を保持(AGENTSの5原則に沿う)。 + * A/B は **同一バイナリでENVトグル**(別バイナリ比較は LTO/配置差に負ける)。 + +* **Q5(mimallocに追いつく現実ライン)** + + * wrapper→gate→policy→route→handler を “階層のまま”維持すると、上限は概ね **mimallocの50–65%** が現実的(あなたの現状 0.4台→0.5台を狙うのは十分射程)。 + * **70%+** を狙うには、少なくとも + + 1. front層の collapse(A)に加えて、 + 2. free側 ptr→meta を(mid/pool含め)O(1) に寄せる(B) + がほぼ必須(“直TLSバケット”に近い世界は meta 到達が勝負なので)。 + +--- + +## 2. Box 図(ASCII) + +推奨:**A) Front FastLane**(Layer Collapse)。「概念上の箱」は維持しつつ、**Hotの入口だけ**を1本化します。 + +``` + (ENV: HAKMEM_FRONT_FASTLANE=0/1) + +----------------+ + | L0: EnvGateBox | + +----------------+ + | + v ++------------------+ +-------------------------+ +| malloc/free shim | --> | L1: FrontFastLaneBox | +| (very thin) | | - size->class->route | ++------------------+ | (1 LUT + 1 LUT) | + | - try alloc/free fast | + | - NO stats/NO learning | + +-------------------------+ + | \ + success --->| \ fail-fast / unknown + v v + +--------------------------+ +-------------------------+ + | L1a: Hot Handlers | | L2: ColdFallbackIface | + | - Tiny/ULTRA/MID hot | | - cold_alloc(size,ci) | + | - per-thread freelists | | - cold_free(ptr) | + +--------------------------+ +-------------------------+ + | + v + +-----------------------------+ + | L3: Cold/Safe/Observable | + | Superslab/Tier/Guard/Stats | + | Policy/Learner snapshot swap| + +-----------------------------+ +``` + +**“変換点は1箇所”**= `FrontFastLaneBox -> ColdFallbackIface` の **try→fallback** 接点に集約します(allocもfreeも同じ思想)。 + +--- + +## 3. API 一覧(推奨案A) + +### 3.1 L0: EnvGateBox + +目的:**同一バイナリ**で A/B、default OFF。 + +```c +// core/box/front_fastlane_env_box.h +static inline bool hak_front_fastlane_enabled(void); // ENV: HAKMEM_FRONT_FASTLANE=0/1 (default 0) +static inline uint32_t hak_front_fastlane_class_mask(void); // optional: which classes are handled +``` + +### 3.2 L1: FrontFastLaneBox(入口の collapse) + +目的:wrapper/gate/policy/route を “この箱の内部” に畳む(呼び出し側から見ると1箱)。 + +```c +// core/box/front_fastlane_box.h + +typedef struct HakFastlaneSnapshot { + // “Hot は snapshot を読むだけ”を守る + uint16_t max_fast_size; // e.g. 1024 + uint32_t class_mask; // which classes use fastlane + const uint8_t* size_to_class_lut;// pointer to global LUT (no copy) + const uint8_t* class_to_route_lut;// route-kind per class + // optional: pointers to hot handler vtables (avoid re-reading globals) +} HakFastlaneSnapshot; + +static inline const HakFastlaneSnapshot* hak_fastlane_snapshot(void); + +// returns NULL => must fallback via ColdFallbackIface (single boundary) +static inline void* hak_fastlane_try_alloc(size_t size, const HakFastlaneSnapshot* s); + +// returns true => handled; false => must fallback +static inline bool hak_fastlane_try_free(void* p, const HakFastlaneSnapshot* s); +``` + +### 3.3 L2: ColdFallbackIface(境界1箇所) + +目的:HotからColdへ落ちる “唯一の穴”。 + +```c +// core/box/front_fastlane_cold_iface_box.h + +void* hak_cold_alloc_fallback(size_t size, uint32_t class_idx, uint32_t route_kind); +void hak_cold_free_fallback(void* p); +``` + +> ポイント:Hot側は「失敗したらこの2関数に落とす」以外の出口を持たない(境界1箇所を強制)。 + +--- + +## 4. A/B 手順と閾値 + +### 4.1 原則(必読の前提の再確認) + +* **同一バイナリでENVトグル**が原則(別バイナリ比較は LTO/配置差で壊れる)。 +* perf の self% 追跡は有効だが、**頻度と二次効果**を外すとROIを誤る(“5%ルール”などの運用指針が repo にある)。 +* cleanenv の Mixed 10-run を回す(export済みENVが残ると比較が壊れる)。スクリプトが既にある。 + +### 4.2 Phase 0〜2(小パッチ順) + +**Phase 0(完全OFFで導入:安全)** + +* 追加:`front_fastlane_env_box.h` / `front_fastlane_box.h` / `front_fastlane_cold_iface_box.h` +* 既存経路は一切変えず、`if (hak_front_fastlane_enabled())` を wrapper に1箇所だけ置く。 +* default: `HAKMEM_FRONT_FASTLANE=0` + +**Phase 1(allocのみ fastlane、対象クラス限定)** + +* `hak_fastlane_try_alloc()` を実装(size→class→route の collapse)。 +* まずは **“Tiny内で完結するクラスだけ”** に限定(class_maskで制御)。 +* fail は必ず `hak_cold_alloc_fallback()` に落とす(境界1箇所)。 + +**Phase 2(freeも fastlane、ただし Fail-fast 強め)** + +* `hak_fastlane_try_free()` 実装。 +* “判定が曖昧”なものは全部 fallback(性能より安全優先で開始)。 + +### 4.3 A/B コマンド(Mixed 10-run) + +repo に **cleanenv 10-run** スクリプトがあるので、それを基準にします。 + +**A: FastLane OFF(baseline)** + +```sh +HAKMEM_FRONT_FASTLANE=0 scripts/run_mixed_10_cleanenv.sh +``` + +**B: FastLane ON** + +```sh +HAKMEM_FRONT_FASTLANE=1 scripts/run_mixed_10_cleanenv.sh +``` + +**任意(perf stat:差分が出たら原因を見る)** + +```sh +HAKMEM_FRONT_FASTLANE=0 perf stat -e cycles,instructions,branches,branch-misses,cache-misses -- \ + ./bench_random_mixed_hakmem 20000000 400 1 + +HAKMEM_FRONT_FASTLANE=1 perf stat -e cycles,instructions,branches,branch-misses,cache-misses -- \ + ./bench_random_mixed_hakmem 20000000 400 1 +``` + +### 4.4 GO/NO-GO 閾値(運用) + +repo の運用指針に合わせ、まずは以下を推奨します(Phase5の方針と整合)。 + +* **GO**: Mixed 10-run mean **+1.0% 以上** +* **NEUTRAL**: **±1.0%** → freeze(research箱として保持) +* **NO-GO**: mean **-1.0% 以下** → rollbackして freeze + +> 分岐形だけの調整で -1.71% を踏んだ例があるので、「branch-miss率が下がった」だけでは GO にしない(branch数/コードサイズ増の方が効く)。 + +--- + +## 5. リスクと対策(戻せる手段込み) + +### リスク1:FastLaneが “二重分岐” を生み、コードが太って逆効果 + +* **症状**:branch-miss率は微改善でも、branch数増・I-cache圧でスループットが落ちる(既に同型の罠が観測されている)。 +* **対策**: + * FastLane 入口は **「1回だけ if」**。以降は **直線 + switch/jumptable** に寄せる。 + * fallback は `__attribute__((cold))`(または別TU)に追い出し、hot 側に条件分岐を増やさない。 + * “snapshot enabled/disabled” みたいな二系統を FastLane 内で抱えない(L0で決め打ち)。 + +### リスク2:別バイナリ比較に引きずられて判断を誤る(diet最適化の罠) + +* **症状**:箱を削ったら速くなるはずが遅い/速い、が **リンク配置差**で反転。 +* **対策**: + * “削除”ではなく **同一バイナリ内での無効化**(ENV=0)を常態化。 + * `scripts/run_mixed_10_cleanenv.sh` を基準にし、export環境汚染を排除。 + +### リスク3:Fail-fast 条件が緩く、silent corruption に寄る + +* **対策(Q3の要求に対応)**: + * **fast path に入れる条件は最小**: + * alloc:`size <= snapshot->max_fast_size` かつ `class_mask` に含まれる + * free:`ptr` が “自分の管理領域” と O(1) で断定できるときだけ + * **fallback 条件は広く**: + * “確信が持てないものは全部” `hak_cold_free_fallback()` + * **不変条件(最小)**: + * debugビルドのみ `assert(class_idx == page_meta->class_idx)` 等 + * releaseは “ワンショットログ + disable gate”(既にmadvise guard等の思想があるなら同型で) + +### リスク4:FastLaneだけで目標(mimalloc 50%)に届かない + +* **現実**:Aは「全呼び出しの固定費」を下げるが、mimalloc差の大きな部分が “ptr→meta” なら A単独では頭打ち。 +* **対策(ロードマップ)**: + * Phase ML2(A)で 0.5台に入らない場合、次の章は **B(ptr→meta O(1) を mid/pool に波及)** を本線にする。 + * Aで FastLane を作っておくと、B/Cの導入点(入口)が固定され、以後の実験がやりやすい(境界の一本化=勝ち筋)。 + +--- + +作成日: 2025-12-14 +記録者: Task agent diff --git a/core/bench_profile.h b/core/bench_profile.h new file mode 100644 index 00000000..7f632903 --- /dev/null +++ b/core/bench_profile.h @@ -0,0 +1,177 @@ +#pragma once +#include +#include +#include +#include +#include + +#ifdef USE_HAKMEM +#include "box/wrapper_env_box.h" // wrapper_env_refresh_from_env (Phase 2 B4) +#include "box/tiny_static_route_box.h" // tiny_static_route_refresh_from_env (Phase 3 C3) +#include "box/hakmem_env_snapshot_box.h" // hakmem_env_snapshot_refresh_from_env (Phase 4 E1) +#endif + +// env が未設定のときだけ既定値を入れる +static inline void bench_setenv_default(const char* key, const char* val) { + if (getenv(key) != NULL) return; + static void* (*real_malloc)(size_t) = NULL; + static int (*real_putenv)(char*) = NULL; + if (!real_malloc) { + real_malloc = (void* (*)(size_t))dlsym(RTLD_NEXT, "malloc"); + if (!real_malloc) real_malloc = malloc; + } + if (!real_putenv) { + real_putenv = (int (*)(char*))dlsym(RTLD_NEXT, "putenv"); + if (!real_putenv) real_putenv = putenv; + } + size_t klen = strlen(key); + size_t vlen = strlen(val); + char* buf = (char*)real_malloc(klen + vlen + 2); + if (!buf) return; + memcpy(buf, key, klen); + buf[klen] = '='; + memcpy(buf + klen + 1, val, vlen); + buf[klen + 1 + vlen] = '\0'; + { + char msg[256]; + int n = snprintf(msg, sizeof(msg), "[bench_profile] set %s=%s\n", key, val); + if (n > 0) { + if (n > (int)sizeof(msg)) n = (int)sizeof(msg); + ssize_t w = write(2, msg, (size_t)n); + (void)w; + } + } + real_putenv(buf); // takes ownership; do not free +} + +// ベンチ専用: HAKMEM_PROFILE に応じて ENV をプリセットする +static inline void bench_apply_profile(void) { + const char* p = getenv("HAKMEM_PROFILE"); + if (!p || !*p) return; + + if (strcmp(p, "MIXED_TINYV3_C7_SAFE") == 0) { + bench_setenv_default("HAKMEM_TINY_HEAP_PROFILE", "C7_SAFE"); + bench_setenv_default("HAKMEM_TINY_C7_HOT", "1"); + bench_setenv_default("HAKMEM_TINY_HOTHEAP_V2", "0"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_ENABLED", "1"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_CLASSES", "0x80"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V4_ENABLED", "0"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V4_CLASSES", "0x0"); + bench_setenv_default("HAKMEM_TINY_PTR_FAST_CLASSIFY_V4_ENABLED", "0"); + bench_setenv_default("HAKMEM_SMALL_SEGMENT_V4_ENABLED", "0"); + bench_setenv_default("HAKMEM_POOL_V2_ENABLED", "0"); + bench_setenv_default("HAKMEM_TINY_FRONT_V3_ENABLED", "1"); + bench_setenv_default("HAKMEM_TINY_FRONT_V3_LUT_ENABLED", "1"); + bench_setenv_default("HAKMEM_TINY_PTR_FAST_CLASSIFY_ENABLED", "1"); + // Phase FREE-TINY-FAST-DUALHOT-1: C0-C3 direct fast free (skip policy snapshot) + bench_setenv_default("HAKMEM_FREE_TINY_FAST_HOTCOLD", "1"); + // Phase 2 B4: Wrapper hot/cold split (malloc/free wrapper shape) + bench_setenv_default("HAKMEM_WRAP_SHAPE", "1"); + // Phase 4 E1: ENV Snapshot Consolidation (+3.92% proven on Mixed) + bench_setenv_default("HAKMEM_ENV_SNAPSHOT", "1"); + // Phase 5 E4-1: Free wrapper ENV snapshot (+3.51% proven on Mixed, 10-run) + bench_setenv_default("HAKMEM_FREE_WRAPPER_ENV_SNAPSHOT", "1"); + // Phase 5 E4-2: Malloc wrapper ENV snapshot (+21.83% proven on Mixed, 10-run) + bench_setenv_default("HAKMEM_MALLOC_WRAPPER_ENV_SNAPSHOT", "1"); + // Phase 5 E5-1: Free Tiny Direct Path (+3.35% proven on Mixed, 10-run) + bench_setenv_default("HAKMEM_FREE_TINY_DIRECT", "1"); + // Phase 6: Front FastLane (Layer Collapse) (+11.13% proven on Mixed, 10-run) + bench_setenv_default("HAKMEM_FRONT_FASTLANE", "1"); + // Phase 4-4: C6 ULTRA free+alloc 統合を有効化 (default OFF, manual opt-in) + bench_setenv_default("HAKMEM_TINY_C6_ULTRA_FREE_ENABLED", "0"); + // Phase MID-V3: Mid/Pool HotBox v3 + // Mixed (16–1024B) では MID_V3(C6) が大きく遅くなるため、デフォルト OFF に固定。 + // C6-heavy プロファイル側でのみ ON を推奨する(C6-heavy のみ最適化対象)。 + bench_setenv_default("HAKMEM_MID_V3_ENABLED", "0"); + bench_setenv_default("HAKMEM_MID_V3_CLASSES", "0x0"); + // Phase 2 B3: Routing branch shape optimization (LIKELY on LEGACY, cold helper for rare routes) + bench_setenv_default("HAKMEM_TINY_ALLOC_ROUTE_SHAPE", "1"); + // Phase 3 C3: Static routing (policy_snapshot bypass, +2.2% proven) + bench_setenv_default("HAKMEM_TINY_STATIC_ROUTE", "1"); + // Phase 3 D1: Free route cache (TLS cache for free path routing, +2.19% proven) + bench_setenv_default("HAKMEM_FREE_STATIC_ROUTE", "1"); + } else if (strcmp(p, "C6_HEAVY_LEGACY_POOLV1") == 0) { + bench_setenv_default("HAKMEM_TINY_HEAP_PROFILE", "C7_SAFE"); + bench_setenv_default("HAKMEM_TINY_C6_HOT", "0"); + bench_setenv_default("HAKMEM_TINY_HOTHEAP_V2", "0"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_ENABLED", "1"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_CLASSES", "0x80"); + bench_setenv_default("HAKMEM_POOL_V2_ENABLED", "0"); + bench_setenv_default("HAKMEM_POOL_V1_FLATTEN_ENABLED", "0"); + bench_setenv_default("HAKMEM_MID_DESC_CACHE_ENABLED", "1"); + // Phase 4-4: C6 ULTRA free+alloc 統合を有効化 (default OFF, manual opt-in) + bench_setenv_default("HAKMEM_TINY_C6_ULTRA_FREE_ENABLED", "0"); + // Phase MID-V3: Mid/Pool HotBox v3 (257-768B, C6 only) + bench_setenv_default("HAKMEM_MID_V3_ENABLED", "1"); + bench_setenv_default("HAKMEM_MID_V3_CLASSES", "0x40"); + // Phase 6: Front FastLane (Layer Collapse) (+11.13% proven on Mixed, 10-run) + bench_setenv_default("HAKMEM_FRONT_FASTLANE", "1"); + // Phase 2 B3: Routing branch shape optimization (LIKELY on LEGACY, cold helper for rare routes) + bench_setenv_default("HAKMEM_TINY_ALLOC_ROUTE_SHAPE", "1"); + } else if (strcmp(p, "C6_V7_STUB") == 0) { + // Phase v7-1: C6-only v7 stub 実験用(MID v3 fallback) + bench_setenv_default("HAKMEM_TINY_HEAP_PROFILE", "C7_SAFE"); + bench_setenv_default("HAKMEM_TINY_C6_HOT", "0"); + bench_setenv_default("HAKMEM_TINY_HOTHEAP_V2", "0"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_ENABLED", "1"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_CLASSES", "0x80"); + bench_setenv_default("HAKMEM_POOL_V2_ENABLED", "0"); + bench_setenv_default("HAKMEM_MID_V3_ENABLED", "1"); + bench_setenv_default("HAKMEM_MID_V3_CLASSES", "0x40"); + // v7 stub ON (C6-only) + bench_setenv_default("HAKMEM_SMALL_HEAP_V7_ENABLED", "1"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V7_CLASSES", "0x40"); + } else if (strcmp(p, "C6_HEAVY_LEGACY_POOLV1_FLATTEN") == 0) { + // LEGACY mid/smallmid ベンチ専用(C7_SAFE では使用しない) + bench_setenv_default("HAKMEM_TINY_HEAP_PROFILE", "LEGACY"); + bench_setenv_default("HAKMEM_TINY_C6_HOT", "0"); + bench_setenv_default("HAKMEM_TINY_HOTHEAP_V2", "0"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_ENABLED", "1"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_CLASSES", "0x80"); + bench_setenv_default("HAKMEM_POOL_V2_ENABLED", "0"); + bench_setenv_default("HAKMEM_POOL_V1_FLATTEN_ENABLED", "1"); + bench_setenv_default("HAKMEM_POOL_V1_FLATTEN_STATS", "1"); + bench_setenv_default("HAKMEM_POOL_ZERO_MODE", "header"); + } else if (strcmp(p, "DEBUG_TINY_FRONT_PERF") == 0) { + bench_setenv_default("HAKMEM_TINY_HEAP_PROFILE", "C7_SAFE"); + bench_setenv_default("HAKMEM_TINY_C7_HOT", "1"); + bench_setenv_default("HAKMEM_TINY_HOTHEAP_V2", "0"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_ENABLED", "1"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_CLASSES", "0x80"); + bench_setenv_default("HAKMEM_POOL_V2_ENABLED", "0"); + bench_setenv_default("HAKMEM_TINY_FRONT_V3_ENABLED", "1"); + bench_setenv_default("HAKMEM_TINY_FRONT_V3_LUT_ENABLED", "1"); + bench_setenv_default("HAKMEM_TINY_PTR_FAST_CLASSIFY_ENABLED", "1"); + } else if (strcmp(p, "C6_SMALL_HEAP_V3_EXPERIMENT") == 0) { + // C6 を SmallObject v3 に載せる研究用(標準では使用しない) + bench_setenv_default("HAKMEM_TINY_HEAP_PROFILE", "C7_SAFE"); + bench_setenv_default("HAKMEM_TINY_C6_HOT", "1"); + bench_setenv_default("HAKMEM_TINY_HOTHEAP_V2", "0"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_ENABLED", "1"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_CLASSES", "0x40"); // C6 only + bench_setenv_default("HAKMEM_SMALL_HEAP_V4_ENABLED", "0"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V4_CLASSES", "0x0"); + bench_setenv_default("HAKMEM_POOL_V2_ENABLED", "0"); + } else if (strcmp(p, "C6_SMALL_HEAP_V4_EXPERIMENT") == 0) { + // C6 を SmallObject v4 に載せる研究用(標準では使用しない) + bench_setenv_default("HAKMEM_TINY_HEAP_PROFILE", "C7_SAFE"); + bench_setenv_default("HAKMEM_TINY_C6_HOT", "1"); + bench_setenv_default("HAKMEM_TINY_HOTHEAP_V2", "0"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_ENABLED", "0"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V3_CLASSES", "0x0"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V4_ENABLED", "1"); + bench_setenv_default("HAKMEM_SMALL_HEAP_V4_CLASSES", "0x40"); // C6 only + bench_setenv_default("HAKMEM_POOL_V2_ENABLED", "0"); + } + +#ifdef USE_HAKMEM + // Phase 3 C3 Step 0: Ensure policy snapshot reflects final ENV after putenv defaults. + small_policy_v7_bump_version(); + // Phase 2 B4: Sync wrapper ENV cache after bench_profile putenv defaults. + wrapper_env_refresh_from_env(); + // Phase 3 C3: Sync static route cache after bench_profile putenv defaults. + tiny_static_route_refresh_from_env(); + // Phase 4 E1: Sync ENV snapshot cache after bench_profile putenv defaults. + hakmem_env_snapshot_refresh_from_env(); +#endif + } diff --git a/core/box/carve_push_box.d b/core/box/carve_push_box.d index ed6bd540..36bdcace 100644 --- a/core/box/carve_push_box.d +++ b/core/box/carve_push_box.d @@ -36,9 +36,9 @@ core/box/carve_push_box.o: core/box/carve_push_box.c \ core/hakmem_tiny_config.h core/tiny_nextptr.h core/hakmem_build_flags.h \ core/tiny_region_id.h core/superslab/superslab_inline.h \ core/box/tiny_layout_box.h core/box/tiny_header_box.h \ - core/box/tiny_layout_box.h core/box/../tiny_debug_ring.h \ - core/box/ss_addr_map_box.h core/box/../superslab/superslab_inline.h \ - core/box/tiny_ptr_bridge_box.h \ + core/box/tiny_layout_box.h core/box/tiny_header_write_once_env_box.h \ + core/box/../tiny_debug_ring.h core/box/ss_addr_map_box.h \ + core/box/../superslab/superslab_inline.h core/box/tiny_ptr_bridge_box.h \ core/box/../hakmem_tiny_superslab_internal.h \ core/box/../box/ss_hot_cold_box.h \ core/box/../box/../superslab/superslab_types.h \ @@ -110,6 +110,7 @@ core/superslab/superslab_inline.h: core/box/tiny_layout_box.h: core/box/tiny_header_box.h: core/box/tiny_layout_box.h: +core/box/tiny_header_write_once_env_box.h: core/box/../tiny_debug_ring.h: core/box/ss_addr_map_box.h: core/box/../superslab/superslab_inline.h: diff --git a/core/box/front_fastlane_box.c b/core/box/front_fastlane_box.c new file mode 100644 index 00000000..e130853b --- /dev/null +++ b/core/box/front_fastlane_box.c @@ -0,0 +1,22 @@ +// ============================================================================ +// Phase 6: Front FastLane Box (Cold Helpers / Stats) +// ============================================================================ +// +// Purpose: Cold-path helpers and stats dump for FrontFastLaneBox +// +// Note: Main hot-path logic is inline in front_fastlane_box.h +// This .c file provides cold helpers and stats management +// +// ============================================================================ + +#include "front_fastlane_box.h" +#include "front_fastlane_stats_box.h" +#include + +// Cold helper: Stats dump (called on exit if needed) +// Note: front_fastlane_stats_dump() is already defined in stats_box.h +// This function provides a public entry point for external callers + +void front_fastlane_dump_stats(void) { + front_fastlane_stats_dump(); +} diff --git a/core/box/front_fastlane_box.h b/core/box/front_fastlane_box.h new file mode 100644 index 00000000..6ff618ed --- /dev/null +++ b/core/box/front_fastlane_box.h @@ -0,0 +1,171 @@ +#ifndef HAK_FRONT_FASTLANE_BOX_H +#define HAK_FRONT_FASTLANE_BOX_H + +// ============================================================================ +// Phase 6: Front FastLane Box (Hot Inline / Try API) +// ============================================================================ +// +// Purpose: Single-box entry point for malloc/free hot paths +// Collapses wrapper→gate→policy→route layers into one +// +// API: +// void* front_fastlane_try_malloc(size_t size) +// - Returns non-NULL on success (handled by FastLane) +// - Returns NULL on failure (fallback to existing wrapper path) +// +// bool front_fastlane_try_free(void* ptr) +// - Returns true if handled (success) +// - Returns false if not handled (fallback to existing wrapper path) +// +// Box Theory: +// - L0: ENV gate (front_fastlane_env_box.h) +// - L1: This file (hot inline handlers) +// - L2: Stats (front_fastlane_stats_box.h, cold helpers in .c) +// +// Strategy: +// - Read existing "winning boxes" only once +// - Call existing hot handlers (malloc_tiny_fast_for_class, free_tiny_fast) +// - No duplicate checks (deduplicate existing wrapper logic) +// - Fail-fast: Any uncertainty → return not-handled +// +// Safety: +// - ENV-gated (default OFF) +// - Single fallback boundary (FastLane → ColdFallback) +// - Reversible (ENV toggle) +// +// ============================================================================ + +#include +#include +#include +#include "front_fastlane_env_box.h" +#include "front_fastlane_stats_box.h" +#include "../hakmem_tiny.h" // hak_tiny_size_to_class, tiny_get_max_size +#include "../front/malloc_tiny_fast.h" // malloc_tiny_fast_for_class + +// FastLane is only safe after global init completes. +// Before init, wrappers must handle recursion guards + syscall init. +extern int g_initialized; + +// ============================================================================ +// Hot Inline: try_malloc +// ============================================================================ + +// Patch 4: Actual Tiny routing implementation +// Strategy: Read existing winning boxes only once, call existing hot handlers +// No duplicate checks (deduplicate existing wrapper logic) +static inline void* front_fastlane_try_malloc(size_t size) { + FRONT_FASTLANE_STAT_INC(malloc_total); + + // Fail-fast: do not enter FastLane before init completes. + if (__builtin_expect(!g_initialized, 0)) { + FRONT_FASTLANE_STAT_INC(malloc_fallback_other); + return NULL; + } + + // Fast path: Size check (Tiny range only) + // Use cached max size (typically 256 or 1024) + size_t max_size = tiny_get_max_size(); + if (__builtin_expect(size > max_size, 0)) { + FRONT_FASTLANE_STAT_INC(malloc_fallback_size); + return NULL; // Not Tiny → fallback + } + + // Class calculation (single LUT lookup, no branches) + int class_idx = hak_tiny_size_to_class(size); + if (__builtin_expect(class_idx < 0 || class_idx >= 8, 0)) { + FRONT_FASTLANE_STAT_INC(malloc_fallback_class); + return NULL; // Invalid class → fallback + } + + // Class mask check (gradual rollout support) + uint8_t mask = front_fastlane_class_mask(); + if (__builtin_expect(((mask >> class_idx) & 1) == 0, 0)) { + FRONT_FASTLANE_STAT_INC(malloc_fallback_other); + return NULL; // Class not enabled → fallback + } + + // Call existing hot handler (no duplication) + // This is the winning path from E5-4 / Phase 4 E2 + void* ptr = malloc_tiny_fast_for_class(size, class_idx); + if (__builtin_expect(ptr != NULL, 1)) { + FRONT_FASTLANE_STAT_INC(malloc_hit); + return ptr; // Success + } + + // Allocation failed (refill needed, TLS exhausted, etc.) + FRONT_FASTLANE_STAT_INC(malloc_fallback_alloc); + return NULL; // Fallback to cold path +} + +// ============================================================================ +// Hot Inline: try_free +// ============================================================================ + +// Patch 6: Actual Tiny direct path implementation +// Strategy: Header validation → direct Tiny free (same pattern as E5-1) +// Only handle cases where we have high confidence (Tiny header magic) +static inline bool front_fastlane_try_free(void* ptr) { + FRONT_FASTLANE_STAT_INC(free_total); + + // Fail-fast: do not enter FastLane before init completes. + if (__builtin_expect(!g_initialized, 0)) { + FRONT_FASTLANE_STAT_INC(free_fallback_other); + return false; + } + +#if HAKMEM_TINY_HEADER_CLASSIDX + // Page boundary guard: ptr must not be page-aligned + // (Accessing ptr-1 when ptr is page-aligned could segfault) + uintptr_t off = (uintptr_t)ptr & 0xFFFu; + if (__builtin_expect(off == 0, 0)) { + FRONT_FASTLANE_STAT_INC(free_fallback_aligned); + return false; // Page-aligned → fallback (unsafe to read header) + } + + // Fast header validation (1 load, 1 compare) + uint8_t header = *((uint8_t*)ptr - 1); + uint8_t magic = header & 0xF0u; + + if (__builtin_expect(magic != 0xA0u, 0)) { + // Not Tiny header (could be Mid/Pool/Large or external allocation) + if (magic != 0) { + FRONT_FASTLANE_STAT_INC(free_fallback_header); + } + return false; // Not Tiny → fallback + } + + // Extract class index from header (lower 4 bits) + int class_idx = (int)(header & 0x0Fu); + if (__builtin_expect(class_idx >= 8, 0)) { + FRONT_FASTLANE_STAT_INC(free_fallback_class); + return false; // Invalid class → fallback + } + + // Class mask check (gradual rollout support) + uint8_t mask = front_fastlane_class_mask(); + if (__builtin_expect(((mask >> class_idx) & 1) == 0, 0)) { + FRONT_FASTLANE_STAT_INC(free_fallback_other); + return false; // Class not enabled → fallback + } + + // Call existing hot handler (no duplication) + // This is the winning path from E5-1 (free_tiny_fast returns 1 on success) + // Forward declaration needed - free_tiny_fast is in malloc_tiny_fast.h + extern int free_tiny_fast(void* ptr); + if (__builtin_expect(free_tiny_fast(ptr), 1)) { + FRONT_FASTLANE_STAT_INC(free_hit); + return true; // Success + } + + // Free failed (cold path needed - refill, full TLS, etc.) + FRONT_FASTLANE_STAT_INC(free_fallback_failure); + return false; // Fallback to cold path +#else + // No header support → always fallback + FRONT_FASTLANE_STAT_INC(free_fallback_other); + return false; +#endif +} + +#endif // HAK_FRONT_FASTLANE_BOX_H diff --git a/core/box/front_fastlane_env_box.c b/core/box/front_fastlane_env_box.c new file mode 100644 index 00000000..54041bc8 --- /dev/null +++ b/core/box/front_fastlane_env_box.c @@ -0,0 +1,33 @@ +// ============================================================================ +// Phase 6: Front FastLane - ENV Gate Box (Implementation) +// ============================================================================ +// +// Purpose: Optional refresh function for bench_profile putenv() synchronization +// +// Note: The main getters (front_fastlane_enabled, front_fastlane_class_mask) +// are inline in the header for zero overhead in hot paths. +// This .c file provides cold-path refresh functions if needed. +// +// ============================================================================ + +#include "front_fastlane_env_box.h" +#include +#include + +// Refresh from ENV (for bench_profile putenv() synchronization) +// Called after bench_profile changes ENV variables via putenv() +void front_fastlane_env_refresh_from_env(void) { + // Force re-read of ENV variables on next call + // Note: The actual getenv() is done in the inline functions in the header + // This just resets the cache to trigger re-initialization + + // Access the static atomics via extern declarations + // (The actual variables live in the header inline functions' static scope) + // Since we can't access static variables in inline functions from here, + // we rely on the fact that ENV changes will be picked up naturally + // on the next call if the cache is cleared. + + // For now, this is a no-op placeholder. + // If bench_profile needs synchronization, we'll need to refactor + // the cache to be file-scope globals instead of function-local statics. +} diff --git a/core/box/front_fastlane_env_box.h b/core/box/front_fastlane_env_box.h new file mode 100644 index 00000000..2ce04360 --- /dev/null +++ b/core/box/front_fastlane_env_box.h @@ -0,0 +1,98 @@ +#ifndef HAK_FRONT_FASTLANE_ENV_BOX_H +#define HAK_FRONT_FASTLANE_ENV_BOX_H + +// ============================================================================ +// Phase 6: Front FastLane - ENV Gate Box +// ============================================================================ +// +// Purpose: ENV gate for Front FastLane (Layer Collapse optimization) +// +// ENV Variables: +// HAKMEM_FRONT_FASTLANE=0/1 (default: 1, promoted) +// - 0: Disabled (use existing wrapper paths) +// - 1: Enabled (use FastLane single-box entry point) +// +// HAKMEM_FRONT_FASTLANE_CLASS_MASK=0x.. (default: 0xFF, optional) +// - Bitmask for gradual rollout (e.g., 0x01 = class 0 only) +// - 0xFF = all classes enabled +// +// Box Theory: +// - L0: ENV gate (this file) +// - L1: FrontFastLaneBox (front_fastlane_box.h) +// - Integration: hak_wrappers.inc.h +// +// Safety: +// - Default ON (opt-out via ENV=0) +// - Zero overhead when disabled (static cached) +// - Lazy init (getenv on first call) +// +// Rollback: +// - Set HAKMEM_FRONT_FASTLANE=0 +// - Or rebuild without integration +// +// ============================================================================ + +#include +#include +#include + +// Forward declaration for cross-box includes +static inline int front_fastlane_enabled(void); +static inline uint8_t front_fastlane_class_mask(void); + +// ============================================================================ +// ENV Gate Implementation +// ============================================================================ + +// Lazy init: Check ENV variable on first call, cache result +// Thread-safe: Read-only after init (atomic store, relaxed load) +static inline int front_fastlane_enabled(void) { + static _Atomic int cached = -1; // -1 = uninitialized + int val = atomic_load_explicit(&cached, memory_order_relaxed); + + if (__builtin_expect(val == -1, 0)) { + // Cold path: First call, check ENV + const char* env = getenv("HAKMEM_FRONT_FASTLANE"); + int enabled = 1; // default: ON (opt-out) + + if (env) { + // Parse: "0" or empty = disabled, "1" or non-empty = enabled + enabled = (env[0] != '0' && env[0] != '\0') ? 1 : 0; + } + + // Cache result (thread-safe: atomic store) + atomic_store_explicit(&cached, enabled, memory_order_relaxed); + val = enabled; + } + + return val; +} + +// Get class mask for gradual rollout (default: 0xFF = all classes) +static inline uint8_t front_fastlane_class_mask(void) { + static _Atomic int cached = -1; // -1 = uninitialized + int val = atomic_load_explicit(&cached, memory_order_relaxed); + + if (__builtin_expect(val == -1, 0)) { + // Cold path: First call, check ENV + const char* env = getenv("HAKMEM_FRONT_FASTLANE_CLASS_MASK"); + int mask = 0xFF; // Default: all classes enabled + + if (env) { + // Parse hex value (e.g., "0x03" or "03") + char* end; + long parsed = strtol(env, &end, 0); // Auto-detect base (0x prefix) + if (end != env && parsed >= 0 && parsed <= 0xFF) { + mask = (int)parsed; + } + } + + // Cache result (thread-safe: atomic store) + atomic_store_explicit(&cached, mask, memory_order_relaxed); + val = mask; + } + + return (uint8_t)val; +} + +#endif // HAK_FRONT_FASTLANE_ENV_BOX_H diff --git a/core/box/front_fastlane_stats_box.h b/core/box/front_fastlane_stats_box.h new file mode 100644 index 00000000..d9e7c58c --- /dev/null +++ b/core/box/front_fastlane_stats_box.h @@ -0,0 +1,109 @@ +#ifndef HAK_FRONT_FASTLANE_STATS_BOX_H +#define HAK_FRONT_FASTLANE_STATS_BOX_H + +// ============================================================================ +// Phase 6: Front FastLane - Stats Box +// ============================================================================ +// +// Purpose: Visibility into FastLane hit/fallback rates +// +// Counters (compile-out when HAKMEM_DEBUG_COUNTERS=0): +// +// Malloc: +// - malloc_total: Total try_malloc attempts +// - malloc_hit: Successful FastLane alloc +// - malloc_fallback_*: Fallback reasons (3-6 types) +// +// Free: +// - free_total: Total try_free attempts +// - free_hit: Successful FastLane free +// - free_fallback_*: Fallback reasons (3-6 types) +// +// Output (on exit, if HAKMEM_DEBUG_COUNTERS=1): +// [FRONT_FASTLANE] malloc_total=N hit=N fb_*=N ... free_total=N hit=N fb_*=N ... +// +// Box Theory: +// - L2: Stats layer (compile-out when counters disabled) +// - Zero overhead: No-op macros when HAKMEM_DEBUG_COUNTERS=0 +// +// ============================================================================ + +#include +#include +#include + +#if HAKMEM_DEBUG_COUNTERS + +// Stats structure (global, thread-safe via atomics) +typedef struct { + // Malloc stats + _Atomic uint64_t malloc_total; // Total try_malloc calls + _Atomic uint64_t malloc_hit; // Successful FastLane alloc + _Atomic uint64_t malloc_fallback_stub; // Stub: not implemented yet (Patch 2) + _Atomic uint64_t malloc_fallback_size; // Size out of Tiny range + _Atomic uint64_t malloc_fallback_class; // Class calculation failed + _Atomic uint64_t malloc_fallback_alloc; // Allocation failed (refill needed) + _Atomic uint64_t malloc_fallback_other; // Other reasons + + // Free stats + _Atomic uint64_t free_total; // Total try_free calls + _Atomic uint64_t free_hit; // Successful FastLane free + _Atomic uint64_t free_fallback_stub; // Stub: not implemented yet (Patch 2) + _Atomic uint64_t free_fallback_aligned; // Page-aligned pointer + _Atomic uint64_t free_fallback_header; // Invalid header magic + _Atomic uint64_t free_fallback_class; // Class out of bounds + _Atomic uint64_t free_fallback_failure; // Free failed (cold path needed) + _Atomic uint64_t free_fallback_other; // Other reasons +} FrontFastLaneStats; + +// Global stats instance +static FrontFastLaneStats g_front_fastlane_stats = {0}; + +// Increment macros (relaxed ordering - stats only) +#define FRONT_FASTLANE_STAT_INC(field) \ + atomic_fetch_add_explicit(&g_front_fastlane_stats.field, 1, memory_order_relaxed) + +// Dump stats on exit (call from wrapper destructor or main) +static void front_fastlane_stats_dump(void) { + uint64_t m_total = atomic_load_explicit(&g_front_fastlane_stats.malloc_total, memory_order_relaxed); + uint64_t f_total = atomic_load_explicit(&g_front_fastlane_stats.free_total, memory_order_relaxed); + + if (m_total == 0 && f_total == 0) return; // No activity + + // Malloc stats + uint64_t m_hit = atomic_load_explicit(&g_front_fastlane_stats.malloc_hit, memory_order_relaxed); + uint64_t m_fb_stub = atomic_load_explicit(&g_front_fastlane_stats.malloc_fallback_stub, memory_order_relaxed); + uint64_t m_fb_size = atomic_load_explicit(&g_front_fastlane_stats.malloc_fallback_size, memory_order_relaxed); + uint64_t m_fb_class = atomic_load_explicit(&g_front_fastlane_stats.malloc_fallback_class, memory_order_relaxed); + uint64_t m_fb_alloc = atomic_load_explicit(&g_front_fastlane_stats.malloc_fallback_alloc, memory_order_relaxed); + uint64_t m_fb_other = atomic_load_explicit(&g_front_fastlane_stats.malloc_fallback_other, memory_order_relaxed); + + // Free stats + uint64_t f_hit = atomic_load_explicit(&g_front_fastlane_stats.free_hit, memory_order_relaxed); + uint64_t f_fb_stub = atomic_load_explicit(&g_front_fastlane_stats.free_fallback_stub, memory_order_relaxed); + uint64_t f_fb_aligned = atomic_load_explicit(&g_front_fastlane_stats.free_fallback_aligned, memory_order_relaxed); + uint64_t f_fb_header = atomic_load_explicit(&g_front_fastlane_stats.free_fallback_header, memory_order_relaxed); + uint64_t f_fb_class = atomic_load_explicit(&g_front_fastlane_stats.free_fallback_class, memory_order_relaxed); + uint64_t f_fb_failure = atomic_load_explicit(&g_front_fastlane_stats.free_fallback_failure, memory_order_relaxed); + uint64_t f_fb_other = atomic_load_explicit(&g_front_fastlane_stats.free_fallback_other, memory_order_relaxed); + + fprintf(stderr, "[FRONT_FASTLANE] malloc_total=%lu hit=%lu fb_stub=%lu fb_size=%lu fb_class=%lu fb_alloc=%lu fb_other=%lu | " + "free_total=%lu hit=%lu fb_stub=%lu fb_aligned=%lu fb_header=%lu fb_class=%lu fb_failure=%lu fb_other=%lu\n", + (unsigned long)m_total, (unsigned long)m_hit, + (unsigned long)m_fb_stub, (unsigned long)m_fb_size, (unsigned long)m_fb_class, + (unsigned long)m_fb_alloc, (unsigned long)m_fb_other, + (unsigned long)f_total, (unsigned long)f_hit, + (unsigned long)f_fb_stub, (unsigned long)f_fb_aligned, (unsigned long)f_fb_header, + (unsigned long)f_fb_class, (unsigned long)f_fb_failure, (unsigned long)f_fb_other); +} + +#else // HAKMEM_DEBUG_COUNTERS == 0 + +// No-op macros (zero overhead) +#define FRONT_FASTLANE_STAT_INC(field) do {} while(0) + +static inline void front_fastlane_stats_dump(void) {} + +#endif // HAKMEM_DEBUG_COUNTERS + +#endif // HAK_FRONT_FASTLANE_STATS_BOX_H diff --git a/core/box/front_gate_box.d b/core/box/front_gate_box.d index 90250e23..071d2d30 100644 --- a/core/box/front_gate_box.d +++ b/core/box/front_gate_box.d @@ -16,17 +16,18 @@ core/box/front_gate_box.o: core/box/front_gate_box.c \ core/box/ss_pt_env_box.h core/box/ss_pt_env_box.h core/tiny_debug_api.h \ core/box/tiny_layout_box.h core/box/../hakmem_tiny_config.h \ core/box/tiny_header_box.h core/box/tiny_layout_box.h \ - core/box/../tiny_region_id.h core/box/tls_sll_box.h \ - core/box/../hakmem_internal.h core/box/../hakmem.h \ - core/box/../hakmem_build_flags.h core/box/../hakmem_config.h \ - core/box/../hakmem_features.h core/box/../hakmem_sys.h \ - core/box/../hakmem_whale.h core/box/../box/ptr_type_box.h \ - core/box/../hakmem_debug_master.h core/box/../tiny_remote.h \ - core/box/../hakmem_tiny_integrity.h core/box/../hakmem_tiny.h \ - core/box/../ptr_track.h core/box/../ptr_trace.h \ - core/box/../hakmem_trace_master.h core/box/../hakmem_stats_master.h \ - core/box/../tiny_debug_ring.h core/box/ss_addr_map_box.h \ - core/box/../superslab/superslab_inline.h core/box/tiny_ptr_bridge_box.h \ + core/box/../tiny_region_id.h core/box/tiny_header_write_once_env_box.h \ + core/box/tls_sll_box.h core/box/../hakmem_internal.h \ + core/box/../hakmem.h core/box/../hakmem_build_flags.h \ + core/box/../hakmem_config.h core/box/../hakmem_features.h \ + core/box/../hakmem_sys.h core/box/../hakmem_whale.h \ + core/box/../box/ptr_type_box.h core/box/../hakmem_debug_master.h \ + core/box/../tiny_remote.h core/box/../hakmem_tiny_integrity.h \ + core/box/../hakmem_tiny.h core/box/../ptr_track.h \ + core/box/../ptr_trace.h core/box/../hakmem_trace_master.h \ + core/box/../hakmem_stats_master.h core/box/../tiny_debug_ring.h \ + core/box/ss_addr_map_box.h core/box/../superslab/superslab_inline.h \ + core/box/tiny_ptr_bridge_box.h \ core/box/../hakmem_tiny_superslab_internal.h \ core/box/../hakmem_tiny_superslab.h core/box/../box/ss_hot_cold_box.h \ core/box/../box/../superslab/superslab_types.h \ @@ -77,6 +78,7 @@ core/box/../hakmem_tiny_config.h: core/box/tiny_header_box.h: core/box/tiny_layout_box.h: core/box/../tiny_region_id.h: +core/box/tiny_header_write_once_env_box.h: core/box/tls_sll_box.h: core/box/../hakmem_internal.h: core/box/../hakmem.h: diff --git a/core/box/hak_wrappers.inc.h b/core/box/hak_wrappers.inc.h index 09577df9..d7deb3ec 100644 --- a/core/box/hak_wrappers.inc.h +++ b/core/box/hak_wrappers.inc.h @@ -42,6 +42,7 @@ void* realloc(void* ptr, size_t size) { #include "free_tiny_direct_stats_box.h" // Phase 5 E5-1: Free Tiny direct path stats #include "malloc_tiny_direct_env_box.h" // Phase 5 E5-4: Malloc Tiny direct path ENV gate #include "malloc_tiny_direct_stats_box.h" // Phase 5 E5-4: Malloc Tiny direct path stats +#include "front_fastlane_box.h" // Phase 6: Front FastLane (Layer Collapse) #include "../hakmem_internal.h" // AllocHeader helpers for diagnostics #include "../hakmem_super_registry.h" // Superslab lookup for diagnostics #include "../superslab/superslab_inline.h" // slab_index_for, capacity @@ -175,6 +176,18 @@ void* malloc(size_t size) { // Fallback to normal path for large allocations } + // Phase 6: Front FastLane (Layer Collapse) + // Strategy: Collapse wrapper→gate→policy→route layers into single hot box + // Observed: +11.13% on Mixed 10-run (Phase 6 A/B) + // ENV: HAKMEM_FRONT_FASTLANE=0/1 (default: 1, opt-out) + if (__builtin_expect(front_fastlane_enabled(), 1)) { + void* p = front_fastlane_try_malloc(size); + if (__builtin_expect(p != NULL, 1)) { + return p; // Success: handled by FastLane + } + // Fallback: not handled, continue to existing wrapper path + } + // Phase 5 E4-2: Malloc Wrapper ENV Snapshot (optional, ENV-gated) // Strategy: Consolidate 2+ TLS reads -> 1 TLS read (50%+ reduction) // Expected gain: +2-4% (from malloc 16.13% + tiny_alloc_gate_fast 19.50% reduction) @@ -618,6 +631,17 @@ void free(void* ptr) { #endif if (!ptr) return; + // Phase 6: Front FastLane (Layer Collapse) - free path + // Strategy: Collapse wrapper→gate→classify layers into single hot box + // Observed: +11.13% on Mixed 10-run (Phase 6 A/B) + // ENV: HAKMEM_FRONT_FASTLANE=0/1 (default: 1, opt-out) + if (__builtin_expect(front_fastlane_enabled(), 1)) { + if (front_fastlane_try_free(ptr)) { + return; // Success: handled by FastLane + } + // Fallback: not handled, continue to existing wrapper path + } + // Phase 5 E5-1: Free Tiny Direct Path (ENV-gated, opt-in) // Strategy: Wrapper-level Tiny validation → direct path (skip ENV snapshot + cold path) // Expected gain: +3-5% (reduces 29.56% overhead by 30-40%) diff --git a/core/box/superslab_expansion_box.d b/core/box/superslab_expansion_box.d index 3d7243b3..c9f74655 100644 --- a/core/box/superslab_expansion_box.d +++ b/core/box/superslab_expansion_box.d @@ -40,6 +40,7 @@ core/box/superslab_expansion_box.o: core/box/superslab_expansion_box.c \ core/box/tiny_layout_box.h core/box/../hakmem_tiny_config.h \ core/box/../hakmem_build_flags.h core/box/tiny_header_box.h \ core/box/tiny_layout_box.h core/box/../tiny_region_id.h \ + core/box/tiny_header_write_once_env_box.h \ core/box/../box/slab_freelist_atomic.h \ core/box/../hakmem_tiny_superslab_constants.h core/box/superslab_expansion_box.h: @@ -103,5 +104,6 @@ core/box/../hakmem_build_flags.h: core/box/tiny_header_box.h: core/box/tiny_layout_box.h: core/box/../tiny_region_id.h: +core/box/tiny_header_write_once_env_box.h: core/box/../box/slab_freelist_atomic.h: core/box/../hakmem_tiny_superslab_constants.h: diff --git a/core/box/tiny_header_box.h b/core/box/tiny_header_box.h index ba0bcd34..266cd8a0 100644 --- a/core/box/tiny_header_box.h +++ b/core/box/tiny_header_box.h @@ -204,7 +204,9 @@ static inline int tiny_header_read(const void* base, int class_idx) { void* tiny_region_id_write_header(void* base, int class_idx); // Forward declaration from tiny_header_write_once_env_box.h -int tiny_header_write_once_enabled(void); +// NOTE: This is static inline in tiny_header_write_once_env_box.h, not extern +// Must include the header instead of forward declaring +#include "tiny_header_write_once_env_box.h" static inline void* tiny_header_finalize_alloc(void* base, int class_idx) { #if HAKMEM_TINY_HEADER_CLASSIDX diff --git a/core/tiny_alloc_fast_push.d b/core/tiny_alloc_fast_push.d index 0e017e06..2e5ee96a 100644 --- a/core/tiny_alloc_fast_push.d +++ b/core/tiny_alloc_fast_push.d @@ -29,9 +29,9 @@ core/tiny_alloc_fast_push.o: core/tiny_alloc_fast_push.c \ core/hakmem_tiny_config.h core/tiny_nextptr.h core/hakmem_build_flags.h \ core/tiny_region_id.h core/superslab/superslab_inline.h \ core/box/tiny_layout_box.h core/box/tiny_header_box.h \ - core/box/tiny_layout_box.h core/box/../tiny_debug_ring.h \ - core/box/ss_addr_map_box.h core/box/../superslab/superslab_inline.h \ - core/box/tiny_ptr_bridge_box.h \ + core/box/tiny_layout_box.h core/box/tiny_header_write_once_env_box.h \ + core/box/../tiny_debug_ring.h core/box/ss_addr_map_box.h \ + core/box/../superslab/superslab_inline.h core/box/tiny_ptr_bridge_box.h \ core/box/../hakmem_tiny_superslab_internal.h \ core/box/../box/ss_hot_cold_box.h \ core/box/../box/../superslab/superslab_types.h \ @@ -96,6 +96,7 @@ core/superslab/superslab_inline.h: core/box/tiny_layout_box.h: core/box/tiny_header_box.h: core/box/tiny_layout_box.h: +core/box/tiny_header_write_once_env_box.h: core/box/../tiny_debug_ring.h: core/box/ss_addr_map_box.h: core/box/../superslab/superslab_inline.h: diff --git a/docs/analysis/PHASE6_FRONT_FASTLANE_1_AB_TEST_RESULTS.md b/docs/analysis/PHASE6_FRONT_FASTLANE_1_AB_TEST_RESULTS.md new file mode 100644 index 00000000..e1cf40ad --- /dev/null +++ b/docs/analysis/PHASE6_FRONT_FASTLANE_1_AB_TEST_RESULTS.md @@ -0,0 +1,385 @@ +# Phase 6: Front FastLane(Layer Collapse)A/B テスト結果レポート + +## テスト実施日時 +2025-12-14 + +## ステータス +**✅ GO** - Mixed 10-run で **+11.13%** の顕著な性能改善を確認 + +## 概要 + +Phase 6 Front FastLane 実装の A/B テストを実施しました。malloc/free の入口で発生している「wrapper→gate→policy→route」の固定費を **Hot 側 1 箱** に畳み、**Cold 側へ落ちるのは 1 箇所**(単一フォールバック)にする Layer Collapse 最適化により、**期待を大幅に上回る +11.13% の性能改善**を達成しました。 + +設計書で予測された効果は **+1-3%** でしたが、実測は **その 4-11 倍** に達しています。 + +## A/B テスト結果 + +### テスト環境 + +- ベンチマーク: `bench_random_mixed_hakmem` +- プロファイル: `MIXED_TINYV3_C7_SAFE` +- イテレーション: 20,000,000 +- ワーキングセット: 400 +- 実行回数: 10 runs (clean env) +- ENV gate: `HAKMEM_FRONT_FASTLANE=0/1` + +### Step 1: FastLane OFF(HAKMEM_FRONT_FASTLANE=0) + +``` +=== Run 1/10 === +Throughput = 42675954 ops/s [iter=20000000 ws=400] time=0.469s +=== Run 2/10 === +Throughput = 44058707 ops/s [iter=20000000 ws=400] time=0.454s +=== Run 3/10 === +Throughput = 44382231 ops/s [iter=20000000 ws=400] time=0.451s +=== Run 4/10 === +Throughput = 43165326 ops/s [iter=20000000 ws=400] time=0.463s +=== Run 5/10 === +Throughput = 42980303 ops/s [iter=20000000 ws=400] time=0.465s +=== Run 6/10 === +Throughput = 43339273 ops/s [iter=20000000 ws=400] time=0.461s +=== Run 7/10 === +Throughput = 43114891 ops/s [iter=20000000 ws=400] time=0.464s +=== Run 8/10 === +Throughput = 44069765 ops/s [iter=20000000 ws=400] time=0.454s +=== Run 9/10 === +Throughput = 43226216 ops/s [iter=20000000 ws=400] time=0.463s +=== Run 10/10 === +Throughput = 43180051 ops/s [iter=20000000 ws=400] time=0.463s +``` + +**統計(OFF):** +- 平均値: **43,419,271.70 ops/s** +- 中央値: 43,203,133.50 ops/s +- 標準偏差: 554,031.41 ops/s (1.28%) + +### Step 2: FastLane ON(HAKMEM_FRONT_FASTLANE=1) + +``` +=== Run 1/10 === +Throughput = 48633653 ops/s [iter=20000000 ws=400] time=0.411s +=== Run 2/10 === +Throughput = 48910397 ops/s [iter=20000000 ws=400] time=0.409s +=== Run 3/10 === +Throughput = 48299599 ops/s [iter=20000000 ws=400] time=0.414s +=== Run 4/10 === +Throughput = 48465902 ops/s [iter=20000000 ws=400] time=0.413s +=== Run 5/10 === +Throughput = 48443070 ops/s [iter=20000000 ws=400] time=0.413s +=== Run 6/10 === +Throughput = 48389817 ops/s [iter=20000000 ws=400] time=0.413s +=== Run 7/10 === +Throughput = 46431234 ops/s [iter=20000000 ws=400] time=0.431s +=== Run 8/10 === +Throughput = 47683380 ops/s [iter=20000000 ws=400] time=0.419s +=== Run 9/10 === +Throughput = 48413343 ops/s [iter=20000000 ws=400] time=0.413s +=== Run 10/10 === +Throughput = 48855722 ops/s [iter=20000000 ws=400] time=0.409s +``` + +**統計(ON):** +- 平均値: **48,252,611.70 ops/s** +- 中央値: 48,428,206.50 ops/s +- 標準偏差: 723,547.76 ops/s (1.50%) + +### Step 3: 性能差分と判定 + +**絶対値差分:** +- **+4,833,340 ops/s** + +**相対値差分:** +- **+11.13%** + +**判定基準:** +- GO: +1.0% 以上 ✅ +- NEUTRAL: ±1.0% +- NO-GO: -1.0% 以下 + +**結果:** +- **✅ GO** - 期待値(+1-3%)を大幅に上回る **+11.13%** の改善 + +## 詳細データ(Run ごとの比較) + +| Run | OFF (ops/s) | ON (ops/s) | Diff (%) | +|----:|-------------:|-------------:|----------:| +| 1 | 42,675,954 | 48,633,653 | +13.96% | +| 2 | 44,058,707 | 48,910,397 | +11.01% | +| 3 | 44,382,231 | 48,299,599 | +8.83% | +| 4 | 43,165,326 | 48,465,902 | +12.28% | +| 5 | 42,980,303 | 48,443,070 | +12.71% | +| 6 | 43,339,273 | 48,389,817 | +11.65% | +| 7 | 43,114,891 | 46,431,234 | +7.69% | +| 8 | 44,069,765 | 47,683,380 | +8.20% | +| 9 | 43,226,216 | 48,413,343 | +12.00% | +| 10 | 43,180,051 | 48,855,722 | +13.14% | + +**全 10 runs で一貫してポジティブ(+7.69% 〜 +13.96%)な改善を記録。** + +## Step 4: 健康診断結果 + +```bash +$ scripts/verify_health_profiles.sh +``` + +**結果: ✅ PASSED** + +- Health Profile 1 (MIXED_TINYV3_C7_SAFE): 43,737,790 ops/s +- Health Profile 2 (C6_HEAVY_LEGACY_POOLV1): 23,045,785 ops/s + +すべての健康プロファイルが正常に実行され、パフォーマンスの異常は検出されませんでした。 + +## 性能改善の分析 + +### 予測との比較 + +| 指標 | 設計書予測 | 実測 | 比率 | +|--------------------------|------------|----------|-----------| +| 期待される改善 | +1-3% | +11.13% | **3.7-11倍** | +| 最小改善目標(GO基準) | +1.0% | +11.13% | **11倍** | + +### なぜ期待を上回ったのか? + +Phase 6 の設計では **+1-3%** を予測していましたが、実測で **+11.13%** を記録しました。この期待超えの理由を分析します: + +#### 1. 重複排除の累積効果(主要因) + +**設計意図:** +- wrapper→gate→policy→route の Layer Collapse +- 各層で繰り返される判定を 1 箇所に集約 + +**実際の効果:** +- size→class 判定: 複数回 → **1 回** +- ENV snapshot 読み: 複数回 → **1 回** +- route 決定: 複数回 → **1 回** +- 境界チェック: 複数箇所 → **1 箇所**(FastLane → ColdFallback) + +これらが **掛け算で効いた** 可能性が高い。 + +#### 2. 命令キャッシュの改善 + +**単一フォールバック境界:** +- wrapper の複雑な分岐構造が **直線化** +- 既存の Hot handler は変更なし(既に最適化済み) + +**効果:** +- I-cache miss 削減 +- 分岐予測精度向上(境界が 1 箇所に固定) + +#### 3. TLS/ENV アクセスの削減 + +**既存経路(OFF):** +- malloc wrapper: ENV snapshot 読む +- tiny gate: policy を読む +- tiny route: route を読む +- 各 handler: class 情報を読む + +**FastLane 経路(ON):** +- FastLane entry: ENV gate 1 回(cached) +- size→class: LUT 1 回(既存資産再利用) +- 既存 handler 呼び出し: 追加の ENV 読みなし + +→ **TLS アクセス回数が劇的に減少** + +#### 4. 既存の勝ち箱との相乗効果 + +Phase 6 は以下の既存最適化を **前提として** Layer Collapse を実施: + +- Phase 2 B3 (Routing 分岐形): **+2.89%** +- Phase 2 B4 (Wrapper Layer Hot/Cold Split): **+1.47%** +- Phase 3/4/5 の各種最適化: **累積 ~+4.4%** + +FastLane はこれらの **上に** 重複排除を実施したため、**baseline が既に高速化された状態** からの改善となった。 + +#### 5. Fail-Fast の効果 + +**設計原則:** +- 確信が持てない場合は即 fallback +- 既存経路を壊さない(安全ゲート) + +**効果:** +- Hot path が **確実に高速な経路だけ** を通る +- 曖昧なケースは早期に Cold へ(分岐コスト最小化) + +### 既存 Phase との累積効果 + +| Phase | 改善率 | 備考 | +|-------------------------------------|-----------|-----------------------------------------| +| Phase 2 B3 (Routing 分岐形) | +2.89% | route snapshot による分岐形 | +| Phase 2 B4 (Wrapper Layer Split) | +1.47% | Hot/Cold 境界明確化 | +| Phase 3-5 (各種最適化) | ~+1-2% | header/ENV/route の個別最適化 | +| **Phase 6 (Front FastLane)** | **+11.13%** | **Layer Collapse + 重複排除** | +| **累積** | **~+17-20%** | **複数最適化の相乗効果** | + +**Phase 6 は既存の最適化を前提として、さらに Layer Collapse を実施したため、相加ではなく相乗的な効果を発揮した。** + +## 実装の正しさ検証 + +### 機能的健全性 + +1. **両モードで正常動作:** + - OFF: 既存経路をそのまま使用 + - ON: FastLane → 既存 handler の呼び出しに成功 + +2. **健康診断 PASSED:** + - 全プロファイルで異常なし + - メモリリーク検出なし + - RSS 正常範囲内 + +3. **Fail-Fast が正しく動作:** + - FastLane は確実なケースのみ処理 + - 曖昧なケースは既存経路へ fallback + +### 性能的健全性 + +1. **再現性:** + - 全 10 runs で一貫して +7.69% 〜 +13.96% の改善 + - 標準偏差も許容範囲内(OFF: 1.28%, ON: 1.50%) + +2. **異常値なし:** + - すべての run で改善方向 + - 大きな外れ値なし(run 7 が最小 +7.69%, run 1 が最大 +13.96%) + +3. **ENV gate が正しく動作:** + - OFF で既存性能を維持 + - ON で明確な改善 + +## 次のステップ + +### Phase 6 の昇格(推奨) + +**判定: ✅ GO (+11.13%)** + +以下の手順で昇格することを推奨します: + +#### 1. default ON への切り替え + +**変更箇所:** +- `/mnt/workdisk/public_share/hakmem/core/box/front_fastlane_env_box.c` +- ENV default を `0` → `1` に変更 + +**理由:** +- +11.13% の顕著な改善(GO 基準 +1.0% を大幅にクリア) +- 全 10 runs で一貫した改善 +- 健康診断 PASSED +- 機能的・性能的健全性を確認 + +#### 2. stats 削減(optional) + +現在の stats は研究用に詳細な fallback reason を記録していますが、default ON 後は以下に簡素化可能: + +```c +// 最小 stats(production) +hit_count +fallback_count +``` + +fallback reason の詳細は ENV で on/off 可能にする。 + +#### 3. 段階的展開(optional, 保守的アプローチ) + +慎重を期す場合は、以下の段階展開も可能: + +1. **Week 1:** `HAKMEM_FRONT_FASTLANE=1` を default に +2. **Week 2:** class mask で段階導入(`HAKMEM_FRONT_FASTLANE_CLASS_MASK`) +3. **Week 3:** 全 class で有効化 + +ただし、A/B テスト結果が圧倒的に良好なため、**一括で default ON を推奨**。 + +#### 4. 凍結箱のクリーンアップ(将来) + +Phase 6 が安定したら、以下の凍結箱を整理可能: + +- Phase 5 E7 (Frozen Box Prune): NO-GO → **削除候補** + - 参照: `docs/analysis/PHASE5_E7_FROZEN_BOX_PRUNE_AB_TEST_RESULTS.md` + +Phase 6 の Layer Collapse により、"削る" アプローチは不要になった。 + +### 外部レビューへの報告(推奨) + +Phase 6 は外部レビュー(ML2)で提案された最優先アプローチでした: + +- 質問状: `docs/analysis/PHASE_ML2_CHATGPT_QUESTIONNAIRE_FASTLANE.md` +- 回答: `docs/analysis/PHASE_ML2_CHATGPT_RESPONSE_FASTLANE.md` + +**結果を外部に報告:** +- 予測: +1-3% +- 実測: **+11.13%**(予測の 3.7-11 倍) + +この成功事例を外部レビュアーにフィードバックすることで、次の最適化方針の精度向上に繋がります。 + +### Phase 7 以降の方針(提案) + +Phase 6 で **Layer Collapse** による重複排除が成功したため、次の方向性として: + +#### Option A: Front FastLane の拡張 + +**現状の FastLane:** +- Tiny のみ対応 +- malloc/free のみ + +**拡張候補:** +- Small への対応(class 8-15) +- realloc への対応 +- calloc への対応 + +**期待効果:** +- さらに +2-5% の改善可能性 + +#### Option B: Backend 最適化 + +**Front が最適化されたため、Backend がボトルネックになる可能性:** +- SuperSlab refill の最適化 +- Region carve の最適化 +- TLS cache の効率化 + +**期待効果:** +- +1-3% の改善可能性 + +#### Option C: Mid/Large の最適化 + +**現状 Mid/Large は最適化が少ない:** +- Mid V3 の拡張(class mask 拡大) +- Large の Fast path 追加 + +**期待効果:** +- ワークロードによって +3-10% の改善可能性 + +**推奨順序:** +1. **Phase 6 昇格** (default ON) +2. **Option A** (FastLane 拡張 - Small/realloc/calloc) +3. **Option C** (Mid/Large 最適化) +4. **Option B** (Backend 最適化) + +## まとめ + +Phase 6 Front FastLane(Layer Collapse)A/B テストの結果: + +- ✅ **GO 判定** - Mixed 10-run mean で **+11.13%** の顕著な改善 +- ✅ 全 10 runs で一貫した改善(+7.69% 〜 +13.96%) +- ✅ 健康診断 PASSED(全プロファイル正常) +- ✅ 期待値(+1-3%)を **3.7-11 倍上回る** 圧倒的な性能向上 + +**Phase 6 は HAKMEM 史上最大の単体改善を記録しました。** + +### 主要成功要因 + +1. **重複排除の累積効果** - wrapper→gate→policy→route の Layer Collapse +2. **既存の勝ち箱との相乗効果** - Phase 2-5 の最適化を前提とした Layer Collapse +3. **Fail-Fast 設計** - 確信が持てる場合のみ Hot path +4. **境界の一本化** - FastLane → ColdFallback の単一フォールバック +5. **TLS/ENV アクセスの削減** - 判定を 1 回に集約 + +### 次のアクション + +1. **即座に昇格** - `HAKMEM_FRONT_FASTLANE=1` を default に +2. **外部レビューに報告** - 予測を大幅に上回る成功事例 +3. **Phase 7 計画** - FastLane 拡張(Small/realloc/calloc)または Mid/Large 最適化 + +--- + +**テスト実施者:** Claude Sonnet 4.5 +**テスト実施日:** 2025-12-14 +**ビルド:** Release (O3, LTO, native) +**ENV:** `HAKMEM_FRONT_FASTLANE=0/1` (同一バイナリ) +**判定:** **✅ GO (+11.13%)** diff --git a/docs/analysis/PHASE6_FRONT_FASTLANE_1_DESIGN.md b/docs/analysis/PHASE6_FRONT_FASTLANE_1_DESIGN.md new file mode 100644 index 00000000..df257eea --- /dev/null +++ b/docs/analysis/PHASE6_FRONT_FASTLANE_1_DESIGN.md @@ -0,0 +1,121 @@ +# Phase 6: Front FastLane(Layer Collapse)Design v1 + +## 0. 背景 / ねらい + +直近の勝ち筋は「分岐形」ではなく **重複排除(境界の一本化)** と **ENV/TLS 読み回数の削減**だった。 +一方で “削る(別バイナリ比較)” は配置/LTO の二次効果で壊れやすく **NO-GO**。 + +外部レビュー(ML2)では、次の芯は **Front FastLane(wrapper→gate→policy→route の Layer Collapse)**が最優先、という結論になった。 + +- 外部回答の記録: `PHASE_ML2_CHATGPT_RESPONSE_FASTLANE.md` +- 質問状: `PHASE_ML2_CHATGPT_QUESTIONNAIRE_FASTLANE.md` + +## 1. ゴール(Box Theory) + +- **Hot の入口を 1 箱に畳む**(malloc/free の “入口固定費” を減らす) +- **境界は 1 箇所**(FastLane → ColdFallback の単一フォールバック) +- **戻せる**(ENV gate で A/B) +- **見える化は最小**(hit/fallback のカウンタだけ) +- **Fail-Fast**(確信が持てないものは必ず既存経路へ) + +## 2. 非ゴール(やらない) + +- 凍結箱を「削除して痩せさせる」(E7 NO-GO) + - 参照: `docs/analysis/PHASE5_E7_FROZEN_BOX_PRUNE_AB_TEST_RESULTS.md` +- “branch hint” の固定最適化(CPU/モードで逆効果になりやすい) + +## 3. 形(Box 図) + +``` + (ENV: HAKMEM_FRONT_FASTLANE=0/1) + +---------------------+ + | L0: FastLaneEnvBox | + +---------------------+ + | + v + +--------------------+ +--------------------------+ + | malloc/free wrapper| --> | L1: FrontFastLaneBox | + | (既存のまま) | | - size->class->route | + +--------------------+ | - try_alloc / try_free | + | - fast: 直線 + 早期return| + +--------------------------+ + | \ + handled->| \ not-handled + v v + +------------------+ +-----------------------+ + | L1a: HotHandlers | | L2: ColdFallbackIface | + | (既存を呼ぶだけ) | | (既存 wrapper 継続) | + +------------------+ +-----------------------+ +``` + +**境界 1 箇所**: `FrontFastLaneBox` が “handled できない” と判断したら即 `ColdFallbackIface`(=既存 wrapper の続き)へ落とす。 + +## 4. 既存資産の再利用(重要) + +FastLane を “新規で全部作る” のではなく、既に勝っている箱を **Hot 入口で 1 回だけ読む**形に揃える。 + +- Wrapper ENV snapshot: + - `core/box/malloc_wrapper_env_snapshot_box.h` + - `core/box/free_wrapper_env_snapshot_box.h` +- Tiny route snapshot: + - `core/box/tiny_route_env_box.h`(route_kind を class ごとに決める) +- ENV snapshot consolidation(ある場合): + - `core/box/hakmem_env_snapshot_box.h` + +方針: +- FastLane 内で “同じ判定を 2 回やらない”。 +- 既存の Hot handler(例: `malloc_tiny_fast_for_class()` / `tiny_alloc_gate_fast()` / `free_tiny_fast()`)を **呼ぶだけ**に留める。 + +## 5. FastLane の責務(L1) + +### 5.1 alloc: try_alloc(size) の責務 + +- size から class を決める(可能なら LUT / 1 回) +- class から route を決める(可能なら snapshot / 1 回) +- “確信が持てる”場合のみ Hot handler に直行し、成功したら即 return +- 失敗したら **即 not-handled**(Cold へ) + +### 5.2 free: try_free(ptr) の責務 + +- **確信が持てる**場合のみ tiny free へ直行(例: header で tiny 物を fail-fast 判定できる) +- それ以外は not-handled(Cold へ) + +## 6. Fail-Fast(安全ゲート) + +FastLane の原則: +- “fast path に入る条件” は **必要最小** +- “fallback 条件” は **広く** + +例(alloc): +- `size <= tiny_get_max_size()`(もしくは wrapper snapshot 由来の cheap 判定) +- class が範囲内 +- route が “既知の tiny hot handler で処理可能” + +例(free): +- header magic / class_idx が valid +- “tiny が確実” と言える場合のみ(曖昧なら必ず Cold) + +## 7. ENV / A/B 方針 + +- `HAKMEM_FRONT_FASTLANE=0/1`(default 1, opt-out) +- optional: + - `HAKMEM_FRONT_FASTLANE_CLASS_MASK=0x??`(段階導入用) + +A/B: +- Mixed 10-run は必ず clean env runner を使う(ENV 漏れ防止) + - `scripts/run_mixed_10_cleanenv.sh` + +GO/NO-GO(運用): +- GO: Mixed 10-run mean **+1.0% 以上** +- NEUTRAL: **±1.0%**(freeze) +- NO-GO: **-1.0% 以下**(rollback/freeze) + +## 8. 実装方針(小パッチ順) + +1. **ENV gate 箱**(default ON) +2. **FrontFastLaneBox**(alloc のみ / tiny のみ) +3. **free を追加**(tiny 直通のみ、曖昧なら落とす) +4. **最小 stats**(hit/fallback、理由は 3〜6 種類まで) +5. 健康診断 + Mixed 10-run A/B + +次の具体指示は `docs/analysis/PHASE6_FRONT_FASTLANE_NEXT_INSTRUCTIONS.md` にまとめる。 diff --git a/docs/analysis/PHASE6_FRONT_FASTLANE_1_IMPLEMENTATION_REPORT.md b/docs/analysis/PHASE6_FRONT_FASTLANE_1_IMPLEMENTATION_REPORT.md new file mode 100644 index 00000000..77561371 --- /dev/null +++ b/docs/analysis/PHASE6_FRONT_FASTLANE_1_IMPLEMENTATION_REPORT.md @@ -0,0 +1,269 @@ +# Phase 6: Front FastLane(Layer Collapse)実装完了レポート + +## 実装日時 +2025-12-14 + +## ステータス +**✅ GO** - Mixed 10-run で **+11.13%** の顕著な改善を確認(A/B 実施済み) + +参照: +- A/B 結果: `docs/analysis/PHASE6_FRONT_FASTLANE_1_AB_TEST_RESULTS.md` + +## 概要 + +Phase 6 では、malloc/free の入口で発生している「wrapper→gate→policy→route」の固定費を **Hot 側 1 箱** に畳み、**Cold 側へ落ちるのは 1 箇所**(単一フォールバック)にする Layer Collapse 最適化を実装しました。 + +設計ドキュメント: +- `/mnt/workdisk/public_share/hakmem/docs/analysis/PHASE6_FRONT_FASTLANE_1_DESIGN.md` +- `/mnt/workdisk/public_share/hakmem/docs/analysis/PHASE6_FRONT_FASTLANE_NEXT_INSTRUCTIONS.md` + +## 実装内容 + +### Patch 1: ENV gate を箱化 + +**新規ファイル:** +- `/mnt/workdisk/public_share/hakmem/core/box/front_fastlane_env_box.h` (getter) +- `/mnt/workdisk/public_share/hakmem/core/box/front_fastlane_env_box.c` (optional refresh) + +**機能:** +- ENV: `HAKMEM_FRONT_FASTLANE=0/1` (default 1, opt-out) +- Optional: `HAKMEM_FRONT_FASTLANE_CLASS_MASK=0x..` (段階導入用, default 0xFF) +- Lazy init (getenv on first call, atomic cache) +- Zero overhead when disabled (static cached) + +### Patch 2: FrontFastLaneBox(stub) + +**新規ファイル:** +- `/mnt/workdisk/public_share/hakmem/core/box/front_fastlane_box.h` (hot inline / try_* API) +- `/mnt/workdisk/public_share/hakmem/core/box/front_fastlane_box.c` (cold helper / stats) +- `/mnt/workdisk/public_share/hakmem/core/box/front_fastlane_stats_box.h` (stats counters) + +**API:** +```c +void* front_fastlane_try_malloc(size_t size) // Success: non-NULL, Fail: NULL +bool front_fastlane_try_free(void* ptr) // Success: true, Fail: false +``` + +**Stats (最小):** +- `fastlane_malloc_hit/fallback` (6種類の fallback reason) +- `fastlane_free_hit/fallback` (7種類の fallback reason) + +### Patch 3: malloc wrapper に統合(1箇所だけ) + +**変更ファイル:** +- `/mnt/workdisk/public_share/hakmem/core/box/hak_wrappers.inc.h` (lines 179-189) + +**統合点:** +- BenchFast の直後、tiny 試行の直前に挿入 +- Fail は既存の wrapper 経路に fall-through(境界 1 箇所) + +**コード:** +```c +// Phase 6: Front FastLane (Layer Collapse) +if (__builtin_expect(front_fastlane_enabled(), 1)) { + void* p = front_fastlane_try_malloc(size); + if (__builtin_expect(p != NULL, 1)) { + return p; // Success: handled by FastLane + } + // Fallback: not handled, continue to existing wrapper path +} +``` + +### Patch 4: malloc の FastLane 実装(Tiny のみ) + +**変更ファイル:** +- `/mnt/workdisk/public_share/hakmem/core/box/front_fastlane_box.h` (lines 50-89) + +**実装方針:** +- **既存の勝ち箱を 1 回だけ読む**で構成 +- `tiny_get_max_size()`: Cached max size check (typically 256 or 1024) +- `hak_tiny_size_to_class(size)`: Single LUT lookup, no branches +- `front_fastlane_class_mask()`: Gradual rollout support +- `malloc_tiny_fast_for_class(size, class_idx)`: Existing hot handler (no duplication) + +**Fail-fast ルール:** +- FastLane 内で **同じ判定を二度しない**(重複排除が主目的) +- 失敗したら即 return NULL(wrapper に戻す) + +**Fallback reasons:** +1. `malloc_fallback_size`: Size > tiny_get_max_size() +2. `malloc_fallback_class`: Invalid class_idx (< 0 or >= 8) +3. `malloc_fallback_other`: Class not enabled in mask +4. `malloc_fallback_alloc`: Allocation failed (refill needed) + +### Patch 5: free wrapper に統合(1箇所だけ) + +**変更ファイル:** +- `/mnt/workdisk/public_share/hakmem/core/box/hak_wrappers.inc.h` (lines 634-643) + +**統合点:** +- `ptr == NULL` の後、heavy classify の前に挿入 + +**コード:** +```c +// Phase 6: Front FastLane (Layer Collapse) - free path +if (__builtin_expect(front_fastlane_enabled(), 1)) { + if (front_fastlane_try_free(ptr)) { + return; // Success: handled by FastLane + } + // Fallback: not handled, continue to existing wrapper path +} +``` + +### Patch 6: free の FastLane 実装(Tiny 直通のみ) + +**変更ファイル:** +- `/mnt/workdisk/public_share/hakmem/core/box/front_fastlane_box.h` (lines 95-153) + +**実装方針:** +- E5-1 の header 判定パターン(`HAKMEM_FREE_TINY_DIRECT`)を再利用 +- **確信が持てる場合のみ** Tiny free に直行 +- それ以外は not-handled (false を返す) + +**処理フロー:** +1. Page boundary guard: `(ptr & 0xFFFu) == 0` → fallback (unsafe to read header) +2. Fast header validation: `*((uint8_t*)ptr - 1)` +3. Magic check: `(header & 0xF0u) == 0xA0u` → Tiny header +4. Class extraction: `(header & 0x0Fu)` → class_idx < 8 +5. Class mask check: `((mask >> class_idx) & 1)` → enabled +6. Call existing hot handler: `free_tiny_fast(ptr)` → returns 1 on success + +**Fallback reasons:** +1. `free_fallback_aligned`: Page-aligned pointer +2. `free_fallback_header`: Invalid header magic +3. `free_fallback_class`: Class out of bounds +4. `free_fallback_failure`: Free failed (cold path needed) +5. `free_fallback_other`: Other reasons (no header support, class not enabled) + +## 既存ビルド問題の修正 + +Phase 6 の実装中に、既存の `g_free_cold_shape` 未定義参照エラーを発見しました。これは Phase 6 とは無関係な既存のビルド問題でした。 + +**修正内容:** + +1. **tiny_header_box.h の修正:** + - `tiny_header_write_once_enabled()` の forward 宣言を削除 + - `#include "tiny_header_write_once_env_box.h"` に置き換え + - 理由: extern 宣言と static inline の衝突 + +2. **Makefile の修正:** + - 以下のオブジェクトファイルを追加: + - `core/box/free_cold_shape_env_box.o` + - `core/box/free_cold_shape_stats_box.o` + - 追加先: + - `OBJS_BASE` (line 221) + - `BENCH_HAKMEM_OBJS_BASE` (line 253) + - `TINY_BENCH_OBJS_BASE` (line 430) + +## ビルド結果 + +```bash +$ make clean && make bench_random_mixed_hakmem +gcc -o bench_random_mixed_hakmem bench_random_mixed_hakmem.o ... -lm -lpthread -flto +lto-wrapper: warning: using serial compilation of 9 LTRANS jobs +$ ls -lh bench_random_mixed_hakmem +-rwxrwxr-x 1 tomoaki tomoaki 631K 12月 14 09:49 bench_random_mixed_hakmem +``` + +**ビルド成功!** + +## 動作確認 + +```bash +# FastLane OFF +$ HAKMEM_FRONT_FASTLANE=0 ./bench_random_mixed_hakmem 1 +[BENCH_FAST] HAKMEM_BENCH_FAST_MODE not set, skipping init +[LIBM_RELOC_GUARD] base=0x7d781a6df000 slot=0x7d781a7c4d88 raw=0x7d781a6ed420 relocated=1 +[RSS] max_kb=29184 +... + +# FastLane ON +$ HAKMEM_FRONT_FASTLANE=1 ./bench_random_mixed_hakmem 1 +[BENCH_FAST] HAKMEM_BENCH_FAST_MODE not set, skipping init +[Rel-Unified] unified_cache_enabled() = 1 +[POLICY_V7_INIT] Route assignments: + C0: LEGACY, C1: LEGACY, C2: LEGACY, C3: LEGACY, C4: LEGACY, C5: LEGACY +... +``` + +**両方とも正常に起動!** + +## 新規追加ファイル一覧 + +``` +core/box/front_fastlane_env_box.h (ENV gate getter) +core/box/front_fastlane_env_box.c (ENV gate refresh, optional) +core/box/front_fastlane_box.h (Hot inline try_* API) +core/box/front_fastlane_box.c (Cold helper / stats dump) +core/box/front_fastlane_stats_box.h (Stats counters) +``` + +## 変更ファイル一覧 + +``` +core/box/hak_wrappers.inc.h (malloc/free wrapper統合, +2箇所) +core/box/tiny_header_box.h (既存ビルド問題修正) +Makefile (既存ビルド問題修正, +3箇所) +``` + +## 重要な設計原則 + +1. **重複排除が主目的:** + - FastLane 内で「同じ判定を二度しない」 + - 既存の hot handler を **呼ぶだけ** に留める + +2. **Fail-Fast:** + - 確信が持てない場合は必ず既存経路へ fallback + - 単一フォールバック境界(FastLane → ColdFallback) + +3. **ENV gate:** + - Default ON(opt-out via `HAKMEM_FRONT_FASTLANE=0`) + - A/B テストは同一バイナリで ENV トグル + +4. **Stats 最小:** + - hit/fallback のカウンタのみ + - fallback reason は 3〜7 種類まで + +## 次のステップ + +### 昇格(強く推奨) + +- `HAKMEM_FRONT_FASTLANE` を **default ON** として運用(opt-out 可)。 +- `core/bench_profile.h` の主要プリセットに `HAKMEM_FRONT_FASTLANE=1` を追加(A/B しやすさのため)。 +- Mixed 10-run / 健康診断を標準のチェック項目として残す(回帰検知)。 + +## 期待される効果 + +**設計書からの予測:** +- **+1-3%** (reduce redundant checks + TLS reads) +- wrapper→gate→policy→route の Layer Collapse +- 境界の一本化による固定費削減 + +**既存の勝ち筋:** +- Phase 2 B3 (Routing 分岐形): **+2.89%** +- Phase 2 B4 (Wrapper Layer Hot/Cold Split): **+1.47%** +- Combined: **~+4.4%** + +Phase 6 はこれらとは異なるアプローチ(重複排除 + Layer Collapse)なので、相加効果が期待できます。 + +## 注意点 + +1. **ENV 漏れ防止:** 必ず `scripts/run_mixed_10_cleanenv.sh` を使う +2. **別バイナリ比較にしない:** 削除/追加で A/B を崩さない +3. **Cold を noinline,cold に追い出して Hot を太らせすぎない** +4. **branch hint を固定しない:** モードで逆効果になり得る + +## まとめ + +Phase 6 Front FastLane (Layer Collapse) は **GO**: + +- ✅ A/B 10-run: **+11.13%**(全 run でプラス) +- ✅ 健康診断: PASS +- ✅ 境界1箇所 + Fail-Fast の設計を維持したまま大幅改善 + +--- + +**実装者:** Claude Sonnet 4.5 +**実装日:** 2025-12-14 +**ビルド:** Release (O3, LTO, native) +**ENV:** `HAKMEM_FRONT_FASTLANE=1` (default ON, opt-out) diff --git a/docs/analysis/PHASE6_FRONT_FASTLANE_NEXT_INSTRUCTIONS.md b/docs/analysis/PHASE6_FRONT_FASTLANE_NEXT_INSTRUCTIONS.md new file mode 100644 index 00000000..404bf48d --- /dev/null +++ b/docs/analysis/PHASE6_FRONT_FASTLANE_NEXT_INSTRUCTIONS.md @@ -0,0 +1,61 @@ +# Phase 6: Front FastLane(Layer Collapse)Next Instructions(昇格) + +## Status + +- Phase 6 FastLane は **✅ GO(+11.13% Mixed 10-run)** +- 結果: `docs/analysis/PHASE6_FRONT_FASTLANE_1_AB_TEST_RESULTS.md` +- 実装: `docs/analysis/PHASE6_FRONT_FASTLANE_1_IMPLEMENTATION_REPORT.md` +- 設計: `docs/analysis/PHASE6_FRONT_FASTLANE_1_DESIGN.md` + +## 0. 目的 + +FastLane を本線昇格(default ON / opt-out)し、以後の baseline を引き上げる。 + +## 1. 昇格(本線化) + +1) **default ON** +- ENV: `HAKMEM_FRONT_FASTLANE=0/1` +- default: **1**(opt-out は `HAKMEM_FRONT_FASTLANE=0`) + +2) **プリセット ON** +- `core/bench_profile.h` の主要プロファイルで `bench_setenv_default("HAKMEM_FRONT_FASTLANE","1")` + +3) **安全ゲート(Fail-Fast)** +- 初期化前(`!g_initialized`)は FastLane を使わず既存 wrapper にフォールバック + +## 2. A/B(最終確認) + +Mixed 10-run(clean env): + +OFF: +```sh +HAKMEM_FRONT_FASTLANE=0 scripts/run_mixed_10_cleanenv.sh +``` + +ON: +```sh +HAKMEM_FRONT_FASTLANE=1 scripts/run_mixed_10_cleanenv.sh +``` + +判定(Mixed 10-run mean): +- GO: **+1.0% 以上** +- NEUTRAL: **±1.0%**(default ON は維持せず、再検討) +- NO-GO: **-1.0% 以下**(即 rollback) + +## 3. 健康診断(必須) + +```sh +scripts/verify_health_profiles.sh +``` + +## 4. Rollback + +- ENV: `HAKMEM_FRONT_FASTLANE=0` +- あるいは本線 default を戻す(diff 1 箇所) + +## 5. 次の候補(Phase 6-2) + +FastLane 内で `tiny_get_max_size()` を毎回呼ぶのは、E4-2 の勝ち筋(wrapper snapshot)と逆方向なので、次はここを薄くする: + +- `malloc_wrapper_env_get()` 由来の `tiny_max_size_256`(または max_size 値)を FastLane に渡して “call を消す” +- ただし “FastLane で同じ判定を二度しない” を守る(境界 1 箇所) diff --git a/hakmem.d b/hakmem.d index 0d66fa5d..28173d7c 100644 --- a/hakmem.d +++ b/hakmem.d @@ -27,9 +27,10 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \ core/ptr_track.h core/tiny_debug_api.h core/box/tiny_layout_box.h \ core/box/../hakmem_tiny_config.h core/box/../hakmem_build_flags.h \ core/box/tiny_header_box.h core/box/tiny_layout_box.h \ - core/box/../tiny_region_id.h core/hakmem_elo.h core/hakmem_ace_stats.h \ - core/hakmem_batch.h core/hakmem_evo.h core/hakmem_debug.h \ - core/hakmem_prof.h core/hakmem_syscall.h core/hakmem_ace_controller.h \ + core/box/../tiny_region_id.h core/box/tiny_header_write_once_env_box.h \ + core/hakmem_elo.h core/hakmem_ace_stats.h core/hakmem_batch.h \ + core/hakmem_evo.h core/hakmem_debug.h core/hakmem_prof.h \ + core/hakmem_syscall.h core/hakmem_ace_controller.h \ core/hakmem_ace_metrics.h core/hakmem_ace_ucb1.h \ core/box/bench_fast_box.h core/box/mid_hotbox_v3_box.h \ core/box/tiny_geometry_box.h \ @@ -166,7 +167,9 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \ core/box/free_tiny_direct_env_box.h \ core/box/free_tiny_direct_stats_box.h \ core/box/malloc_tiny_direct_env_box.h \ - core/box/malloc_tiny_direct_stats_box.h core/box/../hakmem_internal.h + core/box/malloc_tiny_direct_stats_box.h core/box/front_fastlane_box.h \ + core/box/front_fastlane_env_box.h core/box/front_fastlane_stats_box.h \ + core/box/../hakmem_internal.h core/hakmem.h: core/hakmem_build_flags.h: core/hakmem_config.h: @@ -222,6 +225,7 @@ core/box/../hakmem_build_flags.h: core/box/tiny_header_box.h: core/box/tiny_layout_box.h: core/box/../tiny_region_id.h: +core/box/tiny_header_write_once_env_box.h: core/hakmem_elo.h: core/hakmem_ace_stats.h: core/hakmem_batch.h: @@ -414,4 +418,7 @@ core/box/free_tiny_direct_env_box.h: core/box/free_tiny_direct_stats_box.h: core/box/malloc_tiny_direct_env_box.h: core/box/malloc_tiny_direct_stats_box.h: +core/box/front_fastlane_box.h: +core/box/front_fastlane_env_box.h: +core/box/front_fastlane_stats_box.h: core/box/../hakmem_internal.h: diff --git a/hakmem_shared_pool.d b/hakmem_shared_pool.d index 8c7e6092..aa5a127b 100644 --- a/hakmem_shared_pool.d +++ b/hakmem_shared_pool.d @@ -25,19 +25,19 @@ hakmem_shared_pool.o: core/hakmem_shared_pool.c \ core/box/ptr_type_box.h core/tiny_debug_api.h core/box/tiny_layout_box.h \ core/box/../hakmem_tiny_config.h core/box/../hakmem_build_flags.h \ core/box/tiny_header_box.h core/box/tiny_layout_box.h \ - core/box/../tiny_region_id.h core/box/ss_hot_cold_box.h \ - core/box/pagefault_telemetry_box.h core/box/tls_sll_drain_box.h \ - core/box/tls_sll_box.h core/box/../hakmem_internal.h \ - core/box/../hakmem.h core/box/../hakmem_build_flags.h \ - core/box/../hakmem_config.h core/box/../hakmem_features.h \ - core/box/../hakmem_sys.h core/box/../hakmem_whale.h \ - core/box/../box/ptr_type_box.h core/box/../hakmem_debug_master.h \ - core/box/../tiny_remote.h core/box/../hakmem_tiny_integrity.h \ - core/box/../hakmem_tiny.h core/box/../ptr_track.h \ - core/box/../ptr_trace.h core/box/../hakmem_trace_master.h \ - core/box/../hakmem_stats_master.h core/box/../tiny_debug_ring.h \ - core/box/ss_addr_map_box.h core/box/../superslab/superslab_inline.h \ - core/box/tiny_ptr_bridge_box.h \ + core/box/../tiny_region_id.h core/box/tiny_header_write_once_env_box.h \ + core/box/ss_hot_cold_box.h core/box/pagefault_telemetry_box.h \ + core/box/tls_sll_drain_box.h core/box/tls_sll_box.h \ + core/box/../hakmem_internal.h core/box/../hakmem.h \ + core/box/../hakmem_build_flags.h core/box/../hakmem_config.h \ + core/box/../hakmem_features.h core/box/../hakmem_sys.h \ + core/box/../hakmem_whale.h core/box/../box/ptr_type_box.h \ + core/box/../hakmem_debug_master.h core/box/../tiny_remote.h \ + core/box/../hakmem_tiny_integrity.h core/box/../hakmem_tiny.h \ + core/box/../ptr_track.h core/box/../ptr_trace.h \ + core/box/../hakmem_trace_master.h core/box/../hakmem_stats_master.h \ + core/box/../tiny_debug_ring.h core/box/ss_addr_map_box.h \ + core/box/../superslab/superslab_inline.h core/box/tiny_ptr_bridge_box.h \ core/box/../hakmem_tiny_superslab_internal.h \ core/box/../hakmem_tiny_superslab.h core/box/../box/ss_hot_cold_box.h \ core/box/../box/ss_allocation_box.h core/hakmem_tiny_superslab.h \ @@ -98,6 +98,7 @@ core/box/../hakmem_build_flags.h: core/box/tiny_header_box.h: core/box/tiny_layout_box.h: core/box/../tiny_region_id.h: +core/box/tiny_header_write_once_env_box.h: core/box/ss_hot_cold_box.h: core/box/pagefault_telemetry_box.h: core/box/tls_sll_drain_box.h: diff --git a/hakmem_tiny_bg_spill.d b/hakmem_tiny_bg_spill.d index 9b9eb0ed..d261fcdc 100644 --- a/hakmem_tiny_bg_spill.d +++ b/hakmem_tiny_bg_spill.d @@ -15,7 +15,8 @@ hakmem_tiny_bg_spill.o: core/hakmem_tiny_bg_spill.c \ core/box/hak_lane_classify.inc.h core/box/ptr_type_box.h \ core/tiny_debug_api.h core/box/tiny_layout_box.h \ core/box/../hakmem_tiny_config.h core/box/tiny_header_box.h \ - core/box/tiny_layout_box.h core/box/../tiny_region_id.h + core/box/tiny_layout_box.h core/box/../tiny_region_id.h \ + core/box/tiny_header_write_once_env_box.h core/hakmem_tiny_bg_spill.h: core/box/tiny_next_ptr_box.h: core/hakmem_tiny_config.h: @@ -53,3 +54,4 @@ core/box/../hakmem_tiny_config.h: core/box/tiny_header_box.h: core/box/tiny_layout_box.h: core/box/../tiny_region_id.h: +core/box/tiny_header_write_once_env_box.h: diff --git a/hakmem_tiny_magazine.d b/hakmem_tiny_magazine.d index 10858de4..ed74a59c 100644 --- a/hakmem_tiny_magazine.d +++ b/hakmem_tiny_magazine.d @@ -26,7 +26,7 @@ hakmem_tiny_magazine.o: core/hakmem_tiny_magazine.c \ core/box/tiny_layout_box.h core/box/../hakmem_tiny_config.h \ core/box/../hakmem_build_flags.h core/box/tiny_header_box.h \ core/box/tiny_layout_box.h core/box/../tiny_region_id.h \ - core/box/tiny_mem_stats_box.h + core/box/tiny_header_write_once_env_box.h core/box/tiny_mem_stats_box.h core/hakmem_tiny_magazine.h: core/hakmem_tiny.h: core/hakmem_build_flags.h: @@ -75,4 +75,5 @@ core/box/../hakmem_build_flags.h: core/box/tiny_header_box.h: core/box/tiny_layout_box.h: core/box/../tiny_region_id.h: +core/box/tiny_header_write_once_env_box.h: core/box/tiny_mem_stats_box.h: diff --git a/hakmem_tiny_sfc.d b/hakmem_tiny_sfc.d index ee9a4559..6280bd42 100644 --- a/hakmem_tiny_sfc.d +++ b/hakmem_tiny_sfc.d @@ -14,18 +14,18 @@ hakmem_tiny_sfc.o: core/hakmem_tiny_sfc.c core/tiny_alloc_fast_sfc.inc.h \ core/box/ss_pt_env_box.h core/box/ss_pt_env_box.h core/tiny_debug_api.h \ core/box/tiny_layout_box.h core/box/../hakmem_tiny_config.h \ core/box/tiny_header_box.h core/box/tiny_layout_box.h \ - core/box/../tiny_region_id.h core/hakmem_stats_master.h core/tiny_tls.h \ - core/box/tls_sll_box.h core/box/../hakmem_internal.h \ - core/box/../hakmem.h core/box/../hakmem_build_flags.h \ - core/box/../hakmem_config.h core/box/../hakmem_features.h \ - core/box/../hakmem_sys.h core/box/../hakmem_whale.h \ - core/box/../box/ptr_type_box.h core/box/../hakmem_debug_master.h \ - core/box/../tiny_remote.h core/box/../hakmem_tiny_integrity.h \ - core/box/../hakmem_tiny.h core/box/../ptr_track.h \ - core/box/../ptr_trace.h core/box/../hakmem_trace_master.h \ - core/box/../hakmem_stats_master.h core/box/../tiny_debug_ring.h \ - core/box/ss_addr_map_box.h core/box/../superslab/superslab_inline.h \ - core/box/tiny_ptr_bridge_box.h \ + core/box/../tiny_region_id.h core/box/tiny_header_write_once_env_box.h \ + core/hakmem_stats_master.h core/tiny_tls.h core/box/tls_sll_box.h \ + core/box/../hakmem_internal.h core/box/../hakmem.h \ + core/box/../hakmem_build_flags.h core/box/../hakmem_config.h \ + core/box/../hakmem_features.h core/box/../hakmem_sys.h \ + core/box/../hakmem_whale.h core/box/../box/ptr_type_box.h \ + core/box/../hakmem_debug_master.h core/box/../tiny_remote.h \ + core/box/../hakmem_tiny_integrity.h core/box/../hakmem_tiny.h \ + core/box/../ptr_track.h core/box/../ptr_trace.h \ + core/box/../hakmem_trace_master.h core/box/../hakmem_stats_master.h \ + core/box/../tiny_debug_ring.h core/box/ss_addr_map_box.h \ + core/box/../superslab/superslab_inline.h core/box/tiny_ptr_bridge_box.h \ core/box/../hakmem_tiny_superslab_internal.h \ core/box/../hakmem_tiny_superslab.h core/box/../box/ss_hot_cold_box.h \ core/box/../box/../superslab/superslab_types.h \ @@ -74,6 +74,7 @@ core/box/../hakmem_tiny_config.h: core/box/tiny_header_box.h: core/box/tiny_layout_box.h: core/box/../tiny_region_id.h: +core/box/tiny_header_write_once_env_box.h: core/hakmem_stats_master.h: core/tiny_tls.h: core/box/tls_sll_box.h: diff --git a/tiny_adaptive_sizing.d b/tiny_adaptive_sizing.d index b0e49a3b..e418b49a 100644 --- a/tiny_adaptive_sizing.d +++ b/tiny_adaptive_sizing.d @@ -15,7 +15,7 @@ tiny_adaptive_sizing.o: core/tiny_adaptive_sizing.c \ core/box/ss_pt_env_box.h core/box/ss_pt_env_box.h core/tiny_debug_api.h \ core/box/tiny_layout_box.h core/box/../hakmem_tiny_config.h \ core/box/tiny_header_box.h core/box/tiny_layout_box.h \ - core/box/../tiny_region_id.h + core/box/../tiny_region_id.h core/box/tiny_header_write_once_env_box.h core/tiny_adaptive_sizing.h: core/hakmem_tiny.h: core/hakmem_build_flags.h: @@ -53,3 +53,4 @@ core/box/../hakmem_tiny_config.h: core/box/tiny_header_box.h: core/box/tiny_layout_box.h: core/box/../tiny_region_id.h: +core/box/tiny_header_write_once_env_box.h: diff --git a/tiny_fastcache.d b/tiny_fastcache.d index 13834266..10595dc7 100644 --- a/tiny_fastcache.d +++ b/tiny_fastcache.d @@ -15,7 +15,8 @@ tiny_fastcache.o: core/tiny_fastcache.c core/tiny_fastcache.h \ core/box/hak_lane_classify.inc.h core/box/ptr_type_box.h \ core/tiny_debug_api.h core/box/tiny_layout_box.h \ core/box/../hakmem_tiny_config.h core/box/tiny_header_box.h \ - core/box/tiny_layout_box.h core/box/../tiny_region_id.h + core/box/tiny_layout_box.h core/box/../tiny_region_id.h \ + core/box/tiny_header_write_once_env_box.h core/tiny_fastcache.h: core/hakmem_env_cache.h: core/box/tiny_next_ptr_box.h: @@ -54,3 +55,4 @@ core/box/../hakmem_tiny_config.h: core/box/tiny_header_box.h: core/box/tiny_layout_box.h: core/box/../tiny_region_id.h: +core/box/tiny_header_write_once_env_box.h: