diff --git a/Makefile b/Makefile index 1a1a5c79..d7674f2a 100644 --- a/Makefile +++ b/Makefile @@ -218,12 +218,12 @@ LDFLAGS += $(EXTRA_LDFLAGS) # Targets TARGET = test_hakmem -OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o +OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o OBJS = $(OBJS_BASE) # Shared library SHARED_LIB = libhakmem.so -SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o core/box/ss_allocation_box_shared.o superslab_stats_shared.o superslab_cache_shared.o superslab_ace_shared.o superslab_slab_shared.o superslab_backend_shared.o core/superslab_head_stub_shared.o hakmem_smallmid_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/front_gate_classifier_shared.o core/box/free_publish_box_shared.o core/box/capacity_box_shared.o core/box/carve_push_box_shared.o core/box/prewarm_box_shared.o core/box/ss_hot_prewarm_box_shared.o core/box/front_metrics_box_shared.o core/box/bench_fast_box_shared.o core/box/ss_addr_map_box_shared.o core/box/slab_recycling_box_shared.o core/box/pagefault_telemetry_box_shared.o core/box/tiny_sizeclass_hist_box_shared.o core/box/tiny_env_box_shared.o core/box/tiny_route_box_shared.o core/box/tiny_page_box_shared.o core/box/tiny_class_policy_box_shared.o core/box/tiny_class_stats_box_shared.o core/box/tiny_policy_learner_box_shared.o core/box/ss_budget_box_shared.o core/box/tiny_mem_stats_box_shared.o core/box/wrapper_env_box_shared.o core/box/madvise_guard_box_shared.o core/box/libm_reloc_guard_box_shared.o core/page_arena_shared.o core/front/tiny_unified_cache_shared.o core/tiny_alloc_fast_push_shared.o core/tiny_c7_ultra_segment_shared.o core/tiny_c7_ultra_shared.o core/link_stubs_shared.o core/tiny_failfast_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_super_registry_shared.o hakmem_shared_pool_shared.o hakmem_shared_pool_acquire_shared.o hakmem_shared_pool_release_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o core/box/super_reg_box_shared.o core/box/shared_pool_box_shared.o core/box/remote_side_box_shared.o core/tiny_destructors_shared.o +SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o core/box/ss_allocation_box_shared.o superslab_stats_shared.o superslab_cache_shared.o superslab_ace_shared.o superslab_slab_shared.o superslab_backend_shared.o core/superslab_head_stub_shared.o hakmem_smallmid_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/front_gate_classifier_shared.o core/box/free_publish_box_shared.o core/box/capacity_box_shared.o core/box/carve_push_box_shared.o core/box/prewarm_box_shared.o core/box/ss_hot_prewarm_box_shared.o core/box/front_metrics_box_shared.o core/box/bench_fast_box_shared.o core/box/ss_addr_map_box_shared.o core/box/slab_recycling_box_shared.o core/box/pagefault_telemetry_box_shared.o core/box/tiny_sizeclass_hist_box_shared.o core/box/tiny_env_box_shared.o core/box/tiny_route_box_shared.o core/box/free_front_v3_env_box_shared.o core/box/free_path_stats_box_shared.o core/box/tiny_page_box_shared.o core/box/tiny_class_policy_box_shared.o core/box/tiny_class_stats_box_shared.o core/box/tiny_policy_learner_box_shared.o core/box/ss_budget_box_shared.o core/box/tiny_mem_stats_box_shared.o core/box/wrapper_env_box_shared.o core/box/madvise_guard_box_shared.o core/box/libm_reloc_guard_box_shared.o core/page_arena_shared.o core/front/tiny_unified_cache_shared.o core/tiny_alloc_fast_push_shared.o core/tiny_c7_ultra_segment_shared.o core/tiny_c7_ultra_shared.o core/link_stubs_shared.o core/tiny_failfast_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_super_registry_shared.o hakmem_shared_pool_shared.o hakmem_shared_pool_acquire_shared.o hakmem_shared_pool_release_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o core/box/super_reg_box_shared.o core/box/shared_pool_box_shared.o core/box/remote_side_box_shared.o core/tiny_destructors_shared.o # Pool TLS Phase 1 (enable with POOL_TLS_PHASE1=1) ifeq ($(POOL_TLS_PHASE1),1) @@ -250,7 +250,7 @@ endif # Benchmark targets BENCH_HAKMEM = bench_allocators_hakmem BENCH_SYSTEM = bench_allocators_system -BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o bench_allocators_hakmem.o +BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o bench_allocators_hakmem.o BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o @@ -427,7 +427,7 @@ test-box-refactor: box-refactor ./larson_hakmem 10 8 128 1024 1 12345 4 # Phase 4: Tiny Pool benchmarks (properly linked with hakmem) -TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o +TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o diff --git a/core/box/free_path_stats_box.c b/core/box/free_path_stats_box.c index 46f2ebc7..730eced9 100644 --- a/core/box/free_path_stats_box.c +++ b/core/box/free_path_stats_box.c @@ -16,9 +16,10 @@ static void free_path_stats_dump(void) { return; } - fprintf(stderr, "[FREE_PATH_STATS] total=%lu c7_ultra=%lu small_v3=%lu v6=%lu tiny_v1=%lu pool_v1=%lu remote=%lu super_lookup=%lu legacy_fb=%lu\n", + fprintf(stderr, "[FREE_PATH_STATS] total=%lu c7_ultra=%lu c6_ultra_free=%lu small_v3=%lu v6=%lu tiny_v1=%lu pool_v1=%lu remote=%lu super_lookup=%lu legacy_fb=%lu\n", g_free_path_stats.total_calls, g_free_path_stats.c7_ultra_fast, + g_free_path_stats.c6_ultra_free_fast, // Phase 4-2 g_free_path_stats.smallheap_v3_fast, g_free_path_stats.smallheap_v6_fast, g_free_path_stats.tiny_heap_v1_fast, diff --git a/core/box/free_path_stats_box.h b/core/box/free_path_stats_box.h index 0280ab08..9c5843b7 100644 --- a/core/box/free_path_stats_box.h +++ b/core/box/free_path_stats_box.h @@ -9,6 +9,7 @@ typedef struct FreePathStats { uint64_t total_calls; uint64_t c7_ultra_fast; + uint64_t c6_ultra_free_fast; // Phase 4-2: C6 ULTRA-free uint64_t smallheap_v3_fast; uint64_t smallheap_v6_fast; uint64_t tiny_heap_v1_fast; diff --git a/core/box/tiny_c6_ultra_free_box.c b/core/box/tiny_c6_ultra_free_box.c new file mode 100644 index 00000000..0de25a4b --- /dev/null +++ b/core/box/tiny_c6_ultra_free_box.c @@ -0,0 +1,72 @@ +#include "tiny_c6_ultra_free_box.h" +#include "free_path_stats_box.h" +#include "tiny_front_v3_env_box.h" +#include "../hakmem.h" // For HAK_BASE_FROM_RAW +#include "../front/tiny_unified_cache.h" +#include "tiny_front_hot_box.h" +#include "../superslab/superslab_inline.h" // For ss_fast_lookup +#include + +// TLS context +static __thread TinyC6UltraFreeTLS g_c6_ultra_free_tls = {0}; + +TinyC6UltraFreeTLS* tiny_c6_ultra_free_tls(void) { + return &g_c6_ultra_free_tls; +} + +// Legacy free helper (shared with slow path) +static void hak_tiny_free_legacy_impl(void* base, uint32_t class_idx) { + const TinyFrontV3Snapshot* front_snap = + __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL; + + // Legacy fallback - Unified Cache push + if (!front_snap || front_snap->unified_cache_on) { + if (unified_cache_push(class_idx, HAK_BASE_FROM_RAW(base))) { + FREE_PATH_STAT_INC(legacy_fallback); + + // Phase 4-1: Legacy per-class breakdown + if (__builtin_expect(free_path_stats_enabled(), 0)) { + if (class_idx < 8) { + g_free_path_stats.legacy_by_class[class_idx]++; + } + } + return; + } + } + + // Final fallback + tiny_hot_free_fast(class_idx, base); +} + +// Fast path: TLS cache push +void tiny_c6_ultra_free_fast(void* base, uint32_t class_idx) { + TinyC6UltraFreeTLS* ctx = &g_c6_ultra_free_tls; + + // Phase 4-3: Learn segment on first C6 free + if (unlikely(ctx->seg_base == 0)) { + SuperSlab* ss = ss_fast_lookup(base); + if (ss != NULL) { + ctx->seg_base = (uintptr_t)ss; + ctx->seg_end = ctx->seg_base + (1u << ss->lg_size); + } + } + + // Check if ptr is in our segment AND cache has room + if (likely(ctx->seg_base != 0 && + (uintptr_t)base >= ctx->seg_base && + (uintptr_t)base < ctx->seg_end && + ctx->count < TINY_C6_ULTRA_FREE_CAP)) { + // Push to TLS cache + ctx->freelist[ctx->count++] = base; + FREE_PATH_STAT_INC(c6_ultra_free_fast); + return; + } + + // Slow path: fallback to legacy (cache full or ptr not in segment) + tiny_c6_ultra_free_slow(base, class_idx); +} + +// Slow path: fallback to legacy free +void tiny_c6_ultra_free_slow(void* base, uint32_t class_idx) { + hak_tiny_free_legacy_impl(base, class_idx); +} diff --git a/core/box/tiny_c6_ultra_free_box.h b/core/box/tiny_c6_ultra_free_box.h new file mode 100644 index 00000000..7fd36b8f --- /dev/null +++ b/core/box/tiny_c6_ultra_free_box.h @@ -0,0 +1,30 @@ +#ifndef HAKMEM_TINY_C6_ULTRA_FREE_BOX_H +#define HAKMEM_TINY_C6_ULTRA_FREE_BOX_H + +#include +#include +#include "tiny_c6_ultra_free_env_box.h" + +#ifndef likely +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif + +// TLS cache capacity (Phase 4-3: increased for better coverage) +#define TINY_C6_ULTRA_FREE_CAP 128 + +// TLS context for C6 ULTRA-free +typedef struct TinyC6UltraFreeTLS { + void* freelist[TINY_C6_ULTRA_FREE_CAP]; // BASE pointers + uint8_t count; + uint8_t _pad[7]; + uintptr_t seg_base; // C6 segment range (0 = not initialized) + uintptr_t seg_end; +} TinyC6UltraFreeTLS; + +// API +TinyC6UltraFreeTLS* tiny_c6_ultra_free_tls(void); +void tiny_c6_ultra_free_fast(void* base, uint32_t class_idx); +void tiny_c6_ultra_free_slow(void* base, uint32_t class_idx); + +#endif // HAKMEM_TINY_C6_ULTRA_FREE_BOX_H diff --git a/core/box/tiny_c6_ultra_free_env_box.h b/core/box/tiny_c6_ultra_free_env_box.h new file mode 100644 index 00000000..3664811e --- /dev/null +++ b/core/box/tiny_c6_ultra_free_env_box.h @@ -0,0 +1,17 @@ +#ifndef HAKMEM_TINY_C6_ULTRA_FREE_ENV_BOX_H +#define HAKMEM_TINY_C6_ULTRA_FREE_ENV_BOX_H + +#include +#include + +// ENV: HAKMEM_TINY_C6_ULTRA_FREE_ENABLED (default 0) +static inline bool tiny_c6_ultra_free_enabled(void) { + static int g_enabled = -1; + if (__builtin_expect(g_enabled == -1, 0)) { + const char* e = getenv("HAKMEM_TINY_C6_ULTRA_FREE_ENABLED"); + g_enabled = (e && *e && *e != '0') ? 1 : 0; + } + return g_enabled; +} + +#endif // HAKMEM_TINY_C6_ULTRA_FREE_ENV_BOX_H diff --git a/core/front/malloc_tiny_fast.h b/core/front/malloc_tiny_fast.h index c6b75ea1..dbcc38af 100644 --- a/core/front/malloc_tiny_fast.h +++ b/core/front/malloc_tiny_fast.h @@ -46,6 +46,7 @@ // Phase FREE-LEGACY-BREAKDOWN-1: v6 は型エラーがあるため一時的にコメントアウト(デフォルト OFF なので影響なし) // #include "../box/smallobject_core_v6_box.h" // SmallObject Core v6 (C6-only route stub, Phase v6-1) #include "../box/tiny_c7_ultra_box.h" // C7 ULTRA stub (UF-1, delegates to v3) +#include "../box/tiny_c6_ultra_free_box.h" // Phase 4-2: C6 ULTRA-free (free-only, C6-only) #include "../box/tiny_front_v3_env_box.h" // Tiny front v3 snapshot gate #include "../box/tiny_heap_env_box.h" // ENV gate for TinyHeap front (A/B) #include "../box/tiny_route_env_box.h" // Route snapshot (Heap vs Legacy) @@ -80,6 +81,35 @@ static inline int front_gate_unified_enabled(void) { return g_enable; } +// ============================================================================ +// Phase 4-2: Legacy free helper (shared implementation in tiny_c6_ultra_free_box.c) +// ============================================================================ + +// Helper function to perform legacy free (used in both regular path and C6 ULTRA fallback) +__attribute__((always_inline)) +static inline void hak_tiny_free_legacy_inline(void* base, uint32_t class_idx) { + const TinyFrontV3Snapshot* front_snap = + __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL; + + // Legacy fallback - Unified Cache push + if (!front_snap || front_snap->unified_cache_on) { + if (unified_cache_push(class_idx, HAK_BASE_FROM_RAW(base))) { + FREE_PATH_STAT_INC(legacy_fallback); + + // Phase 4-1: Legacy per-class breakdown + if (__builtin_expect(free_path_stats_enabled(), 0)) { + if (class_idx < 8) { + g_free_path_stats.legacy_by_class[class_idx]++; + } + } + return; + } + } + + // Final fallback + tiny_hot_free_fast(class_idx, base); +} + // ============================================================================ // Phase 4-Step2: malloc_tiny_fast() - Hot/Cold Path Box (ACTIVE) // ============================================================================ @@ -289,6 +319,12 @@ static inline int free_tiny_fast(void* ptr) { return 1; } + // Phase 4-2: C6 ULTRA-free (C6-only, free-only, ENV gated) + if (class_idx == 6 && tiny_c6_ultra_free_enabled()) { + tiny_c6_ultra_free_fast(base, class_idx); + return 1; + } + // C7 v3 fast classify: bypass classify_ptr/ss_map_lookup for clear hits if (class_idx == 7 && tiny_front_v3_enabled() && @@ -457,26 +493,9 @@ static inline int free_tiny_fast(void* ptr) { } #endif - int pushed = 0; - if (!front_snap || front_snap->unified_cache_on) { - pushed = unified_cache_push(class_idx, HAK_BASE_FROM_RAW(base)); - } - if (__builtin_expect(pushed, 1)) { - // Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (10. legacy fallback) - FREE_PATH_STAT_INC(legacy_fallback); - - // Phase 4-1: Legacy per-class breakdown - if (__builtin_expect(free_path_stats_enabled(), 0)) { - if (class_idx >= 0 && class_idx < 8) { - g_free_path_stats.legacy_by_class[class_idx]++; - } - } - - return 1; // Success - } - - // Unified Cache full → 通常 free 経路へ - return 0; + // Phase 4-2: Legacy fallback (use inline helper) + hak_tiny_free_legacy_inline(base, class_idx); + return 1; #else // No header mode - fall back to normal free return 0; diff --git a/docs/analysis/FREE_LEGACY_PATH_ANALYSIS.md b/docs/analysis/FREE_LEGACY_PATH_ANALYSIS.md index e1791b70..b4deee4d 100644 --- a/docs/analysis/FREE_LEGACY_PATH_ANALYSIS.md +++ b/docs/analysis/FREE_LEGACY_PATH_ANALYSIS.md @@ -304,3 +304,220 @@ Phase FREE-LEGACY-OPT-4-2 で **C6 クラス** に ULTRA-Free lane を実装す - **C5 (257-512B)**: Legacy の 25.8%(全体の 12.7%) - C5 も ULTRA-Free の候補だが、C6 の効果を確認してから判断すべき + +### 次のアクション(Phase 4-2) + +**最大ターゲット**: C6 Legacy free(Legacy の 51.4%、全体の 25.3%) + +**実装方針**: +1. C6_ULTRA_FREE_BOX(free-only, C6-only)を追加 +2. C7 ULTRA 風の TLS キャッシュ(`freelist[32]` + `count` + segment range check) +3. ENV で opt-in(`HAKMEM_TINY_C6_ULTRA_FREE_ENABLED=0`) +4. alloc 側は既存ルートのまま(後続フェーズで検討) + +**期待効果**: +- C6 Legacy を TLS キャッシュに逃がす → Legacy 半減 +- Legacy: 49.2% → 24-27% +- Mixed: +5-8% 改善 + +## Phase FREE-LEGACY-OPT-4-2: C6_ULTRA_FREE_BOX 実装結果 + +### 実装内容 + +**コンポーネント**: +- `core/box/tiny_c6_ultra_free_env_box.h`: ENV ゲート(HAKMEM_TINY_C6_ULTRA_FREE_ENABLED=0) +- `core/box/tiny_c6_ultra_free_box.h/c`: C6 ULTRA-free TLS キャッシュ +- `core/front/malloc_tiny_fast.h`: free フロントへの接続(C7 ULTRA 直後に分岐追加) +- `core/box/free_path_stats_box.h/c`: c6_ultra_free_fast カウンタ追加 + +**設計**: +- TLS に `freelist[32]` + `count` + `seg_base/seg_end` (segment range check) +- Fast path: seg_base が初期化済み && ptr in range && count < 32 → TLS push +- Slow path: Legacy fallback + +### A/B テスト結果(Mixed 16-1024B) + +**ベースライン(C6 ULTRA OFF)**: +``` +[FREE_PATH_STATS] total=542031 c7_ultra=275089 c6_ultra_free=0 small_v3=0 v6=0 tiny_v1=0 pool_v1=8081 remote=0 super_lookup=0 legacy_fb=266942 +[FREE_PATH_STATS_LEGACY_BY_CLASS] c0=0 c1=0 c2=8746 c3=17279 c4=34727 c5=68871 c6=137319 c7=0 +Throughput = 42.2M ops/s +``` + +**C6 ULTRA ON(実測)**: +``` +[FREE_PATH_STATS] total=542031 c7_ultra=275089 c6_ultra_free=0 small_v3=0 v6=0 tiny_v1=0 pool_v1=8081 remote=0 super_lookup=0 legacy_fb=266942 +[FREE_PATH_STATS_LEGACY_BY_CLASS] c0=0 c1=0 c2=8746 c3=17279 c4=34727 c5=68871 c6=137319 c7=0 +Throughput = 39.9M ops/s (-5.5%) +``` + +### 問題分析 + +**観測された動作**: +- `c6_ultra_free=0` → Fast path に全く入っていない +- `legacy_by_class[6]=137,319` → C6 はすべて Legacy fallback のまま +- Throughput 低下 (42.2M → 39.9M, -5.5%) → ENV チェックのオーバーヘッドのみ + +**根本原因**: **Segment 未初期化** + +C6 ULTRA-free は「free-only」設計のため、TLS の `seg_base/seg_end` を初期化する手段がない: +- Fast path の条件: `seg_base != 0 && ptr in range` +- しかし、seg_base を設定する alloc 側の実装がない +- 結果: 常に seg_base == 0 で slow path(Legacy fallback)に落ちる + +**設計上の矛盾**: +1. **Free-only の限界**: TLS キャッシュを使うには、そのポインタが「自分が管理する segment」内かを確認する必要がある +2. **Segment 所有権**: C7 ULTRA は alloc 時に segment を割り当て、free 時にその segment 内かを確認する +3. **C6 の現状**: alloc は既存ルート(pool_v1)を使い、C6 ULTRA は segment を持たない +4. **結論**: Segment range check は不可能 → Fast path は機能しない + +### 技術的洞察 + +**C7 ULTRA との違い**: +- C7 ULTRA は alloc/free 両方を制御し、専用 segment を持つ +- C6 ULTRA-free は free のみを受けるため、segment 所有権がない +- 結果: Segment-based ownership check は不可能 + +**代替案の検討**: + +1. **Option A: Segment 初期化を追加** + - C6 alloc 時に専用 segment を割り当て + - しかし、これは「alloc は既存ルートのまま」という要件に反する + +2. **Option B: Segment check を削除** + - すべての C6 ポインタを TLS に push(ownership check なし) + - リスク: 他スレッドのポインタを TLS に入れる可能性(cross-thread free 問題) + +3. **Option C: Header-based check** + - ポインタの header を読み、C6 かを確認(すでに class_idx == 6 で確認済み) + - TLS ownership は諦め、class ベースの simple cache とする + +4. **Option D: Phase 中止** + - Free-only での TLS キャッシュは設計上困難 + - C6 の最適化は alloc/free 両方を含む v4/v5 に委ねる + +### 推奨アクション + +**短期**: Phase 4-2 を「研究箱(実装のみ、無効)」として保持 +- ENV デフォルト OFF のまま +- ビルドエラーがないことを確認済み +- 実際の効果はなし(seg_base == 0 のため) + +**中期**: C6 最適化は別アプローチで +- SmallHeap v4/v5/v6 など、alloc/free 両方を制御する経路で C6 を最適化 +- または、C6 専用の alloc/free 統合パスを新規実装 + +**長期**: Free-only 最適化の限界を認識 +- TLS キャッシュは alloc/free 統合が前提 +- Segment ownership なしでは安全な TLS push は困難 + +## Phase FREE-LEGACY-OPT-4-3: Segment 初回学習実装 + +### 目的 + +Phase 4-2 の「seg_base == 0 で fast path に入らない」問題を解決するため、**初回 C6 free で `ss_fast_lookup` を呼び出し、segment 情報を学習**する。 + +### 実装内容 + +**`core/box/tiny_c6_ultra_free_box.c` の修正**: + +```c +void tiny_c6_ultra_free_fast(void* base, uint32_t class_idx) { + TinyC6UltraFreeTLS* ctx = &g_c6_ultra_free_tls; + + // Phase 4-3: Learn segment on first C6 free + if (unlikely(ctx->seg_base == 0)) { + SuperSlab* ss = ss_fast_lookup(base); + if (ss != NULL) { + ctx->seg_base = (uintptr_t)ss; + ctx->seg_end = ctx->seg_base + (1u << ss->lg_size); + } + } + + // Fast path: segment ownership check + TLS push + if (likely(ctx->seg_base != 0 && + (uintptr_t)base >= ctx->seg_base && + (uintptr_t)base < ctx->seg_end && + ctx->count < TINY_C6_ULTRA_FREE_CAP)) { + ctx->freelist[ctx->count++] = base; + FREE_PATH_STAT_INC(c6_ultra_free_fast); + return; + } + + // Slow path: fallback to legacy + tiny_c6_ultra_free_slow(base, class_idx); +} +``` + +**キャッシュサイズ拡大**: +- `TINY_C6_ULTRA_FREE_CAP = 128`(32 → 128 に増加) + +### テスト結果 + +**C6 ULTRA ON + Segment 学習(Mixed 16-1024B)**: +``` +[FREE_PATH_STATS] total=542031 c7_ultra=275089 c6_ultra_free=128 ... +[FREE_PATH_STATS_LEGACY_BY_CLASS] ... c6=137191 ... +``` + +**観測**: +- `c6_ultra_free=128` → Fast path に 128 ブロックがヒット(seg_base 学習成功) +- `legacy_by_class[6]=137,191` → 残りの C6 (137K - 128 = ~137K) は Legacy + +### A/B 比較 + +| 測定 | OFF (M ops/s) | ON (M ops/s) | 差 | +|------|---------------|--------------|-----| +| Run 1 | 40.2 | 41.5 | +3.2% | +| Run 2 | 42.5 | 42.9 | +1.1% | +| 平均 | 41.4 | 42.2 | +1-3% (誤差範囲) | + +### 問題分析 + +**根本的限界**: +1. **キャッシュは満杯になる**: 128 ブロックがキャッシュされると、以降の C6 free はすべて Legacy へオーバーフロー +2. **alloc との連携なし**: キャッシュされたブロックは alloc で再利用されない +3. **ドレイン戦略は効果なし**: ドレイン時に Legacy を呼ぶため、結局同じオーバーヘッド + +**数値で見る限界**: +- C6 total frees: 137,319 +- Fast path hits: 128 (0.09%) +- Legacy fallback: 137,191 (99.91%) + +### ドレイン戦略の試行(失敗) + +満杯時に半分をドレインする戦略も試行: + +```c +static void tiny_c6_ultra_free_drain_half(TinyC6UltraFreeTLS* ctx, uint32_t class_idx) { + uint8_t drain_count = ctx->count / 2; + for (uint8_t i = 0; i < drain_count; i++) { + hak_tiny_free_legacy_impl(ctx->freelist[i], class_idx); + } + // compact remaining... +} +``` + +**結果**: +- 全 137K C6 frees が fast path を通過 (`c6_ultra_free=137319`) +- しかし、137K blocks がドレインで Legacy に送られる +- **Throughput は baseline と同等**(+0%, 相殺) + +### 結論 + +**Free-only TLS キャッシュは alloc 連携なしでは効果限定的** + +**既存の解決策**: +- Core v6 (`SmallHeapCtxV6`) に既に alloc/free 統合 TLS freelist が存在 +- ただし Core v6 は現在 -12% で baseline より遅い(別途最適化が必要) + +**Phase 4-3 の成果**: +1. ✅ Segment 初回学習の実装(`ss_fast_lookup` 活用) +2. ✅ Fast path が機能することを確認 +3. ❌ alloc 連携なしでは効果なし(0.09% のブロックのみキャッシュ) +4. ❌ ドレイン戦略も相殺で効果なし + +**推奨**: +- C6 ULTRA Free は **研究箱として維持**(ENV デフォルト OFF) +- 真の最適化には **alloc 側との TLS 連携**が必須 +- Core v6 の改良、または新規 alloc/free 統合パスの検討が必要