diff --git a/Makefile b/Makefile index bc388392..5388140b 100644 --- a/Makefile +++ b/Makefile @@ -218,7 +218,7 @@ LDFLAGS += $(EXTRA_LDFLAGS) # Targets TARGET = test_hakmem -OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_pt_impl.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o +OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_pt_impl.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/box/hakmem_env_snapshot_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o OBJS = $(OBJS_BASE) # Shared library @@ -427,7 +427,7 @@ test-box-refactor: box-refactor ./larson_hakmem 10 8 128 1024 1 12345 4 # Phase 4: Tiny Pool benchmarks (properly linked with hakmem) -TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_pt_impl.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o +TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_pt_impl.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/box/hakmem_env_snapshot_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o diff --git a/core/box/hakmem_env_snapshot_box.c b/core/box/hakmem_env_snapshot_box.c new file mode 100644 index 00000000..cb298936 --- /dev/null +++ b/core/box/hakmem_env_snapshot_box.c @@ -0,0 +1,78 @@ +// hakmem_env_snapshot_box.c - Phase 4 E1: ENV Snapshot Consolidation (implementation) + +#include "hakmem_env_snapshot_box.h" +#include +#include +#include +#include "../hakmem_build_flags.h" + +// Forward declare learner check (to avoid circular deps) +extern bool small_learner_v2_enabled(void); + +// Global snapshot state (TLS for thread safety) +HakmemEnvSnapshot g_hakmem_env_snapshot = {0}; +int g_hakmem_env_snapshot_ready = 0; + +// Internal helper: read all ENV vars and compute effective values +static void hakmem_env_snapshot_load(HakmemEnvSnapshot* snap) { + // Read HAKMEM_TINY_C7_ULTRA (default: ON) + const char* c7_env = getenv("HAKMEM_TINY_C7_ULTRA_ENABLED"); + if (c7_env && *c7_env) { + snap->tiny_c7_ultra_enabled = (*c7_env != '0'); + } else { + snap->tiny_c7_ultra_enabled = true; // default: ON + } + + // Read HAKMEM_TINY_FRONT_V3_ENABLED (default: ON) + const char* v3_env = getenv("HAKMEM_TINY_FRONT_V3_ENABLED"); + if (v3_env && *v3_env) { + snap->tiny_front_v3_enabled = (*v3_env != '0'); + } else { + snap->tiny_front_v3_enabled = true; // default: ON + } + + // Read HAKMEM_TINY_METADATA_CACHE (default: OFF) + const char* cache_env = getenv("HAKMEM_TINY_METADATA_CACHE"); + if (cache_env && *cache_env) { + snap->tiny_metadata_cache = (*cache_env == '1'); + } else { + snap->tiny_metadata_cache = false; // default: OFF + } + + // Compute effective metadata cache (cache && !learner) + // Safety: disable if learner v7 is active (learner updates route_kind dynamically) + bool learner_active = small_learner_v2_enabled(); + snap->tiny_metadata_cache_eff = snap->tiny_metadata_cache && !learner_active; + +#if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[HAKMEM_ENV_SNAPSHOT] Initialized:\n"); + fprintf(stderr, " tiny_c7_ultra_enabled: %d\n", snap->tiny_c7_ultra_enabled); + fprintf(stderr, " tiny_front_v3_enabled: %d\n", snap->tiny_front_v3_enabled); + fprintf(stderr, " tiny_metadata_cache: %d\n", snap->tiny_metadata_cache); + fprintf(stderr, " tiny_metadata_cache_eff: %d (learner_active=%d)\n", + snap->tiny_metadata_cache_eff, learner_active); + fflush(stderr); +#endif +} + +// Initialize snapshot (lazy init on first access) +void hakmem_env_snapshot_init(void) { + if (g_hakmem_env_snapshot_ready) { + return; // already initialized + } + + hakmem_env_snapshot_load(&g_hakmem_env_snapshot); + g_hakmem_env_snapshot_ready = 1; +} + +// Refresh snapshot from ENV (for bench_profile putenv sync) +// This ensures that after bench_setenv_default() runs, the snapshot is refreshed +void hakmem_env_snapshot_refresh_from_env(void) { + hakmem_env_snapshot_load(&g_hakmem_env_snapshot); + g_hakmem_env_snapshot_ready = 1; + +#if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[HAKMEM_ENV_SNAPSHOT] Refreshed from ENV (bench_profile sync)\n"); + fflush(stderr); +#endif +} diff --git a/core/box/hakmem_env_snapshot_box.h b/core/box/hakmem_env_snapshot_box.h new file mode 100644 index 00000000..070aef3d --- /dev/null +++ b/core/box/hakmem_env_snapshot_box.h @@ -0,0 +1,64 @@ +// hakmem_env_snapshot_box.h - Phase 4 E1: ENV Snapshot Consolidation +// +// Purpose: Consolidate 3 hot ENV gate calls into 1 TLS snapshot read +// Target: tiny_c7_ultra_enabled_env (1.28%) + tiny_front_v3_enabled (1.01%) + +// tiny_metadata_cache_enabled (0.97%) = 3.26% combined ENV overhead +// +// Design: +// - ENV: HAKMEM_ENV_SNAPSHOT=0/1 (default 0, research box) +// - Single TLS snapshot struct containing all hot toggles +// - Lazy init with version-based refresh (follows tiny_front_v3_snapshot pattern) +// - Learner interlock: tiny_metadata_cache_eff = cache && !learner +// +// Benefits: +// - 3 TLS reads → 1 TLS read (66% reduction) +// - 3 lazy init checks → 1 lazy init check +// - Expected gain: +1-3% (conservative from 3.26% overhead) + +#ifndef HAK_ENV_SNAPSHOT_BOX_H +#define HAK_ENV_SNAPSHOT_BOX_H + +#include +#include + +// ENV snapshot struct: consolidates all hot ENV gates +typedef struct HakmemEnvSnapshot { + bool tiny_c7_ultra_enabled; // ENV: HAKMEM_TINY_C7_ULTRA (default 1) + bool tiny_front_v3_enabled; // ENV: HAKMEM_TINY_FRONT_V3_ENABLED (default 1) + bool tiny_metadata_cache; // ENV: HAKMEM_TINY_METADATA_CACHE (default 0) + bool tiny_metadata_cache_eff; // Effective: cache && !learner (for hot path) +} HakmemEnvSnapshot; + +// Global snapshot state (implemented in hakmem_env_snapshot_box.c) +extern HakmemEnvSnapshot g_hakmem_env_snapshot; +extern int g_hakmem_env_snapshot_ready; + +// Snapshot initializer (implemented in hakmem_env_snapshot_box.c) +void hakmem_env_snapshot_init(void); + +// Refresh from ENV (for bench_profile putenv sync) +void hakmem_env_snapshot_refresh_from_env(void); + +// Fast snapshot getter: lazy init + 1 TLS read +static inline const HakmemEnvSnapshot* hakmem_env_snapshot(void) { + if (__builtin_expect(!g_hakmem_env_snapshot_ready, 0)) { + hakmem_env_snapshot_init(); + } + return &g_hakmem_env_snapshot; +} + +// ENV gate: default OFF (research box, set =1 to enable) +static inline bool hakmem_env_snapshot_enabled(void) { + static int g = -1; + if (__builtin_expect(g == -1, 0)) { + const char* e = getenv("HAKMEM_ENV_SNAPSHOT"); + if (e && *e) { + g = (*e == '1') ? 1 : 0; + } else { + g = 0; // default: OFF (research box) + } + } + return g != 0; +} + +#endif // HAK_ENV_SNAPSHOT_BOX_H diff --git a/core/box/tiny_legacy_fallback_box.h b/core/box/tiny_legacy_fallback_box.h index 17d5c697..19c2886c 100644 --- a/core/box/tiny_legacy_fallback_box.h +++ b/core/box/tiny_legacy_fallback_box.h @@ -10,6 +10,7 @@ #include "free_path_stats_box.h" #include "tiny_front_hot_box.h" #include "tiny_metadata_cache_env_box.h" // Phase 3 C2: Metadata cache ENV gate +#include "hakmem_env_snapshot_box.h" // Phase 4 E1: ENV snapshot consolidation // Purpose: Encapsulate legacy free logic (shared by multiple paths) // Called by: malloc_tiny_fast.h (free path) + tiny_c6_ultra_free_box.c (C6 fallback) @@ -21,12 +22,21 @@ // __attribute__((always_inline)) static inline void tiny_legacy_fallback_free_base(void* base, uint32_t class_idx) { - const TinyFrontV3Snapshot* front_snap = - __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL; + // Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1) + const TinyFrontV3Snapshot* front_snap; + bool metadata_cache_on; + if (__builtin_expect(hakmem_env_snapshot_enabled(), 0)) { + const HakmemEnvSnapshot* env = hakmem_env_snapshot(); + front_snap = env->tiny_front_v3_enabled ? tiny_front_v3_snapshot_get() : NULL; + metadata_cache_on = env->tiny_metadata_cache_eff; // Uses effective (cache && !learner) + } else { + front_snap = __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL; + metadata_cache_on = tiny_metadata_cache_enabled(); + } // Phase 3 C2 Patch 2: First page cache hint (optional fast-path) // Check if pointer is in cached page (avoids metadata lookup in future optimizations) - if (__builtin_expect(tiny_metadata_cache_enabled(), 0)) { + if (__builtin_expect(metadata_cache_on, 0)) { // Note: This is a hint-only check. Even if it hits, we still use the standard path. // The cache will be populated during refill operations for future use. // Currently this just validates the cache state; actual optimization TBD. diff --git a/core/box/tiny_metadata_cache_hot_box.h b/core/box/tiny_metadata_cache_hot_box.h index 254a2e35..be57a774 100644 --- a/core/box/tiny_metadata_cache_hot_box.h +++ b/core/box/tiny_metadata_cache_hot_box.h @@ -18,6 +18,7 @@ #include #include "smallobject_policy_v7_box.h" #include "tiny_metadata_cache_env_box.h" +#include "hakmem_env_snapshot_box.h" // Phase 4 E1: ENV snapshot consolidation // ============================================================================ // Policy Hot Cache Structure @@ -58,7 +59,16 @@ static inline void tiny_policy_hot_refresh(void) { /// @return: Route kind for this class __attribute__((always_inline)) static inline SmallRouteKind tiny_policy_hot_get_route(uint32_t class_idx) { - if (__builtin_expect(tiny_metadata_cache_enabled() && !g_policy_hot.learner_v7_enabled, 0)) { + // Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1) + bool metadata_cache_eff; + if (__builtin_expect(hakmem_env_snapshot_enabled(), 0)) { + const HakmemEnvSnapshot* env = hakmem_env_snapshot(); + metadata_cache_eff = env->tiny_metadata_cache_eff; // Already includes learner check + } else { + metadata_cache_eff = tiny_metadata_cache_enabled() && !g_policy_hot.learner_v7_enabled; + } + + if (__builtin_expect(metadata_cache_eff, 0)) { // Fast path: use cached route_kind if (class_idx < 8) { return (SmallRouteKind)g_policy_hot.route_kind[class_idx]; diff --git a/core/front/malloc_tiny_fast.h b/core/front/malloc_tiny_fast.h index 50bef2ae..e55ba4ca 100644 --- a/core/front/malloc_tiny_fast.h +++ b/core/front/malloc_tiny_fast.h @@ -69,6 +69,7 @@ #include "../box/free_tiny_fast_hotcold_stats_box.h" // Phase FREE-TINY-FAST-HOTCOLD-OPT-1: Stats #include "../box/tiny_metadata_cache_hot_box.h" // Phase 3 C2: Policy hot cache (metadata cache optimization) #include "../box/tiny_free_route_cache_env_box.h" // Phase 3 D1: Free path route cache +#include "../box/hakmem_env_snapshot_box.h" // Phase 4 E1: ENV snapshot consolidation // Helper: current thread id (low 32 bits) for owner check #ifndef TINY_SELF_U32_LOCAL_DEFINED @@ -226,7 +227,16 @@ static inline void* malloc_tiny_fast_for_class(size_t size, int class_idx) { // Phase v11a-5b: C7 ULTRA early-exit (skip policy snapshot for common case) // This is the most common hot path - avoids TLS policy overhead - if (class_idx == 7 && tiny_c7_ultra_enabled_env()) { + // Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1) + bool c7_ultra_on; + if (__builtin_expect(hakmem_env_snapshot_enabled(), 0)) { + const HakmemEnvSnapshot* env = hakmem_env_snapshot(); + c7_ultra_on = env->tiny_c7_ultra_enabled; + } else { + c7_ultra_on = tiny_c7_ultra_enabled_env(); + } + + if (class_idx == 7 && c7_ultra_on) { void* ultra_p = tiny_c7_ultra_alloc(size); if (TINY_HOT_LIKELY(ultra_p != NULL)) { return ultra_p; @@ -384,8 +394,14 @@ static int free_tiny_fast_cold(void* ptr, void* base, int class_idx) route = tiny_route_for_class((uint8_t)class_idx); } const int use_tiny_heap = tiny_route_is_heap_kind(route); - const TinyFrontV3Snapshot* front_snap = - __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL; + // Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1) + const TinyFrontV3Snapshot* front_snap; + if (__builtin_expect(hakmem_env_snapshot_enabled(), 0)) { + const HakmemEnvSnapshot* env = hakmem_env_snapshot(); + front_snap = env->tiny_front_v3_enabled ? tiny_front_v3_snapshot_get() : NULL; + } else { + front_snap = __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL; + } // TWO-SPEED: SuperSlab registration check is DEBUG-ONLY to keep HOT PATH fast. // In Release builds, we trust header magic (0xA0) as sufficient validation. @@ -576,7 +592,16 @@ static inline int free_tiny_fast_hot(void* ptr) { FREE_PATH_STAT_INC(total_calls); // Phase v11b-1: C7 ULTRA early-exit (skip policy snapshot for most common case) - if (class_idx == 7 && tiny_c7_ultra_enabled_env()) { + // Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1) + bool c7_ultra_free; + if (__builtin_expect(hakmem_env_snapshot_enabled(), 0)) { + const HakmemEnvSnapshot* env = hakmem_env_snapshot(); + c7_ultra_free = env->tiny_c7_ultra_enabled; + } else { + c7_ultra_free = tiny_c7_ultra_enabled_env(); + } + + if (class_idx == 7 && c7_ultra_free) { FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_c7_ultra); tiny_c7_ultra_free(ptr); FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit); @@ -719,7 +744,16 @@ static inline int free_tiny_fast(void* ptr) { FREE_PATH_STAT_INC(total_calls); // Phase v11b-1: C7 ULTRA early-exit (skip policy snapshot for most common case) - if (class_idx == 7 && tiny_c7_ultra_enabled_env()) { + // Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1) + bool c7_ultra_free; + if (__builtin_expect(hakmem_env_snapshot_enabled(), 0)) { + const HakmemEnvSnapshot* env = hakmem_env_snapshot(); + c7_ultra_free = env->tiny_c7_ultra_enabled; + } else { + c7_ultra_free = tiny_c7_ultra_enabled_env(); + } + + if (class_idx == 7 && c7_ultra_free) { tiny_c7_ultra_free(ptr); return 1; } @@ -790,8 +824,14 @@ legacy_fallback: route = tiny_route_for_class((uint8_t)class_idx); } const int use_tiny_heap = tiny_route_is_heap_kind(route); - const TinyFrontV3Snapshot* front_snap = - __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL; + // Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1) + const TinyFrontV3Snapshot* front_snap; + if (__builtin_expect(hakmem_env_snapshot_enabled(), 0)) { + const HakmemEnvSnapshot* env = hakmem_env_snapshot(); + front_snap = env->tiny_front_v3_enabled ? tiny_front_v3_snapshot_get() : NULL; + } else { + front_snap = __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL; + } // TWO-SPEED: SuperSlab registration check is DEBUG-ONLY to keep HOT PATH fast. // In Release builds, we trust header magic (0xA0) as sufficient validation.