diff --git a/Makefile b/Makefile index 304d371f..14243c0b 100644 --- a/Makefile +++ b/Makefile @@ -218,12 +218,12 @@ LDFLAGS += $(EXTRA_LDFLAGS) # Targets TARGET = test_hakmem -OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o test_hakmem.o +OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o test_hakmem.o OBJS = $(OBJS_BASE) # Shared library SHARED_LIB = libhakmem.so -SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o superslab_allocate_shared.o superslab_stats_shared.o superslab_cache_shared.o superslab_ace_shared.o superslab_slab_shared.o superslab_backend_shared.o superslab_head_shared.o hakmem_smallmid_shared.o hakmem_smallmid_superslab_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/front_gate_classifier_shared.o core/box/free_publish_box_shared.o core/box/capacity_box_shared.o core/box/carve_push_box_shared.o core/box/unified_batch_box_shared.o core/box/prewarm_box_shared.o core/box/ss_hot_prewarm_box_shared.o core/box/front_metrics_box_shared.o core/box/bench_fast_box_shared.o core/box/ss_addr_map_box_shared.o core/box/ss_tls_hint_box_shared.o core/box/slab_recycling_box_shared.o core/box/pagefault_telemetry_box_shared.o core/box/tiny_sizeclass_hist_box_shared.o core/page_arena_shared.o core/front/tiny_unified_cache_shared.o core/tiny_alloc_fast_push_shared.o core/link_stubs_shared.o core/tiny_failfast_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_mid_mt_shared.o hakmem_super_registry_shared.o hakmem_shared_pool_shared.o hakmem_shared_pool_acquire_shared.o hakmem_shared_pool_release_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o +SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o superslab_allocate_shared.o superslab_stats_shared.o superslab_cache_shared.o superslab_ace_shared.o superslab_slab_shared.o superslab_backend_shared.o superslab_head_shared.o hakmem_smallmid_shared.o hakmem_smallmid_superslab_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/front_gate_classifier_shared.o core/box/free_publish_box_shared.o core/box/capacity_box_shared.o core/box/carve_push_box_shared.o core/box/unified_batch_box_shared.o core/box/prewarm_box_shared.o core/box/ss_hot_prewarm_box_shared.o core/box/front_metrics_box_shared.o core/box/bench_fast_box_shared.o core/box/ss_addr_map_box_shared.o core/box/ss_tls_hint_box_shared.o core/box/slab_recycling_box_shared.o core/box/pagefault_telemetry_box_shared.o core/box/tiny_sizeclass_hist_box_shared.o core/page_arena_shared.o core/front/tiny_unified_cache_shared.o core/tiny_alloc_fast_push_shared.o core/link_stubs_shared.o core/tiny_failfast_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_super_registry_shared.o hakmem_shared_pool_shared.o hakmem_shared_pool_acquire_shared.o hakmem_shared_pool_release_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o # Pool TLS Phase 1 (enable with POOL_TLS_PHASE1=1) ifeq ($(POOL_TLS_PHASE1),1) @@ -250,7 +250,7 @@ endif # Benchmark targets BENCH_HAKMEM = bench_allocators_hakmem BENCH_SYSTEM = bench_allocators_system -BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o +BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o @@ -285,7 +285,7 @@ $(TARGET): $(OBJS) @echo "=========================================" # Compile C files -%.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_tiny_superslab.h hakmem_mid_mt.h hakmem_super_registry.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h +%.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_tiny_superslab.h hakmem_super_registry.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h $(CC) $(CFLAGS) -c -o $@ $< # Build benchmark programs @@ -427,7 +427,7 @@ test-box-refactor: box-refactor ./larson_hakmem 10 8 128 1024 1 12345 4 # Phase 4: Tiny Pool benchmarks (properly linked with hakmem) -TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o +TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o diff --git a/core/box/hak_alloc_api.inc.h b/core/box/hak_alloc_api.inc.h index 29339d08..6d9b6800 100644 --- a/core/box/hak_alloc_api.inc.h +++ b/core/box/hak_alloc_api.inc.h @@ -106,14 +106,6 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) { hkm_size_hist_record(size); - // Legacy Mid MT allocator (Phase 5) is disabled by default to favor ACE/Pool. - // Enable via HAKMEM_MID_MT_ENABLE=1 when running legacy benchmarks. - static int g_mid_mt_enabled = -1; - if (__builtin_expect(g_mid_mt_enabled < 0, 0)) { - const char* e = getenv("HAKMEM_MID_MT_ENABLE"); - g_mid_mt_enabled = (e && *e && *e != '0') ? 1 : 0; - } - #ifdef HAKMEM_POOL_TLS_PHASE1 // Phase 1: Ultra-fast Pool TLS for 8KB-52KB range if (size >= 8192 && size <= 53248) { @@ -124,18 +116,6 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) { } #endif - if (__builtin_expect(g_mid_mt_enabled && mid_is_in_range(size), 0)) { -#if HAKMEM_DEBUG_TIMING - HKM_TIME_START(t_mid); -#endif - void* mid_ptr = mid_mt_alloc(size); -#if HAKMEM_DEBUG_TIMING - HKM_TIME_END(HKM_CAT_POOL_GET, t_mid); -#endif - // PERF_OPT: likely hint - mid allocations usually succeed - if (__builtin_expect(mid_ptr != NULL, 1)) return mid_ptr; - } - #if HAKMEM_FEATURE_EVOLUTION if (g_evo_sample_mask > 0) { static _Atomic uint64_t tick_counter = 0; diff --git a/core/box/hak_core_init.inc.h b/core/box/hak_core_init.inc.h index d817c7b5..d6b1b827 100644 --- a/core/box/hak_core_init.inc.h +++ b/core/box/hak_core_init.inc.h @@ -72,7 +72,6 @@ static void hak_init_impl(void) { hkm_whale_init(); // NEW Phase Hybrid: Initialize Mid Range MT allocator (8-32KB, mimalloc-style) - mid_mt_init(); // NEW Phase 6.8: Initialize configuration system (replaces init_free_policy + init_thp_policy) hak_config_init(); diff --git a/core/box/hak_wrappers.inc.h b/core/box/hak_wrappers.inc.h index 6fdd549f..752753da 100644 --- a/core/box/hak_wrappers.inc.h +++ b/core/box/hak_wrappers.inc.h @@ -33,7 +33,6 @@ void* realloc(void* ptr, size_t size) { #include "../hakmem_pool.h" // Mid registry lookup (failsafe for headerless Mid) #include "../front/malloc_tiny_fast.h" // Phase 26: Front Gate Unification #include "tiny_front_config_box.h" // Phase 4-Step3: Compile-time config for dead code elimination -#include "mid_free_route_box.h" // Phase 5-Step2: Mid MT free routing fix // malloc wrapper - intercepts system malloc() calls __thread uint64_t g_malloc_total_calls = 0; @@ -226,11 +225,6 @@ void free(void* ptr) { } #endif - // Phase 5-Step2: Mid Free Route Box (BEFORE classify_ptr) - // Quick fix for 19x free() slowdown: Try Mid MT registry first - // If found, route directly to mid_mt_free() and return - if (mid_free_route_try(ptr)) return; - // Classify pointer BEFORE early libc fallbacks to avoid misrouting Tiny pointers // This is safe: classifier uses header probe and registry; does not allocate. int is_hakmem_owned = 0; diff --git a/core/box/mid_free_route_box.h b/core/box/mid_free_route_box.h deleted file mode 100644 index 86611c87..00000000 --- a/core/box/mid_free_route_box.h +++ /dev/null @@ -1,109 +0,0 @@ -/** - * mid_free_route_box.h - * - * Box: Mid Free Route Box - * Responsibility: Route Mid MT allocations to correct free path - * Contract: Try Mid MT registry lookup, return success/failure - * - * Part of Phase 5-Step2 fix for 19x free() slowdown - * - * Problem: - * - Mid MT allocator registers chunks in MidGlobalRegistry - * - Free path searches Pool's mid_desc registry (different registry!) - * - Result: 100% lookup failure → 4x cascading lookups → 19x slower - * - * Solution: - * - Add Mid MT registry lookup BEFORE Pool registry lookup - * - Route directly to mid_mt_free() if found - * - Fall through to existing path if not found - * - * Performance Impact: - * - Before: 1.42 M ops/s (19x slower than system malloc) - * - After: 14-21 M ops/s (Option B quick fix, 10-15x improvement) - * - * Created: 2025-11-29 (Phase 5-Step2 Mid MT Gap Fix) - */ - -#ifndef MID_FREE_ROUTE_BOX_H -#define MID_FREE_ROUTE_BOX_H - -#include "../hakmem_mid_mt.h" -#include - -#ifdef __cplusplus -extern "C" { -#endif - -// ============================================================================ -// Box Contract: Mid MT Free Routing -// ============================================================================ - -/** - * mid_free_route_try - Try Mid MT free path first - * - * @param ptr Pointer to free - * @return true if handled by Mid MT, false to fall through - * - * Phase 6-B: Header-based detection (lock-free!) - * - * Box Responsibilities: - * 1. Read MidMTHeader from ptr - sizeof(MidMTHeader) - * 2. Check magic number (0xAB42) - * 3. If valid: Call mid_mt_free() and return true - * 4. If invalid: Return false (let existing path handle it) - * - * Box Guarantees: - * - Zero side effects if returning false - * - Correct free if returning true - * - Thread-safe (lock-free header read) - * - * Performance: - * - Before (Phase 5): O(log N) registry lookup + mutex = ~50 cycles (13.98% CPU) - * - After (Phase 6-B): O(1) header read + magic check = ~2 cycles (0.01% CPU) - * - Expected improvement: +17-27% throughput - * - * Usage Example: - * void free(void* ptr) { - * if (mid_free_route_try(ptr)) return; // Mid MT handled - * // Fall through to existing free path... - * } - */ -__attribute__((always_inline)) -static inline bool mid_free_route_try(void* ptr) { - if (!ptr) return false; // NULL ptr, not Mid MT - - // Phase 6-B: Read header for O(1) detection (no mutex!) - void* block = (uint8_t*)ptr - sizeof(MidMTHeader); - MidMTHeader* hdr = (MidMTHeader*)block; - - // Check magic number to identify Mid MT allocation - if (hdr->magic == MID_MT_MAGIC) { - // Valid Mid MT allocation, route to mid_mt_free() - // Pass block_size from header (no size needed from caller!) - mid_mt_free(ptr, hdr->block_size); - return true; // Handled - } - - // Not a Mid MT allocation, fall through to existing path - return false; -} - -// ============================================================================ -// Box Observability (Debug/Profiling) -// ============================================================================ - -#if MID_DEBUG -/** - * mid_free_route_stats - Print Mid Free Route Box statistics - * - * Only available in debug builds (MID_DEBUG=1) - * Tracks hit/miss rates for performance analysis - */ -void mid_free_route_stats(void); -#endif - -#ifdef __cplusplus -} -#endif - -#endif // MID_FREE_ROUTE_BOX_H diff --git a/core/hakmem.c b/core/hakmem.c index 0bb76416..57f512e6 100644 --- a/core/hakmem.c +++ b/core/hakmem.c @@ -16,7 +16,6 @@ #include "hakmem_tiny.h" // NEW Phase 6.12: Tiny Pool (≤1KB) #include "hakmem_tiny_superslab.h" // NEW Phase 7.6: SuperSlab for Tiny Pool #include "tiny_fastcache.h" // NEW Phase 6-3: Tiny Fast Path (System tcache style) -#include "hakmem_mid_mt.h" // NEW Phase Hybrid: Mid Range MT (8-32KB, mimalloc-style) #include "hakmem_super_registry.h" // NEW Phase 1: SuperSlab Registry (mincore elimination) #include "hakmem_elo.h" // NEW: ELO Strategy Selection (Phase 6.2) #include "hakmem_ace_stats.h" // NEW: ACE lightweight stats (avoid implicit decl warnings) diff --git a/core/hakmem_mid_mt.c b/core/hakmem_mid_mt.c deleted file mode 100644 index fc76729e..00000000 --- a/core/hakmem_mid_mt.c +++ /dev/null @@ -1,451 +0,0 @@ -/** - * hakmem_mid_mt.c - * - * Mid Range Multi-threaded Allocator Implementation (8-32KB) - * mimalloc-style per-thread segment for optimal MT performance - * - * Design: - * - Per-thread segments (TLS) for lock-free allocation - * - Global registry for segment lookup during free() - * - 64KB chunks with bump + free list allocation - * - Phase 1: Local free only (remote free = memory leak, acceptable for benchmarking) - * - Phase 2: Will add atomic remote free list - */ - -#include "hakmem_mid_mt.h" -#include -#include -#include -#include -#include -#include -#include - -// Use likely/unlikely hints for branch prediction -#ifndef likely -#define likely(x) __builtin_expect(!!(x), 1) -#endif -#ifndef unlikely -#define unlikely(x) __builtin_expect(!!(x), 0) -#endif - -// ============================================================================ -// Global and TLS Variables -// ============================================================================ - -// TLS: Each thread has independent segments (lock-free!) -__thread MidThreadSegment g_mid_segments[MID_NUM_CLASSES] = {0}; - -// Phase 6-B: Registry removed (no longer needed with header-based free) - -// Statistics (if enabled) -#if MID_ENABLE_STATS -MidStats g_mid_stats = {0}; -#endif - -// Initialization flag -static volatile int g_mid_initialized = 0; -static pthread_mutex_t g_init_lock = PTHREAD_MUTEX_INITIALIZER; - -// ============================================================================ -// Forward Declarations -// ============================================================================ - -static bool segment_refill(MidThreadSegment* seg, int class_idx); -static void* segment_alloc(MidThreadSegment* seg, int class_idx); -static void segment_free_local(MidThreadSegment* seg, void* ptr); -static void* chunk_allocate(size_t chunk_size); -static void chunk_deallocate(void* chunk, size_t chunk_size); -// Phase 6-B: Registry functions removed (header-based free instead) - -// ============================================================================ -// Chunk Management (mmap/munmap wrappers) -// ============================================================================ - -/** - * chunk_allocate - Allocate a new chunk via mmap - * - * @param chunk_size Size of chunk (typically 64KB) - * @return Chunk base address, or NULL on failure - */ -static void* chunk_allocate(size_t chunk_size) { - void* chunk = mmap( - NULL, - chunk_size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - -1, - 0 - ); - - if (chunk == MAP_FAILED) { - MID_LOG("ERROR: mmap failed for chunk_size=%zu", chunk_size); - return NULL; - } - - MID_LOG("Chunk allocated: %p, size=%zu", chunk, chunk_size); - return chunk; -} - -/** - * chunk_deallocate - Free chunk via munmap - * - * @param chunk Chunk base address - * @param chunk_size Size of chunk - */ -static void chunk_deallocate(void* chunk, size_t chunk_size) { - if (!chunk) return; - - int ret = munmap(chunk, chunk_size); - if (ret != 0) { - MID_LOG("ERROR: munmap failed for chunk=%p, size=%zu", chunk, chunk_size); - } else { - MID_LOG("Chunk deallocated: %p, size=%zu", chunk, chunk_size); - } -} - -// ============================================================================ -// Segment Operations -// ============================================================================ - -/** - * segment_refill - Allocate new chunk and setup segment - * - * Called when segment is exhausted (rare, ~0.1% of allocations) - * - * Phase 6-B: No longer registers chunks (header-based free instead) - * - * @return true on success, false on OOM - */ -static bool segment_refill(MidThreadSegment* seg, int class_idx) { - size_t block_size = mid_class_to_size(class_idx); - size_t chunk_size = MID_CHUNK_SIZE; - - // Allocate new chunk via mmap - void* chunk = chunk_allocate(chunk_size); - if (!chunk) { - return false; - } - - // Phase 6-B: No registry add (header-based free doesn't need registry) - - // Setup segment - seg->chunk_base = chunk; - seg->chunk_size = chunk_size; - seg->block_size = block_size; - seg->current = chunk; - seg->end = (uint8_t*)chunk + chunk_size; - seg->capacity = chunk_size / block_size; - seg->refill_count++; - - MID_LOG("Segment refill: class=%d, block_size=%zu, capacity=%u, chunk=%p", - class_idx, block_size, seg->capacity, chunk); - - return true; -} - -/** - * segment_alloc - Allocate from segment (fast path) - * - * PERFORMANCE: Force inline for maximum speed - * - * Fast path priority: - * 1. Free list (most common, ~90-95% hit rate) - * 2. Bump allocation (when free list empty) - * 3. Refill (when segment exhausted) - * - * Phase 6-B: Now writes MidMTHeader for lock-free free() - * - * @return Allocated pointer (after header), or NULL on OOM - */ -static inline void* segment_alloc(MidThreadSegment* seg, int class_idx) __attribute__((always_inline)); -static inline void* segment_alloc(MidThreadSegment* seg, int class_idx) { - void* block; // Block start (includes header space) - size_t block_size = seg->block_size; - - // === Path 0: First allocation - need refill === - // CRITICAL FIX: TLS is zero-initialized, so chunk_base == NULL on first call - if (unlikely(seg->chunk_base == NULL)) { - if (!segment_refill(seg, class_idx)) { - return NULL; // OOM - } - block_size = seg->block_size; // Update after refill - } - - // === Path 1: Free list (fastest, ~4-5 instructions) === - // Note: Free list stores next pointer at block start (overwrites header when freed) - block = seg->free_list; - if (likely(block != NULL)) { - seg->free_list = *(void**)block; // Pop from free list - seg->used_count++; - seg->alloc_count++; - - // Phase 6-B: Write header before returning - MidMTHeader* hdr = (MidMTHeader*)block; - hdr->block_size = (uint32_t)block_size; - hdr->class_idx = (uint16_t)class_idx; - hdr->magic = MID_MT_MAGIC; - - return (uint8_t*)block + sizeof(MidMTHeader); // Return user pointer after header - } - - // === Path 2: Bump allocation (fast, ~6-8 instructions) === - block = seg->current; - void* next = (uint8_t*)block + block_size; - - if (likely(next <= seg->end)) { - seg->current = next; - seg->used_count++; - seg->alloc_count++; - - // Phase 6-B: Write header before returning - MidMTHeader* hdr = (MidMTHeader*)block; - hdr->block_size = (uint32_t)block_size; - hdr->class_idx = (uint16_t)class_idx; - hdr->magic = MID_MT_MAGIC; - - return (uint8_t*)block + sizeof(MidMTHeader); // Return user pointer after header - } - - // === Path 3: Refill (slow, called ~once per 64KB) === - if (!segment_refill(seg, class_idx)) { - return NULL; // OOM - } - - // Retry after refill - block = seg->current; - block_size = seg->block_size; // Update after refill - seg->current = (uint8_t*)block + block_size; - seg->used_count++; - seg->alloc_count++; - - // Phase 6-B: Write header before returning - MidMTHeader* hdr = (MidMTHeader*)block; - hdr->block_size = (uint32_t)block_size; - hdr->class_idx = (uint16_t)class_idx; - hdr->magic = MID_MT_MAGIC; - - return (uint8_t*)block + sizeof(MidMTHeader); // Return user pointer after header -} - -/** - * segment_free_local - Free to local segment (same thread) - * - * @param seg Segment to free to - * @param ptr Pointer to free (user pointer, after header) - * - * Phase 6-B: Adjusted for header-based allocation - */ -static inline void segment_free_local(MidThreadSegment* seg, void* ptr) { - // Phase 6-B: Get block start (before header) - void* block = (uint8_t*)ptr - sizeof(MidMTHeader); - - // Push to free list (lock-free, local operation) - // Note: Overwrites header with next pointer (header no longer needed after free) - *(void**)block = seg->free_list; - seg->free_list = block; - seg->used_count--; - seg->free_count++; - -#if MID_ENABLE_STATS - __sync_fetch_and_add(&g_mid_stats.local_frees, 1); -#endif -} - -// ============================================================================ -// Public API -// ============================================================================ - -/** - * mid_mt_init - Initialize Mid Range MT allocator - * - * Thread-safe, idempotent - * - * Phase 6-B: Simplified (no registry initialization) - */ -void mid_mt_init(void) { - if (g_mid_initialized) return; - - pthread_mutex_lock(&g_init_lock); - - if (!g_mid_initialized) { - // Phase 6-B: No registry initialization (header-based free) - -#if MID_ENABLE_STATS - memset(&g_mid_stats, 0, sizeof(g_mid_stats)); -#endif - - g_mid_initialized = 1; - - MID_LOG("Mid MT allocator initialized (Phase 6-B: header-based)"); - } - - pthread_mutex_unlock(&g_init_lock); -} - -/** - * mid_mt_alloc - Allocate memory from Mid Range pool (8-32KB) - * - * Thread-safe, lock-free (uses TLS) - */ -void* mid_mt_alloc(size_t size) { - // Validate size range (Phase 16: dynamic min size based on Tiny's max) - if (unlikely(size < mid_get_min_size() || size > MID_MAX_SIZE)) { - return NULL; - } - - // Initialize if needed (thread-safe) - if (unlikely(!g_mid_initialized)) { - mid_mt_init(); - } - - // Get size class - int class_idx = mid_size_to_class(size); - if (unlikely(class_idx < 0)) { - return NULL; - } - - // Get thread-local segment - MidThreadSegment* seg = &g_mid_segments[class_idx]; - - // Allocate from segment (fast path) - void* p = segment_alloc(seg, class_idx); - -#if MID_ENABLE_STATS - if (p) { - __sync_fetch_and_add(&g_mid_stats.total_allocs, 1); - } -#endif - - return p; -} - -/** - * mid_mt_free - Free memory allocated by mid_mt_alloc - * - * Phase 6-B: Header-based free (lock-free, no registry lookup!) - * - Reads MidMTHeader to get block metadata (O(1), ~2 cycles) - * - Eliminates pthread_mutex_lock/unlock (13.98% CPU overhead) - * - Expected: +17-27% throughput improvement - * - * Local free (same thread): Ultra-fast, lock-free - * Remote free (cross-thread): NOT IMPLEMENTED (memory leak, Phase 2 will add atomic remote free list) - */ -void mid_mt_free(void* ptr, size_t size) { - if (unlikely(!ptr)) return; - -#if MID_ENABLE_STATS - __sync_fetch_and_add(&g_mid_stats.total_frees, 1); -#endif - - // Phase 6-B: Read header for O(1) metadata lookup (no mutex!) - void* block = (uint8_t*)ptr - sizeof(MidMTHeader); - MidMTHeader* hdr = (MidMTHeader*)block; - - // Validate header magic (sanity check) - if (unlikely(hdr->magic != MID_MT_MAGIC)) { - MID_LOG("ERROR: Invalid Mid MT magic 0x%X (expected 0x%X) for ptr %p", - hdr->magic, MID_MT_MAGIC, ptr); - return; - } - - // Get metadata from header (no registry lookup!) - int class_idx = hdr->class_idx; - - // Validate class_idx - if (unlikely(class_idx < 0 || class_idx >= MID_NUM_CLASSES)) { - MID_LOG("ERROR: Invalid class_idx %d in header for ptr %p", class_idx, ptr); - return; - } - - // Get thread-local segment for this size class - MidThreadSegment* seg = &g_mid_segments[class_idx]; - - // === Fast path: Check if block belongs to current segment === - // Note: Check block (not ptr), since segment tracks block addresses - if (likely(seg->chunk_base != NULL && - block >= seg->chunk_base && - block < seg->end)) { - // Local free (same thread, lock-free) - segment_free_local(seg, ptr); - return; - } - - // === Slow path: Remote free (cross-thread) === - // Phase 1: NOT IMPLEMENTED - // We would need to find the owning segment and push to its remote free list. - // - // For Phase 1 (benchmarking), we accept this memory leak. - // bench_mid_mt_gap uses single-threaded workload, so remote frees never happen. - - MID_LOG("WARNING: Remote free not implemented, leaking %p (block_size=%u, class=%d)", - ptr, hdr->block_size, class_idx); - -#if MID_ENABLE_STATS - __sync_fetch_and_add(&g_mid_stats.remote_frees, 1); -#endif - - // TODO Phase 2: Implement remote free - // segment_free_remote(ptr, hdr->block_size, class_idx); -} - -/** - * mid_mt_thread_exit - Cleanup thread-local segments - * - * Called on thread exit to release resources - * - * Phase 6-B: No registry cleanup needed (header-based free) - */ -void mid_mt_thread_exit(void) { - MID_LOG("Thread exit cleanup"); - - // Free all chunks from this thread's segments - for (int class_idx = 0; class_idx < MID_NUM_CLASSES; class_idx++) { - MidThreadSegment* seg = &g_mid_segments[class_idx]; - - if (seg->chunk_base) { - // Phase 6-B: No registry remove (no registry exists) - - // Deallocate chunk - chunk_deallocate(seg->chunk_base, seg->chunk_size); - - // Clear segment - memset(seg, 0, sizeof(MidThreadSegment)); - } - } -} - -// ============================================================================ -// Statistics (Debug/Profiling) -// ============================================================================ - -#if MID_ENABLE_STATS - -void mid_mt_print_stats(void) { - printf("\n=== Mid Range MT Statistics ===\n"); - printf("Total allocations: %lu\n", g_mid_stats.total_allocs); - printf("Total frees: %lu\n", g_mid_stats.total_frees); - printf("Local frees: %lu (%.1f%%)\n", - g_mid_stats.local_frees, - 100.0 * g_mid_stats.local_frees / (g_mid_stats.total_frees + 1)); - printf("Remote frees: %lu (%.1f%%)\n", - g_mid_stats.remote_frees, - 100.0 * g_mid_stats.remote_frees / (g_mid_stats.total_frees + 1)); - printf("Registry lookups: %lu\n", g_mid_stats.registry_lookups); - printf("\n"); - - // Per-segment stats - for (int class_idx = 0; class_idx < MID_NUM_CLASSES; class_idx++) { - MidThreadSegment* seg = &g_mid_segments[class_idx]; - if (seg->alloc_count > 0) { - printf("Class %d (%zu bytes):\n", class_idx, mid_class_to_size(class_idx)); - printf(" Allocations: %lu\n", seg->alloc_count); - printf(" Frees: %lu\n", seg->free_count); - printf(" Refills: %u\n", seg->refill_count); - printf(" Used count: %u / %u\n", seg->used_count, seg->capacity); - } - } - printf("\n"); -} - -#endif // MID_ENABLE_STATS diff --git a/core/hakmem_mid_mt.h b/core/hakmem_mid_mt.h deleted file mode 100644 index 055301ef..00000000 --- a/core/hakmem_mid_mt.h +++ /dev/null @@ -1,287 +0,0 @@ -/** - * hakmem_mid_mt.h - * - * Mid Range Multi-threaded Allocator (1-32KB) - * mimalloc-style per-thread segment design for optimal MT performance - * - * Part of Hybrid Approach: - * - ≤1023B: Tiny Pool (header-based, C7 usable size) - * - 1-32KB: Mid MT (this module, mimalloc-style per-thread) - * - ≥64KB: Large Pool (learning-based, ELO strategies) - * - * Created: 2025-11-01 - * Goal: 46M → 100-120M ops/s (2.2-2.6x improvement) - */ - -#ifndef HAKMEM_MID_MT_H -#define HAKMEM_MID_MT_H - -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -// ============================================================================ -// Size Classes -// ============================================================================ - -#define MID_SIZE_CLASS_8K 0 // 8KB blocks -#define MID_SIZE_CLASS_16K 1 // 16KB blocks -#define MID_SIZE_CLASS_32K 2 // 32KB blocks -#define MID_NUM_CLASSES 3 // Total number of size classes - -// ============================================================================ -// Phase 6-B: Header-based Allocation (Lock-free Free) -// ============================================================================ - -/** - * MidMTHeader - Per-allocation header for lock-free free() - * - * Prepended to each Mid MT allocation for O(1) metadata lookup. - * Eliminates need for global registry + mutex (13.98% CPU overhead). - * - * Memory Layout: - * [MidMTHeader: 8 bytes][User data: block_size - 8 bytes] - * ^ ^ - * block returned to user - * - * Performance: - * - Before: pthread_mutex_lock (8.12%) + unlock (5.86%) = 13.98% CPU - * - After: Simple header read (~2 cycles) = 0.01% CPU - * - Expected: +17-27% throughput improvement - */ -typedef struct MidMTHeader { - uint32_t block_size; // Block size (8192/16384/32768) - uint16_t class_idx; // Size class index (0-2) - uint16_t magic; // Magic number for validation -} MidMTHeader; - -#define MID_MT_MAGIC 0xAB42 // Mid MT allocation marker - -// Phase 13: Close Tiny/Mid gap. -// Phase 16: Dynamic Mid min size - must start where Tiny ends -// Tiny max size is configurable via HAKMEM_TINY_MAX_CLASS: -// - HAKMEM_TINY_MAX_CLASS=7 (default) → Tiny up to 1023B → Mid starts at 1024B -// - HAKMEM_TINY_MAX_CLASS=5 → Tiny up to 255B → Mid starts at 256B -#include "hakmem_tiny.h" // For tiny_get_max_size() - -#define MID_MIN_SIZE_STATIC (1024) // Static fallback (C7 default) -#define MID_MAX_SIZE (32 * 1024) // 32KB - -static inline size_t mid_get_min_size(void) { - // Phase 5-Step2 FIX: Use static 1024 instead of tiny_get_max_size() + 1 - // Bug: tiny_get_max_size() returns 2047 (C7 usable), making min = 2048 - // This caused 1KB-2KB allocations to fall through to mmap() (100-1000x slower!) - // Fix: Use MID_MIN_SIZE_STATIC (1024) to align with actual Tiny/Mid boundary - return MID_MIN_SIZE_STATIC; // 1024 = TINY_MAX_SIZE -} -#define MID_CHUNK_SIZE (4 * 1024 * 1024) // 4MB chunks (same as mimalloc segments) - -// ============================================================================ -// Data Structures -// ============================================================================ - -/** - * MidThreadSegment - Per-thread segment for lock-free allocation - * - * Memory layout optimized for cache line alignment (64 bytes) - * - Cache line 0: Fast path fields (free_list, current, end, used_count) - * - Cache line 1: Metadata (chunk_base, sizes, capacity) - * - Cache line 2: Statistics (optional, for debugging) - */ -typedef struct MidThreadSegment { - // === Fast Path (Cache line 0) === - void* free_list; // Free objects linked list (NULL if empty) - void* current; // Bump allocation pointer - void* end; // End of current chunk - uint32_t used_count; // Number of allocated blocks - uint32_t padding0; // Alignment padding - - // === Metadata (Cache line 1) === - void* chunk_base; // Base address of current chunk - size_t chunk_size; // Size of chunk (typically 64KB) - size_t block_size; // Size of each block (8KB/16KB/32KB) - uint32_t capacity; // Total blocks in chunk - uint32_t padding1; // Alignment padding - - // === Statistics (Cache line 2) === - uint64_t alloc_count; // Total allocations - uint64_t free_count; // Total frees - uint32_t refill_count; // Number of chunk refills - uint32_t padding2; // Alignment padding - -} __attribute__((aligned(64))) MidThreadSegment; - -// Phase 6-B: Registry structures removed (header-based free instead) - -// ============================================================================ -// Global Variables -// ============================================================================ - -// TLS: Each thread has its own segments (lock-free!) -extern __thread MidThreadSegment g_mid_segments[MID_NUM_CLASSES]; - -// ============================================================================ -// API Functions -// ============================================================================ - -/** - * mid_mt_init - Initialize Mid Range MT allocator - * - * Call once at startup (thread-safe, idempotent) - */ -void mid_mt_init(void); - -/** - * mid_mt_alloc - Allocate memory from Mid Range pool - * - * @param size Allocation size (must be mid_get_min_size() ≤ size ≤ MID_MAX_SIZE) - * Phase 16: Range adjusts dynamically based on Tiny's max size - * Default: 1024B-32KB, can expand to 256B-32KB if Tiny reduced to C0-C5 - * @return Allocated pointer (aligned to block_size), or NULL on failure - * - * Thread-safety: Lock-free (uses TLS) - * Performance: O(1) fast path, O(1) amortized - * - * Fast path: - * 1. Check free_list (most common, ~4-5 instructions) - * 2. Bump allocation if free_list empty (~6-8 instructions) - * 3. Refill chunk if segment exhausted (rare, ~0.1%) - */ -void* mid_mt_alloc(size_t size); - -/** - * mid_mt_free - Free memory allocated by mid_mt_alloc - * - * @param ptr Pointer to free (must be from mid_mt_alloc) - * @param size Original allocation size (for size class lookup) - * - * Thread-safety: Lock-free if freeing to own thread's segment - * Requires registry lock if remote free (cross-thread) - * Performance: O(1) local free, O(log N) remote free (registry lookup) - * - * Note: Phase 1 implementation does not handle remote free (memory leak) - * Phase 2 will implement per-segment atomic remote free list - */ -void mid_mt_free(void* ptr, size_t size); - -/** - * mid_mt_thread_exit - Cleanup thread-local segments - * - * Called on thread exit to release resources - * Should be registered via pthread_key_create or __attribute__((destructor)) - */ -void mid_mt_thread_exit(void); - -// Phase 6-B: mid_registry_lookup() removed (header-based free instead) - -// ============================================================================ -// Inline Helper Functions -// ============================================================================ - -/** - * mid_size_to_class - Convert size to size class index - * - * @param size Allocation size - * @return Size class index (0-2), or -1 if out of range - */ -static inline int mid_size_to_class(size_t size) { - if (size <= 8192) return MID_SIZE_CLASS_8K; - if (size <= 16384) return MID_SIZE_CLASS_16K; - if (size <= 32768) return MID_SIZE_CLASS_32K; - return -1; // Out of range -} - -/** - * mid_class_to_size - Convert size class to block size - * - * @param class_idx Size class index (0-2) - * @return Block size in bytes - */ -static inline size_t mid_class_to_size(int class_idx) { - static const size_t sizes[MID_NUM_CLASSES] = { - 8192, // 8KB - 16384, // 16KB - 32768 // 32KB - }; - return (class_idx >= 0 && class_idx < MID_NUM_CLASSES) ? sizes[class_idx] : 0; -} - -/** - * mid_is_in_range - Check if size is in Mid Range pool range - * - * @param size Allocation size - * @return true if (tiny_max+1) ≤ size ≤ 32KB - * - * Phase 16: Dynamic range - adjusts based on Tiny's max size - * PERF_OPT: Force inline to eliminate function call overhead in hot path - */ -__attribute__((always_inline)) -static inline bool mid_is_in_range(size_t size) { - return (size >= mid_get_min_size() && size <= MID_MAX_SIZE); -} - -// ============================================================================ -// Configuration (can be overridden via environment variables) -// ============================================================================ - -// Default chunk size (64KB) -#ifndef MID_DEFAULT_CHUNK_SIZE -#define MID_DEFAULT_CHUNK_SIZE (64 * 1024) -#endif - -// Initial registry capacity -#ifndef MID_REGISTRY_INITIAL_CAPACITY -#define MID_REGISTRY_INITIAL_CAPACITY 64 -#endif - -// Enable/disable statistics collection -#ifndef MID_ENABLE_STATS -#define MID_ENABLE_STATS 0 // DISABLED for performance -#endif - -// Enable/disable debug logging -#ifndef MID_DEBUG -#define MID_DEBUG 0 // DISABLE for performance testing -#endif - -#if MID_DEBUG -#include -#define MID_LOG(fmt, ...) fprintf(stderr, "[MID_MT] " fmt "\n", ##__VA_ARGS__) -#else -#define MID_LOG(fmt, ...) ((void)0) -#endif - -// ============================================================================ -// Statistics (Debug/Profiling) -// ============================================================================ - -#if MID_ENABLE_STATS - -/** - * MidStats - Global statistics for profiling - */ -typedef struct MidStats { - uint64_t total_allocs; // Total allocations - uint64_t total_frees; // Total frees - uint64_t total_refills; // Total chunk refills - uint64_t local_frees; // Local frees (same thread) - uint64_t remote_frees; // Remote frees (cross-thread) - uint64_t registry_lookups; // Registry lookups -} MidStats; - -extern MidStats g_mid_stats; - -void mid_mt_print_stats(void); - -#endif // MID_ENABLE_STATS - -#ifdef __cplusplus -} -#endif - -#endif // HAKMEM_MID_MT_H