Remove unused Mid MT layer

This commit is contained in:
Moe Charm (CI)
2025-12-01 23:43:44 +09:00
parent 195c74756c
commit f1b7964ef9
8 changed files with 5 additions and 880 deletions

View File

@ -218,12 +218,12 @@ LDFLAGS += $(EXTRA_LDFLAGS)
# Targets
TARGET = test_hakmem
OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o test_hakmem.o
OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o test_hakmem.o
OBJS = $(OBJS_BASE)
# Shared library
SHARED_LIB = libhakmem.so
SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o superslab_allocate_shared.o superslab_stats_shared.o superslab_cache_shared.o superslab_ace_shared.o superslab_slab_shared.o superslab_backend_shared.o superslab_head_shared.o hakmem_smallmid_shared.o hakmem_smallmid_superslab_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/front_gate_classifier_shared.o core/box/free_publish_box_shared.o core/box/capacity_box_shared.o core/box/carve_push_box_shared.o core/box/unified_batch_box_shared.o core/box/prewarm_box_shared.o core/box/ss_hot_prewarm_box_shared.o core/box/front_metrics_box_shared.o core/box/bench_fast_box_shared.o core/box/ss_addr_map_box_shared.o core/box/ss_tls_hint_box_shared.o core/box/slab_recycling_box_shared.o core/box/pagefault_telemetry_box_shared.o core/box/tiny_sizeclass_hist_box_shared.o core/page_arena_shared.o core/front/tiny_unified_cache_shared.o core/tiny_alloc_fast_push_shared.o core/link_stubs_shared.o core/tiny_failfast_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_mid_mt_shared.o hakmem_super_registry_shared.o hakmem_shared_pool_shared.o hakmem_shared_pool_acquire_shared.o hakmem_shared_pool_release_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o
SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o superslab_allocate_shared.o superslab_stats_shared.o superslab_cache_shared.o superslab_ace_shared.o superslab_slab_shared.o superslab_backend_shared.o superslab_head_shared.o hakmem_smallmid_shared.o hakmem_smallmid_superslab_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/front_gate_classifier_shared.o core/box/free_publish_box_shared.o core/box/capacity_box_shared.o core/box/carve_push_box_shared.o core/box/unified_batch_box_shared.o core/box/prewarm_box_shared.o core/box/ss_hot_prewarm_box_shared.o core/box/front_metrics_box_shared.o core/box/bench_fast_box_shared.o core/box/ss_addr_map_box_shared.o core/box/ss_tls_hint_box_shared.o core/box/slab_recycling_box_shared.o core/box/pagefault_telemetry_box_shared.o core/box/tiny_sizeclass_hist_box_shared.o core/page_arena_shared.o core/front/tiny_unified_cache_shared.o core/tiny_alloc_fast_push_shared.o core/link_stubs_shared.o core/tiny_failfast_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_super_registry_shared.o hakmem_shared_pool_shared.o hakmem_shared_pool_acquire_shared.o hakmem_shared_pool_release_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o
# Pool TLS Phase 1 (enable with POOL_TLS_PHASE1=1)
ifeq ($(POOL_TLS_PHASE1),1)
@ -250,7 +250,7 @@ endif
# Benchmark targets
BENCH_HAKMEM = bench_allocators_hakmem
BENCH_SYSTEM = bench_allocators_system
BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o
BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o
BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE)
ifeq ($(POOL_TLS_PHASE1),1)
BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
@ -285,7 +285,7 @@ $(TARGET): $(OBJS)
@echo "========================================="
# Compile C files
%.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_tiny_superslab.h hakmem_mid_mt.h hakmem_super_registry.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h
%.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_tiny_superslab.h hakmem_super_registry.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h
$(CC) $(CFLAGS) -c -o $@ $<
# Build benchmark programs
@ -427,7 +427,7 @@ test-box-refactor: box-refactor
./larson_hakmem 10 8 128 1024 1 12345 4
# Phase 4: Tiny Pool benchmarks (properly linked with hakmem)
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o
TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE)
ifeq ($(POOL_TLS_PHASE1),1)
TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o

View File

@ -106,14 +106,6 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
hkm_size_hist_record(size);
// Legacy Mid MT allocator (Phase 5) is disabled by default to favor ACE/Pool.
// Enable via HAKMEM_MID_MT_ENABLE=1 when running legacy benchmarks.
static int g_mid_mt_enabled = -1;
if (__builtin_expect(g_mid_mt_enabled < 0, 0)) {
const char* e = getenv("HAKMEM_MID_MT_ENABLE");
g_mid_mt_enabled = (e && *e && *e != '0') ? 1 : 0;
}
#ifdef HAKMEM_POOL_TLS_PHASE1
// Phase 1: Ultra-fast Pool TLS for 8KB-52KB range
if (size >= 8192 && size <= 53248) {
@ -124,18 +116,6 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
}
#endif
if (__builtin_expect(g_mid_mt_enabled && mid_is_in_range(size), 0)) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_mid);
#endif
void* mid_ptr = mid_mt_alloc(size);
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_POOL_GET, t_mid);
#endif
// PERF_OPT: likely hint - mid allocations usually succeed
if (__builtin_expect(mid_ptr != NULL, 1)) return mid_ptr;
}
#if HAKMEM_FEATURE_EVOLUTION
if (g_evo_sample_mask > 0) {
static _Atomic uint64_t tick_counter = 0;

View File

@ -72,7 +72,6 @@ static void hak_init_impl(void) {
hkm_whale_init();
// NEW Phase Hybrid: Initialize Mid Range MT allocator (8-32KB, mimalloc-style)
mid_mt_init();
// NEW Phase 6.8: Initialize configuration system (replaces init_free_policy + init_thp_policy)
hak_config_init();

View File

@ -33,7 +33,6 @@ void* realloc(void* ptr, size_t size) {
#include "../hakmem_pool.h" // Mid registry lookup (failsafe for headerless Mid)
#include "../front/malloc_tiny_fast.h" // Phase 26: Front Gate Unification
#include "tiny_front_config_box.h" // Phase 4-Step3: Compile-time config for dead code elimination
#include "mid_free_route_box.h" // Phase 5-Step2: Mid MT free routing fix
// malloc wrapper - intercepts system malloc() calls
__thread uint64_t g_malloc_total_calls = 0;
@ -226,11 +225,6 @@ void free(void* ptr) {
}
#endif
// Phase 5-Step2: Mid Free Route Box (BEFORE classify_ptr)
// Quick fix for 19x free() slowdown: Try Mid MT registry first
// If found, route directly to mid_mt_free() and return
if (mid_free_route_try(ptr)) return;
// Classify pointer BEFORE early libc fallbacks to avoid misrouting Tiny pointers
// This is safe: classifier uses header probe and registry; does not allocate.
int is_hakmem_owned = 0;

View File

@ -1,109 +0,0 @@
/**
* mid_free_route_box.h
*
* Box: Mid Free Route Box
* Responsibility: Route Mid MT allocations to correct free path
* Contract: Try Mid MT registry lookup, return success/failure
*
* Part of Phase 5-Step2 fix for 19x free() slowdown
*
* Problem:
* - Mid MT allocator registers chunks in MidGlobalRegistry
* - Free path searches Pool's mid_desc registry (different registry!)
* - Result: 100% lookup failure → 4x cascading lookups → 19x slower
*
* Solution:
* - Add Mid MT registry lookup BEFORE Pool registry lookup
* - Route directly to mid_mt_free() if found
* - Fall through to existing path if not found
*
* Performance Impact:
* - Before: 1.42 M ops/s (19x slower than system malloc)
* - After: 14-21 M ops/s (Option B quick fix, 10-15x improvement)
*
* Created: 2025-11-29 (Phase 5-Step2 Mid MT Gap Fix)
*/
#ifndef MID_FREE_ROUTE_BOX_H
#define MID_FREE_ROUTE_BOX_H
#include "../hakmem_mid_mt.h"
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
// ============================================================================
// Box Contract: Mid MT Free Routing
// ============================================================================
/**
* mid_free_route_try - Try Mid MT free path first
*
* @param ptr Pointer to free
* @return true if handled by Mid MT, false to fall through
*
* Phase 6-B: Header-based detection (lock-free!)
*
* Box Responsibilities:
* 1. Read MidMTHeader from ptr - sizeof(MidMTHeader)
* 2. Check magic number (0xAB42)
* 3. If valid: Call mid_mt_free() and return true
* 4. If invalid: Return false (let existing path handle it)
*
* Box Guarantees:
* - Zero side effects if returning false
* - Correct free if returning true
* - Thread-safe (lock-free header read)
*
* Performance:
* - Before (Phase 5): O(log N) registry lookup + mutex = ~50 cycles (13.98% CPU)
* - After (Phase 6-B): O(1) header read + magic check = ~2 cycles (0.01% CPU)
* - Expected improvement: +17-27% throughput
*
* Usage Example:
* void free(void* ptr) {
* if (mid_free_route_try(ptr)) return; // Mid MT handled
* // Fall through to existing free path...
* }
*/
__attribute__((always_inline))
static inline bool mid_free_route_try(void* ptr) {
if (!ptr) return false; // NULL ptr, not Mid MT
// Phase 6-B: Read header for O(1) detection (no mutex!)
void* block = (uint8_t*)ptr - sizeof(MidMTHeader);
MidMTHeader* hdr = (MidMTHeader*)block;
// Check magic number to identify Mid MT allocation
if (hdr->magic == MID_MT_MAGIC) {
// Valid Mid MT allocation, route to mid_mt_free()
// Pass block_size from header (no size needed from caller!)
mid_mt_free(ptr, hdr->block_size);
return true; // Handled
}
// Not a Mid MT allocation, fall through to existing path
return false;
}
// ============================================================================
// Box Observability (Debug/Profiling)
// ============================================================================
#if MID_DEBUG
/**
* mid_free_route_stats - Print Mid Free Route Box statistics
*
* Only available in debug builds (MID_DEBUG=1)
* Tracks hit/miss rates for performance analysis
*/
void mid_free_route_stats(void);
#endif
#ifdef __cplusplus
}
#endif
#endif // MID_FREE_ROUTE_BOX_H

View File

@ -16,7 +16,6 @@
#include "hakmem_tiny.h" // NEW Phase 6.12: Tiny Pool (≤1KB)
#include "hakmem_tiny_superslab.h" // NEW Phase 7.6: SuperSlab for Tiny Pool
#include "tiny_fastcache.h" // NEW Phase 6-3: Tiny Fast Path (System tcache style)
#include "hakmem_mid_mt.h" // NEW Phase Hybrid: Mid Range MT (8-32KB, mimalloc-style)
#include "hakmem_super_registry.h" // NEW Phase 1: SuperSlab Registry (mincore elimination)
#include "hakmem_elo.h" // NEW: ELO Strategy Selection (Phase 6.2)
#include "hakmem_ace_stats.h" // NEW: ACE lightweight stats (avoid implicit decl warnings)

View File

@ -1,451 +0,0 @@
/**
* hakmem_mid_mt.c
*
* Mid Range Multi-threaded Allocator Implementation (8-32KB)
* mimalloc-style per-thread segment for optimal MT performance
*
* Design:
* - Per-thread segments (TLS) for lock-free allocation
* - Global registry for segment lookup during free()
* - 64KB chunks with bump + free list allocation
* - Phase 1: Local free only (remote free = memory leak, acceptable for benchmarking)
* - Phase 2: Will add atomic remote free list
*/
#include "hakmem_mid_mt.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <assert.h>
#include <stdatomic.h>
// Use likely/unlikely hints for branch prediction
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#endif
#ifndef unlikely
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
// ============================================================================
// Global and TLS Variables
// ============================================================================
// TLS: Each thread has independent segments (lock-free!)
__thread MidThreadSegment g_mid_segments[MID_NUM_CLASSES] = {0};
// Phase 6-B: Registry removed (no longer needed with header-based free)
// Statistics (if enabled)
#if MID_ENABLE_STATS
MidStats g_mid_stats = {0};
#endif
// Initialization flag
static volatile int g_mid_initialized = 0;
static pthread_mutex_t g_init_lock = PTHREAD_MUTEX_INITIALIZER;
// ============================================================================
// Forward Declarations
// ============================================================================
static bool segment_refill(MidThreadSegment* seg, int class_idx);
static void* segment_alloc(MidThreadSegment* seg, int class_idx);
static void segment_free_local(MidThreadSegment* seg, void* ptr);
static void* chunk_allocate(size_t chunk_size);
static void chunk_deallocate(void* chunk, size_t chunk_size);
// Phase 6-B: Registry functions removed (header-based free instead)
// ============================================================================
// Chunk Management (mmap/munmap wrappers)
// ============================================================================
/**
* chunk_allocate - Allocate a new chunk via mmap
*
* @param chunk_size Size of chunk (typically 64KB)
* @return Chunk base address, or NULL on failure
*/
static void* chunk_allocate(size_t chunk_size) {
void* chunk = mmap(
NULL,
chunk_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0
);
if (chunk == MAP_FAILED) {
MID_LOG("ERROR: mmap failed for chunk_size=%zu", chunk_size);
return NULL;
}
MID_LOG("Chunk allocated: %p, size=%zu", chunk, chunk_size);
return chunk;
}
/**
* chunk_deallocate - Free chunk via munmap
*
* @param chunk Chunk base address
* @param chunk_size Size of chunk
*/
static void chunk_deallocate(void* chunk, size_t chunk_size) {
if (!chunk) return;
int ret = munmap(chunk, chunk_size);
if (ret != 0) {
MID_LOG("ERROR: munmap failed for chunk=%p, size=%zu", chunk, chunk_size);
} else {
MID_LOG("Chunk deallocated: %p, size=%zu", chunk, chunk_size);
}
}
// ============================================================================
// Segment Operations
// ============================================================================
/**
* segment_refill - Allocate new chunk and setup segment
*
* Called when segment is exhausted (rare, ~0.1% of allocations)
*
* Phase 6-B: No longer registers chunks (header-based free instead)
*
* @return true on success, false on OOM
*/
static bool segment_refill(MidThreadSegment* seg, int class_idx) {
size_t block_size = mid_class_to_size(class_idx);
size_t chunk_size = MID_CHUNK_SIZE;
// Allocate new chunk via mmap
void* chunk = chunk_allocate(chunk_size);
if (!chunk) {
return false;
}
// Phase 6-B: No registry add (header-based free doesn't need registry)
// Setup segment
seg->chunk_base = chunk;
seg->chunk_size = chunk_size;
seg->block_size = block_size;
seg->current = chunk;
seg->end = (uint8_t*)chunk + chunk_size;
seg->capacity = chunk_size / block_size;
seg->refill_count++;
MID_LOG("Segment refill: class=%d, block_size=%zu, capacity=%u, chunk=%p",
class_idx, block_size, seg->capacity, chunk);
return true;
}
/**
* segment_alloc - Allocate from segment (fast path)
*
* PERFORMANCE: Force inline for maximum speed
*
* Fast path priority:
* 1. Free list (most common, ~90-95% hit rate)
* 2. Bump allocation (when free list empty)
* 3. Refill (when segment exhausted)
*
* Phase 6-B: Now writes MidMTHeader for lock-free free()
*
* @return Allocated pointer (after header), or NULL on OOM
*/
static inline void* segment_alloc(MidThreadSegment* seg, int class_idx) __attribute__((always_inline));
static inline void* segment_alloc(MidThreadSegment* seg, int class_idx) {
void* block; // Block start (includes header space)
size_t block_size = seg->block_size;
// === Path 0: First allocation - need refill ===
// CRITICAL FIX: TLS is zero-initialized, so chunk_base == NULL on first call
if (unlikely(seg->chunk_base == NULL)) {
if (!segment_refill(seg, class_idx)) {
return NULL; // OOM
}
block_size = seg->block_size; // Update after refill
}
// === Path 1: Free list (fastest, ~4-5 instructions) ===
// Note: Free list stores next pointer at block start (overwrites header when freed)
block = seg->free_list;
if (likely(block != NULL)) {
seg->free_list = *(void**)block; // Pop from free list
seg->used_count++;
seg->alloc_count++;
// Phase 6-B: Write header before returning
MidMTHeader* hdr = (MidMTHeader*)block;
hdr->block_size = (uint32_t)block_size;
hdr->class_idx = (uint16_t)class_idx;
hdr->magic = MID_MT_MAGIC;
return (uint8_t*)block + sizeof(MidMTHeader); // Return user pointer after header
}
// === Path 2: Bump allocation (fast, ~6-8 instructions) ===
block = seg->current;
void* next = (uint8_t*)block + block_size;
if (likely(next <= seg->end)) {
seg->current = next;
seg->used_count++;
seg->alloc_count++;
// Phase 6-B: Write header before returning
MidMTHeader* hdr = (MidMTHeader*)block;
hdr->block_size = (uint32_t)block_size;
hdr->class_idx = (uint16_t)class_idx;
hdr->magic = MID_MT_MAGIC;
return (uint8_t*)block + sizeof(MidMTHeader); // Return user pointer after header
}
// === Path 3: Refill (slow, called ~once per 64KB) ===
if (!segment_refill(seg, class_idx)) {
return NULL; // OOM
}
// Retry after refill
block = seg->current;
block_size = seg->block_size; // Update after refill
seg->current = (uint8_t*)block + block_size;
seg->used_count++;
seg->alloc_count++;
// Phase 6-B: Write header before returning
MidMTHeader* hdr = (MidMTHeader*)block;
hdr->block_size = (uint32_t)block_size;
hdr->class_idx = (uint16_t)class_idx;
hdr->magic = MID_MT_MAGIC;
return (uint8_t*)block + sizeof(MidMTHeader); // Return user pointer after header
}
/**
* segment_free_local - Free to local segment (same thread)
*
* @param seg Segment to free to
* @param ptr Pointer to free (user pointer, after header)
*
* Phase 6-B: Adjusted for header-based allocation
*/
static inline void segment_free_local(MidThreadSegment* seg, void* ptr) {
// Phase 6-B: Get block start (before header)
void* block = (uint8_t*)ptr - sizeof(MidMTHeader);
// Push to free list (lock-free, local operation)
// Note: Overwrites header with next pointer (header no longer needed after free)
*(void**)block = seg->free_list;
seg->free_list = block;
seg->used_count--;
seg->free_count++;
#if MID_ENABLE_STATS
__sync_fetch_and_add(&g_mid_stats.local_frees, 1);
#endif
}
// ============================================================================
// Public API
// ============================================================================
/**
* mid_mt_init - Initialize Mid Range MT allocator
*
* Thread-safe, idempotent
*
* Phase 6-B: Simplified (no registry initialization)
*/
void mid_mt_init(void) {
if (g_mid_initialized) return;
pthread_mutex_lock(&g_init_lock);
if (!g_mid_initialized) {
// Phase 6-B: No registry initialization (header-based free)
#if MID_ENABLE_STATS
memset(&g_mid_stats, 0, sizeof(g_mid_stats));
#endif
g_mid_initialized = 1;
MID_LOG("Mid MT allocator initialized (Phase 6-B: header-based)");
}
pthread_mutex_unlock(&g_init_lock);
}
/**
* mid_mt_alloc - Allocate memory from Mid Range pool (8-32KB)
*
* Thread-safe, lock-free (uses TLS)
*/
void* mid_mt_alloc(size_t size) {
// Validate size range (Phase 16: dynamic min size based on Tiny's max)
if (unlikely(size < mid_get_min_size() || size > MID_MAX_SIZE)) {
return NULL;
}
// Initialize if needed (thread-safe)
if (unlikely(!g_mid_initialized)) {
mid_mt_init();
}
// Get size class
int class_idx = mid_size_to_class(size);
if (unlikely(class_idx < 0)) {
return NULL;
}
// Get thread-local segment
MidThreadSegment* seg = &g_mid_segments[class_idx];
// Allocate from segment (fast path)
void* p = segment_alloc(seg, class_idx);
#if MID_ENABLE_STATS
if (p) {
__sync_fetch_and_add(&g_mid_stats.total_allocs, 1);
}
#endif
return p;
}
/**
* mid_mt_free - Free memory allocated by mid_mt_alloc
*
* Phase 6-B: Header-based free (lock-free, no registry lookup!)
* - Reads MidMTHeader to get block metadata (O(1), ~2 cycles)
* - Eliminates pthread_mutex_lock/unlock (13.98% CPU overhead)
* - Expected: +17-27% throughput improvement
*
* Local free (same thread): Ultra-fast, lock-free
* Remote free (cross-thread): NOT IMPLEMENTED (memory leak, Phase 2 will add atomic remote free list)
*/
void mid_mt_free(void* ptr, size_t size) {
if (unlikely(!ptr)) return;
#if MID_ENABLE_STATS
__sync_fetch_and_add(&g_mid_stats.total_frees, 1);
#endif
// Phase 6-B: Read header for O(1) metadata lookup (no mutex!)
void* block = (uint8_t*)ptr - sizeof(MidMTHeader);
MidMTHeader* hdr = (MidMTHeader*)block;
// Validate header magic (sanity check)
if (unlikely(hdr->magic != MID_MT_MAGIC)) {
MID_LOG("ERROR: Invalid Mid MT magic 0x%X (expected 0x%X) for ptr %p",
hdr->magic, MID_MT_MAGIC, ptr);
return;
}
// Get metadata from header (no registry lookup!)
int class_idx = hdr->class_idx;
// Validate class_idx
if (unlikely(class_idx < 0 || class_idx >= MID_NUM_CLASSES)) {
MID_LOG("ERROR: Invalid class_idx %d in header for ptr %p", class_idx, ptr);
return;
}
// Get thread-local segment for this size class
MidThreadSegment* seg = &g_mid_segments[class_idx];
// === Fast path: Check if block belongs to current segment ===
// Note: Check block (not ptr), since segment tracks block addresses
if (likely(seg->chunk_base != NULL &&
block >= seg->chunk_base &&
block < seg->end)) {
// Local free (same thread, lock-free)
segment_free_local(seg, ptr);
return;
}
// === Slow path: Remote free (cross-thread) ===
// Phase 1: NOT IMPLEMENTED
// We would need to find the owning segment and push to its remote free list.
//
// For Phase 1 (benchmarking), we accept this memory leak.
// bench_mid_mt_gap uses single-threaded workload, so remote frees never happen.
MID_LOG("WARNING: Remote free not implemented, leaking %p (block_size=%u, class=%d)",
ptr, hdr->block_size, class_idx);
#if MID_ENABLE_STATS
__sync_fetch_and_add(&g_mid_stats.remote_frees, 1);
#endif
// TODO Phase 2: Implement remote free
// segment_free_remote(ptr, hdr->block_size, class_idx);
}
/**
* mid_mt_thread_exit - Cleanup thread-local segments
*
* Called on thread exit to release resources
*
* Phase 6-B: No registry cleanup needed (header-based free)
*/
void mid_mt_thread_exit(void) {
MID_LOG("Thread exit cleanup");
// Free all chunks from this thread's segments
for (int class_idx = 0; class_idx < MID_NUM_CLASSES; class_idx++) {
MidThreadSegment* seg = &g_mid_segments[class_idx];
if (seg->chunk_base) {
// Phase 6-B: No registry remove (no registry exists)
// Deallocate chunk
chunk_deallocate(seg->chunk_base, seg->chunk_size);
// Clear segment
memset(seg, 0, sizeof(MidThreadSegment));
}
}
}
// ============================================================================
// Statistics (Debug/Profiling)
// ============================================================================
#if MID_ENABLE_STATS
void mid_mt_print_stats(void) {
printf("\n=== Mid Range MT Statistics ===\n");
printf("Total allocations: %lu\n", g_mid_stats.total_allocs);
printf("Total frees: %lu\n", g_mid_stats.total_frees);
printf("Local frees: %lu (%.1f%%)\n",
g_mid_stats.local_frees,
100.0 * g_mid_stats.local_frees / (g_mid_stats.total_frees + 1));
printf("Remote frees: %lu (%.1f%%)\n",
g_mid_stats.remote_frees,
100.0 * g_mid_stats.remote_frees / (g_mid_stats.total_frees + 1));
printf("Registry lookups: %lu\n", g_mid_stats.registry_lookups);
printf("\n");
// Per-segment stats
for (int class_idx = 0; class_idx < MID_NUM_CLASSES; class_idx++) {
MidThreadSegment* seg = &g_mid_segments[class_idx];
if (seg->alloc_count > 0) {
printf("Class %d (%zu bytes):\n", class_idx, mid_class_to_size(class_idx));
printf(" Allocations: %lu\n", seg->alloc_count);
printf(" Frees: %lu\n", seg->free_count);
printf(" Refills: %u\n", seg->refill_count);
printf(" Used count: %u / %u\n", seg->used_count, seg->capacity);
}
}
printf("\n");
}
#endif // MID_ENABLE_STATS

View File

@ -1,287 +0,0 @@
/**
* hakmem_mid_mt.h
*
* Mid Range Multi-threaded Allocator (1-32KB)
* mimalloc-style per-thread segment design for optimal MT performance
*
* Part of Hybrid Approach:
* - ≤1023B: Tiny Pool (header-based, C7 usable size)
* - 1-32KB: Mid MT (this module, mimalloc-style per-thread)
* - ≥64KB: Large Pool (learning-based, ELO strategies)
*
* Created: 2025-11-01
* Goal: 46M → 100-120M ops/s (2.2-2.6x improvement)
*/
#ifndef HAKMEM_MID_MT_H
#define HAKMEM_MID_MT_H
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <pthread.h>
#ifdef __cplusplus
extern "C" {
#endif
// ============================================================================
// Size Classes
// ============================================================================
#define MID_SIZE_CLASS_8K 0 // 8KB blocks
#define MID_SIZE_CLASS_16K 1 // 16KB blocks
#define MID_SIZE_CLASS_32K 2 // 32KB blocks
#define MID_NUM_CLASSES 3 // Total number of size classes
// ============================================================================
// Phase 6-B: Header-based Allocation (Lock-free Free)
// ============================================================================
/**
* MidMTHeader - Per-allocation header for lock-free free()
*
* Prepended to each Mid MT allocation for O(1) metadata lookup.
* Eliminates need for global registry + mutex (13.98% CPU overhead).
*
* Memory Layout:
* [MidMTHeader: 8 bytes][User data: block_size - 8 bytes]
* ^ ^
* block returned to user
*
* Performance:
* - Before: pthread_mutex_lock (8.12%) + unlock (5.86%) = 13.98% CPU
* - After: Simple header read (~2 cycles) = 0.01% CPU
* - Expected: +17-27% throughput improvement
*/
typedef struct MidMTHeader {
uint32_t block_size; // Block size (8192/16384/32768)
uint16_t class_idx; // Size class index (0-2)
uint16_t magic; // Magic number for validation
} MidMTHeader;
#define MID_MT_MAGIC 0xAB42 // Mid MT allocation marker
// Phase 13: Close Tiny/Mid gap.
// Phase 16: Dynamic Mid min size - must start where Tiny ends
// Tiny max size is configurable via HAKMEM_TINY_MAX_CLASS:
// - HAKMEM_TINY_MAX_CLASS=7 (default) → Tiny up to 1023B → Mid starts at 1024B
// - HAKMEM_TINY_MAX_CLASS=5 → Tiny up to 255B → Mid starts at 256B
#include "hakmem_tiny.h" // For tiny_get_max_size()
#define MID_MIN_SIZE_STATIC (1024) // Static fallback (C7 default)
#define MID_MAX_SIZE (32 * 1024) // 32KB
static inline size_t mid_get_min_size(void) {
// Phase 5-Step2 FIX: Use static 1024 instead of tiny_get_max_size() + 1
// Bug: tiny_get_max_size() returns 2047 (C7 usable), making min = 2048
// This caused 1KB-2KB allocations to fall through to mmap() (100-1000x slower!)
// Fix: Use MID_MIN_SIZE_STATIC (1024) to align with actual Tiny/Mid boundary
return MID_MIN_SIZE_STATIC; // 1024 = TINY_MAX_SIZE
}
#define MID_CHUNK_SIZE (4 * 1024 * 1024) // 4MB chunks (same as mimalloc segments)
// ============================================================================
// Data Structures
// ============================================================================
/**
* MidThreadSegment - Per-thread segment for lock-free allocation
*
* Memory layout optimized for cache line alignment (64 bytes)
* - Cache line 0: Fast path fields (free_list, current, end, used_count)
* - Cache line 1: Metadata (chunk_base, sizes, capacity)
* - Cache line 2: Statistics (optional, for debugging)
*/
typedef struct MidThreadSegment {
// === Fast Path (Cache line 0) ===
void* free_list; // Free objects linked list (NULL if empty)
void* current; // Bump allocation pointer
void* end; // End of current chunk
uint32_t used_count; // Number of allocated blocks
uint32_t padding0; // Alignment padding
// === Metadata (Cache line 1) ===
void* chunk_base; // Base address of current chunk
size_t chunk_size; // Size of chunk (typically 64KB)
size_t block_size; // Size of each block (8KB/16KB/32KB)
uint32_t capacity; // Total blocks in chunk
uint32_t padding1; // Alignment padding
// === Statistics (Cache line 2) ===
uint64_t alloc_count; // Total allocations
uint64_t free_count; // Total frees
uint32_t refill_count; // Number of chunk refills
uint32_t padding2; // Alignment padding
} __attribute__((aligned(64))) MidThreadSegment;
// Phase 6-B: Registry structures removed (header-based free instead)
// ============================================================================
// Global Variables
// ============================================================================
// TLS: Each thread has its own segments (lock-free!)
extern __thread MidThreadSegment g_mid_segments[MID_NUM_CLASSES];
// ============================================================================
// API Functions
// ============================================================================
/**
* mid_mt_init - Initialize Mid Range MT allocator
*
* Call once at startup (thread-safe, idempotent)
*/
void mid_mt_init(void);
/**
* mid_mt_alloc - Allocate memory from Mid Range pool
*
* @param size Allocation size (must be mid_get_min_size() ≤ size ≤ MID_MAX_SIZE)
* Phase 16: Range adjusts dynamically based on Tiny's max size
* Default: 1024B-32KB, can expand to 256B-32KB if Tiny reduced to C0-C5
* @return Allocated pointer (aligned to block_size), or NULL on failure
*
* Thread-safety: Lock-free (uses TLS)
* Performance: O(1) fast path, O(1) amortized
*
* Fast path:
* 1. Check free_list (most common, ~4-5 instructions)
* 2. Bump allocation if free_list empty (~6-8 instructions)
* 3. Refill chunk if segment exhausted (rare, ~0.1%)
*/
void* mid_mt_alloc(size_t size);
/**
* mid_mt_free - Free memory allocated by mid_mt_alloc
*
* @param ptr Pointer to free (must be from mid_mt_alloc)
* @param size Original allocation size (for size class lookup)
*
* Thread-safety: Lock-free if freeing to own thread's segment
* Requires registry lock if remote free (cross-thread)
* Performance: O(1) local free, O(log N) remote free (registry lookup)
*
* Note: Phase 1 implementation does not handle remote free (memory leak)
* Phase 2 will implement per-segment atomic remote free list
*/
void mid_mt_free(void* ptr, size_t size);
/**
* mid_mt_thread_exit - Cleanup thread-local segments
*
* Called on thread exit to release resources
* Should be registered via pthread_key_create or __attribute__((destructor))
*/
void mid_mt_thread_exit(void);
// Phase 6-B: mid_registry_lookup() removed (header-based free instead)
// ============================================================================
// Inline Helper Functions
// ============================================================================
/**
* mid_size_to_class - Convert size to size class index
*
* @param size Allocation size
* @return Size class index (0-2), or -1 if out of range
*/
static inline int mid_size_to_class(size_t size) {
if (size <= 8192) return MID_SIZE_CLASS_8K;
if (size <= 16384) return MID_SIZE_CLASS_16K;
if (size <= 32768) return MID_SIZE_CLASS_32K;
return -1; // Out of range
}
/**
* mid_class_to_size - Convert size class to block size
*
* @param class_idx Size class index (0-2)
* @return Block size in bytes
*/
static inline size_t mid_class_to_size(int class_idx) {
static const size_t sizes[MID_NUM_CLASSES] = {
8192, // 8KB
16384, // 16KB
32768 // 32KB
};
return (class_idx >= 0 && class_idx < MID_NUM_CLASSES) ? sizes[class_idx] : 0;
}
/**
* mid_is_in_range - Check if size is in Mid Range pool range
*
* @param size Allocation size
* @return true if (tiny_max+1) ≤ size ≤ 32KB
*
* Phase 16: Dynamic range - adjusts based on Tiny's max size
* PERF_OPT: Force inline to eliminate function call overhead in hot path
*/
__attribute__((always_inline))
static inline bool mid_is_in_range(size_t size) {
return (size >= mid_get_min_size() && size <= MID_MAX_SIZE);
}
// ============================================================================
// Configuration (can be overridden via environment variables)
// ============================================================================
// Default chunk size (64KB)
#ifndef MID_DEFAULT_CHUNK_SIZE
#define MID_DEFAULT_CHUNK_SIZE (64 * 1024)
#endif
// Initial registry capacity
#ifndef MID_REGISTRY_INITIAL_CAPACITY
#define MID_REGISTRY_INITIAL_CAPACITY 64
#endif
// Enable/disable statistics collection
#ifndef MID_ENABLE_STATS
#define MID_ENABLE_STATS 0 // DISABLED for performance
#endif
// Enable/disable debug logging
#ifndef MID_DEBUG
#define MID_DEBUG 0 // DISABLE for performance testing
#endif
#if MID_DEBUG
#include <stdio.h>
#define MID_LOG(fmt, ...) fprintf(stderr, "[MID_MT] " fmt "\n", ##__VA_ARGS__)
#else
#define MID_LOG(fmt, ...) ((void)0)
#endif
// ============================================================================
// Statistics (Debug/Profiling)
// ============================================================================
#if MID_ENABLE_STATS
/**
* MidStats - Global statistics for profiling
*/
typedef struct MidStats {
uint64_t total_allocs; // Total allocations
uint64_t total_frees; // Total frees
uint64_t total_refills; // Total chunk refills
uint64_t local_frees; // Local frees (same thread)
uint64_t remote_frees; // Remote frees (cross-thread)
uint64_t registry_lookups; // Registry lookups
} MidStats;
extern MidStats g_mid_stats;
void mid_mt_print_stats(void);
#endif // MID_ENABLE_STATS
#ifdef __cplusplus
}
#endif
#endif // HAKMEM_MID_MT_H