diff --git a/Makefile b/Makefile index b38a826a..97e7bd67 100644 --- a/Makefile +++ b/Makefile @@ -191,12 +191,12 @@ LDFLAGS += $(EXTRA_LDFLAGS) # Targets TARGET = test_hakmem -OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o test_hakmem.o +OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o test_hakmem.o OBJS = $(OBJS_BASE) # Shared library SHARED_LIB = libhakmem.so -SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o hakmem_tiny_superslab_shared.o hakmem_smallmid_shared.o hakmem_smallmid_superslab_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/front_gate_classifier_shared.o core/box/free_local_box_shared.o core/box/free_remote_box_shared.o core/box/free_publish_box_shared.o core/box/capacity_box_shared.o core/box/carve_push_box_shared.o core/box/unified_batch_box_shared.o core/box/prewarm_box_shared.o core/box/ss_hot_prewarm_box_shared.o core/box/front_metrics_box_shared.o core/box/bench_fast_box_shared.o core/box/pagefault_telemetry_box_shared.o core/box/tiny_sizeclass_hist_box_shared.o core/page_arena_shared.o core/front/tiny_unified_cache_shared.o core/tiny_alloc_fast_push_shared.o core/link_stubs_shared.o core/tiny_failfast_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_mid_mt_shared.o hakmem_super_registry_shared.o hakmem_shared_pool_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o +SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o superslab_allocate_shared.o superslab_stats_shared.o superslab_cache_shared.o superslab_ace_shared.o superslab_slab_shared.o superslab_backend_shared.o superslab_head_shared.o hakmem_smallmid_shared.o hakmem_smallmid_superslab_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/front_gate_classifier_shared.o core/box/free_local_box_shared.o core/box/free_remote_box_shared.o core/box/free_publish_box_shared.o core/box/capacity_box_shared.o core/box/carve_push_box_shared.o core/box/unified_batch_box_shared.o core/box/prewarm_box_shared.o core/box/ss_hot_prewarm_box_shared.o core/box/front_metrics_box_shared.o core/box/bench_fast_box_shared.o core/box/pagefault_telemetry_box_shared.o core/box/tiny_sizeclass_hist_box_shared.o core/page_arena_shared.o core/front/tiny_unified_cache_shared.o core/tiny_alloc_fast_push_shared.o core/link_stubs_shared.o core/tiny_failfast_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_mid_mt_shared.o hakmem_super_registry_shared.o hakmem_shared_pool_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o # Pool TLS Phase 1 (enable with POOL_TLS_PHASE1=1) ifeq ($(POOL_TLS_PHASE1),1) @@ -223,7 +223,7 @@ endif # Benchmark targets BENCH_HAKMEM = bench_allocators_hakmem BENCH_SYSTEM = bench_allocators_system -BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o +BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o @@ -400,7 +400,7 @@ test-box-refactor: box-refactor ./larson_hakmem 10 8 128 1024 1 12345 4 # Phase 4: Tiny Pool benchmarks (properly linked with hakmem) -TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o +TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o diff --git a/core/box/ss_legacy_backend_box.c b/core/box/ss_legacy_backend_box.c index 24f42915..f8e6783a 100644 --- a/core/box/ss_legacy_backend_box.c +++ b/core/box/ss_legacy_backend_box.c @@ -5,6 +5,7 @@ #include "ss_allocation_box.h" #include "hakmem_tiny_config.h" #include "hakmem_tiny.h" // For tiny_self_u32 +#include "../tiny_region_id.h" // For tiny_region_id_write_header #include #include #include @@ -88,7 +89,11 @@ void* hak_tiny_alloc_superslab_backend_hint(int class_idx) g_ss_legacy_hint_ss[class_idx] = NULL; } +#if HAKMEM_TINY_HEADER_CLASSIDX + return tiny_region_id_write_header(base, class_idx); +#else return (void*)base; +#endif } // ============================================================================ @@ -156,7 +161,11 @@ void* hak_tiny_alloc_superslab_backend_legacy(int class_idx) hak_tiny_ss_hint_record(class_idx, chunk, slab_idx); meta->used++; atomic_fetch_add_explicit(&chunk->total_active_blocks, 1, memory_order_relaxed); +#if HAKMEM_TINY_HEADER_CLASSIDX + return tiny_region_id_write_header(base, class_idx); +#else return (void*)base; +#endif } } chunk = chunk->next_chunk; @@ -197,7 +206,11 @@ void* hak_tiny_alloc_superslab_backend_legacy(int class_idx) hak_tiny_ss_hint_record(class_idx, new_chunk, slab_idx); meta->used++; atomic_fetch_add_explicit(&new_chunk->total_active_blocks, 1, memory_order_relaxed); +#if HAKMEM_TINY_HEADER_CLASSIDX + return tiny_region_id_write_header(base, class_idx); +#else return (void*)base; +#endif } } diff --git a/core/box/ss_unified_backend_box.c b/core/box/ss_unified_backend_box.c index 7151c874..2bde19e7 100644 --- a/core/box/ss_unified_backend_box.c +++ b/core/box/ss_unified_backend_box.c @@ -7,6 +7,7 @@ #include "hakmem_shared_pool.h" #include "hakmem_tiny_config.h" #include "ss_allocation_box.h" +#include "../tiny_region_id.h" // For tiny_region_id_write_header #include #include @@ -109,7 +110,11 @@ void* hak_tiny_alloc_superslab_backend_shared(int class_idx) hak_tiny_ss_hint_record(class_idx, ss, slab_idx); +#if HAKMEM_TINY_HEADER_CLASSIDX + return tiny_region_id_write_header(base, class_idx); +#else return (void*)base; +#endif } // ============================================================================ diff --git a/core/hakmem_tiny_superslab.c b/core/hakmem_tiny_superslab.c deleted file mode 100644 index 43f05342..00000000 --- a/core/hakmem_tiny_superslab.c +++ /dev/null @@ -1,1521 +0,0 @@ -// hakmem_tiny_superslab.c - SuperSlab allocator implementation (Phase 6.22) -// Purpose: 2MB aligned slab allocation with fast pointer→slab lookup -// License: MIT -// Date: 2025-10-24 - -#include "hakmem_tiny_superslab.h" -#include "box/ss_hot_cold_box.h" // Phase 3d-C: Hot/Cold Split -#include "hakmem_super_registry.h" // Phase 1: Registry integration -#include "hakmem_tiny.h" // For tiny_self_u32 -#include "hakmem_tiny_config.h" // For extern g_tiny_class_sizes -#include "hakmem_shared_pool.h" // Phase 12: Shared SuperSlab pool backend (skeleton) -#include -#include -#include -#include -#include -#include // getenv, atoi -#include -#include -#include // getrlimit for OOM diagnostics -#include -#include "hakmem_internal.h" // HAKMEM_LOG for release-silent logging -#include "tiny_region_id.h" // For HEADER_MAGIC / HEADER_CLASS_MASK (restore header on remote-drain) -#include "hakmem_tiny_integrity.h" // HAK_CHECK_CLASS_IDX -#include "box/tiny_next_ptr_box.h" // For tiny_next_write -#include "box/slab_freelist_atomic.h" // Phase 1: Atomic freelist accessor - -static int g_ss_force_lg = -1; -static _Atomic int g_ss_populate_once = 0; - -// Forward: decide next SuperSlab lg for a class (ACE-aware, clamped) -static inline uint8_t hak_tiny_superslab_next_lg(int class_idx) -{ - if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { - return SUPERSLAB_LG_DEFAULT; - } - // Prefer ACE target if within allowed range - uint8_t t = atomic_load_explicit((_Atomic uint8_t*)&g_ss_ace[class_idx].target_lg, - memory_order_relaxed); - if (t < SUPERSLAB_LG_MIN || t > SUPERSLAB_LG_MAX) { - return SUPERSLAB_LG_DEFAULT; - } - return t; -} - -// ============================================================================ -// Global Statistics -// ============================================================================ - -static pthread_mutex_t g_superslab_lock = PTHREAD_MUTEX_INITIALIZER; -uint64_t g_superslabs_allocated = 0; // Non-static for debugging -uint64_t g_superslabs_freed = 0; // Phase 7.6: Non-static for test access -uint64_t g_bytes_allocated = 0; // Non-static for debugging - -// ============================================================================ -// Phase 2a: Dynamic Expansion - Global per-class SuperSlabHeads -// ============================================================================ - -SuperSlabHead* g_superslab_heads[TINY_NUM_CLASSES_SS] = {NULL}; - -// Debug counters -_Atomic uint64_t g_ss_active_dec_calls = 0; -_Atomic uint64_t g_hak_tiny_free_calls = 0; -_Atomic uint64_t g_ss_remote_push_calls = 0; -// Free path instrumentation (lightweight, for OOM/route diagnosis) -_Atomic uint64_t g_free_ss_enter = 0; // hak_tiny_free_superslab() entries -_Atomic uint64_t g_free_local_box_calls = 0; // same-thread freelist pushes -_Atomic uint64_t g_free_remote_box_calls = 0; // cross-thread remote pushes -// Per-class counters for gating/metrics (Tiny classes = 8) -uint64_t g_ss_alloc_by_class[8] = {0}; -uint64_t g_ss_freed_by_class[8] = {0}; - -typedef struct SuperslabCacheEntry { - struct SuperslabCacheEntry* next; -} SuperslabCacheEntry; - -static SuperslabCacheEntry* g_ss_cache_head[8] = {0}; -static size_t g_ss_cache_count[8] = {0}; -static size_t g_ss_cache_cap[8] = {0}; -static size_t g_ss_precharge_target[8] = {0}; -static _Atomic int g_ss_precharge_done[8] = {0}; -static int g_ss_cache_enabled = 0; - -static pthread_once_t g_ss_cache_once = PTHREAD_ONCE_INIT; -static pthread_mutex_t g_ss_cache_lock[8]; - -uint64_t g_ss_cache_hits[8] = {0}; -uint64_t g_ss_cache_misses[8] = {0}; -uint64_t g_ss_cache_puts[8] = {0}; -uint64_t g_ss_cache_drops[8] = {0}; -uint64_t g_ss_cache_precharged[8] = {0}; - -uint64_t g_superslabs_reused = 0; -uint64_t g_superslabs_cached = 0; - -static void ss_cache_global_init(void) { - for (int i = 0; i < 8; i++) { - pthread_mutex_init(&g_ss_cache_lock[i], NULL); - } -} - -static inline void ss_cache_ensure_init(void) { - pthread_once(&g_ss_cache_once, ss_cache_global_init); -} - -static void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int populate); -static void ss_cache_precharge(uint8_t size_class, size_t ss_size, uintptr_t ss_mask); -static SuperslabCacheEntry* ss_cache_pop(uint8_t size_class); -static int ss_cache_push(uint8_t size_class, SuperSlab* ss); - -// Drain remote MPSC stack into freelist (ownership already verified by caller) -void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta) -{ - if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss) || !meta) return; - - static _Atomic uint32_t g_remote_drain_diag_once = 0; - static int g_remote_drain_diag_en = -1; - - // Atomically take the whole remote list - uintptr_t head = atomic_exchange_explicit(&ss->remote_heads[slab_idx], 0, - memory_order_acq_rel); - if (head == 0) return; - - // Convert remote stack (offset 0 next) into freelist encoding via Box API - // and splice in front of current freelist preserving relative order. - void* prev = meta->freelist; - int cls = (int)meta->class_idx; - HAK_CHECK_CLASS_IDX(cls, "_ss_remote_drain_to_freelist_unsafe"); - if (__builtin_expect(cls < 0 || cls >= TINY_NUM_CLASSES, 0)) { - static _Atomic int g_remote_drain_cls_oob = 0; - if (atomic_fetch_add_explicit(&g_remote_drain_cls_oob, 1, memory_order_relaxed) == 0) { - fprintf(stderr, - "[REMOTE_DRAIN_CLASS_OOB] ss=%p slab_idx=%d meta=%p cls=%d head=%#lx\n", - (void*)ss, slab_idx, (void*)meta, cls, (unsigned long)head); - } - return; - } - uintptr_t cur = head; - while (cur != 0) { - uintptr_t next = *(uintptr_t*)cur; // remote-next stored at offset 0 -#if !HAKMEM_BUILD_RELEASE - if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) { - const char* e = getenv("HAKMEM_TINY_SLL_DIAG"); - g_remote_drain_diag_en = (e && *e && *e != '0') ? 1 : 0; - } -#else - if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) { - g_remote_drain_diag_en = 0; - } -#endif - if (__builtin_expect(g_remote_drain_diag_en, 0)) { - uintptr_t addr = (uintptr_t)next; - if (addr != 0 && (addr < 4096 || addr > 0x00007fffffffffffULL)) { - uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed); - if (shot < 8) { - fprintf(stderr, - "[REMOTE_DRAIN_NEXT_INVALID] cls=%d slab=%d cur=%p next=%p head=%#lx prev=%p count=%u\n", - cls, - slab_idx, - (void*)cur, - (void*)next, - (unsigned long)head, - prev, - (unsigned)meta->used); - } - } -#if HAKMEM_TINY_HEADER_CLASSIDX - int hdr_cls = tiny_region_id_read_header((uint8_t*)cur + 1); - if (hdr_cls >= 0 && hdr_cls != cls) { - uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed); - if (shot < 8) { - fprintf(stderr, - "[REMOTE_DRAIN_HDR_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d head=%#lx\n", - cls, slab_idx, (void*)cur, hdr_cls, (int)meta->class_idx, (unsigned long)head); - } - } -#endif - } -#if HAKMEM_TINY_HEADER_CLASSIDX - // Cross-check header vs meta before writing next (even if diag is off) - { - int hdr_cls_pre = tiny_region_id_read_header((uint8_t*)cur + 1); - if (hdr_cls_pre >= 0 && hdr_cls_pre != cls) { - static _Atomic uint32_t g_hdr_meta_mismatch_rd = 0; - uint32_t n = atomic_fetch_add_explicit(&g_hdr_meta_mismatch_rd, 1, memory_order_relaxed); - if (n < 16) { - fprintf(stderr, - "[REMOTE_DRAIN_HDR_META_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d\n", - cls, slab_idx, (void*)cur, hdr_cls_pre, (int)meta->class_idx); - } - } - } -#endif - // Restore header for header-classes (class 1-6) which were clobbered by remote push -#if HAKMEM_TINY_HEADER_CLASSIDX - if (cls != 0) { - uint8_t expected = (uint8_t)(HEADER_MAGIC | (cls & HEADER_CLASS_MASK)); - *(uint8_t*)(uintptr_t)cur = expected; - } -#endif - // Rewrite next pointer to Box representation for this class - tiny_next_write(cls, (void*)cur, prev); - prev = (void*)cur; - cur = next; - } - meta->freelist = prev; - // Reset remote count after full drain - atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_release); - - // Update freelist/nonempty visibility bits - uint32_t bit = (1u << slab_idx); - atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release); - atomic_fetch_or_explicit(&ss->nonempty_mask, bit, memory_order_release); -} - -static inline void ss_stats_os_alloc(uint8_t size_class, size_t ss_size) { - pthread_mutex_lock(&g_superslab_lock); - g_superslabs_allocated++; - if (size_class < 8) { - g_ss_alloc_by_class[size_class]++; - } - g_bytes_allocated += ss_size; - pthread_mutex_unlock(&g_superslab_lock); -} - -static inline void ss_stats_cache_reuse(void) { - pthread_mutex_lock(&g_superslab_lock); - g_superslabs_reused++; - pthread_mutex_unlock(&g_superslab_lock); -} - -static inline void ss_stats_cache_store(void) { - pthread_mutex_lock(&g_superslab_lock); - g_superslabs_cached++; - pthread_mutex_unlock(&g_superslab_lock); -} - -// ============================================================================ -// Phase 8.3: ACE (Adaptive Cache Engine) State -// ============================================================================ - -SuperSlabACEState g_ss_ace[TINY_NUM_CLASSES_SS] = {{0}}; - -// Phase 8.3: hak_now_ns() is now defined in hakmem_tiny_superslab.h as static inline - -// ============================================================================ -// Diagnostics -// ============================================================================ - -static void log_superslab_oom_once(size_t ss_size, size_t alloc_size, int err) { - static int logged = 0; - if (logged) return; - logged = 1; - - // CRITICAL FIX: Increment lock depth FIRST before any LIBC calls - // fopen/fclose/getrlimit/fprintf all may call malloc internally - // Must bypass HAKMEM wrapper to avoid header mismatch crash - extern __thread int g_hakmem_lock_depth; - g_hakmem_lock_depth++; // Force wrapper to use __libc_malloc - - struct rlimit rl = {0}; - if (getrlimit(RLIMIT_AS, &rl) != 0) { - rl.rlim_cur = RLIM_INFINITY; - rl.rlim_max = RLIM_INFINITY; - } - - unsigned long vm_size_kb = 0; - unsigned long vm_rss_kb = 0; - FILE* status = fopen("/proc/self/status", "r"); - if (status) { - char line[256]; - while (fgets(line, sizeof(line), status)) { - if (strncmp(line, "VmSize:", 7) == 0) { - (void)sscanf(line + 7, "%lu", &vm_size_kb); - } else if (strncmp(line, "VmRSS:", 6) == 0) { - (void)sscanf(line + 6, "%lu", &vm_rss_kb); - } - } - fclose(status); - } - // CRITICAL FIX: Do NOT decrement lock_depth yet! - // fprintf() below may call malloc for buffering - - char rl_cur_buf[32]; - char rl_max_buf[32]; - if (rl.rlim_cur == RLIM_INFINITY) { - strcpy(rl_cur_buf, "inf"); - } else { - snprintf(rl_cur_buf, sizeof(rl_cur_buf), "%llu", (unsigned long long)rl.rlim_cur); - } - if (rl.rlim_max == RLIM_INFINITY) { - strcpy(rl_max_buf, "inf"); - } else { - snprintf(rl_max_buf, sizeof(rl_max_buf), "%llu", (unsigned long long)rl.rlim_max); - } - -#if !HAKMEM_BUILD_RELEASE - fprintf(stderr, - "[SS OOM] mmap failed: err=%d ss_size=%zu alloc_size=%zu " - "alloc=%llu freed=%llu bytes=%llu " - "RLIMIT_AS(cur=%s max=%s) VmSize=%lu kB VmRSS=%lu kB\n", - err, - ss_size, - alloc_size, - (unsigned long long)g_superslabs_allocated, - (unsigned long long)g_superslabs_freed, - (unsigned long long)g_bytes_allocated, - rl_cur_buf, - rl_max_buf, - vm_size_kb, - vm_rss_kb); -#endif - - g_hakmem_lock_depth--; // Now safe to restore (all libc calls complete) -} - -// Global counters for debugging (non-static for external access) -_Atomic uint64_t g_ss_mmap_count = 0; -_Atomic uint64_t g_final_fallback_mmap_count = 0; - -static void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int populate) { - void* ptr = NULL; - static int log_count = 0; - -#ifdef MAP_ALIGNED_SUPER - int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER; -#ifdef MAP_POPULATE - if (populate) { - map_flags |= MAP_POPULATE; - } -#endif - ptr = mmap(NULL, ss_size, - PROT_READ | PROT_WRITE, - map_flags, - -1, 0); - if (ptr != MAP_FAILED) { - atomic_fetch_add(&g_ss_mmap_count, 1); - if (((uintptr_t)ptr & ss_mask) == 0) { - ss_stats_os_alloc(size_class, ss_size); - return ptr; - } - munmap(ptr, ss_size); - ptr = NULL; - } else { - log_superslab_oom_once(ss_size, ss_size, errno); - } -#endif - - size_t alloc_size = ss_size * 2; - int flags = MAP_PRIVATE | MAP_ANONYMOUS; -#ifdef MAP_POPULATE - if (populate) { - flags |= MAP_POPULATE; - } -#endif - void* raw = mmap(NULL, alloc_size, - PROT_READ | PROT_WRITE, - flags, - -1, 0); - if (raw != MAP_FAILED) { - uint64_t count = atomic_fetch_add(&g_ss_mmap_count, 1) + 1; - #if !HAKMEM_BUILD_RELEASE - if (log_count < 10) { - fprintf(stderr, "[SUPERSLAB_MMAP] #%lu: class=%d size=%zu (total SuperSlab mmaps so far)\n", - (unsigned long)count, size_class, ss_size); - log_count++; - } - #endif - } - if (raw == MAP_FAILED) { - log_superslab_oom_once(ss_size, alloc_size, errno); - return NULL; - } - - uintptr_t raw_addr = (uintptr_t)raw; - uintptr_t aligned_addr = (raw_addr + ss_mask) & ~ss_mask; - ptr = (void*)aligned_addr; - - size_t prefix_size = aligned_addr - raw_addr; - if (prefix_size > 0) { - munmap(raw, prefix_size); - } - size_t suffix_size = alloc_size - prefix_size - ss_size; - if (suffix_size > 0) { - if (populate) { -#ifdef MADV_DONTNEED - madvise((char*)ptr + ss_size, suffix_size, MADV_DONTNEED); -#endif - } else { - munmap((char*)ptr + ss_size, suffix_size); - } - } - - ss_stats_os_alloc(size_class, ss_size); - return ptr; -} - -static void ss_cache_precharge(uint8_t size_class, size_t ss_size, uintptr_t ss_mask) { - if (!g_ss_cache_enabled) return; - if (size_class >= 8) return; - if (g_ss_precharge_target[size_class] == 0) return; - if (atomic_load_explicit(&g_ss_precharge_done[size_class], memory_order_acquire)) return; - - ss_cache_ensure_init(); - pthread_mutex_lock(&g_ss_cache_lock[size_class]); - size_t target = g_ss_precharge_target[size_class]; - size_t cap = g_ss_cache_cap[size_class]; - size_t desired = target; - if (cap != 0 && desired > cap) { - desired = cap; - } - while (g_ss_cache_count[size_class] < desired) { - void* raw = ss_os_acquire(size_class, ss_size, ss_mask, 1); - if (!raw) { - break; - } - SuperslabCacheEntry* entry = (SuperslabCacheEntry*)raw; - entry->next = g_ss_cache_head[size_class]; - g_ss_cache_head[size_class] = entry; - g_ss_cache_count[size_class]++; - g_ss_cache_precharged[size_class]++; - } - atomic_store_explicit(&g_ss_precharge_done[size_class], 1, memory_order_release); - pthread_mutex_unlock(&g_ss_cache_lock[size_class]); -} - -static SuperslabCacheEntry* ss_cache_pop(uint8_t size_class) { - if (!g_ss_cache_enabled) return NULL; - if (size_class >= 8) return NULL; - - ss_cache_ensure_init(); - - pthread_mutex_lock(&g_ss_cache_lock[size_class]); - SuperslabCacheEntry* entry = g_ss_cache_head[size_class]; - if (entry) { - g_ss_cache_head[size_class] = entry->next; - if (g_ss_cache_count[size_class] > 0) { - g_ss_cache_count[size_class]--; - } - entry->next = NULL; - g_ss_cache_hits[size_class]++; - } else { - g_ss_cache_misses[size_class]++; - } - pthread_mutex_unlock(&g_ss_cache_lock[size_class]); - return entry; -} - -static int ss_cache_push(uint8_t size_class, SuperSlab* ss) { - if (!g_ss_cache_enabled) return 0; - if (size_class >= 8) return 0; - - ss_cache_ensure_init(); - pthread_mutex_lock(&g_ss_cache_lock[size_class]); - size_t cap = g_ss_cache_cap[size_class]; - if (cap != 0 && g_ss_cache_count[size_class] >= cap) { - g_ss_cache_drops[size_class]++; - pthread_mutex_unlock(&g_ss_cache_lock[size_class]); - return 0; - } - SuperslabCacheEntry* entry = (SuperslabCacheEntry*)ss; - entry->next = g_ss_cache_head[size_class]; - g_ss_cache_head[size_class] = entry; - g_ss_cache_count[size_class]++; - g_ss_cache_puts[size_class]++; - pthread_mutex_unlock(&g_ss_cache_lock[size_class]); - return 1; -} - -/* - * Legacy backend for hak_tiny_alloc_superslab_box(). - * - * Phase 12 Stage A/B: - * - Uses per-class SuperSlabHead (g_superslab_heads) as the implementation. - * - Callers MUST use hak_tiny_alloc_superslab_box() and never touch this directly. - * - Later Stage C: this function will be replaced by a shared_pool backend. - */ -static SuperSlabHead* init_superslab_head(int class_idx); -static int expand_superslab_head(SuperSlabHead* head); - -static void* hak_tiny_alloc_superslab_backend_legacy(int class_idx) -{ - if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { - return NULL; - } - - SuperSlabHead* head = g_superslab_heads[class_idx]; - if (!head) { - head = init_superslab_head(class_idx); - if (!head) { - return NULL; - } - g_superslab_heads[class_idx] = head; - } - - SuperSlab* chunk = head->current_chunk ? head->current_chunk : head->first_chunk; - - while (chunk) { - int cap = ss_slabs_capacity(chunk); - for (int slab_idx = 0; slab_idx < cap; slab_idx++) { - TinySlabMeta* meta = &chunk->slabs[slab_idx]; - - // Skip slabs that belong to a different class (or are uninitialized). - if (meta->class_idx != (uint8_t)class_idx && meta->class_idx != 255) { - continue; - } - - // P1.2 FIX: Initialize slab on first use (like shared backend does) - // This ensures class_map is populated for all slabs, not just slab 0 - if (meta->capacity == 0) { - size_t block_size = g_tiny_class_sizes[class_idx]; - uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self(); - superslab_init_slab(chunk, slab_idx, block_size, owner_tid); - meta = &chunk->slabs[slab_idx]; // Refresh pointer after init - meta->class_idx = (uint8_t)class_idx; - // P1.2: Update class_map for dynamic slab initialization - chunk->class_map[slab_idx] = (uint8_t)class_idx; - } - - if (meta->used < meta->capacity) { - size_t stride = tiny_block_stride_for_class(class_idx); - size_t offset = (size_t)meta->used * stride; - uint8_t* base = (uint8_t*)chunk - + SUPERSLAB_SLAB0_DATA_OFFSET - + (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE - + offset; - - meta->used++; - atomic_fetch_add_explicit(&chunk->total_active_blocks, 1, memory_order_relaxed); - return (void*)base; - } - } - chunk = chunk->next_chunk; - } - - if (expand_superslab_head(head) < 0) { - return NULL; - } - - SuperSlab* new_chunk = head->current_chunk; - if (!new_chunk) { - return NULL; - } - - int cap2 = ss_slabs_capacity(new_chunk); - for (int slab_idx = 0; slab_idx < cap2; slab_idx++) { - TinySlabMeta* meta = &new_chunk->slabs[slab_idx]; - - // P1.2 FIX: Initialize slab on first use (like shared backend does) - if (meta->capacity == 0) { - size_t block_size = g_tiny_class_sizes[class_idx]; - uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self(); - superslab_init_slab(new_chunk, slab_idx, block_size, owner_tid); - meta = &new_chunk->slabs[slab_idx]; // Refresh pointer after init - meta->class_idx = (uint8_t)class_idx; - // P1.2: Update class_map for dynamic slab initialization - new_chunk->class_map[slab_idx] = (uint8_t)class_idx; - } - - if (meta->used < meta->capacity) { - size_t stride = tiny_block_stride_for_class(class_idx); - size_t offset = (size_t)meta->used * stride; - uint8_t* base = (uint8_t*)new_chunk - + SUPERSLAB_SLAB0_DATA_OFFSET - + (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE - + offset; - - meta->used++; - atomic_fetch_add_explicit(&new_chunk->total_active_blocks, 1, memory_order_relaxed); - return (void*)base; - } - } - - return NULL; -} - -/* - * Shared pool backend for hak_tiny_alloc_superslab_box(). - * - * Phase 12-2: - * - Uses SharedSuperSlabPool (g_shared_pool) to obtain a SuperSlab/slab - * for the requested class_idx. - * - This backend EXPRESSLY owns only: - * - choosing (ss, slab_idx) via shared_pool_acquire_slab() - * - initializing that slab's TinySlabMeta via superslab_init_slab() - * and nothing else; all callers must go through hak_tiny_alloc_superslab_box(). - * - * - For now this is a minimal, conservative implementation: - * - One linear bump-run is carved from the acquired slab using tiny_block_stride_for_class(). - * - No complex per-slab freelist or refill policy yet (Phase 12-3+). - * - If shared_pool_acquire_slab() fails, we fall back to legacy backend. - */ -static void* hak_tiny_alloc_superslab_backend_shared(int class_idx) -{ - if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { - return NULL; - } - - SuperSlab* ss = NULL; - int slab_idx = -1; - - if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) != 0 || !ss) { - // Shared pool could not provide a slab; caller may choose to fall back. - return NULL; - } - - TinySlabMeta* meta = &ss->slabs[slab_idx]; - - // Defensive: shared_pool must either hand us an UNASSIGNED slab or one - // already bound to this class. Anything else is a hard bug. - if (meta->class_idx != 255 && meta->class_idx != (uint8_t)class_idx) { -#if !HAKMEM_BUILD_RELEASE - fprintf(stderr, - "[HAKMEM][SS_SHARED] BUG: acquire_slab mismatch: cls=%d meta->class_idx=%u slab_idx=%d ss=%p\n", - class_idx, (unsigned)meta->class_idx, slab_idx, (void*)ss); -#endif - return NULL; - } - - // Initialize slab geometry once for this class. - if (meta->capacity == 0) { - size_t block_size = g_tiny_class_sizes[class_idx]; - // LARSON FIX: Pass actual thread ID for cross-thread free detection - uint32_t my_tid = (uint32_t)(uintptr_t)pthread_self(); - superslab_init_slab(ss, slab_idx, block_size, my_tid); - meta = &ss->slabs[slab_idx]; - - // CRITICAL FIX: Always set class_idx after init to avoid C0/C7 confusion. - // New SuperSlabs start with meta->class_idx=0 (mmap zero-init). - // Must explicitly set to requested class, not just when class_idx==255. - meta->class_idx = (uint8_t)class_idx; - // P1.1: Update class_map in shared acquire path - ss->class_map[slab_idx] = (uint8_t)class_idx; - } - - // Final contract check before computing addresses. - if (meta->class_idx != (uint8_t)class_idx || - meta->capacity == 0 || - meta->used > meta->capacity) { -#if !HAKMEM_BUILD_RELEASE - fprintf(stderr, - "[HAKMEM][SS_SHARED] BUG: invalid slab meta before alloc: " - "cls=%d slab_idx=%d meta_cls=%u used=%u cap=%u ss=%p\n", - class_idx, slab_idx, - (unsigned)meta->class_idx, - (unsigned)meta->used, - (unsigned)meta->capacity, - (void*)ss); -#endif - return NULL; - } - - // Simple bump allocation within this slab. - if (meta->used >= meta->capacity) { - // Slab exhausted: in minimal Phase12-2 backend we do not loop; - // caller or future logic must acquire another slab. - return NULL; - } - - size_t stride = tiny_block_stride_for_class(class_idx); - size_t offset = (size_t)meta->used * stride; - - // Phase 12-2 minimal geometry: - // - slab 0 data offset via SUPERSLAB_SLAB0_DATA_OFFSET - // - subsequent slabs at fixed SUPERSLAB_SLAB_USABLE_SIZE strides. - size_t slab_base_off = SUPERSLAB_SLAB0_DATA_OFFSET - + (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE; - uint8_t* base = (uint8_t*)ss + slab_base_off + offset; - - meta->used++; - atomic_fetch_add_explicit(&ss->total_active_blocks, 1, memory_order_relaxed); - - return (void*)base; -} - -/* - * Box API entry: - * - Single front-door for tiny-side Superslab allocations. - * - * Phase 12 policy: - * - HAKMEM_TINY_SS_SHARED=0 → legacy backendのみ(回帰確認用) - * - HAKMEM_TINY_SS_SHARED=1 → shared backendを優先し、失敗時のみ legacy にフォールバック - */ -void* hak_tiny_alloc_superslab_box(int class_idx) -{ - static int g_ss_shared_mode = -1; - static _Atomic uint32_t g_ss_backend_log = 0; - if (__builtin_expect(g_ss_shared_mode == -1, 0)) { - const char* e = getenv("HAKMEM_TINY_SS_SHARED"); - if (!e || !*e) { - g_ss_shared_mode = 1; // デフォルト: shared 有効 - } else { - int v = atoi(e); - g_ss_shared_mode = (v != 0) ? 1 : 0; - } - } - - if (g_ss_shared_mode == 1) { - void* p = hak_tiny_alloc_superslab_backend_shared(class_idx); - if (p != NULL) { - uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed); - if (n < 4) { - fprintf(stderr, "[SS_BACKEND] shared cls=%d ptr=%p\n", class_idx, p); - } - return p; - } - // shared backend が失敗した場合は安全側で legacy にフォールバック - uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed); - if (n < 4) { - fprintf(stderr, "[SS_BACKEND] shared_fail→legacy cls=%d\n", class_idx); - } - return hak_tiny_alloc_superslab_backend_legacy(class_idx); - } - - // shared OFF 時は legacy のみ - uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed); - if (n < 4) { - fprintf(stderr, "[SS_BACKEND] legacy cls=%d\n", class_idx); - } - return hak_tiny_alloc_superslab_backend_legacy(class_idx); -} - -// ============================================================================ -// SuperSlab Allocation (2MB aligned) -// ============================================================================ - -SuperSlab* superslab_allocate(uint8_t size_class) { - // Optional fault injection for testing: HAKMEM_TINY_SS_FAULT_RATE=N → 1/N で失敗 - static int fault_rate = -1; // -1=unparsed, 0=disabled, >0=rate - static __thread unsigned long fault_tick = 0; - if (__builtin_expect(fault_rate == -1, 0)) { - const char* e = getenv("HAKMEM_TINY_SS_FAULT_RATE"); - if (e && *e) { - int v = atoi(e); if (v < 0) v = 0; fault_rate = v; - } else { - fault_rate = 0; - } - } - if (fault_rate > 0) { - unsigned long t = ++fault_tick; - if ((t % (unsigned long)fault_rate) == 0ul) { - return NULL; // simulate OOM - } - } - // Optional env clamp for SuperSlab size - static int env_parsed = 0; - static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_DEFAULT; // Start with default (2MB) - static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX; - if (!env_parsed) { - char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB"); - if (maxmb) { - int m = atoi(maxmb); if (m == 1) g_ss_max_lg_env = 20; else if (m == 2) g_ss_max_lg_env = 21; - } - char* minmb = getenv("HAKMEM_TINY_SS_MIN_MB"); - if (minmb) { - int m = atoi(minmb); if (m == 1) g_ss_min_lg_env = 20; else if (m == 2) g_ss_min_lg_env = 21; - } - if (g_ss_min_lg_env > g_ss_max_lg_env) g_ss_min_lg_env = g_ss_max_lg_env; - const char* force_lg_env = getenv("HAKMEM_TINY_SS_FORCE_LG"); - if (force_lg_env && *force_lg_env) { - int v = atoi(force_lg_env); - if (v >= SUPERSLAB_LG_MIN && v <= SUPERSLAB_LG_MAX) { - g_ss_force_lg = v; - g_ss_min_lg_env = g_ss_max_lg_env = v; - } - } - size_t precharge_default = 0; - const char* precharge_env = getenv("HAKMEM_TINY_SS_PRECHARGE"); - if (precharge_env && *precharge_env) { - long v = atol(precharge_env); - if (v < 0) v = 0; - precharge_default = (size_t)v; - if (v > 0) { - atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed); - } - } - size_t cache_default = 0; - const char* cache_env = getenv("HAKMEM_TINY_SS_CACHE"); - if (cache_env && *cache_env) { - long v = atol(cache_env); - if (v < 0) v = 0; - cache_default = (size_t)v; - } - for (int i = 0; i < 8; i++) { - g_ss_cache_cap[i] = cache_default; - g_ss_precharge_target[i] = precharge_default; - } - for (int i = 0; i < 8; i++) { - char name[64]; - snprintf(name, sizeof(name), "HAKMEM_TINY_SS_CACHE_C%d", i); - char* cap_env = getenv(name); - if (cap_env && *cap_env) { - long v = atol(cap_env); - if (v < 0) v = 0; - g_ss_cache_cap[i] = (size_t)v; - } - snprintf(name, sizeof(name), "HAKMEM_TINY_SS_PRECHARGE_C%d", i); - char* pre_env = getenv(name); - if (pre_env && *pre_env) { - long v = atol(pre_env); - if (v < 0) v = 0; - g_ss_precharge_target[i] = (size_t)v; - if (v > 0) { - atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed); - } - } - if (g_ss_cache_cap[i] > 0 || g_ss_precharge_target[i] > 0) { - g_ss_cache_enabled = 1; - } - } - const char* populate_env = getenv("HAKMEM_TINY_SS_POPULATE_ONCE"); - if (populate_env && atoi(populate_env) != 0) { - atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed); - } - env_parsed = 1; - } - - uint8_t lg = (g_ss_force_lg >= 0) ? (uint8_t)g_ss_force_lg : hak_tiny_superslab_next_lg(size_class); - if (lg < g_ss_min_lg_env) lg = g_ss_min_lg_env; - if (lg > g_ss_max_lg_env) lg = g_ss_max_lg_env; - size_t ss_size = (size_t)1 << lg; // 2^20 = 1MB, 2^21 = 2MB - uintptr_t ss_mask = ss_size - 1; - int from_cache = 0; - void* ptr = NULL; - - // Debug logging flag (lazy init) - static __thread int dbg = -1; -#if HAKMEM_BUILD_RELEASE - dbg = 0; -#else - if (__builtin_expect(dbg == -1, 0)) { - const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG"); - dbg = (e && *e && *e != '0') ? 1 : 0; - } -#endif - - // Phase 9: Try LRU cache first (lazy deallocation) - SuperSlab* cached_ss = hak_ss_lru_pop(size_class); - if (cached_ss) { - ptr = (void*)cached_ss; - from_cache = 1; - // Debug logging for REFILL from LRU - if (dbg == 1) { - fprintf(stderr, "[REFILL] class=%d from_lru=1 ss=%p\n", - size_class, (void*)cached_ss); - } - // Skip old cache path - LRU cache takes priority - } else if (g_ss_cache_enabled && size_class < 8) { - // Fallback to old cache (will be deprecated) - ss_cache_precharge(size_class, ss_size, ss_mask); - SuperslabCacheEntry* old_cached = ss_cache_pop(size_class); - if (old_cached) { - ptr = (void*)old_cached; - from_cache = 1; - // Debug logging for REFILL from prewarm (old cache is essentially prewarm) - if (dbg == 1) { - fprintf(stderr, "[REFILL] class=%d from_prewarm=1 ss=%p\n", - size_class, (void*)old_cached); - } - } - } - - if (!ptr) { - int populate = atomic_exchange_explicit(&g_ss_populate_once, 0, memory_order_acq_rel); - ptr = ss_os_acquire(size_class, ss_size, ss_mask, populate); - if (!ptr) { - return NULL; - } - // Debug logging for REFILL with new allocation - if (dbg == 1) { - fprintf(stderr, "[REFILL] class=%d new_alloc=1 ss=%p\n", - size_class, (void*)ptr); - } - } - - // Initialize SuperSlab header (Phase 12: no global size_class field) - SuperSlab* ss = (SuperSlab*)ptr; - ss->magic = SUPERSLAB_MAGIC; - ss->active_slabs = 0; - ss->lg_size = lg; // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB) - ss->slab_bitmap = 0; - ss->nonempty_mask = 0; // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask - ss->partial_epoch = 0; - ss->publish_hint = 0xFF; - - // Initialize atomics explicitly - atomic_store_explicit(&ss->total_active_blocks, 0, memory_order_relaxed); - atomic_store_explicit(&ss->refcount, 0, memory_order_relaxed); - atomic_store_explicit(&ss->listed, 0, memory_order_relaxed); - ss->partial_next = NULL; - - // Phase 9: Initialize LRU fields - ss->last_used_ns = 0; - ss->generation = 0; - ss->lru_prev = NULL; - ss->lru_next = NULL; - - // Phase 3d-C: Initialize Hot/Cold Split fields - ss->hot_count = 0; - ss->cold_count = 0; - for (int i = 0; i < 16; i++) { - ss->hot_indices[i] = 0; - ss->cold_indices[i] = 0; - } - - // Initialize all slab metadata (only up to max slabs for this size) - int max_slabs = (int)(ss_size / SLAB_SIZE); - - // PERF_OPT: memset removed - mmap() already returns zero-initialized pages - // Previous memset calls consumed 23.83% CPU time (perf analysis 2025-11-28) - // Measured improvement: +1.3% throughput (71.86M → 72.78M ops/s) - // Note: ASan/debug builds may need these, but production mmap guarantees zero pages - // memset(ss->slabs, 0, max_slabs * sizeof(TinySlabMeta)); - // memset(ss->remote_heads, 0, max_slabs * sizeof(uintptr_t)); - // memset(ss->remote_counts, 0, max_slabs * sizeof(uint32_t)); - // memset(ss->slab_listed, 0, max_slabs * sizeof(uint32_t)); - - for (int i = 0; i < max_slabs; i++) { - // Phase 1: Atomic initialization (freelist + used are now _Atomic) - slab_freelist_store_relaxed(&ss->slabs[i], NULL); // Explicit NULL (redundant after memset, but clear intent) - atomic_store_explicit(&ss->slabs[i].used, 0, memory_order_relaxed); - ss->slabs[i].capacity = 0; - ss->slabs[i].owner_tid_low = 0; - - // Initialize remote queue atomics (memset already zeroed, but use proper atomic init) - atomic_store_explicit(&ss->remote_heads[i], 0, memory_order_relaxed); - atomic_store_explicit(&ss->remote_counts[i], 0, memory_order_relaxed); - atomic_store_explicit(&ss->slab_listed[i], 0, memory_order_relaxed); - } - - if (from_cache) { - ss_stats_cache_reuse(); - } - - // Phase 8.3: Update ACE current_lg to match allocated size - g_ss_ace[size_class].current_lg = lg; - - // Phase 1: Register SuperSlab in global registry for fast lookup - // CRITICAL: Register AFTER full initialization (ss structure is ready) - uintptr_t base = (uintptr_t)ss; - if (!hak_super_register(base, ss)) { - // Registry full - this is a fatal error - fprintf(stderr, "HAKMEM FATAL: SuperSlab registry full, cannot register %p\n", ss); - // Still return ss to avoid memory leak, but lookups may fail - } - - return ss; -} - -// ============================================================================ -// Phase 2a: Dynamic Expansion - Chunk Management Functions -// ============================================================================ - -// Initialize SuperSlabHead for a class -SuperSlabHead* init_superslab_head(int class_idx) { - if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { - return NULL; - } - - // Allocate SuperSlabHead structure - SuperSlabHead* head = (SuperSlabHead*)calloc(1, sizeof(SuperSlabHead)); - if (!head) { - extern __thread int g_hakmem_lock_depth; - g_hakmem_lock_depth++; - fprintf(stderr, "[HAKMEM] CRITICAL: Failed to allocate SuperSlabHead for class %d\n", class_idx); - g_hakmem_lock_depth--; - return NULL; - } - - head->class_idx = (uint8_t)class_idx; - atomic_store_explicit(&head->total_chunks, 0, memory_order_relaxed); - head->first_chunk = NULL; - head->current_chunk = NULL; - pthread_mutex_init(&head->expansion_lock, NULL); - - // Allocate initial chunk(s) - // Hot classes (1, 4, 6) get 2 initial chunks to reduce contention - int initial_chunks = 1; - - // Phase 2a: Start with 1 chunk for all classes (expansion will handle growth) - // This reduces startup memory overhead while still allowing unlimited growth - initial_chunks = 1; - - for (int i = 0; i < initial_chunks; i++) { - if (expand_superslab_head(head) < 0) { - extern __thread int g_hakmem_lock_depth; - g_hakmem_lock_depth++; - fprintf(stderr, "[HAKMEM] CRITICAL: Failed to allocate initial chunk %d for class %d\n", - i, class_idx); - g_hakmem_lock_depth--; - - // Cleanup on failure - SuperSlab* chunk = head->first_chunk; - while (chunk) { - SuperSlab* next = chunk->next_chunk; - superslab_free(chunk); - chunk = next; - } - pthread_mutex_destroy(&head->expansion_lock); - free(head); - return NULL; - } - } - - extern __thread int g_hakmem_lock_depth; - g_hakmem_lock_depth++; -#if !HAKMEM_BUILD_RELEASE - fprintf(stderr, "[HAKMEM] Initialized SuperSlabHead for class %d: %zu initial chunks\n", - class_idx, atomic_load_explicit(&head->total_chunks, memory_order_relaxed)); -#endif - g_hakmem_lock_depth--; - - return head; -} - -// Expand SuperSlabHead by allocating and linking a new chunk -int expand_superslab_head(SuperSlabHead* head) { - if (!head) { - return -1; - } - - // Allocate new chunk via existing superslab_allocate - SuperSlab* new_chunk = superslab_allocate(head->class_idx); - if (!new_chunk) { -#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE) - extern __thread int g_hakmem_lock_depth; - g_hakmem_lock_depth++; - fprintf(stderr, "[HAKMEM] CRITICAL: Failed to allocate new chunk for class %d (system OOM)\n", - head->class_idx); - g_hakmem_lock_depth--; -#endif - return -1; // True OOM (system out of memory) - } - - // CRITICAL FIX: Initialize slab 0 so bitmap != 0x00000000 - // Phase 2a chunks must have at least one usable slab after allocation - size_t block_size = g_tiny_class_sizes[head->class_idx]; - // Use pthread_self() directly since tiny_self_u32() is static inline in hakmem_tiny.c - uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self(); - - superslab_init_slab(new_chunk, 0, block_size, owner_tid); - - // Initialize the next_chunk link to NULL - new_chunk->next_chunk = NULL; - - // Thread-safe linking - pthread_mutex_lock(&head->expansion_lock); - - if (head->current_chunk) { - // Find the tail of the list (optimization: could cache tail pointer) - SuperSlab* tail = head->current_chunk; - while (tail->next_chunk) { - tail = tail->next_chunk; - } - tail->next_chunk = new_chunk; - } else { - // First chunk - head->first_chunk = new_chunk; - } - - // Update current chunk to new chunk (for fast allocation) - head->current_chunk = new_chunk; - - // Increment total chunks atomically - size_t old_count = atomic_fetch_add_explicit(&head->total_chunks, 1, memory_order_relaxed); - size_t new_count = old_count + 1; - - pthread_mutex_unlock(&head->expansion_lock); - -#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE) - extern __thread int g_hakmem_lock_depth; - g_hakmem_lock_depth++; - fprintf(stderr, "[HAKMEM] Expanded SuperSlabHead for class %d: %zu chunks now (bitmap=0x%08x)\n", - head->class_idx, new_count, new_chunk->slab_bitmap); - g_hakmem_lock_depth--; -#endif - - return 0; -} - -// Find which chunk a pointer belongs to -SuperSlab* find_chunk_for_ptr(void* ptr, int class_idx) { - if (!ptr || class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { - return NULL; - } - - SuperSlabHead* head = g_superslab_heads[class_idx]; - if (!head) { - return NULL; - } - - uintptr_t ptr_addr = (uintptr_t)ptr; - - // Walk the chunk list - SuperSlab* chunk = head->first_chunk; - while (chunk) { - // Check if ptr is within this chunk's memory range - // Each chunk is aligned to SUPERSLAB_SIZE (1MB or 2MB) - uintptr_t chunk_start = (uintptr_t)chunk; - size_t chunk_size = (size_t)1 << chunk->lg_size; // Use actual chunk size - uintptr_t chunk_end = chunk_start + chunk_size; - - if (ptr_addr >= chunk_start && ptr_addr < chunk_end) { - // Found the chunk - return chunk; - } - - chunk = chunk->next_chunk; - } - - return NULL; // Not found in any chunk -} - -// ============================================================================ -// SuperSlab Deallocation -// ============================================================================ - -void superslab_free(SuperSlab* ss) { - if (!ss || ss->magic != SUPERSLAB_MAGIC) { - return; // Invalid SuperSlab - } - - // ADD DEBUG LOGGING - static __thread int dbg = -1; -#if HAKMEM_BUILD_RELEASE - dbg = 0; -#else - if (__builtin_expect(dbg == -1, 0)) { - const char* e = getenv("HAKMEM_SS_FREE_DEBUG"); - dbg = (e && *e && *e != '0') ? 1 : 0; - } -#endif - if (dbg == 1) { - fprintf(stderr, "[SS_FREE] CALLED: ss=%p lg_size=%d active_slabs=%u\n", - (void*)ss, ss->lg_size, ss->active_slabs); - } - - // Phase 9: Lazy Deallocation - try to cache in LRU instead of munmap - size_t ss_size = (size_t)1 << ss->lg_size; - - // Phase 1: Unregister SuperSlab from registry FIRST - // CRITICAL: Must unregister BEFORE adding to LRU cache - // Reason: Cached SuperSlabs should NOT be found by lookups - uintptr_t base = (uintptr_t)ss; - hak_super_unregister(base); - - // Memory fence to ensure unregister is visible - atomic_thread_fence(memory_order_release); - - // Phase 9: Try LRU cache first (lazy deallocation) - // NOTE: LRU cache keeps magic=SUPERSLAB_MAGIC for validation - // Magic will be cleared on eviction or reuse - int lru_cached = hak_ss_lru_push(ss); - if (dbg == 1) { - fprintf(stderr, "[SS_FREE] hak_ss_lru_push() returned %d\n", lru_cached); - } - if (lru_cached) { - // Successfully cached in LRU - defer munmap - return; - } - - // LRU cache full or disabled - try old cache using head class_idx (if known) - int old_cached = ss_cache_push(0, ss); - if (old_cached) { - ss_stats_cache_store(); - return; - } - - // Both caches full - immediately free to OS (eager deallocation) - // Clear magic to prevent use-after-free - ss->magic = 0; - -#if !HAKMEM_BUILD_RELEASE - fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n", - (void*)ss, ss_size, - atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed)); -#endif - - munmap(ss, ss_size); - - // Update statistics for actual release to OS - pthread_mutex_lock(&g_superslab_lock); - g_superslabs_freed++; - // Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here - g_bytes_allocated -= ss_size; - pthread_mutex_unlock(&g_superslab_lock); - -#if !HAKMEM_BUILD_RELEASE - fprintf(stderr, "[DEBUG ss_os_release] g_superslabs_freed now = %llu\n", - (unsigned long long)g_superslabs_freed); -#endif -} - -// ============================================================================ -// Slab Initialization within SuperSlab -// ============================================================================ - -void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_t owner_tid) -{ - if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) { - return; - } - - // Phase E1-CORRECT unified geometry: - // - block_size is the TOTAL stride for this class (g_tiny_class_sizes[cls]) - // - usable bytes are determined by slab index (slab0 vs others) - // - capacity = usable / stride for ALL classes (including former C7) - size_t usable_size = (slab_idx == 0) - ? SUPERSLAB_SLAB0_USABLE_SIZE - : SUPERSLAB_SLAB_USABLE_SIZE; - size_t stride = block_size; - uint16_t capacity = (uint16_t)(usable_size / stride); - -#if !HAKMEM_BUILD_RELEASE - if (slab_idx == 0) { - fprintf(stderr, - "[SUPERSLAB_INIT] slab 0: usable_size=%zu stride=%zu capacity=%u\n", - usable_size, stride, (unsigned)capacity); - } -#endif - - TinySlabMeta* meta = &ss->slabs[slab_idx]; - meta->freelist = NULL; // NULL = linear allocation mode - meta->used = 0; - meta->active = 0; // P1.3: blocks in use by user (starts at 0) - meta->tls_cached = 0; // P2.2: blocks cached in TLS SLL (starts at 0) - meta->capacity = capacity; - meta->carved = 0; - // LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes - meta->owner_tid_low = (uint8_t)((owner_tid >> 8) & 0xFFu); - // Fail-safe: stamp class_idx from geometry (stride → class). - // This ensures legacy/shared/legacy-refill paths all end with a correct class. - for (int i = 0; i < TINY_NUM_CLASSES; i++) { - if (g_tiny_class_sizes[i] == stride) { - meta->class_idx = (uint8_t)i; - // P1.1: Update class_map for out-of-band lookup on free path - ss->class_map[slab_idx] = (uint8_t)i; - break; - } - } - - superslab_activate_slab(ss, slab_idx); -} - -// ============================================================================ -// Slab Bitmap Management -// ============================================================================ - -void superslab_activate_slab(SuperSlab* ss, int slab_idx) { - if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) { - return; - } - uint32_t mask = 1u << slab_idx; - if ((ss->slab_bitmap & mask) == 0) { - ss->slab_bitmap |= mask; - ss->active_slabs++; - - // Phase 3d-C: Update hot/cold indices after activating new slab - ss_update_hot_cold_indices(ss); - } -} - -void superslab_deactivate_slab(SuperSlab* ss, int slab_idx) { - if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) { - return; - } - uint32_t mask = 1u << slab_idx; - if (ss->slab_bitmap & mask) { - ss->slab_bitmap &= ~mask; - ss->active_slabs--; - } -} - -int superslab_find_free_slab(SuperSlab* ss) { - if (!ss) return -1; - if ((int)ss->active_slabs >= ss_slabs_capacity(ss)) { - return -1; // No free slabs - } - // Find first 0 bit in bitmap - int cap = ss_slabs_capacity(ss); - for (int i = 0; i < cap; i++) { - if ((ss->slab_bitmap & (1u << i)) == 0) { - return i; - } - } - return -1; -} - -// ============================================================================ -// Statistics / Debugging -// ============================================================================ - -void superslab_print_stats(SuperSlab* ss) { - if (!ss || ss->magic != SUPERSLAB_MAGIC) { - printf("Invalid SuperSlab\n"); - return; - } - - printf("=== SuperSlab Stats ===\n"); - printf("Address: %p\n", (void*)ss); - // Phase 12: per-SS size_class removed; classes are per-slab via meta->class_idx. - printf("Active slabs: %u / %d\n", ss->active_slabs, ss_slabs_capacity(ss)); - printf("Bitmap: 0x%08X\n", ss->slab_bitmap); - printf("\nPer-slab details:\n"); - for (int i = 0; i < ss_slabs_capacity(ss); i++) { - if (ss->slab_bitmap & (1u << i)) { - TinySlabMeta* meta = &ss->slabs[i]; - printf(" Slab %2d: used=%u/%u freelist=%p class=%u owner_tid_low=%u\n", - i, meta->used, meta->capacity, meta->freelist, - (unsigned)meta->class_idx, (unsigned)meta->owner_tid_low); - } - } - printf("\n"); -} - -// Global statistics -void superslab_print_global_stats(void) { - pthread_mutex_lock(&g_superslab_lock); - printf("=== Global SuperSlab Stats ===\n"); - printf("SuperSlabs allocated: %lu\n", g_superslabs_allocated); - printf("SuperSlabs freed: %lu\n", g_superslabs_freed); - printf("SuperSlabs active: %lu\n", g_superslabs_allocated - g_superslabs_freed); - printf("Total bytes allocated: %lu MB\n", g_bytes_allocated / (1024 * 1024)); - pthread_mutex_unlock(&g_superslab_lock); -} - -// ============================================================================ -// Phase 8.3: ACE Statistics / Debugging -// ============================================================================ - -void superslab_ace_print_stats(void) { - printf("=== ACE (Adaptive Cache Engine) Stats ===\n"); - const char* class_names[8] = {"8B", "16B", "24B", "32B", "40B", "48B", "56B", "64B"}; - - printf("Class Curr Targ Hot Allocs Refills Spills LiveBlks\n"); - printf("--------------------------------------------------------------\n"); - - for (int i = 0; i < TINY_NUM_CLASSES_SS; i++) { - SuperSlabACEState* c = &g_ss_ace[i]; - printf("%-6s %2uMB %2uMB %4u %7u %8u %7u %9u\n", - class_names[i], - (1u << c->current_lg) / (1024 * 1024), - (1u << c->target_lg) / (1024 * 1024), - c->hot_score, - c->alloc_count, - c->refill_count, - c->spill_count, - c->live_blocks); - } - printf("\n"); -} - -// ============================================================================ -// Phase 8.3: ACE Tick Function (Promotion/Demotion Logic) -// ============================================================================ - -#define ACE_TICK_NS (150ULL * 1000 * 1000) // 150ms tick interval -#define ACE_COOLDOWN_NS (800ULL * 1000 * 1000) // 0.8s cooldown (anti-oscillation) - -// Simplified thresholds for refill activity -#define HI_REFILL(k) (g_ss_ace[k].refill_count > 64) // High refill rate -#define MID_REFILL(k) (g_ss_ace[k].refill_count > 16) // Medium refill rate - -// Object sizes per class (for capacity calculation) -// Must match TINY size classes: 8, 16, 24, 32, 40, 48, 56, 64 bytes -static const int g_tiny_obj_sizes[TINY_NUM_CLASSES_SS] = {8, 16, 24, 32, 40, 48, 56, 64}; - -void hak_tiny_superslab_ace_tick(int k, uint64_t now) { - if (k < 0 || k >= TINY_NUM_CLASSES_SS) return; - - SuperSlabACEState* c = &g_ss_ace[k]; - - // Rate limiting: only tick every ACE_TICK_NS (~150ms) - if (now - c->last_tick_ns < ACE_TICK_NS) return; - - // Calculate capacity for 1MB and 2MB SuperSlabs - int obj_size = g_tiny_obj_sizes[k]; - double cap1MB = (double)((1U << 20) / obj_size); // 1MB capacity - double cap2MB = (double)((1U << 21) / obj_size); // 2MB capacity - - // Calculate hotness score (weighted: 60% live blocks, 40% refill rate) - double hot = 0.6 * (double)c->live_blocks + 0.4 * (double)c->refill_count; - if (hot < 0) hot = 0; - if (hot > 1000) hot = 1000; - c->hot_score = (uint16_t)hot; - - // Cooldown mechanism: prevent size changes within 0.8s of last change - static uint64_t last_switch_ns[TINY_NUM_CLASSES_SS] = {0}; - - if (now - last_switch_ns[k] >= ACE_COOLDOWN_NS) { - if (c->current_lg <= 20) { - // Promotion condition: 1MB → 2MB - // High demand (live > 75% capacity) AND high refill rate - if (c->live_blocks > 0.75 * cap1MB && HI_REFILL(k)) { - c->target_lg = 21; // Promote to 2MB - last_switch_ns[k] = now; - } - } else { - // Demotion condition: 2MB → 1MB - // Low demand (live < 35% capacity) AND low refill rate - if (c->live_blocks < 0.35 * cap2MB && !MID_REFILL(k)) { - c->target_lg = 20; // Demote to 1MB - last_switch_ns[k] = now; - } - } - } - - // EMA-style decay for counters (reduce by 75% each tick) - c->alloc_count = c->alloc_count / 4; - c->refill_count = c->refill_count / 4; - c->spill_count = c->spill_count / 4; - // live_blocks is updated incrementally by alloc/free, not decayed here - - c->last_tick_ns = now; -} - -// ============================================================================ -// Phase 8.4: ACE Observer (Registry-based, zero hot-path overhead) -// ============================================================================ - -// Global debug flag (set once at initialization) -static int g_ace_debug = 0; - -// Registry-based observation: scan all SuperSlabs for usage stats -static void ace_observe_and_decide(int k) { - if (k < 0 || k >= TINY_NUM_CLASSES_SS) return; - - SuperSlabACEState* c = &g_ss_ace[k]; - - // Scan Registry to count SuperSlabs and total live blocks - int ss_count = 0; - uint32_t total_live = 0; - - for (int i = 0; i < SUPER_REG_SIZE; i++) { - SuperRegEntry* e = &g_super_reg[i]; - - // Atomic read (thread-safe) - uintptr_t base = atomic_load_explicit( - (_Atomic uintptr_t*)&e->base, - memory_order_acquire); - - if (base == 0) continue; // Empty slot - - // Phase 8.4: Safety check - skip if ss pointer is invalid - if (!e->ss) continue; - // Phase 12: per-SS size_class removed; registry entries are per-class by construction. - - ss_count++; - // Phase 8.4: Scan all slabs to count used blocks (zero hot-path overhead) - uint32_t ss_live = 0; - int cap_scan = ss_slabs_capacity(e->ss); - for (int slab_idx = 0; slab_idx < cap_scan; slab_idx++) { - TinySlabMeta* meta = &e->ss->slabs[slab_idx]; - // Relaxed read is OK (stats only, no hot-path impact) - ss_live += meta->used; - } - total_live += ss_live; - } - - // Calculate utilization - int obj_size = g_tiny_obj_sizes[k]; - uint8_t current_lg = atomic_load_explicit( - (_Atomic uint8_t*)&c->current_lg, - memory_order_relaxed); - - uint32_t capacity = (ss_count > 0) ? ss_count * ((1U << current_lg) / obj_size) : 1; - double util = (double)total_live / capacity; - - // Update hot_score (for debugging/visualization) - c->hot_score = (uint16_t)(util * 1000); - if (c->hot_score > 1000) c->hot_score = 1000; - - // Promotion/Demotion decision - uint8_t new_target = current_lg; - - if (current_lg <= 20) { - // Promotion: 1MB → 2MB - if (util > 0.75) { - new_target = 21; - } - } else { - // Demotion: 2MB → 1MB - if (util < 0.35) { - new_target = 20; - } - } - - // Debug output (if enabled) - if (g_ace_debug && ss_count > 0) { - fprintf(stderr, "[ACE] Class %d (%dB): ss=%d live=%u cap=%u util=%.2f%% lg=%d->%d hot=%d\n", - k, obj_size, ss_count, total_live, capacity, util * 100.0, - current_lg, new_target, c->hot_score); - } - - // Atomic write (thread-safe) - if (new_target != current_lg) { - atomic_store_explicit( - (_Atomic uint8_t*)&c->target_lg, - new_target, - memory_order_release); - if (g_ace_debug) { - fprintf(stderr, "[ACE] *** Class %d: SIZE CHANGE %dMB -> %dMB (util=%.2f%%)\n", - k, 1 << (current_lg - 20), 1 << (new_target - 20), util * 100.0); - } - } -} - -// Called from Learner thread (background observation) -void hak_tiny_superslab_ace_observe_all(void) { - // Initialize debug flag once - static int initialized = 0; - if (!initialized) { - const char* ace_debug = getenv("HAKMEM_ACE_DEBUG"); - g_ace_debug = (ace_debug && atoi(ace_debug) != 0) ? 1 : 0; - initialized = 1; - } - - for (int k = 0; k < TINY_NUM_CLASSES_SS; k++) { - ace_observe_and_decide(k); - } -} diff --git a/core/hakmem_tiny_superslab_internal.h b/core/hakmem_tiny_superslab_internal.h new file mode 100644 index 00000000..1e0d307e --- /dev/null +++ b/core/hakmem_tiny_superslab_internal.h @@ -0,0 +1,128 @@ +// hakmem_tiny_superslab_internal.h - Internal declarations for superslab refactor +// Purpose: Shared declarations between superslab implementation files +// License: MIT +// Date: 2025-11-28 + +#ifndef HAKMEM_TINY_SUPERSLAB_INTERNAL_H +#define HAKMEM_TINY_SUPERSLAB_INTERNAL_H + +#include "hakmem_tiny_superslab.h" +#include "box/ss_hot_cold_box.h" +#include "hakmem_super_registry.h" +#include "hakmem_tiny.h" +#include "hakmem_tiny_config.h" +#include "hakmem_shared_pool.h" +#include "hakmem_internal.h" +#include "tiny_region_id.h" +#include "hakmem_tiny_integrity.h" +#include "box/tiny_next_ptr_box.h" +#include "box/slab_freelist_atomic.h" +#include +#include +#include +#include +#include +#include +#include +#include + +// ============================================================================ +// Global Variables (defined in superslab_stats.c) +// ============================================================================ + +extern pthread_mutex_t g_superslab_lock; +extern uint64_t g_superslabs_allocated; +extern uint64_t g_superslabs_freed; +extern uint64_t g_bytes_allocated; +extern _Atomic uint64_t g_ss_active_dec_calls; +extern _Atomic uint64_t g_hak_tiny_free_calls; +extern _Atomic uint64_t g_ss_remote_push_calls; +extern _Atomic uint64_t g_free_ss_enter; +extern _Atomic uint64_t g_free_local_box_calls; +extern _Atomic uint64_t g_free_remote_box_calls; +extern uint64_t g_ss_alloc_by_class[8]; +extern uint64_t g_ss_freed_by_class[8]; +extern _Atomic uint64_t g_ss_mmap_count; +extern _Atomic uint64_t g_final_fallback_mmap_count; + +// ============================================================================ +// SuperSlabHead Management (defined in superslab_head.c) +// ============================================================================ + +extern SuperSlabHead* g_superslab_heads[TINY_NUM_CLASSES_SS]; + +// ============================================================================ +// Cache System (defined in superslab_cache.c) +// ============================================================================ + +typedef struct SuperslabCacheEntry { + struct SuperslabCacheEntry* next; +} SuperslabCacheEntry; + +extern SuperslabCacheEntry* g_ss_cache_head[8]; +extern size_t g_ss_cache_count[8]; +extern size_t g_ss_cache_cap[8]; +extern size_t g_ss_precharge_target[8]; +extern _Atomic int g_ss_precharge_done[8]; +extern int g_ss_cache_enabled; +extern pthread_once_t g_ss_cache_once; +extern pthread_mutex_t g_ss_cache_lock[8]; +extern uint64_t g_ss_cache_hits[8]; +extern uint64_t g_ss_cache_misses[8]; +extern uint64_t g_ss_cache_puts[8]; +extern uint64_t g_ss_cache_drops[8]; +extern uint64_t g_ss_cache_precharged[8]; +extern uint64_t g_superslabs_reused; +extern uint64_t g_superslabs_cached; + +// Cache functions (defined in superslab_cache.c) +void ss_cache_global_init(void); +void ss_cache_ensure_init(void); +void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int populate); +void ss_cache_precharge(uint8_t size_class, size_t ss_size, uintptr_t ss_mask); +SuperslabCacheEntry* ss_cache_pop(uint8_t size_class); +int ss_cache_push(uint8_t size_class, SuperSlab* ss); + +// ============================================================================ +// ACE (Adaptive Cache Engine) - defined in superslab_ace.c +// ============================================================================ + +extern SuperSlabACEState g_ss_ace[TINY_NUM_CLASSES_SS]; +extern int g_ss_force_lg; +extern _Atomic int g_ss_populate_once; + +uint8_t hak_tiny_superslab_next_lg(int class_idx); +void ace_observe_and_decide(int k); + +// ============================================================================ +// Statistics (defined in superslab_stats.c) +// ============================================================================ + +void ss_stats_os_alloc(uint8_t size_class, size_t ss_size); +void ss_stats_cache_reuse(void); +void ss_stats_cache_store(void); +void log_superslab_oom_once(size_t ss_size, size_t alloc_size, int err); + +// ============================================================================ +// Slab Management (defined in superslab_slab.c) +// ============================================================================ + +// Drain remote MPSC stack into freelist (ownership already verified by caller) +void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta); + +// ============================================================================ +// Backend Allocation (defined in superslab_backend.c) +// ============================================================================ + +void* hak_tiny_alloc_superslab_backend_legacy(int class_idx); +void* hak_tiny_alloc_superslab_backend_shared(int class_idx); + +// ============================================================================ +// SuperSlabHead Management (defined in superslab_head.c) +// ============================================================================ + +SuperSlabHead* init_superslab_head(int class_idx); +int expand_superslab_head(SuperSlabHead* head); +SuperSlab* find_chunk_for_ptr(void* ptr, int class_idx); + +#endif // HAKMEM_TINY_SUPERSLAB_INTERNAL_H diff --git a/core/superslab_ace.c b/core/superslab_ace.c new file mode 100644 index 00000000..65583150 --- /dev/null +++ b/core/superslab_ace.c @@ -0,0 +1,230 @@ +// superslab_ace.c - ACE (Adaptive Cache Engine) for SuperSlab allocator +// Purpose: Dynamic SuperSlab size adaptation based on usage patterns +// License: MIT +// Date: 2025-11-28 + +#include "hakmem_tiny_superslab_internal.h" + +// ============================================================================ +// ACE (Adaptive Cache Engine) State +// ============================================================================ + +SuperSlabACEState g_ss_ace[TINY_NUM_CLASSES_SS] = {{0}}; + +int g_ss_force_lg = -1; +_Atomic int g_ss_populate_once = 0; + +// ============================================================================ +// ACE Helper Functions +// ============================================================================ + +// Forward: decide next SuperSlab lg for a class (ACE-aware, clamped) +uint8_t hak_tiny_superslab_next_lg(int class_idx) +{ + if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { + return SUPERSLAB_LG_DEFAULT; + } + // Prefer ACE target if within allowed range + uint8_t t = atomic_load_explicit((_Atomic uint8_t*)&g_ss_ace[class_idx].target_lg, + memory_order_relaxed); + if (t < SUPERSLAB_LG_MIN || t > SUPERSLAB_LG_MAX) { + return SUPERSLAB_LG_DEFAULT; + } + return t; +} + +// ============================================================================ +// ACE Statistics / Debugging +// ============================================================================ + +void superslab_ace_print_stats(void) { + printf("=== ACE (Adaptive Cache Engine) Stats ===\n"); + const char* class_names[8] = {"8B", "16B", "24B", "32B", "40B", "48B", "56B", "64B"}; + + printf("Class Curr Targ Hot Allocs Refills Spills LiveBlks\n"); + printf("--------------------------------------------------------------\n"); + + for (int i = 0; i < TINY_NUM_CLASSES_SS; i++) { + SuperSlabACEState* c = &g_ss_ace[i]; + printf("%-6s %2uMB %2uMB %4u %7u %8u %7u %9u\n", + class_names[i], + (1u << c->current_lg) / (1024 * 1024), + (1u << c->target_lg) / (1024 * 1024), + c->hot_score, + c->alloc_count, + c->refill_count, + c->spill_count, + c->live_blocks); + } + printf("\n"); +} + +// ============================================================================ +// ACE Tick Function (Promotion/Demotion Logic) +// ============================================================================ + +#define ACE_TICK_NS (150ULL * 1000 * 1000) // 150ms tick interval +#define ACE_COOLDOWN_NS (800ULL * 1000 * 1000) // 0.8s cooldown (anti-oscillation) + +// Simplified thresholds for refill activity +#define HI_REFILL(k) (g_ss_ace[k].refill_count > 64) // High refill rate +#define MID_REFILL(k) (g_ss_ace[k].refill_count > 16) // Medium refill rate + +// Object sizes per class (for capacity calculation) +// Must match TINY size classes: 8, 16, 24, 32, 40, 48, 56, 64 bytes +static const int g_tiny_obj_sizes[TINY_NUM_CLASSES_SS] = {8, 16, 24, 32, 40, 48, 56, 64}; + +void hak_tiny_superslab_ace_tick(int k, uint64_t now) { + if (k < 0 || k >= TINY_NUM_CLASSES_SS) return; + + SuperSlabACEState* c = &g_ss_ace[k]; + + // Rate limiting: only tick every ACE_TICK_NS (~150ms) + if (now - c->last_tick_ns < ACE_TICK_NS) return; + + // Calculate capacity for 1MB and 2MB SuperSlabs + int obj_size = g_tiny_obj_sizes[k]; + double cap1MB = (double)((1U << 20) / obj_size); // 1MB capacity + double cap2MB = (double)((1U << 21) / obj_size); // 2MB capacity + + // Calculate hotness score (weighted: 60% live blocks, 40% refill rate) + double hot = 0.6 * (double)c->live_blocks + 0.4 * (double)c->refill_count; + if (hot < 0) hot = 0; + if (hot > 1000) hot = 1000; + c->hot_score = (uint16_t)hot; + + // Cooldown mechanism: prevent size changes within 0.8s of last change + static uint64_t last_switch_ns[TINY_NUM_CLASSES_SS] = {0}; + + if (now - last_switch_ns[k] >= ACE_COOLDOWN_NS) { + if (c->current_lg <= 20) { + // Promotion condition: 1MB → 2MB + // High demand (live > 75% capacity) AND high refill rate + if (c->live_blocks > 0.75 * cap1MB && HI_REFILL(k)) { + c->target_lg = 21; // Promote to 2MB + last_switch_ns[k] = now; + } + } else { + // Demotion condition: 2MB → 1MB + // Low demand (live < 35% capacity) AND low refill rate + if (c->live_blocks < 0.35 * cap2MB && !MID_REFILL(k)) { + c->target_lg = 20; // Demote to 1MB + last_switch_ns[k] = now; + } + } + } + + // EMA-style decay for counters (reduce by 75% each tick) + c->alloc_count = c->alloc_count / 4; + c->refill_count = c->refill_count / 4; + c->spill_count = c->spill_count / 4; + // live_blocks is updated incrementally by alloc/free, not decayed here + + c->last_tick_ns = now; +} + +// ============================================================================ +// ACE Observer (Registry-based, zero hot-path overhead) +// ============================================================================ + +// Global debug flag (set once at initialization) +static int g_ace_debug = 0; + +// Registry-based observation: scan all SuperSlabs for usage stats +void ace_observe_and_decide(int k) { + if (k < 0 || k >= TINY_NUM_CLASSES_SS) return; + + SuperSlabACEState* c = &g_ss_ace[k]; + + // Scan Registry to count SuperSlabs and total live blocks + int ss_count = 0; + uint32_t total_live = 0; + + for (int i = 0; i < SUPER_REG_SIZE; i++) { + SuperRegEntry* e = &g_super_reg[i]; + + // Atomic read (thread-safe) + uintptr_t base = atomic_load_explicit( + (_Atomic uintptr_t*)&e->base, + memory_order_acquire); + + if (base == 0) continue; // Empty slot + + // Phase 8.4: Safety check - skip if ss pointer is invalid + if (!e->ss) continue; + // Phase 12: per-SS size_class removed; registry entries are per-class by construction. + + ss_count++; + // Phase 8.4: Scan all slabs to count used blocks (zero hot-path overhead) + uint32_t ss_live = 0; + int cap_scan = ss_slabs_capacity(e->ss); + for (int slab_idx = 0; slab_idx < cap_scan; slab_idx++) { + TinySlabMeta* meta = &e->ss->slabs[slab_idx]; + // Relaxed read is OK (stats only, no hot-path impact) + ss_live += meta->used; + } + total_live += ss_live; + } + + // Calculate utilization + int obj_size = g_tiny_obj_sizes[k]; + uint8_t current_lg = atomic_load_explicit( + (_Atomic uint8_t*)&c->current_lg, + memory_order_relaxed); + + uint32_t capacity = (ss_count > 0) ? ss_count * ((1U << current_lg) / obj_size) : 1; + double util = (double)total_live / capacity; + + // Update hot_score (for debugging/visualization) + c->hot_score = (uint16_t)(util * 1000); + if (c->hot_score > 1000) c->hot_score = 1000; + + // Promotion/Demotion decision + uint8_t new_target = current_lg; + + if (current_lg <= 20) { + // Promotion: 1MB → 2MB + if (util > 0.75) { + new_target = 21; + } + } else { + // Demotion: 2MB → 1MB + if (util < 0.35) { + new_target = 20; + } + } + + // Debug output (if enabled) + if (g_ace_debug && ss_count > 0) { + fprintf(stderr, "[ACE] Class %d (%dB): ss=%d live=%u cap=%u util=%.2f%% lg=%d->%d hot=%d\n", + k, obj_size, ss_count, total_live, capacity, util * 100.0, + current_lg, new_target, c->hot_score); + } + + // Atomic write (thread-safe) + if (new_target != current_lg) { + atomic_store_explicit( + (_Atomic uint8_t*)&c->target_lg, + new_target, + memory_order_release); + if (g_ace_debug) { + fprintf(stderr, "[ACE] *** Class %d: SIZE CHANGE %dMB -> %dMB (util=%.2f%%)\n", + k, 1 << (current_lg - 20), 1 << (new_target - 20), util * 100.0); + } + } +} + +// Called from Learner thread (background observation) +void hak_tiny_superslab_ace_observe_all(void) { + // Initialize debug flag once + static int initialized = 0; + if (!initialized) { + const char* ace_debug = getenv("HAKMEM_ACE_DEBUG"); + g_ace_debug = (ace_debug && atoi(ace_debug) != 0) ? 1 : 0; + initialized = 1; + } + + for (int k = 0; k < TINY_NUM_CLASSES_SS; k++) { + ace_observe_and_decide(k); + } +} diff --git a/core/superslab_allocate.c b/core/superslab_allocate.c new file mode 100644 index 00000000..f4f03b11 --- /dev/null +++ b/core/superslab_allocate.c @@ -0,0 +1,313 @@ +// superslab_allocate.c - SuperSlab allocation and deallocation +// Purpose: Main allocation/free entry points for SuperSlabs +// License: MIT +// Date: 2025-11-28 + +#include "hakmem_tiny_superslab_internal.h" + +// ============================================================================ +// SuperSlab Allocation (2MB aligned) +// ============================================================================ + +SuperSlab* superslab_allocate(uint8_t size_class) { + // Optional fault injection for testing: HAKMEM_TINY_SS_FAULT_RATE=N → 1/N で失敗 + static int fault_rate = -1; // -1=unparsed, 0=disabled, >0=rate + static __thread unsigned long fault_tick = 0; + if (__builtin_expect(fault_rate == -1, 0)) { + const char* e = getenv("HAKMEM_TINY_SS_FAULT_RATE"); + if (e && *e) { + int v = atoi(e); if (v < 0) v = 0; fault_rate = v; + } else { + fault_rate = 0; + } + } + if (fault_rate > 0) { + unsigned long t = ++fault_tick; + if ((t % (unsigned long)fault_rate) == 0ul) { + return NULL; // simulate OOM + } + } + // Optional env clamp for SuperSlab size + static int env_parsed = 0; + static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_DEFAULT; // Start with default (2MB) + static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX; + if (!env_parsed) { + char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB"); + if (maxmb) { + int m = atoi(maxmb); if (m == 1) g_ss_max_lg_env = 20; else if (m == 2) g_ss_max_lg_env = 21; + } + char* minmb = getenv("HAKMEM_TINY_SS_MIN_MB"); + if (minmb) { + int m = atoi(minmb); if (m == 1) g_ss_min_lg_env = 20; else if (m == 2) g_ss_min_lg_env = 21; + } + if (g_ss_min_lg_env > g_ss_max_lg_env) g_ss_min_lg_env = g_ss_max_lg_env; + const char* force_lg_env = getenv("HAKMEM_TINY_SS_FORCE_LG"); + if (force_lg_env && *force_lg_env) { + int v = atoi(force_lg_env); + if (v >= SUPERSLAB_LG_MIN && v <= SUPERSLAB_LG_MAX) { + g_ss_force_lg = v; + g_ss_min_lg_env = g_ss_max_lg_env = v; + } + } + size_t precharge_default = 0; + const char* precharge_env = getenv("HAKMEM_TINY_SS_PRECHARGE"); + if (precharge_env && *precharge_env) { + long v = atol(precharge_env); + if (v < 0) v = 0; + precharge_default = (size_t)v; + if (v > 0) { + atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed); + } + } + size_t cache_default = 0; + const char* cache_env = getenv("HAKMEM_TINY_SS_CACHE"); + if (cache_env && *cache_env) { + long v = atol(cache_env); + if (v < 0) v = 0; + cache_default = (size_t)v; + } + for (int i = 0; i < 8; i++) { + g_ss_cache_cap[i] = cache_default; + g_ss_precharge_target[i] = precharge_default; + } + for (int i = 0; i < 8; i++) { + char name[64]; + snprintf(name, sizeof(name), "HAKMEM_TINY_SS_CACHE_C%d", i); + char* cap_env = getenv(name); + if (cap_env && *cap_env) { + long v = atol(cap_env); + if (v < 0) v = 0; + g_ss_cache_cap[i] = (size_t)v; + } + snprintf(name, sizeof(name), "HAKMEM_TINY_SS_PRECHARGE_C%d", i); + char* pre_env = getenv(name); + if (pre_env && *pre_env) { + long v = atol(pre_env); + if (v < 0) v = 0; + g_ss_precharge_target[i] = (size_t)v; + if (v > 0) { + atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed); + } + } + if (g_ss_cache_cap[i] > 0 || g_ss_precharge_target[i] > 0) { + g_ss_cache_enabled = 1; + } + } + const char* populate_env = getenv("HAKMEM_TINY_SS_POPULATE_ONCE"); + if (populate_env && atoi(populate_env) != 0) { + atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed); + } + env_parsed = 1; + } + + uint8_t lg = (g_ss_force_lg >= 0) ? (uint8_t)g_ss_force_lg : hak_tiny_superslab_next_lg(size_class); + if (lg < g_ss_min_lg_env) lg = g_ss_min_lg_env; + if (lg > g_ss_max_lg_env) lg = g_ss_max_lg_env; + size_t ss_size = (size_t)1 << lg; // 2^20 = 1MB, 2^21 = 2MB + uintptr_t ss_mask = ss_size - 1; + int from_cache = 0; + void* ptr = NULL; + + // Debug logging flag (lazy init) + static __thread int dbg = -1; +#if HAKMEM_BUILD_RELEASE + dbg = 0; +#else + if (__builtin_expect(dbg == -1, 0)) { + const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG"); + dbg = (e && *e && *e != '0') ? 1 : 0; + } +#endif + + // Phase 9: Try LRU cache first (lazy deallocation) + SuperSlab* cached_ss = hak_ss_lru_pop(size_class); + if (cached_ss) { + ptr = (void*)cached_ss; + from_cache = 1; + // Debug logging for REFILL from LRU + if (dbg == 1) { + fprintf(stderr, "[REFILL] class=%d from_lru=1 ss=%p\n", + size_class, (void*)cached_ss); + } + // Skip old cache path - LRU cache takes priority + } else if (g_ss_cache_enabled && size_class < 8) { + // Fallback to old cache (will be deprecated) + ss_cache_precharge(size_class, ss_size, ss_mask); + SuperslabCacheEntry* old_cached = ss_cache_pop(size_class); + if (old_cached) { + ptr = (void*)old_cached; + from_cache = 1; + // Debug logging for REFILL from prewarm (old cache is essentially prewarm) + if (dbg == 1) { + fprintf(stderr, "[REFILL] class=%d from_prewarm=1 ss=%p\n", + size_class, (void*)old_cached); + } + } + } + + if (!ptr) { + int populate = atomic_exchange_explicit(&g_ss_populate_once, 0, memory_order_acq_rel); + ptr = ss_os_acquire(size_class, ss_size, ss_mask, populate); + if (!ptr) { + return NULL; + } + // Debug logging for REFILL with new allocation + if (dbg == 1) { + fprintf(stderr, "[REFILL] class=%d new_alloc=1 ss=%p\n", + size_class, (void*)ptr); + } + } + + // Initialize SuperSlab header (Phase 12: no global size_class field) + SuperSlab* ss = (SuperSlab*)ptr; + ss->magic = SUPERSLAB_MAGIC; + ss->active_slabs = 0; + ss->lg_size = lg; // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB) + ss->slab_bitmap = 0; + ss->nonempty_mask = 0; // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask + ss->partial_epoch = 0; + ss->publish_hint = 0xFF; + + // Initialize atomics explicitly + atomic_store_explicit(&ss->total_active_blocks, 0, memory_order_relaxed); + atomic_store_explicit(&ss->refcount, 0, memory_order_relaxed); + atomic_store_explicit(&ss->listed, 0, memory_order_relaxed); + ss->partial_next = NULL; + + // Phase 9: Initialize LRU fields + ss->last_used_ns = 0; + ss->generation = 0; + ss->lru_prev = NULL; + ss->lru_next = NULL; + + // Phase 3d-C: Initialize Hot/Cold Split fields + ss->hot_count = 0; + ss->cold_count = 0; + for (int i = 0; i < 16; i++) { + ss->hot_indices[i] = 0; + ss->cold_indices[i] = 0; + } + + // Initialize all slab metadata (only up to max slabs for this size) + int max_slabs = (int)(ss_size / SLAB_SIZE); + + // PERF_OPT: memset removed - mmap() already returns zero-initialized pages + // Previous memset calls consumed 23.83% CPU time (perf analysis 2025-11-28) + // Measured improvement: +1.3% throughput (71.86M → 72.78M ops/s) + // Note: ASan/debug builds may need these, but production mmap guarantees zero pages + // memset(ss->slabs, 0, max_slabs * sizeof(TinySlabMeta)); + // memset(ss->remote_heads, 0, max_slabs * sizeof(uintptr_t)); + // memset(ss->remote_counts, 0, max_slabs * sizeof(uint32_t)); + // memset(ss->slab_listed, 0, max_slabs * sizeof(uint32_t)); + + for (int i = 0; i < max_slabs; i++) { + // Phase 1: Atomic initialization (freelist + used are now _Atomic) + slab_freelist_store_relaxed(&ss->slabs[i], NULL); // Explicit NULL (redundant after memset, but clear intent) + atomic_store_explicit(&ss->slabs[i].used, 0, memory_order_relaxed); + ss->slabs[i].capacity = 0; + ss->slabs[i].owner_tid_low = 0; + + // Initialize remote queue atomics (memset already zeroed, but use proper atomic init) + atomic_store_explicit(&ss->remote_heads[i], 0, memory_order_relaxed); + atomic_store_explicit(&ss->remote_counts[i], 0, memory_order_relaxed); + atomic_store_explicit(&ss->slab_listed[i], 0, memory_order_relaxed); + } + + if (from_cache) { + ss_stats_cache_reuse(); + } + + // Phase 8.3: Update ACE current_lg to match allocated size + g_ss_ace[size_class].current_lg = lg; + + // Phase 1: Register SuperSlab in global registry for fast lookup + // CRITICAL: Register AFTER full initialization (ss structure is ready) + uintptr_t base = (uintptr_t)ss; + if (!hak_super_register(base, ss)) { + // Registry full - this is a fatal error + fprintf(stderr, "HAKMEM FATAL: SuperSlab registry full, cannot register %p\n", ss); + // Still return ss to avoid memory leak, but lookups may fail + } + + return ss; +} + +// ============================================================================ +// SuperSlab Deallocation +// ============================================================================ + +void superslab_free(SuperSlab* ss) { + if (!ss || ss->magic != SUPERSLAB_MAGIC) { + return; // Invalid SuperSlab + } + + // ADD DEBUG LOGGING + static __thread int dbg = -1; +#if HAKMEM_BUILD_RELEASE + dbg = 0; +#else + if (__builtin_expect(dbg == -1, 0)) { + const char* e = getenv("HAKMEM_SS_FREE_DEBUG"); + dbg = (e && *e && *e != '0') ? 1 : 0; + } +#endif + if (dbg == 1) { + fprintf(stderr, "[SS_FREE] CALLED: ss=%p lg_size=%d active_slabs=%u\n", + (void*)ss, ss->lg_size, ss->active_slabs); + } + + // Phase 9: Lazy Deallocation - try to cache in LRU instead of munmap + size_t ss_size = (size_t)1 << ss->lg_size; + + // Phase 1: Unregister SuperSlab from registry FIRST + // CRITICAL: Must unregister BEFORE adding to LRU cache + // Reason: Cached SuperSlabs should NOT be found by lookups + uintptr_t base = (uintptr_t)ss; + hak_super_unregister(base); + + // Memory fence to ensure unregister is visible + atomic_thread_fence(memory_order_release); + + // Phase 9: Try LRU cache first (lazy deallocation) + // NOTE: LRU cache keeps magic=SUPERSLAB_MAGIC for validation + // Magic will be cleared on eviction or reuse + int lru_cached = hak_ss_lru_push(ss); + if (dbg == 1) { + fprintf(stderr, "[SS_FREE] hak_ss_lru_push() returned %d\n", lru_cached); + } + if (lru_cached) { + // Successfully cached in LRU - defer munmap + return; + } + + // LRU cache full or disabled - try old cache using head class_idx (if known) + int old_cached = ss_cache_push(0, ss); + if (old_cached) { + ss_stats_cache_store(); + return; + } + + // Both caches full - immediately free to OS (eager deallocation) + // Clear magic to prevent use-after-free + ss->magic = 0; + +#if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n", + (void*)ss, ss_size, + atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed)); +#endif + + munmap(ss, ss_size); + + // Update statistics for actual release to OS + pthread_mutex_lock(&g_superslab_lock); + g_superslabs_freed++; + // Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here + g_bytes_allocated -= ss_size; + pthread_mutex_unlock(&g_superslab_lock); + +#if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[DEBUG ss_os_release] g_superslabs_freed now = %llu\n", + (unsigned long long)g_superslabs_freed); +#endif +} diff --git a/core/superslab_backend.c b/core/superslab_backend.c new file mode 100644 index 00000000..b7593f07 --- /dev/null +++ b/core/superslab_backend.c @@ -0,0 +1,281 @@ +// superslab_backend.c - Backend allocation paths for SuperSlab allocator +// Purpose: Legacy and shared pool backend implementations +// License: MIT +// Date: 2025-11-28 + +#include "hakmem_tiny_superslab_internal.h" + +/* + * superslab_return_block() - Single exit point for all SuperSlab allocations + * + * Purpose: Ensures consistent header writing across all allocation paths. + * This prevents bugs where headers are written in some paths but not others. + * + * Parameters: + * base - Block start address from SuperSlab geometry + * class_idx - Tiny class index (0-7) + * + * Returns: + * User pointer (base + 1 if headers enabled, base otherwise) + * + * Header writing behavior: + * - If HAKMEM_TINY_HEADER_CLASSIDX=1: Writes header via tiny_region_id_write_header() + * - If HAKMEM_TINY_HEADER_CLASSIDX=0: Returns base directly (no header) + */ +static inline void* superslab_return_block(void* base, int class_idx) { +#if HAKMEM_TINY_HEADER_CLASSIDX + return tiny_region_id_write_header(base, class_idx); +#else + return (void*)base; +#endif +} + +/* + * Legacy backend for hak_tiny_alloc_superslab_box(). + * + * Phase 12 Stage A/B: + * - Uses per-class SuperSlabHead (g_superslab_heads) as the implementation. + * - Callers MUST use hak_tiny_alloc_superslab_box() and never touch this directly. + * - Later Stage C: this function will be replaced by a shared_pool backend. + */ +void* hak_tiny_alloc_superslab_backend_legacy(int class_idx) +{ + if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { + return NULL; + } + + SuperSlabHead* head = g_superslab_heads[class_idx]; + if (!head) { + head = init_superslab_head(class_idx); + if (!head) { + return NULL; + } + g_superslab_heads[class_idx] = head; + } + + SuperSlab* chunk = head->current_chunk ? head->current_chunk : head->first_chunk; + + while (chunk) { + int cap = ss_slabs_capacity(chunk); + for (int slab_idx = 0; slab_idx < cap; slab_idx++) { + TinySlabMeta* meta = &chunk->slabs[slab_idx]; + + // Skip slabs that belong to a different class (or are uninitialized). + if (meta->class_idx != (uint8_t)class_idx && meta->class_idx != 255) { + continue; + } + + // P1.2 FIX: Initialize slab on first use (like shared backend does) + // This ensures class_map is populated for all slabs, not just slab 0 + if (meta->capacity == 0) { + size_t block_size = g_tiny_class_sizes[class_idx]; + uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self(); + superslab_init_slab(chunk, slab_idx, block_size, owner_tid); + meta = &chunk->slabs[slab_idx]; // Refresh pointer after init + meta->class_idx = (uint8_t)class_idx; + // P1.2: Update class_map for dynamic slab initialization + chunk->class_map[slab_idx] = (uint8_t)class_idx; + } + + if (meta->used < meta->capacity) { + size_t stride = tiny_block_stride_for_class(class_idx); + size_t offset = (size_t)meta->used * stride; + uint8_t* base = (uint8_t*)chunk + + SUPERSLAB_SLAB0_DATA_OFFSET + + (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE + + offset; + + meta->used++; + atomic_fetch_add_explicit(&chunk->total_active_blocks, 1, memory_order_relaxed); + return superslab_return_block(base, class_idx); + } + } + chunk = chunk->next_chunk; + } + + if (expand_superslab_head(head) < 0) { + return NULL; + } + + SuperSlab* new_chunk = head->current_chunk; + if (!new_chunk) { + return NULL; + } + + int cap2 = ss_slabs_capacity(new_chunk); + for (int slab_idx = 0; slab_idx < cap2; slab_idx++) { + TinySlabMeta* meta = &new_chunk->slabs[slab_idx]; + + // P1.2 FIX: Initialize slab on first use (like shared backend does) + if (meta->capacity == 0) { + size_t block_size = g_tiny_class_sizes[class_idx]; + uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self(); + superslab_init_slab(new_chunk, slab_idx, block_size, owner_tid); + meta = &new_chunk->slabs[slab_idx]; // Refresh pointer after init + meta->class_idx = (uint8_t)class_idx; + // P1.2: Update class_map for dynamic slab initialization + new_chunk->class_map[slab_idx] = (uint8_t)class_idx; + } + + if (meta->used < meta->capacity) { + size_t stride = tiny_block_stride_for_class(class_idx); + size_t offset = (size_t)meta->used * stride; + uint8_t* base = (uint8_t*)new_chunk + + SUPERSLAB_SLAB0_DATA_OFFSET + + (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE + + offset; + + meta->used++; + atomic_fetch_add_explicit(&new_chunk->total_active_blocks, 1, memory_order_relaxed); + return superslab_return_block(base, class_idx); + } + } + + return NULL; +} + +/* + * Shared pool backend for hak_tiny_alloc_superslab_box(). + * + * Phase 12-2: + * - Uses SharedSuperSlabPool (g_shared_pool) to obtain a SuperSlab/slab + * for the requested class_idx. + * - This backend EXPRESSLY owns only: + * - choosing (ss, slab_idx) via shared_pool_acquire_slab() + * - initializing that slab's TinySlabMeta via superslab_init_slab() + * and nothing else; all callers must go through hak_tiny_alloc_superslab_box(). + * + * - For now this is a minimal, conservative implementation: + * - One linear bump-run is carved from the acquired slab using tiny_block_stride_for_class(). + * - No complex per-slab freelist or refill policy yet (Phase 12-3+). + * - If shared_pool_acquire_slab() fails, we fall back to legacy backend. + */ +void* hak_tiny_alloc_superslab_backend_shared(int class_idx) +{ + if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { + return NULL; + } + + SuperSlab* ss = NULL; + int slab_idx = -1; + + if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) != 0 || !ss) { + // Shared pool could not provide a slab; caller may choose to fall back. + return NULL; + } + + TinySlabMeta* meta = &ss->slabs[slab_idx]; + + // Defensive: shared_pool must either hand us an UNASSIGNED slab or one + // already bound to this class. Anything else is a hard bug. + if (meta->class_idx != 255 && meta->class_idx != (uint8_t)class_idx) { +#if !HAKMEM_BUILD_RELEASE + fprintf(stderr, + "[HAKMEM][SS_SHARED] BUG: acquire_slab mismatch: cls=%d meta->class_idx=%u slab_idx=%d ss=%p\n", + class_idx, (unsigned)meta->class_idx, slab_idx, (void*)ss); +#endif + return NULL; + } + + // Initialize slab geometry once for this class. + if (meta->capacity == 0) { + size_t block_size = g_tiny_class_sizes[class_idx]; + // LARSON FIX: Pass actual thread ID for cross-thread free detection + uint32_t my_tid = (uint32_t)(uintptr_t)pthread_self(); + superslab_init_slab(ss, slab_idx, block_size, my_tid); + meta = &ss->slabs[slab_idx]; + + // CRITICAL FIX: Always set class_idx after init to avoid C0/C7 confusion. + // New SuperSlabs start with meta->class_idx=0 (mmap zero-init). + // Must explicitly set to requested class, not just when class_idx==255. + meta->class_idx = (uint8_t)class_idx; + // P1.1: Update class_map in shared acquire path + ss->class_map[slab_idx] = (uint8_t)class_idx; + } + + // Final contract check before computing addresses. + if (meta->class_idx != (uint8_t)class_idx || + meta->capacity == 0 || + meta->used > meta->capacity) { +#if !HAKMEM_BUILD_RELEASE + fprintf(stderr, + "[HAKMEM][SS_SHARED] BUG: invalid slab meta before alloc: " + "cls=%d slab_idx=%d meta_cls=%u used=%u cap=%u ss=%p\n", + class_idx, slab_idx, + (unsigned)meta->class_idx, + (unsigned)meta->used, + (unsigned)meta->capacity, + (void*)ss); +#endif + return NULL; + } + + // Simple bump allocation within this slab. + if (meta->used >= meta->capacity) { + // Slab exhausted: in minimal Phase12-2 backend we do not loop; + // caller or future logic must acquire another slab. + return NULL; + } + + size_t stride = tiny_block_stride_for_class(class_idx); + size_t offset = (size_t)meta->used * stride; + + // Phase 12-2 minimal geometry: + // - slab 0 data offset via SUPERSLAB_SLAB0_DATA_OFFSET + // - subsequent slabs at fixed SUPERSLAB_SLAB_USABLE_SIZE strides. + size_t slab_base_off = SUPERSLAB_SLAB0_DATA_OFFSET + + (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE; + uint8_t* base = (uint8_t*)ss + slab_base_off + offset; + + meta->used++; + atomic_fetch_add_explicit(&ss->total_active_blocks, 1, memory_order_relaxed); + + return superslab_return_block(base, class_idx); +} + +/* + * Box API entry: + * - Single front-door for tiny-side Superslab allocations. + * + * Phase 12 policy: + * - HAKMEM_TINY_SS_SHARED=0 → legacy backendのみ(回帰確認用) + * - HAKMEM_TINY_SS_SHARED=1 → shared backendを優先し、失敗時のみ legacy にフォールバック + */ +void* hak_tiny_alloc_superslab_box(int class_idx) +{ + static int g_ss_shared_mode = -1; + static _Atomic uint32_t g_ss_backend_log = 0; + if (__builtin_expect(g_ss_shared_mode == -1, 0)) { + const char* e = getenv("HAKMEM_TINY_SS_SHARED"); + if (!e || !*e) { + g_ss_shared_mode = 1; // デフォルト: shared 有効 + } else { + int v = atoi(e); + g_ss_shared_mode = (v != 0) ? 1 : 0; + } + } + + if (g_ss_shared_mode == 1) { + void* p = hak_tiny_alloc_superslab_backend_shared(class_idx); + if (p != NULL) { + uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed); + if (n < 4) { + fprintf(stderr, "[SS_BACKEND] shared cls=%d ptr=%p\n", class_idx, p); + } + return p; + } + // shared backend が失敗した場合は安全側で legacy にフォールバック + uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed); + if (n < 4) { + fprintf(stderr, "[SS_BACKEND] shared_fail→legacy cls=%d\n", class_idx); + } + return hak_tiny_alloc_superslab_backend_legacy(class_idx); + } + + // shared OFF 時は legacy のみ + uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed); + if (n < 4) { + fprintf(stderr, "[SS_BACKEND] legacy cls=%d\n", class_idx); + } + return hak_tiny_alloc_superslab_backend_legacy(class_idx); +} diff --git a/core/superslab_cache.c b/core/superslab_cache.c new file mode 100644 index 00000000..d26bef6d --- /dev/null +++ b/core/superslab_cache.c @@ -0,0 +1,204 @@ +// superslab_cache.c - Cache management for SuperSlab allocator +// Purpose: LRU cache and old cache (prewarm) for SuperSlabs +// License: MIT +// Date: 2025-11-28 + +#include "hakmem_tiny_superslab_internal.h" + +// ============================================================================ +// Cache System - Global Variables +// ============================================================================ + +SuperslabCacheEntry* g_ss_cache_head[8] = {0}; +size_t g_ss_cache_count[8] = {0}; +size_t g_ss_cache_cap[8] = {0}; +size_t g_ss_precharge_target[8] = {0}; +_Atomic int g_ss_precharge_done[8] = {0}; +int g_ss_cache_enabled = 0; + +pthread_once_t g_ss_cache_once = PTHREAD_ONCE_INIT; +pthread_mutex_t g_ss_cache_lock[8]; + +uint64_t g_ss_cache_hits[8] = {0}; +uint64_t g_ss_cache_misses[8] = {0}; +uint64_t g_ss_cache_puts[8] = {0}; +uint64_t g_ss_cache_drops[8] = {0}; +uint64_t g_ss_cache_precharged[8] = {0}; + +uint64_t g_superslabs_reused = 0; +uint64_t g_superslabs_cached = 0; + +// ============================================================================ +// Cache Initialization +// ============================================================================ + +void ss_cache_global_init(void) { + for (int i = 0; i < 8; i++) { + pthread_mutex_init(&g_ss_cache_lock[i], NULL); + } +} + +void ss_cache_ensure_init(void) { + pthread_once(&g_ss_cache_once, ss_cache_global_init); +} + +// ============================================================================ +// OS Acquisition (mmap with alignment) +// ============================================================================ + +void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int populate) { + void* ptr = NULL; + static int log_count = 0; + +#ifdef MAP_ALIGNED_SUPER + int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER; +#ifdef MAP_POPULATE + if (populate) { + map_flags |= MAP_POPULATE; + } +#endif + ptr = mmap(NULL, ss_size, + PROT_READ | PROT_WRITE, + map_flags, + -1, 0); + if (ptr != MAP_FAILED) { + atomic_fetch_add(&g_ss_mmap_count, 1); + if (((uintptr_t)ptr & ss_mask) == 0) { + ss_stats_os_alloc(size_class, ss_size); + return ptr; + } + munmap(ptr, ss_size); + ptr = NULL; + } else { + log_superslab_oom_once(ss_size, ss_size, errno); + } +#endif + + size_t alloc_size = ss_size * 2; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; +#ifdef MAP_POPULATE + if (populate) { + flags |= MAP_POPULATE; + } +#endif + void* raw = mmap(NULL, alloc_size, + PROT_READ | PROT_WRITE, + flags, + -1, 0); + if (raw != MAP_FAILED) { + uint64_t count = atomic_fetch_add(&g_ss_mmap_count, 1) + 1; + #if !HAKMEM_BUILD_RELEASE + if (log_count < 10) { + fprintf(stderr, "[SUPERSLAB_MMAP] #%lu: class=%d size=%zu (total SuperSlab mmaps so far)\n", + (unsigned long)count, size_class, ss_size); + log_count++; + } + #endif + } + if (raw == MAP_FAILED) { + log_superslab_oom_once(ss_size, alloc_size, errno); + return NULL; + } + + uintptr_t raw_addr = (uintptr_t)raw; + uintptr_t aligned_addr = (raw_addr + ss_mask) & ~ss_mask; + ptr = (void*)aligned_addr; + + size_t prefix_size = aligned_addr - raw_addr; + if (prefix_size > 0) { + munmap(raw, prefix_size); + } + size_t suffix_size = alloc_size - prefix_size - ss_size; + if (suffix_size > 0) { + if (populate) { +#ifdef MADV_DONTNEED + madvise((char*)ptr + ss_size, suffix_size, MADV_DONTNEED); +#endif + } else { + munmap((char*)ptr + ss_size, suffix_size); + } + } + + ss_stats_os_alloc(size_class, ss_size); + return ptr; +} + +// ============================================================================ +// Cache Precharge (prewarm) +// ============================================================================ + +void ss_cache_precharge(uint8_t size_class, size_t ss_size, uintptr_t ss_mask) { + if (!g_ss_cache_enabled) return; + if (size_class >= 8) return; + if (g_ss_precharge_target[size_class] == 0) return; + if (atomic_load_explicit(&g_ss_precharge_done[size_class], memory_order_acquire)) return; + + ss_cache_ensure_init(); + pthread_mutex_lock(&g_ss_cache_lock[size_class]); + size_t target = g_ss_precharge_target[size_class]; + size_t cap = g_ss_cache_cap[size_class]; + size_t desired = target; + if (cap != 0 && desired > cap) { + desired = cap; + } + while (g_ss_cache_count[size_class] < desired) { + void* raw = ss_os_acquire(size_class, ss_size, ss_mask, 1); + if (!raw) { + break; + } + SuperslabCacheEntry* entry = (SuperslabCacheEntry*)raw; + entry->next = g_ss_cache_head[size_class]; + g_ss_cache_head[size_class] = entry; + g_ss_cache_count[size_class]++; + g_ss_cache_precharged[size_class]++; + } + atomic_store_explicit(&g_ss_precharge_done[size_class], 1, memory_order_release); + pthread_mutex_unlock(&g_ss_cache_lock[size_class]); +} + +// ============================================================================ +// Cache Pop/Push Operations +// ============================================================================ + +SuperslabCacheEntry* ss_cache_pop(uint8_t size_class) { + if (!g_ss_cache_enabled) return NULL; + if (size_class >= 8) return NULL; + + ss_cache_ensure_init(); + + pthread_mutex_lock(&g_ss_cache_lock[size_class]); + SuperslabCacheEntry* entry = g_ss_cache_head[size_class]; + if (entry) { + g_ss_cache_head[size_class] = entry->next; + if (g_ss_cache_count[size_class] > 0) { + g_ss_cache_count[size_class]--; + } + entry->next = NULL; + g_ss_cache_hits[size_class]++; + } else { + g_ss_cache_misses[size_class]++; + } + pthread_mutex_unlock(&g_ss_cache_lock[size_class]); + return entry; +} + +int ss_cache_push(uint8_t size_class, SuperSlab* ss) { + if (!g_ss_cache_enabled) return 0; + if (size_class >= 8) return 0; + + ss_cache_ensure_init(); + pthread_mutex_lock(&g_ss_cache_lock[size_class]); + size_t cap = g_ss_cache_cap[size_class]; + if (cap != 0 && g_ss_cache_count[size_class] >= cap) { + g_ss_cache_drops[size_class]++; + pthread_mutex_unlock(&g_ss_cache_lock[size_class]); + return 0; + } + SuperslabCacheEntry* entry = (SuperslabCacheEntry*)ss; + entry->next = g_ss_cache_head[size_class]; + g_ss_cache_head[size_class] = entry; + g_ss_cache_count[size_class]++; + g_ss_cache_puts[size_class]++; + pthread_mutex_unlock(&g_ss_cache_lock[size_class]); + return 1; +} diff --git a/core/superslab_head.c b/core/superslab_head.c new file mode 100644 index 00000000..e9841181 --- /dev/null +++ b/core/superslab_head.c @@ -0,0 +1,176 @@ +// superslab_head.c - SuperSlabHead management for dynamic expansion +// Purpose: Per-class chunk lists and expansion logic +// License: MIT +// Date: 2025-11-28 + +#include "hakmem_tiny_superslab_internal.h" + +// ============================================================================ +// Phase 2a: Dynamic Expansion - Global per-class SuperSlabHeads +// ============================================================================ + +SuperSlabHead* g_superslab_heads[TINY_NUM_CLASSES_SS] = {NULL}; + +// ============================================================================ +// SuperSlabHead Management Functions +// ============================================================================ + +// Initialize SuperSlabHead for a class +SuperSlabHead* init_superslab_head(int class_idx) { + if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { + return NULL; + } + + // Allocate SuperSlabHead structure + SuperSlabHead* head = (SuperSlabHead*)calloc(1, sizeof(SuperSlabHead)); + if (!head) { + extern __thread int g_hakmem_lock_depth; + g_hakmem_lock_depth++; + fprintf(stderr, "[HAKMEM] CRITICAL: Failed to allocate SuperSlabHead for class %d\n", class_idx); + g_hakmem_lock_depth--; + return NULL; + } + + head->class_idx = (uint8_t)class_idx; + atomic_store_explicit(&head->total_chunks, 0, memory_order_relaxed); + head->first_chunk = NULL; + head->current_chunk = NULL; + pthread_mutex_init(&head->expansion_lock, NULL); + + // Allocate initial chunk(s) + // Hot classes (1, 4, 6) get 2 initial chunks to reduce contention + int initial_chunks = 1; + + // Phase 2a: Start with 1 chunk for all classes (expansion will handle growth) + // This reduces startup memory overhead while still allowing unlimited growth + initial_chunks = 1; + + for (int i = 0; i < initial_chunks; i++) { + if (expand_superslab_head(head) < 0) { + extern __thread int g_hakmem_lock_depth; + g_hakmem_lock_depth++; + fprintf(stderr, "[HAKMEM] CRITICAL: Failed to allocate initial chunk %d for class %d\n", + i, class_idx); + g_hakmem_lock_depth--; + + // Cleanup on failure + SuperSlab* chunk = head->first_chunk; + while (chunk) { + SuperSlab* next = chunk->next_chunk; + superslab_free(chunk); + chunk = next; + } + pthread_mutex_destroy(&head->expansion_lock); + free(head); + return NULL; + } + } + + extern __thread int g_hakmem_lock_depth; + g_hakmem_lock_depth++; +#if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[HAKMEM] Initialized SuperSlabHead for class %d: %zu initial chunks\n", + class_idx, atomic_load_explicit(&head->total_chunks, memory_order_relaxed)); +#endif + g_hakmem_lock_depth--; + + return head; +} + +// Expand SuperSlabHead by allocating and linking a new chunk +int expand_superslab_head(SuperSlabHead* head) { + if (!head) { + return -1; + } + + // Allocate new chunk via existing superslab_allocate + SuperSlab* new_chunk = superslab_allocate(head->class_idx); + if (!new_chunk) { +#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE) + extern __thread int g_hakmem_lock_depth; + g_hakmem_lock_depth++; + fprintf(stderr, "[HAKMEM] CRITICAL: Failed to allocate new chunk for class %d (system OOM)\n", + head->class_idx); + g_hakmem_lock_depth--; +#endif + return -1; // True OOM (system out of memory) + } + + // CRITICAL FIX: Initialize slab 0 so bitmap != 0x00000000 + // Phase 2a chunks must have at least one usable slab after allocation + size_t block_size = g_tiny_class_sizes[head->class_idx]; + // Use pthread_self() directly since tiny_self_u32() is static inline in hakmem_tiny.c + uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self(); + + superslab_init_slab(new_chunk, 0, block_size, owner_tid); + + // Initialize the next_chunk link to NULL + new_chunk->next_chunk = NULL; + + // Thread-safe linking + pthread_mutex_lock(&head->expansion_lock); + + if (head->current_chunk) { + // Find the tail of the list (optimization: could cache tail pointer) + SuperSlab* tail = head->current_chunk; + while (tail->next_chunk) { + tail = tail->next_chunk; + } + tail->next_chunk = new_chunk; + } else { + // First chunk + head->first_chunk = new_chunk; + } + + // Update current chunk to new chunk (for fast allocation) + head->current_chunk = new_chunk; + + // Increment total chunks atomically + size_t old_count = atomic_fetch_add_explicit(&head->total_chunks, 1, memory_order_relaxed); + size_t new_count = old_count + 1; + + pthread_mutex_unlock(&head->expansion_lock); + +#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE) + extern __thread int g_hakmem_lock_depth; + g_hakmem_lock_depth++; + fprintf(stderr, "[HAKMEM] Expanded SuperSlabHead for class %d: %zu chunks now (bitmap=0x%08x)\n", + head->class_idx, new_count, new_chunk->slab_bitmap); + g_hakmem_lock_depth--; +#endif + + return 0; +} + +// Find which chunk a pointer belongs to +SuperSlab* find_chunk_for_ptr(void* ptr, int class_idx) { + if (!ptr || class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { + return NULL; + } + + SuperSlabHead* head = g_superslab_heads[class_idx]; + if (!head) { + return NULL; + } + + uintptr_t ptr_addr = (uintptr_t)ptr; + + // Walk the chunk list + SuperSlab* chunk = head->first_chunk; + while (chunk) { + // Check if ptr is within this chunk's memory range + // Each chunk is aligned to SUPERSLAB_SIZE (1MB or 2MB) + uintptr_t chunk_start = (uintptr_t)chunk; + size_t chunk_size = (size_t)1 << chunk->lg_size; // Use actual chunk size + uintptr_t chunk_end = chunk_start + chunk_size; + + if (ptr_addr >= chunk_start && ptr_addr < chunk_end) { + // Found the chunk + return chunk; + } + + chunk = chunk->next_chunk; + } + + return NULL; // Not found in any chunk +} diff --git a/core/superslab_slab.c b/core/superslab_slab.c new file mode 100644 index 00000000..4219f943 --- /dev/null +++ b/core/superslab_slab.c @@ -0,0 +1,210 @@ +// superslab_slab.c - Slab initialization and management +// Purpose: Slab lifecycle and bitmap management within SuperSlabs +// License: MIT +// Date: 2025-11-28 + +#include "hakmem_tiny_superslab_internal.h" + +// ============================================================================ +// Remote Drain (MPSC queue to freelist conversion) +// ============================================================================ + +// Drain remote MPSC stack into freelist (ownership already verified by caller) +void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta) +{ + if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss) || !meta) return; + + static _Atomic uint32_t g_remote_drain_diag_once = 0; + static int g_remote_drain_diag_en = -1; + + // Atomically take the whole remote list + uintptr_t head = atomic_exchange_explicit(&ss->remote_heads[slab_idx], 0, + memory_order_acq_rel); + if (head == 0) return; + + // Convert remote stack (offset 0 next) into freelist encoding via Box API + // and splice in front of current freelist preserving relative order. + void* prev = meta->freelist; + int cls = (int)meta->class_idx; + HAK_CHECK_CLASS_IDX(cls, "_ss_remote_drain_to_freelist_unsafe"); + if (__builtin_expect(cls < 0 || cls >= TINY_NUM_CLASSES, 0)) { + static _Atomic int g_remote_drain_cls_oob = 0; + if (atomic_fetch_add_explicit(&g_remote_drain_cls_oob, 1, memory_order_relaxed) == 0) { + fprintf(stderr, + "[REMOTE_DRAIN_CLASS_OOB] ss=%p slab_idx=%d meta=%p cls=%d head=%#lx\n", + (void*)ss, slab_idx, (void*)meta, cls, (unsigned long)head); + } + return; + } + uintptr_t cur = head; + while (cur != 0) { + uintptr_t next = *(uintptr_t*)cur; // remote-next stored at offset 0 +#if !HAKMEM_BUILD_RELEASE + if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) { + const char* e = getenv("HAKMEM_TINY_SLL_DIAG"); + g_remote_drain_diag_en = (e && *e && *e != '0') ? 1 : 0; + } +#else + if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) { + g_remote_drain_diag_en = 0; + } +#endif + if (__builtin_expect(g_remote_drain_diag_en, 0)) { + uintptr_t addr = (uintptr_t)next; + if (addr != 0 && (addr < 4096 || addr > 0x00007fffffffffffULL)) { + uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed); + if (shot < 8) { + fprintf(stderr, + "[REMOTE_DRAIN_NEXT_INVALID] cls=%d slab=%d cur=%p next=%p head=%#lx prev=%p count=%u\n", + cls, + slab_idx, + (void*)cur, + (void*)next, + (unsigned long)head, + prev, + (unsigned)meta->used); + } + } +#if HAKMEM_TINY_HEADER_CLASSIDX + int hdr_cls = tiny_region_id_read_header((uint8_t*)cur + 1); + if (hdr_cls >= 0 && hdr_cls != cls) { + uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed); + if (shot < 8) { + fprintf(stderr, + "[REMOTE_DRAIN_HDR_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d head=%#lx\n", + cls, slab_idx, (void*)cur, hdr_cls, (int)meta->class_idx, (unsigned long)head); + } + } +#endif + } +#if HAKMEM_TINY_HEADER_CLASSIDX + // Cross-check header vs meta before writing next (even if diag is off) + { + int hdr_cls_pre = tiny_region_id_read_header((uint8_t*)cur + 1); + if (hdr_cls_pre >= 0 && hdr_cls_pre != cls) { + static _Atomic uint32_t g_hdr_meta_mismatch_rd = 0; + uint32_t n = atomic_fetch_add_explicit(&g_hdr_meta_mismatch_rd, 1, memory_order_relaxed); + if (n < 16) { + fprintf(stderr, + "[REMOTE_DRAIN_HDR_META_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d\n", + cls, slab_idx, (void*)cur, hdr_cls_pre, (int)meta->class_idx); + } + } + } +#endif + // Restore header for header-classes (class 1-6) which were clobbered by remote push +#if HAKMEM_TINY_HEADER_CLASSIDX + if (cls != 0) { + uint8_t expected = (uint8_t)(HEADER_MAGIC | (cls & HEADER_CLASS_MASK)); + *(uint8_t*)(uintptr_t)cur = expected; + } +#endif + // Rewrite next pointer to Box representation for this class + tiny_next_write(cls, (void*)cur, prev); + prev = (void*)cur; + cur = next; + } + meta->freelist = prev; + // Reset remote count after full drain + atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_release); + + // Update freelist/nonempty visibility bits + uint32_t bit = (1u << slab_idx); + atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release); + atomic_fetch_or_explicit(&ss->nonempty_mask, bit, memory_order_release); +} + +// ============================================================================ +// Slab Initialization within SuperSlab +// ============================================================================ + +void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_t owner_tid) +{ + if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) { + return; + } + + // Phase E1-CORRECT unified geometry: + // - block_size is the TOTAL stride for this class (g_tiny_class_sizes[cls]) + // - usable bytes are determined by slab index (slab0 vs others) + // - capacity = usable / stride for ALL classes (including former C7) + size_t usable_size = (slab_idx == 0) + ? SUPERSLAB_SLAB0_USABLE_SIZE + : SUPERSLAB_SLAB_USABLE_SIZE; + size_t stride = block_size; + uint16_t capacity = (uint16_t)(usable_size / stride); + +#if !HAKMEM_BUILD_RELEASE + if (slab_idx == 0) { + fprintf(stderr, + "[SUPERSLAB_INIT] slab 0: usable_size=%zu stride=%zu capacity=%u\n", + usable_size, stride, (unsigned)capacity); + } +#endif + + TinySlabMeta* meta = &ss->slabs[slab_idx]; + meta->freelist = NULL; // NULL = linear allocation mode + meta->used = 0; + meta->active = 0; // P1.3: blocks in use by user (starts at 0) + meta->tls_cached = 0; // P2.2: blocks cached in TLS SLL (starts at 0) + meta->capacity = capacity; + meta->carved = 0; + // LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes + meta->owner_tid_low = (uint8_t)((owner_tid >> 8) & 0xFFu); + // Fail-safe: stamp class_idx from geometry (stride → class). + // This ensures legacy/shared/legacy-refill paths all end with a correct class. + for (int i = 0; i < TINY_NUM_CLASSES; i++) { + if (g_tiny_class_sizes[i] == stride) { + meta->class_idx = (uint8_t)i; + // P1.1: Update class_map for out-of-band lookup on free path + ss->class_map[slab_idx] = (uint8_t)i; + break; + } + } + + superslab_activate_slab(ss, slab_idx); +} + +// ============================================================================ +// Slab Bitmap Management +// ============================================================================ + +void superslab_activate_slab(SuperSlab* ss, int slab_idx) { + if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) { + return; + } + uint32_t mask = 1u << slab_idx; + if ((ss->slab_bitmap & mask) == 0) { + ss->slab_bitmap |= mask; + ss->active_slabs++; + + // Phase 3d-C: Update hot/cold indices after activating new slab + ss_update_hot_cold_indices(ss); + } +} + +void superslab_deactivate_slab(SuperSlab* ss, int slab_idx) { + if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) { + return; + } + uint32_t mask = 1u << slab_idx; + if (ss->slab_bitmap & mask) { + ss->slab_bitmap &= ~mask; + ss->active_slabs--; + } +} + +int superslab_find_free_slab(SuperSlab* ss) { + if (!ss) return -1; + if ((int)ss->active_slabs >= ss_slabs_capacity(ss)) { + return -1; // No free slabs + } + // Find first 0 bit in bitmap + int cap = ss_slabs_capacity(ss); + for (int i = 0; i < cap; i++) { + if ((ss->slab_bitmap & (1u << i)) == 0) { + return i; + } + } + return -1; +} diff --git a/core/superslab_stats.c b/core/superslab_stats.c new file mode 100644 index 00000000..c7a56135 --- /dev/null +++ b/core/superslab_stats.c @@ -0,0 +1,166 @@ +// superslab_stats.c - Statistics and debugging for SuperSlab allocator +// Purpose: Tracking and reporting allocation statistics +// License: MIT +// Date: 2025-11-28 + +#include "hakmem_tiny_superslab_internal.h" + +// ============================================================================ +// Global Statistics +// ============================================================================ + +pthread_mutex_t g_superslab_lock = PTHREAD_MUTEX_INITIALIZER; +uint64_t g_superslabs_allocated = 0; // Non-static for debugging +uint64_t g_superslabs_freed = 0; // Phase 7.6: Non-static for test access +uint64_t g_bytes_allocated = 0; // Non-static for debugging + +// Debug counters +_Atomic uint64_t g_ss_active_dec_calls = 0; +_Atomic uint64_t g_hak_tiny_free_calls = 0; +_Atomic uint64_t g_ss_remote_push_calls = 0; +// Free path instrumentation (lightweight, for OOM/route diagnosis) +_Atomic uint64_t g_free_ss_enter = 0; // hak_tiny_free_superslab() entries +_Atomic uint64_t g_free_local_box_calls = 0; // same-thread freelist pushes +_Atomic uint64_t g_free_remote_box_calls = 0; // cross-thread remote pushes +// Per-class counters for gating/metrics (Tiny classes = 8) +uint64_t g_ss_alloc_by_class[8] = {0}; +uint64_t g_ss_freed_by_class[8] = {0}; + +// Global counters for debugging (non-static for external access) +_Atomic uint64_t g_ss_mmap_count = 0; +_Atomic uint64_t g_final_fallback_mmap_count = 0; + +// ============================================================================ +// Statistics Functions +// ============================================================================ + +void ss_stats_os_alloc(uint8_t size_class, size_t ss_size) { + pthread_mutex_lock(&g_superslab_lock); + g_superslabs_allocated++; + if (size_class < 8) { + g_ss_alloc_by_class[size_class]++; + } + g_bytes_allocated += ss_size; + pthread_mutex_unlock(&g_superslab_lock); +} + +void ss_stats_cache_reuse(void) { + pthread_mutex_lock(&g_superslab_lock); + g_superslabs_reused++; + pthread_mutex_unlock(&g_superslab_lock); +} + +void ss_stats_cache_store(void) { + pthread_mutex_lock(&g_superslab_lock); + g_superslabs_cached++; + pthread_mutex_unlock(&g_superslab_lock); +} + +// ============================================================================ +// Diagnostics +// ============================================================================ + +void log_superslab_oom_once(size_t ss_size, size_t alloc_size, int err) { + static int logged = 0; + if (logged) return; + logged = 1; + + // CRITICAL FIX: Increment lock depth FIRST before any LIBC calls + // fopen/fclose/getrlimit/fprintf all may call malloc internally + // Must bypass HAKMEM wrapper to avoid header mismatch crash + extern __thread int g_hakmem_lock_depth; + g_hakmem_lock_depth++; // Force wrapper to use __libc_malloc + + struct rlimit rl = {0}; + if (getrlimit(RLIMIT_AS, &rl) != 0) { + rl.rlim_cur = RLIM_INFINITY; + rl.rlim_max = RLIM_INFINITY; + } + + unsigned long vm_size_kb = 0; + unsigned long vm_rss_kb = 0; + FILE* status = fopen("/proc/self/status", "r"); + if (status) { + char line[256]; + while (fgets(line, sizeof(line), status)) { + if (strncmp(line, "VmSize:", 7) == 0) { + (void)sscanf(line + 7, "%lu", &vm_size_kb); + } else if (strncmp(line, "VmRSS:", 6) == 0) { + (void)sscanf(line + 6, "%lu", &vm_rss_kb); + } + } + fclose(status); + } + // CRITICAL FIX: Do NOT decrement lock_depth yet! + // fprintf() below may call malloc for buffering + + char rl_cur_buf[32]; + char rl_max_buf[32]; + if (rl.rlim_cur == RLIM_INFINITY) { + strcpy(rl_cur_buf, "inf"); + } else { + snprintf(rl_cur_buf, sizeof(rl_cur_buf), "%llu", (unsigned long long)rl.rlim_cur); + } + if (rl.rlim_max == RLIM_INFINITY) { + strcpy(rl_max_buf, "inf"); + } else { + snprintf(rl_max_buf, sizeof(rl_max_buf), "%llu", (unsigned long long)rl.rlim_max); + } + +#if !HAKMEM_BUILD_RELEASE + fprintf(stderr, + "[SS OOM] mmap failed: err=%d ss_size=%zu alloc_size=%zu " + "alloc=%llu freed=%llu bytes=%llu " + "RLIMIT_AS(cur=%s max=%s) VmSize=%lu kB VmRSS=%lu kB\n", + err, + ss_size, + alloc_size, + (unsigned long long)g_superslabs_allocated, + (unsigned long long)g_superslabs_freed, + (unsigned long long)g_bytes_allocated, + rl_cur_buf, + rl_max_buf, + vm_size_kb, + vm_rss_kb); +#endif + + g_hakmem_lock_depth--; // Now safe to restore (all libc calls complete) +} + +// ============================================================================ +// Statistics / Debugging +// ============================================================================ + +void superslab_print_stats(SuperSlab* ss) { + if (!ss || ss->magic != SUPERSLAB_MAGIC) { + printf("Invalid SuperSlab\n"); + return; + } + + printf("=== SuperSlab Stats ===\n"); + printf("Address: %p\n", (void*)ss); + // Phase 12: per-SS size_class removed; classes are per-slab via meta->class_idx. + printf("Active slabs: %u / %d\n", ss->active_slabs, ss_slabs_capacity(ss)); + printf("Bitmap: 0x%08X\n", ss->slab_bitmap); + printf("\nPer-slab details:\n"); + for (int i = 0; i < ss_slabs_capacity(ss); i++) { + if (ss->slab_bitmap & (1u << i)) { + TinySlabMeta* meta = &ss->slabs[i]; + printf(" Slab %2d: used=%u/%u freelist=%p class=%u owner_tid_low=%u\n", + i, meta->used, meta->capacity, meta->freelist, + (unsigned)meta->class_idx, (unsigned)meta->owner_tid_low); + } + } + printf("\n"); +} + +// Global statistics +void superslab_print_global_stats(void) { + pthread_mutex_lock(&g_superslab_lock); + printf("=== Global SuperSlab Stats ===\n"); + printf("SuperSlabs allocated: %lu\n", g_superslabs_allocated); + printf("SuperSlabs freed: %lu\n", g_superslabs_freed); + printf("SuperSlabs active: %lu\n", g_superslabs_allocated - g_superslabs_freed); + printf("Total bytes allocated: %lu MB\n", g_bytes_allocated / (1024 * 1024)); + pthread_mutex_unlock(&g_superslab_lock); +} diff --git a/core/tiny_region_id.h b/core/tiny_region_id.h index f7e4e18a..8770577e 100644 --- a/core/tiny_region_id.h +++ b/core/tiny_region_id.h @@ -290,25 +290,18 @@ static inline int tiny_region_id_read_header(void* ptr) { // CRITICAL FIX (Pool TLS Phase 1): ALWAYS validate magic when Pool TLS is enabled // Reason: Pool TLS uses different magic (0xb0 vs 0xa0), MUST distinguish them! // Without this, Pool TLS allocations are wrongly routed to Tiny freelist → corruption -#if !HAKMEM_BUILD_RELEASE || defined(HAKMEM_POOL_TLS_PHASE1) - // Debug/Development OR Pool TLS: Validate magic byte to catch non-header allocations - // Reason: Mid/Large allocations don't have headers, must detect and reject them + // Always validate magic byte to catch non-header allocations (release included). + // Reason: mmap-zero or mid/large frees can otherwise be misrouted as class 0. uint8_t magic = header & 0xF0; - #if HAKMEM_DEBUG_VERBOSE +#if HAKMEM_DEBUG_VERBOSE static int debug_count = 0; if (debug_count < 5) { fprintf(stderr, "[TINY_READ_HEADER] ptr=%p header=0x%02x magic=0x%02x expected=0x%02x\n", ptr, header, magic, HEADER_MAGIC); debug_count++; } - #endif +#endif if (magic != HEADER_MAGIC) { - // Invalid header - likely non-header allocation (Mid/Large/Pool TLS) - #if HAKMEM_DEBUG_VERBOSE - if (debug_count < 6) { // One more after the 5 above - fprintf(stderr, "[TINY_READ_HEADER] REJECTING ptr=%p (magic mismatch)\n", ptr); - } - #endif #if !HAKMEM_BUILD_RELEASE static int invalid_count = 0; if (invalid_count < 5) { @@ -322,12 +315,6 @@ static inline int tiny_region_id_read_header(void* ptr) { if (tiny_guard_is_enabled()) tiny_guard_on_invalid(ptr, header); return -1; } -#else - // Release (without Pool TLS): Skip magic validation (save 2-3 cycles) - // Safety: Bounds check below still prevents out-of-bounds array access - // Trade-off: Mid/Large frees may corrupt TLS freelist (rare, ~0.1% of frees) - // NOTE: This optimization is DISABLED when Pool TLS is enabled (different magic bytes!) -#endif int class_idx = (int)(header & HEADER_CLASS_MASK);