# Makefile for hakmem PoC CC = gcc # Default target: Show help .DEFAULT_GOAL := help .PHONY: help help: @echo "=========================================" @echo "HAKMEM Build Targets" @echo "=========================================" @echo "" @echo "Development (Fast builds):" @echo " make bench_random_mixed_hakmem - Quick build (~1-2 min)" @echo " make bench_tiny_hot_hakmem - Quick build" @echo " make test_hakmem - Quick test build" @echo "" @echo "Benchmarking (PGO-optimized, +6% faster):" @echo " make pgo-tiny-full - Full PGO workflow (~5-10 min)" @echo " = Profile + Optimize + Test" @echo " make pgo-tiny-profile - Step 1: Build profile binaries" @echo " make pgo-tiny-collect - Step 2: Collect profile data" @echo " make pgo-tiny-build - Step 3: Build optimized" @echo "" @echo "Comparison:" @echo " make bench-comparison - Compare hakmem vs system vs mimalloc" @echo " make bench-pool-tls - Pool TLS benchmark" @echo "" @echo "Cleanup:" @echo " make clean - Clean build artifacts" @echo "" @echo "Phase 4 Performance:" @echo " Baseline: 57.0 M ops/s" @echo " PGO-optimized: 60.6 M ops/s (+6.25%)" @echo "" @echo "TIP: For best performance, use 'make pgo-tiny-full'" @echo "=========================================" CXX = g++ # Directory structure (2025-11-01 reorganization) SRC_DIR := core BENCH_SRC := benchmarks/src TEST_SRC := tests BUILD_DIR := build BENCH_BIN_DIR := benchmarks/bin # Search paths for source files VPATH := $(SRC_DIR):$(SRC_DIR)/box:$(BENCH_SRC)/tiny:$(BENCH_SRC)/mid:$(BENCH_SRC)/comprehensive:$(BENCH_SRC)/stress:$(TEST_SRC)/unit:$(TEST_SRC)/integration:$(TEST_SRC)/stress # Timing: default OFF for performance. Set HAKMEM_TIMING=1 to enable. HAKMEM_TIMING ?= 0 # Phase 6.25: Aggressive optimization flags (default ON, overridable) OPT_LEVEL ?= 3 USE_LTO ?= 1 NATIVE ?= 1 BASE_CFLAGS := -Wall -Wextra -std=c11 -D_GNU_SOURCE -D_POSIX_C_SOURCE=199309L \ -D_GLIBC_USE_ISOC2X=0 -D__isoc23_strtol=strtol -D__isoc23_strtoll=strtoll \ -D__isoc23_strtoul=strtoul -D__isoc23_strtoull=strtoull -DHAKMEM_DEBUG_TIMING=$(HAKMEM_TIMING) \ -ffast-math -funroll-loops -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \ -fno-semantic-interposition -I core -I include CFLAGS = -O$(OPT_LEVEL) $(BASE_CFLAGS) ifeq ($(NATIVE),1) CFLAGS += -march=native -mtune=native -fno-plt endif ifeq ($(USE_LTO),1) CFLAGS += -flto endif # Allow overriding TLS ring capacity at build time: make shared RING_CAP=32 RING_CAP ?= 32 # Phase 6.25: Aggressive optimization + TLS Ring 拡張 CFLAGS_SHARED = -O$(OPT_LEVEL) $(BASE_CFLAGS) -fPIC -DPOOL_TLS_RING_CAP=$(RING_CAP) ifeq ($(NATIVE),1) CFLAGS_SHARED += -march=native -mtune=native -fno-plt endif ifeq ($(USE_LTO),1) CFLAGS_SHARED += -flto endif LDFLAGS = -lm -lpthread ifeq ($(USE_LTO),1) LDFLAGS += -flto endif # ------------------------------------------------------------ # Build hygiene: dependency tracking + flag consistency checks # ------------------------------------------------------------ # Track header dependencies for explicit compile rules as well CFLAGS += -MMD -MP # If someone injects -DHAKMEM_POOL_TLS_PHASE1=1 directly into CFLAGS # but forgets POOL_TLS_PHASE1=1, object lists will miss pool_tls*.o. # Fail fast to avoid confusing link/runtime errors. ifneq ($(filter -DHAKMEM_POOL_TLS_PHASE1=1,$(CFLAGS)),) ifneq ($(POOL_TLS_PHASE1),1) $(error Detected -DHAKMEM_POOL_TLS_PHASE1=1 in CFLAGS but POOL_TLS_PHASE1!=1. Please invoke: make POOL_TLS_PHASE1=1 ...) endif endif # Include generated .d files if present (safe even if none yet) # Filter to only files (not directories like glibc-2.38/build/iconvdata/gconv-modules.d) # Also exclude glibc and mimalloc-bench subdirectories -include $(shell find . -name '*.d' -type f -not -path './glibc*' -not -path './mimalloc-bench*' 2>/dev/null) # ------------------------------------------------------------ # Build flavor: release/debug (controls HAKMEM_BUILD_* and NDEBUG) # ------------------------------------------------------------ BUILD_FLAVOR ?= release ifeq ($(BUILD_FLAVOR),release) CFLAGS += -DNDEBUG -DHAKMEM_BUILD_RELEASE=1 CFLAGS_SHARED += -DNDEBUG -DHAKMEM_BUILD_RELEASE=1 else ifeq ($(BUILD_FLAVOR),debug) CFLAGS += -DHAKMEM_BUILD_DEBUG=1 CFLAGS_SHARED += -DHAKMEM_BUILD_DEBUG=1 endif # Default: enable Box Theory refactor for Tiny (Phase 6-1.7) # This is the best performing option currently (4.19M ops/s) # NOTE: Disabled while testing ULTRA_SIMPLE with SFC integration # To opt-out for legacy path: make BOX_REFACTOR_DEFAULT=0 BOX_REFACTOR_DEFAULT ?= 1 ifeq ($(BOX_REFACTOR_DEFAULT),1) CFLAGS += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1 CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1 else CFLAGS += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=0 CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=0 endif # (Removed) legacy BUILD_RELEASE_DEFAULT in favor of BUILD_FLAVOR # Phase 6-2: Ultra-Simple with SFC integration # Original Ultra-Simple (without SFC): 3.56M ops/s vs BOX_REFACTOR: 4.19M ops/s # Now testing with SFC (128-slot cache) integration - expecting >5M ops/s # To disable: make ULTRA_SIMPLE_DEFAULT=0 ULTRA_SIMPLE_DEFAULT ?= 0 ifeq ($(ULTRA_SIMPLE_DEFAULT),1) CFLAGS += -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1 CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1 endif # Phase 6-3: Tiny Fast Path (System tcache style, 3-4 instruction fast path) # Target: 70-80% of System tcache (95-108 M ops/s) # Enable by default for testing TINY_FAST_PATH_DEFAULT ?= 1 ifeq ($(TINY_FAST_PATH_DEFAULT),1) CFLAGS += -DHAKMEM_TINY_FAST_PATH=1 CFLAGS_SHARED += -DHAKMEM_TINY_FAST_PATH=1 endif # Phase 6-1.8: New 3-Layer Tiny front (A/B) # To enable by default: make NEW_3LAYER_DEFAULT=1 NEW_3LAYER_DEFAULT ?= 0 ifeq ($(NEW_3LAYER_DEFAULT),1) CFLAGS += -DHAKMEM_TINY_USE_NEW_3LAYER=1 CFLAGS_SHARED += -DHAKMEM_TINY_USE_NEW_3LAYER=1 endif # Phase 7: Region-ID Direct Lookup (Header-based class_idx) # Ultra-fast free: 3-5 instructions, 5-10 cycles (vs 500+ cycles current) # Target: 40-80M ops/s (70-140% of System malloc) # Enable: make HEADER_CLASSIDX=1 # Default: ON (Phase 7 validated, Fix #16 stable, mimalloc strategy Phase 1) HEADER_CLASSIDX ?= 1 ifeq ($(HEADER_CLASSIDX),1) CFLAGS += -DHAKMEM_TINY_HEADER_CLASSIDX=1 CFLAGS_SHARED += -DHAKMEM_TINY_HEADER_CLASSIDX=1 endif # Phase 7 Task 2: Aggressive inline TLS cache access # Enable: make HEADER_CLASSIDX=1 AGGRESSIVE_INLINE=1 # Expected: +10-15% performance (save 5-10 cycles per alloc) # Default: ON (mimalloc strategy Phase 1) AGGRESSIVE_INLINE ?= 1 ifeq ($(AGGRESSIVE_INLINE),1) CFLAGS += -DHAKMEM_TINY_AGGRESSIVE_INLINE=1 CFLAGS_SHARED += -DHAKMEM_TINY_AGGRESSIVE_INLINE=1 endif # Phase 7 Task 3: Pre-warm TLS cache # Enable: make PREWARM_TLS=1 # Expected: Reduce first-allocation miss penalty # Default: ON (mimalloc strategy Phase 1) PREWARM_TLS ?= 1 ifeq ($(PREWARM_TLS),1) CFLAGS += -DHAKMEM_TINY_PREWARM_TLS=1 CFLAGS_SHARED += -DHAKMEM_TINY_PREWARM_TLS=1 endif # Performance Optimization: Fixed refill for class5 (256B) # ChatGPT-sensei recommendation: Eliminate branches by fixing want=256 # Enable: make CLASS5_FIXED_REFILL=1 # Expected: Reduce branch mispredictions and instruction count CLASS5_FIXED_REFILL ?= 0 ifeq ($(CLASS5_FIXED_REFILL),1) CFLAGS += -DHAKMEM_TINY_CLASS5_FIXED_REFILL=1 CFLAGS_SHARED += -DHAKMEM_TINY_CLASS5_FIXED_REFILL=1 endif # Phase 3 (2025-11-29): mincore removed entirely # - mincore() syscall overhead eliminated (was +10.3% with DISABLE flag) # - Phase 1b/2 registry-based validation provides sufficient safety # - Dead code cleanup: DISABLE_MINCORE flag no longer needed ifdef PROFILE_GEN CFLAGS += -fprofile-generate LDFLAGS += -fprofile-generate endif ifdef PROFILE_USE CFLAGS += -fprofile-use -Wno-error=coverage-mismatch LDFLAGS += -fprofile-use endif CFLAGS += $(EXTRA_CFLAGS) LDFLAGS += $(EXTRA_LDFLAGS) # Targets TARGET = test_hakmem OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o test_hakmem.o OBJS = $(OBJS_BASE) # Shared library SHARED_LIB = libhakmem.so SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o superslab_allocate_shared.o superslab_stats_shared.o superslab_cache_shared.o superslab_ace_shared.o superslab_slab_shared.o superslab_backend_shared.o superslab_head_shared.o hakmem_smallmid_shared.o hakmem_smallmid_superslab_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/front_gate_classifier_shared.o core/box/free_local_box_shared.o core/box/free_remote_box_shared.o core/box/free_publish_box_shared.o core/box/capacity_box_shared.o core/box/carve_push_box_shared.o core/box/unified_batch_box_shared.o core/box/prewarm_box_shared.o core/box/ss_hot_prewarm_box_shared.o core/box/front_metrics_box_shared.o core/box/bench_fast_box_shared.o core/box/pagefault_telemetry_box_shared.o core/box/tiny_sizeclass_hist_box_shared.o core/page_arena_shared.o core/front/tiny_unified_cache_shared.o core/tiny_alloc_fast_push_shared.o core/link_stubs_shared.o core/tiny_failfast_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_mid_mt_shared.o hakmem_super_registry_shared.o hakmem_shared_pool_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o # Pool TLS Phase 1 (enable with POOL_TLS_PHASE1=1) ifeq ($(POOL_TLS_PHASE1),1) OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o SHARED_OBJS += pool_tls_shared.o pool_refill_shared.o pool_tls_arena_shared.o pool_tls_registry_shared.o pool_tls_remote_shared.o CFLAGS += -DHAKMEM_POOL_TLS_PHASE1=1 CFLAGS_SHARED += -DHAKMEM_POOL_TLS_PHASE1=1 endif # Pool TLS Phase 1.5b - Pre-warm optimization ifeq ($(POOL_TLS_PREWARM),1) CFLAGS += -DHAKMEM_POOL_TLS_PREWARM=1 CFLAGS_SHARED += -DHAKMEM_POOL_TLS_PREWARM=1 endif # Pool TLS Bind Box - Registry lookup short-circuit (Phase 1.6) ifeq ($(POOL_TLS_BIND_BOX),1) OBJS += pool_tls_bind.o SHARED_OBJS += pool_tls_bind_shared.o CFLAGS += -DHAKMEM_POOL_TLS_BIND_BOX=1 CFLAGS_SHARED += -DHAKMEM_POOL_TLS_BIND_BOX=1 endif # Benchmark targets BENCH_HAKMEM = bench_allocators_hakmem BENCH_SYSTEM = bench_allocators_system BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o endif BENCH_SYSTEM_OBJS = bench_allocators_system.o # Default target all: $(TARGET) # Show key build-time switches for troubleshooting .PHONY: print-flags print-flags: @echo "==== Build Switches ====" @echo "FLAVOR = $(BUILD_FLAVOR)" @echo "POOL_TLS_PHASE1 = $(POOL_TLS_PHASE1)" @echo "POOL_TLS_PREWARM = $(POOL_TLS_PREWARM)" @echo "HEADER_CLASSIDX = $(HEADER_CLASSIDX)" @echo "AGGRESSIVE_INLINE = $(AGGRESSIVE_INLINE)" @echo "PREWARM_TLS = $(PREWARM_TLS)" @echo "USE_LTO = $(USE_LTO)" @echo "OPT_LEVEL = $(OPT_LEVEL)" @echo "NATIVE = $(NATIVE)" @echo "CFLAGS contains = $(filter -DHAKMEM_BUILD_%,$(CFLAGS))" # Build test program $(TARGET): $(OBJS) $(CC) -o $@ $^ $(LDFLAGS) @echo "" @echo "=========================================" @echo "Build successful! Run with:" @echo " ./$(TARGET)" @echo "=========================================" # Compile C files %.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_tiny_superslab.h hakmem_mid_mt.h hakmem_super_registry.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h $(CC) $(CFLAGS) -c -o $@ $< # Build benchmark programs bench: CFLAGS += -DHAKMEM_PROF_STATIC=1 bench: $(BENCH_HAKMEM) $(BENCH_SYSTEM) @echo "" @echo "=========================================" @echo "Benchmark programs built successfully!" @echo " $(BENCH_HAKMEM) - hakmem versions" @echo " $(BENCH_SYSTEM) - system/jemalloc/mimalloc" @echo "" @echo "Run benchmarks with:" @echo " bash bench_runner.sh --runs 10" @echo "=========================================" # hakmem version (with hakmem linked) bench_allocators_hakmem.o: bench_allocators.c hakmem.h $(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $< $(BENCH_HAKMEM): $(BENCH_HAKMEM_OBJS) $(CC) -o $@ $^ $(LDFLAGS) # system version (without hakmem, for LD_PRELOAD testing) bench_allocators_system.o: bench_allocators.c $(CC) $(CFLAGS) -c -o $@ $< $(BENCH_SYSTEM): $(BENCH_SYSTEM_OBJS) $(CC) -o $@ $^ $(LDFLAGS) # Tiny hot microbench (direct link vs system) bench_tiny_hot_hakmem.o: bench_tiny_hot.c hakmem.h $(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $< bench_tiny_hot_system.o: bench_tiny_hot.c $(CC) $(CFLAGS) -c -o $@ $< bench_tiny_hot_hakmem: $(filter-out bench_allocators_hakmem.o bench_allocators_system.o,$(BENCH_HAKMEM_OBJS)) bench_tiny_hot_hakmem.o $(CC) -o $@ $^ $(LDFLAGS) bench_tiny_hot_system: bench_tiny_hot_system.o $(CC) -o $@ $^ $(LDFLAGS) # mimalloc variant for tiny hot bench (direct link) bench_tiny_hot_mi.o: bench_tiny_hot.c $(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $< bench_mi_force.o: bench_mi_force.c $(CC) $(CFLAGS) -I mimalloc-bench/extern/mi/include -c -o $@ $< bench_tiny_hot_mi: bench_tiny_hot_mi.o bench_mi_force.o $(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS) # hakmi variant for tiny hot bench (direct link via front API) bench_tiny_hot_hakmi.o: bench_tiny_hot.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h $(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc -c -o $@ $< HAKMI_FRONT_OBJS = adapters/hakmi_front/hakmi_front.o adapters/hakmi_front/hakmi_env.o adapters/hakmi_front/hakmi_tls_front.o # ===== Convenience perf targets ===== .PHONY: pgo-gen-tinyhot pgo-use-tinyhot perf-help # Generate PGO profile for Tiny Hot (32/100/60000) with SLL-first fast path pgo-gen-tinyhot: $(MAKE) PROFILE_GEN=1 bench_tiny_hot_hakmem HAKMEM_TINY_TRACE_RING=0 HAKMEM_SAFE_FREE=0 \ HAKMEM_TINY_TLS_SLL=1 HAKMEM_TINY_TLS_LIST=1 HAKMEM_SLL_MULTIPLIER=1 \ ./bench_tiny_hot_hakmem 32 100 60000 || true # Use generated PGO profile for Tiny Hot binary pgo-use-tinyhot: $(MAKE) PROFILE_USE=1 bench_tiny_hot_hakmem # Show recommended runtime envs for bench reproducibility perf-help: @echo "Recommended runtime envs (Tiny Hot / Larson):" @echo " export HAKMEM_TINY_TRACE_RING=0 HAKMEM_SAFE_FREE=0" @echo " export HAKMEM_TINY_TLS_SLL=1 HAKMEM_TINY_TLS_LIST=1" @echo " export HAKMEM_SLL_MULTIPLIER=1" @echo "Build flags (overridable): OPT_LEVEL=$(OPT_LEVEL) USE_LTO=$(USE_LTO) NATIVE=$(NATIVE)" # Explicit compile rules for hakmi front objects (require mimalloc headers) adapters/hakmi_front/hakmi_front.o: adapters/hakmi_front/hakmi_front.c adapters/hakmi_front/hakmi_front.h include/hakmi/hakmi_api.h $(CC) $(CFLAGS) -I include -I mimalloc-bench/extern/mi/include -c -o $@ $< adapters/hakmi_front/hakmi_env.o: adapters/hakmi_front/hakmi_env.c adapters/hakmi_front/hakmi_env.h $(CC) $(CFLAGS) -I include -c -o $@ $< adapters/hakmi_front/hakmi_tls_front.o: adapters/hakmi_front/hakmi_tls_front.c adapters/hakmi_front/hakmi_tls_front.h $(CC) $(CFLAGS) -I include -I mimalloc-bench/extern/mi/include -c -o $@ $< bench_tiny_hot_hakmi: bench_tiny_hot_hakmi.o $(HAKMI_FRONT_OBJS) $(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS) # Run test run: $(TARGET) @echo "" @echo "=========================================" @echo "Running hakmem PoC test..." @echo "=========================================" @./$(TARGET) # Shared library target (for LD_PRELOAD with mimalloc-bench) %_shared.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h $(CC) $(CFLAGS_SHARED) -c -o $@ $< $(SHARED_LIB): $(SHARED_OBJS) $(CC) -shared -o $@ $^ $(LDFLAGS) @echo "" @echo "=========================================" @echo "Shared library built successfully!" @echo " $(SHARED_LIB)" @echo "" @echo "Use with LD_PRELOAD:" @echo " LD_PRELOAD=./$(SHARED_LIB) " @echo "=========================================" shared: $(SHARED_LIB) # Phase 6.15: Debug build target (verbose logging) debug: CFLAGS += -DHAKMEM_DEBUG_VERBOSE -g -O0 -DHAKMEM_PROF_STATIC=1 debug: CFLAGS_SHARED += -DHAKMEM_DEBUG_VERBOSE -g -O0 -DHAKMEM_PROF_STATIC=1 debug: HAKMEM_TIMING=1 debug: shared # Phase 6-1.7: Box Theory Refactoring box-refactor: $(MAKE) clean $(MAKE) CFLAGS="$(CFLAGS) -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1" larson_hakmem @echo "" @echo "=========================================" @echo "Built with Box Refactor (Phase 6-1.7)" @echo " larson_hakmem (with Box 1/5/6)" @echo "=========================================" # Convenience target: build and test box-refactor test-box-refactor: box-refactor @echo "" @echo "=========================================" @echo "Running Box Refactor Test..." @echo "=========================================" ./larson_hakmem 10 8 128 1024 1 12345 4 # Phase 4: Tiny Pool benchmarks (properly linked with hakmem) TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o endif ifeq ($(POOL_TLS_BIND_BOX),1) TINY_BENCH_OBJS += pool_tls_bind.o endif bench_tiny: bench_tiny.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) @echo "✓ bench_tiny built with hakmem" bench_tiny_mt: bench_tiny_mt.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) @echo "✓ bench_tiny_mt built with hakmem" # Burst+Pause bench (mimalloc stress pattern) bench_burst_pause_hakmem.o: bench_burst_pause.c hakmem.h $(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $< bench_burst_pause_system.o: bench_burst_pause.c $(CC) $(CFLAGS) -c -o $@ $< bench_burst_pause_mi.o: bench_burst_pause.c $(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $< bench_burst_pause_hakmem: bench_burst_pause_hakmem.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) @echo "✓ bench_burst_pause_hakmem built" bench_burst_pause_system: bench_burst_pause_system.o $(CC) -o $@ $^ $(LDFLAGS) @echo "✓ bench_burst_pause_system built" bench_burst_pause_mi: bench_burst_pause_mi.o $(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS) @echo "✓ bench_burst_pause_mi built" bench_burst_pause_mt_hakmem.o: bench_burst_pause_mt.c hakmem.h $(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $< bench_burst_pause_mt_system.o: bench_burst_pause_mt.c $(CC) $(CFLAGS) -c -o $@ $< bench_burst_pause_mt_mi.o: bench_burst_pause_mt.c $(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $< bench_burst_pause_mt_hakmem: bench_burst_pause_mt_hakmem.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) @echo "✓ bench_burst_pause_mt_hakmem built" bench_burst_pause_mt_system: bench_burst_pause_mt_system.o $(CC) -o $@ $^ $(LDFLAGS) @echo "✓ bench_burst_pause_mt_system built" bench_burst_pause_mt_mi: bench_burst_pause_mt_mi.o $(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS) @echo "✓ bench_burst_pause_mt_mi built" # ---------------------------------------------------------------------------- # Hako FFI stub (optional; for front-end integration smoke) # ---------------------------------------------------------------------------- hako_ffi_stub: libhako_ffi_stub.a @echo "✓ libhako_ffi_stub.a built" hako_ffi_stub.o: src/hako/ffi_stub.c include/hako/ffi.h include/hako/types.h $(CC) $(CFLAGS) -c -o hako_ffi_stub.o src/hako/ffi_stub.c libhako_ffi_stub.a: hako_ffi_stub.o ar rcs $@ $^ # Smoke test for Hako FFI stubs hako_smoke: hako_ffi_stub tests/hako_smoke.c $(CC) $(CFLAGS) -o hako_smoke tests/hako_smoke.c libhako_ffi_stub.a @echo "✓ hako_smoke built" # ---------------------------------------------------------------------------- # Larson benchmarks (Google/mimalloc-bench style) # ---------------------------------------------------------------------------- LARSON_SRC := mimalloc-bench/bench/larson/larson.cpp # System variant (uses system malloc/free) larson_system.o: $(LARSON_SRC) $(CXX) $(CFLAGS) -c -o $@ $< larson_system: larson_system.o $(CXX) -o $@ $^ $(LDFLAGS) # mimalloc variant (direct link to prebuilt mimalloc) larson_mi.o: $(LARSON_SRC) $(CXX) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $< larson_mi: larson_mi.o bench_mi_force.o $(CXX) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS) # HAKMEM variant (hakmem.o provides malloc/free symbols directly) larson_hakmem.o: $(LARSON_SRC) $(CXX) $(CFLAGS) -I core -c -o $@ $< larson_hakmem: larson_hakmem.o $(TINY_BENCH_OBJS) $(CXX) -o $@ $^ $(LDFLAGS) test_mf2: test_mf2.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) @echo "✓ test_mf2 built with hakmem" # bench_comprehensive.o with USE_HAKMEM flag bench_comprehensive.o: bench_comprehensive.c $(CC) $(CFLAGS) -DUSE_HAKMEM -c $< -o $@ bench_comprehensive_hakmem: bench_comprehensive.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) @echo "✓ bench_comprehensive_hakmem built with hakmem" bench_comprehensive_system: bench_comprehensive.c $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) @echo "✓ bench_comprehensive_system built (system malloc)" # mimalloc direct-link variant (no LD_PRELOAD dependency) bench_comprehensive_mi: bench_comprehensive.c $(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include \ bench_comprehensive.c -o $@ \ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS) @echo "✓ bench_comprehensive_mi built (direct link to mimalloc)" # hakx (new hybrid) front API stubs HAKX_OBJS = engines/hakx/hakx_api_stub.o engines/hakx/hakx_front_tiny.o engines/hakx/hakx_l25_tuner.o engines/hakx/hakx_api_stub.o: engines/hakx/hakx_api_stub.c include/hakx/hakx_api.h engines/hakx/hakx_front_tiny.h $(CC) $(CFLAGS) -I include -c -o $@ $< # hakx variant for tiny hot bench (direct link via hakx API) bench_tiny_hot_hakx.o: bench_tiny_hot.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h $(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $< bench_tiny_hot_hakx: bench_tiny_hot_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) @echo "✓ bench_tiny_hot_hakx built (hakx API stub)" # P0 variant with batch refill optimization bench_tiny_hot_hakx_p0.o: bench_tiny_hot.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h $(CC) $(CFLAGS) -DHAKMEM_TINY_P0_BATCH_REFILL=1 -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $< bench_tiny_hot_hakx_p0: bench_tiny_hot_hakx_p0.o $(HAKX_OBJS) $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) @echo "✓ bench_tiny_hot_hakx_p0 built (with P0 batch refill)" # hak_tiny_alloc/free 直叩きの比較用ベンチ bench_tiny_hot_direct.o: bench_tiny_hot_direct.c core/hakmem_tiny.h $(CC) $(CFLAGS) -c -o $@ $< bench_tiny_hot_direct: bench_tiny_hot_direct.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) @echo "✓ bench_tiny_hot_direct built (hak_tiny_alloc/free direct)" # hakmi variant for comprehensive bench (front + mimalloc backend) bench_comprehensive_hakmi: bench_comprehensive.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h $(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc \ bench_comprehensive.c -o $@ \ adapters/hakmi_front/hakmi_front.o adapters/hakmi_front/hakmi_env.o adapters/hakmi_front/hakmi_tls_front.o \ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS) @echo "✓ bench_comprehensive_hakmi built (hakmi front + mimalloc backend)" # hakx variant for comprehensive bench bench_comprehensive_hakx: bench_comprehensive.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h $(HAKX_OBJS) $(TINY_BENCH_OBJS) $(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast \ bench_comprehensive.c -o $@ $(HAKX_OBJS) $(TINY_BENCH_OBJS) $(LDFLAGS) @echo "✓ bench_comprehensive_hakx built (hakx API stub)" # Random mixed bench (direct link variants) bench_random_mixed_hakmem.o: bench_random_mixed.c hakmem.h $(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $< bench_random_mixed_system.o: bench_random_mixed.c $(CC) $(CFLAGS) -c -o $@ $< bench_random_mixed_mi.o: bench_random_mixed.c $(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $< bench_random_mixed_hakmem: bench_random_mixed_hakmem.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) bench_random_mixed_system: bench_random_mixed_system.o $(CC) -o $@ $^ $(LDFLAGS) # Fixed-size microbench (direct link variants) bench_fixed_size_hakmem.o: benchmarks/src/fixed/bench_fixed_size.c hakmem.h $(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $< bench_fixed_size_system.o: benchmarks/src/fixed/bench_fixed_size.c $(CC) $(CFLAGS) -c -o $@ $< bench_fixed_size_hakmem: bench_fixed_size_hakmem.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) bench_fixed_size_system: bench_fixed_size_system.o $(CC) -o $@ $^ $(LDFLAGS) bench_random_mixed_mi: bench_random_mixed_mi.o bench_mi_force.o $(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS) # hakmi variant for random mixed bench bench_random_mixed_hakmi.o: bench_random_mixed.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h $(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc -c -o $@ $< bench_random_mixed_hakmi: bench_random_mixed_hakmi.o $(HAKMI_FRONT_OBJS) bench_mi_force.o $(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS) # hakx variant for random mixed bench bench_random_mixed_hakx.o: bench_random_mixed.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h $(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $< bench_random_mixed_hakx: bench_random_mixed_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) # VM-mixed bench around L2.5 (512KB–<2MB) bench_vm_mixed_hakmem.o: bench_vm_mixed.c hakmem.h $(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $< bench_vm_mixed_system.o: bench_vm_mixed.c $(CC) $(CFLAGS) -c -o $@ $< bench_vm_mixed_hakmem: bench_vm_mixed_hakmem.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) bench_vm_mixed_system: bench_vm_mixed_system.o $(CC) -o $@ $^ $(LDFLAGS) # Ultra-fast build for benchmarks: trims unwinding/PLT overhead and # improves code locality. Use: `make bench_fast` then run the binary. bench_fast: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables bench_fast: LDFLAGS += -Wl,-O2 bench_fast: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi bench_tiny_hot_hakx @echo "✓ bench_fast build complete" # Perf-Main (safe) bench build: no bench-only macros; same O flags perf_main: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables perf_main: LDFLAGS += -Wl,-O2 perf_main: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi bench_random_mixed_hakmem bench_random_mixed_system bench_random_mixed_mi bench_comprehensive_hakx bench_tiny_hot_hakx bench_random_mixed_hakx @echo "✓ perf_main build complete (no bench-only macros)" # Mid/Large (8–32KiB) bench bench_mid_large_hakmem.o: bench_mid_large.c hakmem.h $(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $< bench_mid_large_system.o: bench_mid_large.c $(CC) $(CFLAGS) -c -o $@ $< bench_mid_large_mi.o: bench_mid_large.c $(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $< bench_mid_large_hakmem: bench_mid_large_hakmem.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) bench_mid_large_system: bench_mid_large_system.o $(CC) -o $@ $^ $(LDFLAGS) bench_mid_large_mi: bench_mid_large_mi.o bench_mi_force.o $(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS) # hakx variant for mid/large (1T) bench_mid_large_hakx.o: bench_mid_large.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h $(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $< bench_mid_large_hakx: bench_mid_large_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) # Mid/Large MT (8–32KiB) bench bench_mid_large_mt_hakmem.o: bench_mid_large_mt.c hakmem.h $(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $< bench_mid_large_mt_system.o: bench_mid_large_mt.c $(CC) $(CFLAGS) -c -o $@ $< bench_mid_large_mt_mi.o: bench_mid_large_mt.c $(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $< bench_mid_large_mt_hakmem: bench_mid_large_mt_hakmem.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) bench_mid_large_mt_system: bench_mid_large_mt_system.o $(CC) -o $@ $^ $(LDFLAGS) bench_mid_large_mt_mi: bench_mid_large_mt_mi.o bench_mi_force.o $(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS) # hakx variant for mid/large MT bench_mid_large_mt_hakx.o: bench_mid_large_mt.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h $(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $< bench_mid_large_mt_hakx: bench_mid_large_mt_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) # Fragmentation stress bench bench_fragment_stress_hakmem.o: bench_fragment_stress.c hakmem.h $(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $< bench_fragment_stress_system.o: bench_fragment_stress.c $(CC) $(CFLAGS) -c -o $@ $< bench_fragment_stress_mi.o: bench_fragment_stress.c $(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $< bench_fragment_stress_hakmem: bench_fragment_stress_hakmem.o $(TINY_BENCH_OBJS) $(CC) -o $@ $^ $(LDFLAGS) bench_fragment_stress_system: bench_fragment_stress_system.o $(CC) -o $@ $^ $(LDFLAGS) bench_fragment_stress_mi: bench_fragment_stress_mi.o bench_mi_force.o $(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS) # Bench build with Minimal Tiny Front (physically excludes optional front tiers) bench_tiny_front: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -DHAKMEM_TINY_MINIMAL_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_MAG_OWNER=0 bench_tiny_front: LDFLAGS += -Wl,-O2 bench_tiny_front: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi @echo "✓ bench_tiny_front build complete (HAKMEM_TINY_MINIMAL_FRONT=1)" # Bench build with Strict Front (compile-out optional front tiers, baseline structure) bench_front_strict: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -DHAKMEM_TINY_STRICT_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1 bench_front_strict: LDFLAGS += -Wl,-O2 bench_front_strict: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi @echo "✓ bench_front_strict build complete (HAKMEM_TINY_STRICT_FRONT=1)" # Bench build with Ultra (SLL-only front) for Tiny-Hot microbench # - Compiles hakmem bench with SLL-first/strict front, without Quick/FrontCache, stats off # - Only affects bench binaries; normal builds unchanged bench_ultra_strict: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \ -DHAKMEM_TINY_ULTRA=1 -DHAKMEM_TINY_TLS_SLL=1 -DHAKMEM_TINY_STRICT_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1 \ -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 bench_ultra_strict: LDFLAGS += -Wl,-O2 bench_ultra_strict: clean bench_tiny_hot_hakmem @echo "✓ bench_ultra_strict build complete (ULTRA+STRICT front)" # Bench build with Ultra (SLL-only) but without STRICT/MINIMAL, Quick/FrontCache compiled out bench_ultra: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \ -DHAKMEM_TINY_ULTRA=1 -DHAKMEM_TINY_TLS_SLL=1 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 bench_ultra: LDFLAGS += -Wl,-O2 bench_ultra: clean bench_tiny_hot_hakmem @echo "✓ bench_ultra build complete (ULTRA SLL-only, Quick/FrontCache OFF)" # Bench build with explicit bench fast path (SLL→Mag→tiny reflll), stats/quick/front off bench_fastpath: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \ -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 bench_fastpath: LDFLAGS += -Wl,-O2 bench_fastpath: clean bench_tiny_hot_hakmem @echo "✓ bench_fastpath build complete (bench-only fast path)" # Bench build: SLL-only (≤64B), with warmup bench_sll_only: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \ -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 \ -DHAKMEM_TINY_BENCH_WARMUP32=160 -DHAKMEM_TINY_BENCH_WARMUP64=192 -DHAKMEM_TINY_BENCH_WARMUP8=64 -DHAKMEM_TINY_BENCH_WARMUP16=96 \ -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 bench_sll_only: LDFLAGS += -Wl,-O2 bench_sll_only: clean bench_tiny_hot_hakmem @echo "✓ bench_sll_only build complete (bench-only SLL-only + warmup)" # Bench-fastpath with explicit refill sizes (A/B) bench_fastpath_r8: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=8 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables bench_fastpath_r8: LDFLAGS += -Wl,-O2 bench_fastpath_r8: clean bench_tiny_hot_hakmem @echo "✓ bench_fastpath_r8 build complete" bench_fastpath_r12: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=12 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables bench_fastpath_r12: LDFLAGS += -Wl,-O2 bench_fastpath_r12: clean bench_tiny_hot_hakmem @echo "✓ bench_fastpath_r12 build complete" bench_fastpath_r16: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=16 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables bench_fastpath_r16: LDFLAGS += -Wl,-O2 bench_fastpath_r16: clean bench_tiny_hot_hakmem @echo "✓ bench_fastpath_r16 build complete" # PGO for bench-fastpath pgo-benchfast-profile: @echo "=========================================" @echo "PGO Profile (bench-fastpath)" @echo "=========================================" rm -f *.gcda *.o bench_tiny_hot_hakmem $(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \ LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null @echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)" ./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true ./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true ./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true ./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true @echo "✓ bench-fastpath profile data collected (*.gcda)" pgo-benchfast-build: @echo "=========================================" @echo "PGO Build (bench-fastpath)" @echo "=========================================" rm -f *.o bench_tiny_hot_hakmem $(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \ LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null @echo "✓ bench-fastpath PGO build complete" # Debug bench (with counters/prints) bench_debug: CFLAGS += -DHAKMEM_DEBUG_COUNTERS=1 -g -O2 bench_debug: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi @echo "✓ bench_debug build complete (debug counters enabled)" # Debug build for random_mixed (enable counters for SFC stats) .PHONY: bench_random_mixed_debug bench_random_mixed_debug: @echo "[debug] Rebuilding bench_random_mixed_hakmem with HAKMEM_DEBUG_COUNTERS=1" $(MAKE) clean >/dev/null $(MAKE) CFLAGS+=" -DHAKMEM_DEBUG_COUNTERS=1 -O2 -g" bench_random_mixed_hakmem >/dev/null @echo "✓ bench_random_mixed_debug built" # ======================================== # Phase 7 便利ターゲット(重要な定数がデフォルト化されています) # ======================================== # Phase 7: 全最適化を有効化(Task 1+2+3) # 使い方: make phase7 # または: make phase7-bench で自動ベンチマーク .PHONY: phase7 phase7-bench phase7-test phase7: @echo "=========================================" @echo "Phase 7: Building with all optimizations" @echo "=========================================" @echo "Flags:" @echo " HEADER_CLASSIDX=1 (Task 1: Skip magic validation)" @echo " AGGRESSIVE_INLINE=1 (Task 2: Inline TLS macros)" @echo " PREWARM_TLS=1 (Task 3: Pre-warm cache)" @echo "" $(MAKE) clean $(MAKE) HEADER_CLASSIDX=1 AGGRESSIVE_INLINE=1 PREWARM_TLS=1 \ bench_random_mixed_hakmem larson_hakmem @echo "" @echo "✓ Phase 7 build complete!" @echo " Run: make phase7-bench (quick benchmark)" @echo " Run: make phase7-test (sanity test)" phase7-bench: phase7 @echo "" @echo "=========================================" @echo "Phase 7 Quick Benchmark" @echo "=========================================" @echo "Larson 1T:" @./larson_hakmem 1 1 128 1024 1 12345 1 2>&1 | grep "Throughput =" @echo "" @echo "Random Mixed (128B, 256B, 1024B):" @./bench_random_mixed_hakmem 100000 128 1234567 2>&1 | tail -1 @./bench_random_mixed_hakmem 100000 256 1234567 2>&1 | tail -1 @./bench_random_mixed_hakmem 100000 1024 1234567 2>&1 | tail -1 phase7-test: phase7 @echo "" @echo "=========================================" @echo "Phase 7 Sanity Test" @echo "=========================================" @./larson_hakmem 1 1 128 1024 1 12345 1 >/dev/null 2>&1 && echo "✓ Larson 1T OK" || echo "✗ Larson 1T FAILED" @./bench_random_mixed_hakmem 10000 128 1234567 >/dev/null 2>&1 && echo "✓ Random Mixed 128B OK" || echo "✗ Random Mixed 128B FAILED" @./bench_random_mixed_hakmem 10000 1024 1234567 >/dev/null 2>&1 && echo "✓ Random Mixed 1024B OK" || echo "✗ Random Mixed 1024B FAILED" # Clean clean: rm -f $(OBJS) $(TARGET) $(BENCH_HAKMEM_OBJS) $(BENCH_SYSTEM_OBJS) $(BENCH_HAKMEM) $(BENCH_SYSTEM) $(SHARED_OBJS) $(SHARED_LIB) *.csv libhako_ffi_stub.a hako_ffi_stub.o rm -f bench_comprehensive.o bench_comprehensive_hakmem bench_comprehensive_system rm -f bench_tiny bench_tiny.o bench_tiny_mt bench_tiny_mt.o test_mf2 test_mf2.o bench_tiny_hakmem # Help help: @echo "hakmem PoC - Makefile targets:" @echo "" @echo "=== Phase 7 Optimizations (推奨) ===" @echo " make phase7 - Phase 7全最適化ビルド (Task 1+2+3)" @echo " make phase7-bench - Phase 7 + クイックベンチマーク" @echo " make phase7-test - Phase 7 + サニティテスト" @echo "" @echo "=== 基本ターゲット ===" @echo " make - Build the test program" @echo " make run - Build and run the test" @echo " make bench - Build benchmark programs" @echo " make shared - Build shared library (for LD_PRELOAD)" @echo " make clean - Clean build artifacts" @echo " make bench-mode - Run Tiny-focused PGO bench (scripts/bench_mode.sh)" @echo " make bench-all - Run (near) full mimalloc-bench with timeouts" @echo "" @echo "Benchmark workflow:" @echo " 1. make bench" @echo " 2. bash bench_runner.sh --runs 10" @echo " 3. python3 analyze_results.py benchmark_results.csv" @echo "" @echo "mimalloc-bench workflow:" @echo " 1. make shared" @echo " 2. LD_PRELOAD=./libhakmem.so " # Step 2: PGO (Profile-Guided Optimization) targets pgo-profile: @echo "=========================================" @echo "Step 2b: PGO Profile Collection" @echo "=========================================" rm -f *.gcda *.o bench_comprehensive_hakmem $(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto" LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_comprehensive_hakmem @echo "Running profile workload..." HAKMEM_WRAP_TINY=1 ./bench_comprehensive_hakmem 2>&1 | grep -E "(Test 1:|Throughput:)" | head -6 @echo "✓ Profile data collected (*.gcda files)" pgo-build: @echo "=========================================" @echo "Step 2c: PGO Optimized Build (LTO+PGO)" @echo "=========================================" rm -f *.o bench_comprehensive_hakmem $(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto" LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_comprehensive_hakmem @echo "✓ LTO+PGO optimized build complete" # PGO for tiny_hot (Strict Front recommended) pgo-hot-profile: @echo "=========================================" @echo "PGO Profile (tiny_hot) with Strict Front" @echo "=========================================" rm -f *.gcda *.o bench_tiny_hot_hakmem $(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_STRICT_FRONT=1" \ LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null @echo "[profile-run] bench_tiny_hot_hakmem (sizes 16/32/64, batch=100, cycles=60000)" HAKMEM_TINY_SPECIALIZE_MASK=0x02 ./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true ./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true ./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true @echo "✓ tiny_hot profile data collected (*.gcda)" pgo-hot-build: @echo "=========================================" @echo "PGO Build (tiny_hot) with Strict Front" @echo "=========================================" rm -f *.o bench_tiny_hot_hakmem $(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_STRICT_FRONT=1" \ LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null @echo "✓ tiny_hot PGO build complete" # Phase 8.2: Memory profiling build (verbose memory breakdown) bench-memory: CFLAGS += -DHAKMEM_DEBUG_MEMORY bench-memory: clean bench_comprehensive_hakmem @echo "" @echo "=========================================" @echo "Memory profiling build complete!" @echo " Run: ./bench_comprehensive_hakmem" @echo " Memory breakdown will be printed at end" @echo "=========================================" .PHONY: all run bench shared debug clean help pgo-profile pgo-build bench-memory # PGO for shared library (LD_PRELOAD) # Step 1: Build instrumented shared lib and collect profile pgo-profile-shared: @echo "=========================================" @echo "Step: PGO Profile Collection (shared lib)" @echo "=========================================" rm -f *_shared.gcda *_shared.o $(SHARED_LIB) $(MAKE) CFLAGS_SHARED="$(CFLAGS_SHARED) -fprofile-generate -flto" LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" shared @echo "Running profile workload (LD_PRELOAD)..." HAKMEM_WRAP_TINY=1 LD_PRELOAD=./$(SHARED_LIB) ./bench_comprehensive_system 2>&1 | grep -E "(SIZE CLASS:|Throughput:)" | head -20 || true @echo "✓ Profile data collected (*.gcda for *_shared)" # Step 2: Build optimized shared lib using profile pgo-build-shared: @echo "=========================================" @echo "Step: PGO Optimized Build (shared lib)" @echo "=========================================" rm -f *_shared.o $(SHARED_LIB) $(MAKE) CFLAGS_SHARED="$(CFLAGS_SHARED) -fprofile-use -flto -Wno-error=coverage-mismatch" LDFLAGS="$(LDFLAGS) -fprofile-use -flto" shared @echo "✓ LTO+PGO optimized shared library complete" # Convenience: run Bench Mode script bench-mode: @bash scripts/bench_mode.sh bench-all: @bash scripts/run_all_benches_with_timeouts.sh # PGO for bench_sll_only pgo-benchsll-profile: @echo "=========================================" @echo "PGO Profile (bench_sll_only)" @echo "=========================================" rm -f *.gcda *.o bench_tiny_hot_hakmem $(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \ LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null @echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)" ./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true ./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true ./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true ./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true @echo "✓ bench_sll_only profile data collected (*.gcda)" pgo-benchsll-build: @echo "=========================================" @echo "PGO Build (bench_sll_only)" @echo "=========================================" rm -f *.o bench_tiny_hot_hakmem $(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \ LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null @echo "✓ bench_sll_only PGO build complete" # Variant: SLL-only with REFILL=12 and WARMUP32=192 (tune for 32B) pgo-benchsll-r12w192-profile: @echo "=========================================" @echo "PGO Profile (bench_sll_only r12 w32=192)" @echo "=========================================" rm -f *.gcda *.o bench_tiny_hot_hakmem $(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL32=12 -DHAKMEM_TINY_BENCH_WARMUP32=192 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \ LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null @echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)" ./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true ./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true ./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true ./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true @echo "✓ r12 w32=192 profile data collected (*.gcda)" pgo-benchsll-r12w192-build: @echo "=========================================" @echo "PGO Build (bench_sll_only r12 w32=192)" @echo "=========================================" rm -f *.o bench_tiny_hot_hakmem $(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL32=12 -DHAKMEM_TINY_BENCH_WARMUP32=192 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \ LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null @echo "✓ r12 w32=192 PGO build complete" MI_RPATH := $(shell pwd)/mimalloc-bench/extern/mi/out/release # Sanitized builds (compiler-assisted debugging) .PHONY: asan-larson ubsan-larson tsan-larson SAN_ASAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \ -fsanitize=address,undefined -fno-sanitize-recover=all -fstack-protector-strong \ -DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1 SAN_ASAN_LDFLAGS = -fsanitize=address,undefined SAN_UBSAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \ -fsanitize=undefined -fno-sanitize-recover=undefined -fstack-protector-strong \ -DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1 SAN_UBSAN_LDFLAGS = -fsanitize=undefined # Allocator-enabled sanitizer variants (no FORCE_LIBC) # FIXME 2025-11-07: TLS initialization order issue - using libc for now SAN_ASAN_ALLOC_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \ -fsanitize=address,undefined -fno-sanitize-recover=all -fstack-protector-strong \ -DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1 SAN_ASAN_ALLOC_LDFLAGS = -fsanitize=address,undefined SAN_UBSAN_ALLOC_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \ -fsanitize=undefined -fno-sanitize-recover=undefined -fstack-protector-strong \ -DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1 SAN_UBSAN_ALLOC_LDFLAGS = -fsanitize=undefined SAN_TSAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto -fsanitize=thread \ -DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1 SAN_TSAN_LDFLAGS = -fsanitize=thread # Variant: TSan with allocator enabled (no FORCE_LIBC) # FIXME 2025-11-07: TLS initialization order issue - using libc for now SAN_TSAN_ALLOC_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto -fsanitize=thread \ -DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1 SAN_TSAN_ALLOC_LDFLAGS = -fsanitize=thread asan-larson: @$(MAKE) clean >/dev/null @$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_ASAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_ASAN_LDFLAGS)" >/dev/null @cp -f larson_hakmem larson_hakmem_asan @echo "✓ Built larson_hakmem_asan with ASan/UBSan" ubsan-larson: @$(MAKE) clean >/dev/null @$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_UBSAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_UBSAN_LDFLAGS)" >/dev/null @cp -f larson_hakmem larson_hakmem_ubsan @echo "✓ Built larson_hakmem_ubsan with UBSan" tsan-larson: @$(MAKE) clean >/dev/null @$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_TSAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_TSAN_LDFLAGS)" >/dev/null @cp -f larson_hakmem larson_hakmem_tsan @echo "✓ Built larson_hakmem_tsan with TSan (no ASan)" .PHONY: tsan-larson-alloc tsan-larson-alloc: @$(MAKE) clean >/dev/null @$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_TSAN_ALLOC_CFLAGS)" EXTRA_LDFLAGS="$(SAN_TSAN_ALLOC_LDFLAGS)" >/dev/null @cp -f larson_hakmem larson_hakmem_tsan_alloc @echo "✓ Built larson_hakmem_tsan_alloc with TSan (allocator enabled)" .PHONY: asan-larson-alloc ubsan-larson-alloc asan-larson-alloc: @$(MAKE) clean >/dev/null @$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_ASAN_ALLOC_CFLAGS)" EXTRA_LDFLAGS="$(SAN_ASAN_ALLOC_LDFLAGS)" >/dev/null @cp -f larson_hakmem larson_hakmem_asan_alloc @echo "✓ Built larson_hakmem_asan_alloc with ASan/UBSan (allocator enabled)" ubsan-larson-alloc: @$(MAKE) clean >/dev/null @$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_UBSAN_ALLOC_CFLAGS)" EXTRA_LDFLAGS="$(SAN_UBSAN_ALLOC_LDFLAGS)" >/dev/null @cp -f larson_hakmem larson_hakmem_ubsan_alloc @echo "✓ Built larson_hakmem_ubsan_alloc with UBSan (allocator enabled)" # Sanitized shared libraries for LD_PRELOAD (allocator enabled) .PHONY: asan-shared-alloc tsan-shared-alloc asan-shared-alloc: @$(MAKE) clean >/dev/null @$(MAKE) SHARED_LIB=libhakmem_asan.so \ CFLAGS_SHARED="$(CFLAGS_SHARED) $(SAN_ASAN_ALLOC_CFLAGS)" \ LDFLAGS="$(LDFLAGS) $(SAN_ASAN_ALLOC_LDFLAGS)" shared >/dev/null @echo "✓ Built libhakmem_asan.so (LD_PRELOAD, allocator enabled)" tsan-shared-alloc: @$(MAKE) clean >/dev/null @$(MAKE) SHARED_LIB=libhakmem_tsan.so \ CFLAGS_SHARED="$(CFLAGS_SHARED) $(SAN_TSAN_ALLOC_CFLAGS)" \ LDFLAGS="$(LDFLAGS) $(SAN_TSAN_ALLOC_LDFLAGS)" shared >/dev/null @echo "✓ Built libhakmem_tsan.so (LD_PRELOAD, allocator enabled)" # TSan multithread smoke linking against allocator (direct link) .PHONY: mt-smoke-tsan mt-smoke-tsan: @$(MAKE) clean >/dev/null @$(MAKE) $(TINY_BENCH_OBJS) >/dev/null $(CC) -O1 -g -fno-omit-frame-pointer -fno-lto -fsanitize=thread \ -o mt_smoke tests/mt_smoke.c $(TINY_BENCH_OBJS) $(LDFLAGS) -fsanitize=thread @echo "✓ Built mt_smoke (TSan)" # ---------------------------------------------------------------------------- # Convenience targets (debug/route/3layer) # ---------------------------------------------------------------------------- .PHONY: larson_hakmem_3layer larson_hakmem_route # ---------------------------------------------------------------------------- # Runtime helpers: sanitizer-safe runners for debugging/bench # ---------------------------------------------------------------------------- # Default run params (overridable): THREADS ?= 4 SLEEP ?= 10 MIN ?= 8 MAX ?= 128 CHPT ?= 1024 ROUNDS ?= 1 SEED ?= 12345 # Resolve libasan from the active toolchain ASAN_LIB := $(shell $(CC) -print-file-name=libasan.so) .PHONY: asan-preload-run asan-preload-run: @$(MAKE) -j asan-shared-alloc larson_system >/dev/null @echo "[asan-preload] LD_PRELOAD chain: $$LD_PRELOAD" @echo "[asan-preload] Running: ./larson_system $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS)" @LSAN_OPTIONS=detect_leaks=0 \ LD_PRELOAD="$(ASAN_LIB):$(PWD)/libhakmem_asan.so" \ ./larson_system $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS) .PHONY: asan-preload-mailbox-lite asan-preload-mailbox-lite: @$(MAKE) -j asan-shared-alloc larson_system >/dev/null @echo "[asan-preload-mailbox-lite] (short-run)" @echo "[asan-preload-mailbox-lite] Running: ./larson_system 5 $(MIN) $(MAX) 256 $(ROUNDS) $(SEED) $(THREADS)" @HAKMEM_WRAP_TINY=1 HAKMEM_TINY_SS_ADOPT=1 \ HAKMEM_TINY_DEBUG_REMOTE_GUARD=1 HAKMEM_TINY_TRACE_RING=1 \ LSAN_OPTIONS=detect_leaks=0 \ LD_PRELOAD="$(ASAN_LIB):$(PWD)/libhakmem_asan.so" \ ./larson_system 5 $(MIN) $(MAX) 256 $(ROUNDS) $(SEED) $(THREADS) .PHONY: ubsan-mailbox-run ubsan-mailbox-run: @$(MAKE) -j ubsan-larson-alloc >/dev/null @echo "[ubsan-mailbox] Running: ./larson_hakmem_ubsan_alloc $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS)" @HAKMEM_WRAP_TINY=1 HAKMEM_TINY_SS_ADOPT=1 \ ./larson_hakmem_ubsan_alloc $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS) # ---------------------------------------------------------------------------- # HAKMEM direct-link benches & reproducer helpers # ---------------------------------------------------------------------------- .PHONY: bench-hakmem bench-hakmem: @$(MAKE) -j larson_hakmem >/dev/null @echo "== hakmem 1T ==" && ./larson_hakmem $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) 1 @echo "== hakmem $(THREADS)T ==" && ./larson_hakmem $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS) .PHONY: bench-hakmem-hot64 bench-hakmem-hot64: @$(MAKE) -j larson_hakmem >/dev/null @echo "== hakmem HOT64 1T ==" && HAKMEM_TINY_REFILL_COUNT_HOT=64 ./larson_hakmem 5 $(MIN) $(MAX) 512 $(ROUNDS) $(SEED) 1 @echo "== hakmem HOT64 $(THREADS)T ==" && HAKMEM_TINY_REFILL_COUNT_HOT=64 ./larson_hakmem 5 $(MIN) $(MAX) 512 $(ROUNDS) $(SEED) $(THREADS) .PHONY: bench-hakmem-hot64-fastcap-ab bench-hakmem-hot64-fastcap-ab: @$(MAKE) -j larson_hakmem >/dev/null @for cap in 8 16 32; do \ echo "== HOT64 FastCap=$$cap $(THREADS)T (short) =="; \ HAKMEM_TINY_REFILL_COUNT_HOT=64 HAKMEM_TINY_FAST_CAP=$$cap \ HAKMEM_TINY_DEBUG_REMOTE_GUARD=1 HAKMEM_TINY_TRACE_RING=1 \ ./larson_hakmem 5 $(MIN) $(MAX) 256 $(ROUNDS) $(SEED) $(THREADS) || true; \ done .PHONY: valgrind-hakmem-hot64-lite valgrind-hakmem-hot64-lite: @$(MAKE) clean >/dev/null @$(MAKE) OPT_LEVEL=0 USE_LTO=0 NATIVE=0 larson_hakmem >/dev/null @echo "== valgrind HOT64 lite $(THREADS)T ==" @HAKMEM_TINY_REFILL_COUNT_HOT=64 \ valgrind --quiet --leak-check=full --show-leak-kinds=all \ --errors-for-leak-kinds=all --track-origins=yes --error-exitcode=99 \ ./larson_hakmem 2 $(MIN) $(MAX) 256 $(ROUNDS) $(SEED) $(THREADS) || true # ---------------------------------------------------------------------------- # Unit tests (Box-level) # ---------------------------------------------------------------------------- .PHONY: unit unit-run UNIT_BIN_DIR := tests/bin UNIT_BINS := $(UNIT_BIN_DIR)/test_super_registry $(UNIT_BIN_DIR)/test_ready_ring $(UNIT_BIN_DIR)/test_mailbox_box unit: $(UNIT_BINS) @echo "OK: unit tests built -> $(UNIT_BINS)" $(UNIT_BIN_DIR)/test_super_registry: tests/unit/test_super_registry.c core/hakmem_super_registry.c core/hakmem_tiny_superslab.c @mkdir -p $(UNIT_BIN_DIR) $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(UNIT_BIN_DIR)/test_ready_ring: tests/unit/test_ready_ring.c @mkdir -p $(UNIT_BIN_DIR) $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(UNIT_BIN_DIR)/test_mailbox_box: tests/unit/test_mailbox_box.c tests/unit/mailbox_test_stubs.c core/box/mailbox_box.c @mkdir -p $(UNIT_BIN_DIR) $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) unit-run: unit @echo "Running unit: test_super_registry" && $(UNIT_BIN_DIR)/test_super_registry @echo "Running unit: test_ready_ring" && $(UNIT_BIN_DIR)/test_ready_ring @echo "Running unit: test_mailbox_box" && $(UNIT_BIN_DIR)/test_mailbox_box # Build 3-layer Tiny (new front) with low optimization for debug/testing larson_hakmem_3layer: $(MAKE) clean $(MAKE) NEW_3LAYER_DEFAULT=1 ULTRA_SIMPLE_DEFAULT=0 BOX_REFACTOR_DEFAULT=1 USE_LTO=0 OPT_LEVEL=1 larson_hakmem @echo "=========================================" @echo "Built larson_hakmem with NEW 3-LAYER front" @echo " NEW_3LAYER_DEFAULT=1, LTO=OFF, O1" @echo "=========================================" # Build 3-layer + route fingerprint enabled (runtime ring still needs ENV) larson_hakmem_route: $(MAKE) clean $(MAKE) NEW_3LAYER_DEFAULT=1 ULTRA_SIMPLE_DEFAULT=0 BOX_REFACTOR_DEFAULT=1 USE_LTO=0 OPT_LEVEL=1 \ EXTRA_CFLAGS+=" -DHAKMEM_ROUTE=1" larson_hakmem @echo "=========================================" @echo "Built larson_hakmem (3-layer + route)" @echo " HAKMEM_ROUTE build-flag set; runtime ENV still controls output" @echo "=========================================" # ---------------------------------------------------------------------------- # Pool TLS Benchmarks (Phase 1.5b) # ---------------------------------------------------------------------------- # Build HAKMEM shared library first to satisfy -lhakmem bench_pool_tls_hakmem: benchmarks/bench_pool_tls.c $(SHARED_LIB) $(CC) $(CFLAGS) -o $@ $< -L. -lhakmem $(LDFLAGS) bench_pool_tls_system: benchmarks/bench_pool_tls.c $(CC) $(CFLAGS) -DUSE_SYSTEM_MALLOC -o $@ $< $(LDFLAGS) .PHONY: bench-pool-tls bench-pool-tls: bench_pool_tls_hakmem bench_pool_tls_system @echo "=========================================" @echo "Pool TLS Benchmark (8KB-52KB allocations)" @echo "=========================================" @echo "" @echo "== HAKMEM (Phase 1.5b Pre-warm) ==" @./bench_pool_tls_hakmem 1 100000 256 42 @echo "" @echo "== System malloc ==" @./bench_pool_tls_system 1 100000 256 42 @echo "" @echo "=========================================" # Phase E1-CORRECT Debug Bench (minimal test) test_simple_e1: test_simple_e1.o $(HAKMEM_OBJS) $(CC) -o $@ $^ $(LDFLAGS) test_simple_e1.o: test_simple_e1.c $(CC) $(CFLAGS) -c -o $@ $< # ======================================== # Phase 4: PGO (Profile-Guided Optimization) Targets # ======================================== # Phase 4-Step1: PGO Profile Build # Builds binaries with -fprofile-generate for profiling .PHONY: pgo-tiny-profile pgo-tiny-profile: @echo "=========================================" @echo "Phase 4: Building PGO Profile Binaries" @echo "=========================================" $(MAKE) clean $(MAKE) PROFILE_GEN=1 bench_random_mixed_hakmem bench_tiny_hot_hakmem @echo "" @echo "✓ PGO profile binaries built" @echo "Next: Run 'make pgo-tiny-collect' to collect profile data" @echo "" # Phase 4-Step1: PGO Profile Collection # Executes representative workloads to generate .gcda files .PHONY: pgo-tiny-collect pgo-tiny-collect: @echo "=========================================" @echo "Phase 4: Collecting PGO Profile Data" @echo "=========================================" ./scripts/box/pgo_tiny_profile_box.sh # Phase 4-Step1: PGO Optimized Build # Builds binaries with -fprofile-use for optimization .PHONY: pgo-tiny-build pgo-tiny-build: @echo "=========================================" @echo "Phase 4: Building PGO-Optimized Binaries" @echo "=========================================" @echo "Building optimized binaries..." $(MAKE) clean $(MAKE) PROFILE_USE=1 bench_random_mixed_hakmem bench_tiny_hot_hakmem @echo "" @echo "✓ PGO-optimized binaries built" @echo "Next: Run './bench_random_mixed_hakmem 1000000 256 42' to test" @echo "" # Phase 4-Step1: Full PGO Workflow # Complete workflow: profile → collect → build → test .PHONY: pgo-tiny-full pgo-tiny-full: pgo-tiny-profile pgo-tiny-collect pgo-tiny-build @echo "=========================================" @echo "Phase 4: PGO Full Workflow Complete" @echo "=========================================" @echo "Testing PGO-optimized binary..." @echo "" ./bench_random_mixed_hakmem 1000000 256 42 @echo "" @echo "✓ PGO optimization complete!" @echo ""