Files
hakmem/Makefile
Claude b4e4416544 Add mimalloc-bench submodule and simplify larson_hakmem build
Changes:
- Add mimalloc-bench as git submodule for Larson benchmark source
- Simplify Makefile: Remove shim layer (hakmem.o provides malloc/free directly)
- Enable larson.sh script to build and run Larson benchmarks

This allows running: ./scripts/larson.sh hakmem --profile tinyhot_tput 2 4
2025-11-05 03:43:50 +00:00

787 lines
40 KiB
Makefile
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Makefile for hakmem PoC
CC = gcc
CXX = g++
# Directory structure (2025-11-01 reorganization)
SRC_DIR := core
BENCH_SRC := benchmarks/src
TEST_SRC := tests
BUILD_DIR := build
BENCH_BIN_DIR := benchmarks/bin
# Search paths for source files
VPATH := $(SRC_DIR):$(BENCH_SRC)/tiny:$(BENCH_SRC)/mid:$(BENCH_SRC)/comprehensive:$(BENCH_SRC)/stress:$(TEST_SRC)/unit:$(TEST_SRC)/integration:$(TEST_SRC)/stress
# Timing: default OFF for performance. Set HAKMEM_TIMING=1 to enable.
HAKMEM_TIMING ?= 0
# Phase 6.25: Aggressive optimization flags (default ON, overridable)
OPT_LEVEL ?= 3
USE_LTO ?= 1
NATIVE ?= 1
BASE_CFLAGS := -Wall -Wextra -std=c11 -D_GNU_SOURCE -D_POSIX_C_SOURCE=199309L \
-D_GLIBC_USE_ISOC2X=0 -D__isoc23_strtol=strtol -D__isoc23_strtoll=strtoll \
-D__isoc23_strtoul=strtoul -D__isoc23_strtoull=strtoull -DHAKMEM_DEBUG_TIMING=$(HAKMEM_TIMING) \
-ffast-math -funroll-loops -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
-fno-semantic-interposition -I core
CFLAGS = -O$(OPT_LEVEL) $(BASE_CFLAGS)
ifeq ($(NATIVE),1)
CFLAGS += -march=native -mtune=native -fno-plt
endif
ifeq ($(USE_LTO),1)
CFLAGS += -flto
endif
# Allow overriding TLS ring capacity at build time: make shared RING_CAP=32
RING_CAP ?= 32
# Phase 6.25: Aggressive optimization + TLS Ring 拡張
CFLAGS_SHARED = -O$(OPT_LEVEL) $(BASE_CFLAGS) -fPIC -DPOOL_TLS_RING_CAP=$(RING_CAP)
ifeq ($(NATIVE),1)
CFLAGS_SHARED += -march=native -mtune=native -fno-plt
endif
ifeq ($(USE_LTO),1)
CFLAGS_SHARED += -flto
endif
LDFLAGS = -lm -lpthread
ifeq ($(USE_LTO),1)
LDFLAGS += -flto
endif
# Default: enable Box Theory refactor for Tiny (Phase 6-1.7)
# This is the best performing option currently (4.19M ops/s)
# To opt-out for legacy path: make BOX_REFACTOR_DEFAULT=0
BOX_REFACTOR_DEFAULT ?= 1
ifeq ($(BOX_REFACTOR_DEFAULT),1)
CFLAGS += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1
CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1
endif
# Phase 6-2: Ultra-Simple was tested but slower (-15%)
# Ultra-Simple: 3.56M ops/s, BOX_REFACTOR: 4.19M ops/s
# Both have same superslab_refill bottleneck (29% CPU)
# To enable ultra_simple: make ULTRA_SIMPLE_DEFAULT=1
ULTRA_SIMPLE_DEFAULT ?= 0
ifeq ($(ULTRA_SIMPLE_DEFAULT),1)
CFLAGS += -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1
CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1
endif
# Phase 6-3: Tiny Fast Path (System tcache style, 3-4 instruction fast path)
# Target: 70-80% of System tcache (95-108 M ops/s)
# Enable by default for testing
TINY_FAST_PATH_DEFAULT ?= 1
ifeq ($(TINY_FAST_PATH_DEFAULT),1)
CFLAGS += -DHAKMEM_TINY_FAST_PATH=1
CFLAGS_SHARED += -DHAKMEM_TINY_FAST_PATH=1
endif
ifdef PROFILE_GEN
CFLAGS += -fprofile-generate
LDFLAGS += -fprofile-generate
endif
ifdef PROFILE_USE
CFLAGS += -fprofile-use -Wno-error=coverage-mismatch
LDFLAGS += -fprofile-use
endif
CFLAGS += $(EXTRA_CFLAGS)
LDFLAGS += $(EXTRA_LDFLAGS)
# Targets
TARGET = test_hakmem
OBJS = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_mailbox.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o test_hakmem.o
# Shared library
SHARED_LIB = libhakmem.so
SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o hakmem_tiny_superslab_shared.o tiny_mailbox_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_mid_mt_shared.o hakmem_super_registry_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o
# Benchmark targets
BENCH_HAKMEM = bench_allocators_hakmem
BENCH_SYSTEM = bench_allocators_system
BENCH_HAKMEM_OBJS = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_mailbox.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o bench_allocators_hakmem.o
BENCH_SYSTEM_OBJS = bench_allocators_system.o
# Default target
all: $(TARGET)
# Build test program
$(TARGET): $(OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo ""
@echo "========================================="
@echo "Build successful! Run with:"
@echo " ./$(TARGET)"
@echo "========================================="
# Compile C files
%.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_tiny_superslab.h hakmem_mid_mt.h hakmem_super_registry.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h
$(CC) $(CFLAGS) -c -o $@ $<
# Build benchmark programs
bench: CFLAGS += -DHAKMEM_PROF_STATIC=1
bench: $(BENCH_HAKMEM) $(BENCH_SYSTEM)
@echo ""
@echo "========================================="
@echo "Benchmark programs built successfully!"
@echo " $(BENCH_HAKMEM) - hakmem versions"
@echo " $(BENCH_SYSTEM) - system/jemalloc/mimalloc"
@echo ""
@echo "Run benchmarks with:"
@echo " bash bench_runner.sh --runs 10"
@echo "========================================="
# hakmem version (with hakmem linked)
bench_allocators_hakmem.o: bench_allocators.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
$(BENCH_HAKMEM): $(BENCH_HAKMEM_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
# system version (without hakmem, for LD_PRELOAD testing)
bench_allocators_system.o: bench_allocators.c
$(CC) $(CFLAGS) -c -o $@ $<
$(BENCH_SYSTEM): $(BENCH_SYSTEM_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
# Tiny hot microbench (direct link vs system)
bench_tiny_hot_hakmem.o: bench_tiny_hot.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_tiny_hot_system.o: bench_tiny_hot.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_tiny_hot_hakmem: $(filter-out bench_allocators_hakmem.o bench_allocators_system.o,$(BENCH_HAKMEM_OBJS)) bench_tiny_hot_hakmem.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_tiny_hot_system: bench_tiny_hot_system.o
$(CC) -o $@ $^ $(LDFLAGS)
# mimalloc variant for tiny hot bench (direct link)
bench_tiny_hot_mi.o: bench_tiny_hot.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_tiny_hot_mi: bench_tiny_hot_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
# hakmi variant for tiny hot bench (direct link via front API)
bench_tiny_hot_hakmi.o: bench_tiny_hot.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc -c -o $@ $<
HAKMI_FRONT_OBJS = adapters/hakmi_front/hakmi_front.o adapters/hakmi_front/hakmi_env.o adapters/hakmi_front/hakmi_tls_front.o
# ===== Convenience perf targets =====
.PHONY: pgo-gen-tinyhot pgo-use-tinyhot perf-help
# Generate PGO profile for Tiny Hot (32/100/60000) with SLL-first fast path
pgo-gen-tinyhot:
$(MAKE) PROFILE_GEN=1 bench_tiny_hot_hakmem
HAKMEM_TINY_TRACE_RING=0 HAKMEM_SAFE_FREE=0 \
HAKMEM_TINY_TLS_SLL=1 HAKMEM_TINY_TLS_LIST=1 HAKMEM_TINY_HOTMAG=0 HAKMEM_SLL_MULTIPLIER=1 \
./bench_tiny_hot_hakmem 32 100 60000 || true
# Use generated PGO profile for Tiny Hot binary
pgo-use-tinyhot:
$(MAKE) PROFILE_USE=1 bench_tiny_hot_hakmem
# Show recommended runtime envs for bench reproducibility
perf-help:
@echo "Recommended runtime envs (Tiny Hot / Larson):"
@echo " export HAKMEM_TINY_TRACE_RING=0 HAKMEM_SAFE_FREE=0"
@echo " export HAKMEM_TINY_TLS_SLL=1 HAKMEM_TINY_TLS_LIST=1 HAKMEM_TINY_HOTMAG=0"
@echo " export HAKMEM_SLL_MULTIPLIER=1"
@echo "Build flags (overridable): OPT_LEVEL=$(OPT_LEVEL) USE_LTO=$(USE_LTO) NATIVE=$(NATIVE)"
# Explicit compile rules for hakmi front objects (require mimalloc headers)
adapters/hakmi_front/hakmi_front.o: adapters/hakmi_front/hakmi_front.c adapters/hakmi_front/hakmi_front.h include/hakmi/hakmi_api.h
$(CC) $(CFLAGS) -I include -I mimalloc-bench/extern/mi/include -c -o $@ $<
adapters/hakmi_front/hakmi_env.o: adapters/hakmi_front/hakmi_env.c adapters/hakmi_front/hakmi_env.h
$(CC) $(CFLAGS) -I include -c -o $@ $<
adapters/hakmi_front/hakmi_tls_front.o: adapters/hakmi_front/hakmi_tls_front.c adapters/hakmi_front/hakmi_tls_front.h
$(CC) $(CFLAGS) -I include -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_tiny_hot_hakmi: bench_tiny_hot_hakmi.o $(HAKMI_FRONT_OBJS)
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
# Run test
run: $(TARGET)
@echo ""
@echo "========================================="
@echo "Running hakmem PoC test..."
@echo "========================================="
@./$(TARGET)
# Shared library target (for LD_PRELOAD with mimalloc-bench)
%_shared.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h
$(CC) $(CFLAGS_SHARED) -c -o $@ $<
$(SHARED_LIB): $(SHARED_OBJS)
$(CC) -shared -o $@ $^ $(LDFLAGS)
@echo ""
@echo "========================================="
@echo "Shared library built successfully!"
@echo " $(SHARED_LIB)"
@echo ""
@echo "Use with LD_PRELOAD:"
@echo " LD_PRELOAD=./$(SHARED_LIB) <command>"
@echo "========================================="
shared: $(SHARED_LIB)
# Phase 6.15: Debug build target (verbose logging)
debug: CFLAGS += -DHAKMEM_DEBUG_VERBOSE -g -O0 -DHAKMEM_PROF_STATIC=1
debug: CFLAGS_SHARED += -DHAKMEM_DEBUG_VERBOSE -g -O0 -DHAKMEM_PROF_STATIC=1
debug: HAKMEM_TIMING=1
debug: shared
# Phase 6-1.7: Box Theory Refactoring
box-refactor:
$(MAKE) clean
$(MAKE) CFLAGS="$(CFLAGS) -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1" larson_hakmem
@echo ""
@echo "========================================="
@echo "Built with Box Refactor (Phase 6-1.7)"
@echo " larson_hakmem (with Box 1/5/6)"
@echo "========================================="
# Convenience target: build and test box-refactor
test-box-refactor: box-refactor
@echo ""
@echo "========================================="
@echo "Running Box Refactor Test..."
@echo "========================================="
./larson_hakmem 10 8 128 1024 1 12345 4
# Phase 4: Tiny Pool benchmarks (properly linked with hakmem)
TINY_BENCH_OBJS = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_mailbox.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o
bench_tiny: bench_tiny.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_tiny built with hakmem"
bench_tiny_mt: bench_tiny_mt.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_tiny_mt built with hakmem"
# Burst+Pause bench (mimalloc stress pattern)
bench_burst_pause_hakmem.o: bench_burst_pause.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_burst_pause_system.o: bench_burst_pause.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_burst_pause_mi.o: bench_burst_pause.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_burst_pause_hakmem: bench_burst_pause_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_burst_pause_hakmem built"
bench_burst_pause_system: bench_burst_pause_system.o
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_burst_pause_system built"
bench_burst_pause_mi: bench_burst_pause_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
@echo "✓ bench_burst_pause_mi built"
bench_burst_pause_mt_hakmem.o: bench_burst_pause_mt.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_burst_pause_mt_system.o: bench_burst_pause_mt.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_burst_pause_mt_mi.o: bench_burst_pause_mt.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_burst_pause_mt_hakmem: bench_burst_pause_mt_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_burst_pause_mt_hakmem built"
bench_burst_pause_mt_system: bench_burst_pause_mt_system.o
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_burst_pause_mt_system built"
bench_burst_pause_mt_mi: bench_burst_pause_mt_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
@echo "✓ bench_burst_pause_mt_mi built"
# ----------------------------------------------------------------------------
# Larson benchmarks (Google/mimalloc-bench style)
# ----------------------------------------------------------------------------
LARSON_SRC := mimalloc-bench/bench/larson/larson.cpp
# System variant (uses system malloc/free)
larson_system.o: $(LARSON_SRC)
$(CXX) $(CFLAGS) -c -o $@ $<
larson_system: larson_system.o
$(CXX) -o $@ $^ $(LDFLAGS)
# mimalloc variant (direct link to prebuilt mimalloc)
larson_mi.o: $(LARSON_SRC)
$(CXX) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
larson_mi: larson_mi.o
$(CXX) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
# HAKMEM variant (hakmem.o provides malloc/free symbols directly)
larson_hakmem.o: $(LARSON_SRC)
$(CXX) $(CFLAGS) -I core -c -o $@ $<
larson_hakmem: larson_hakmem.o $(TINY_BENCH_OBJS)
$(CXX) -o $@ $^ $(LDFLAGS)
test_mf2: test_mf2.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ test_mf2 built with hakmem"
# bench_comprehensive.o with USE_HAKMEM flag
bench_comprehensive.o: bench_comprehensive.c
$(CC) $(CFLAGS) -DUSE_HAKMEM -c $< -o $@
bench_comprehensive_hakmem: bench_comprehensive.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_comprehensive_hakmem built with hakmem"
bench_comprehensive_system: bench_comprehensive.c
$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
@echo "✓ bench_comprehensive_system built (system malloc)"
# mimalloc direct-link variant (no LD_PRELOAD dependency)
bench_comprehensive_mi: bench_comprehensive.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include \
bench_comprehensive.c -o $@ \
-L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
@echo "✓ bench_comprehensive_mi built (direct link to mimalloc)"
# hakx (new hybrid) front API stubs
HAKX_OBJS = engines/hakx/hakx_api_stub.o engines/hakx/hakx_front_tiny.o engines/hakx/hakx_l25_tuner.o
engines/hakx/hakx_api_stub.o: engines/hakx/hakx_api_stub.c include/hakx/hakx_api.h engines/hakx/hakx_front_tiny.h
$(CC) $(CFLAGS) -I include -c -o $@ $<
# hakx variant for tiny hot bench (direct link via hakx API)
bench_tiny_hot_hakx.o: bench_tiny_hot.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
bench_tiny_hot_hakx: bench_tiny_hot_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_tiny_hot_hakx built (hakx API stub)"
# P0 variant with batch refill optimization
bench_tiny_hot_hakx_p0.o: bench_tiny_hot.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
$(CC) $(CFLAGS) -DHAKMEM_TINY_P0_BATCH_REFILL=1 -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
bench_tiny_hot_hakx_p0: bench_tiny_hot_hakx_p0.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_tiny_hot_hakx_p0 built (with P0 batch refill)"
# hak_tiny_alloc/free 直叩きの比較用ベンチ
bench_tiny_hot_direct.o: bench_tiny_hot_direct.c core/hakmem_tiny.h
$(CC) $(CFLAGS) -c -o $@ $<
bench_tiny_hot_direct: bench_tiny_hot_direct.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_tiny_hot_direct built (hak_tiny_alloc/free direct)"
# hakmi variant for comprehensive bench (front + mimalloc backend)
bench_comprehensive_hakmi: bench_comprehensive.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc \
bench_comprehensive.c -o $@ \
adapters/hakmi_front/hakmi_front.o adapters/hakmi_front/hakmi_env.o adapters/hakmi_front/hakmi_tls_front.o \
-L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
@echo "✓ bench_comprehensive_hakmi built (hakmi front + mimalloc backend)"
# hakx variant for comprehensive bench
bench_comprehensive_hakx: bench_comprehensive.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h $(HAKX_OBJS) $(TINY_BENCH_OBJS)
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast \
bench_comprehensive.c -o $@ $(HAKX_OBJS) $(TINY_BENCH_OBJS) $(LDFLAGS)
@echo "✓ bench_comprehensive_hakx built (hakx API stub)"
# Random mixed bench (direct link variants)
bench_random_mixed_hakmem.o: bench_random_mixed.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_random_mixed_system.o: bench_random_mixed.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_random_mixed_mi.o: bench_random_mixed.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_random_mixed_hakmem: bench_random_mixed_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_random_mixed_system: bench_random_mixed_system.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_random_mixed_mi: bench_random_mixed_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
# hakmi variant for random mixed bench
bench_random_mixed_hakmi.o: bench_random_mixed.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc -c -o $@ $<
bench_random_mixed_hakmi: bench_random_mixed_hakmi.o $(HAKMI_FRONT_OBJS)
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
# hakx variant for random mixed bench
bench_random_mixed_hakx.o: bench_random_mixed.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
bench_random_mixed_hakx: bench_random_mixed_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
# Ultra-fast build for benchmarks: trims unwinding/PLT overhead and
# improves code locality. Use: `make bench_fast` then run the binary.
bench_fast: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
bench_fast: LDFLAGS += -Wl,-O2
bench_fast: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi bench_tiny_hot_hakx
@echo "✓ bench_fast build complete"
# Perf-Main (safe) bench build: no bench-only macros; same O flags
perf_main: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
perf_main: LDFLAGS += -Wl,-O2
perf_main: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi bench_random_mixed_hakmem bench_random_mixed_system bench_random_mixed_mi bench_comprehensive_hakx bench_tiny_hot_hakx bench_random_mixed_hakx
@echo "✓ perf_main build complete (no bench-only macros)"
# Mid/Large (832KiB) bench
bench_mid_large_hakmem.o: bench_mid_large.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_mid_large_system.o: bench_mid_large.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_mid_large_mi.o: bench_mid_large.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_mid_large_hakmem: bench_mid_large_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_large_system: bench_mid_large_system.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_large_mi: bench_mid_large_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
# hakx variant for mid/large (1T)
bench_mid_large_hakx.o: bench_mid_large.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
bench_mid_large_hakx: bench_mid_large_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
# Mid/Large MT (832KiB) bench
bench_mid_large_mt_hakmem.o: bench_mid_large_mt.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_mid_large_mt_system.o: bench_mid_large_mt.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_mid_large_mt_mi.o: bench_mid_large_mt.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_mid_large_mt_hakmem: bench_mid_large_mt_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_large_mt_system: bench_mid_large_mt_system.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_large_mt_mi: bench_mid_large_mt_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
# hakx variant for mid/large MT
bench_mid_large_mt_hakx.o: bench_mid_large_mt.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
bench_mid_large_mt_hakx: bench_mid_large_mt_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
# Fragmentation stress bench
bench_fragment_stress_hakmem.o: bench_fragment_stress.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_fragment_stress_system.o: bench_fragment_stress.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_fragment_stress_mi.o: bench_fragment_stress.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_fragment_stress_hakmem: bench_fragment_stress_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_fragment_stress_system: bench_fragment_stress_system.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_fragment_stress_mi: bench_fragment_stress_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
# Bench build with Minimal Tiny Front (physically excludes optional front tiers)
bench_tiny_front: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -DHAKMEM_TINY_MINIMAL_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_MAG_OWNER=0
bench_tiny_front: LDFLAGS += -Wl,-O2
bench_tiny_front: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi
@echo "✓ bench_tiny_front build complete (HAKMEM_TINY_MINIMAL_FRONT=1)"
# Bench build with Strict Front (compile-out optional front tiers, baseline structure)
bench_front_strict: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -DHAKMEM_TINY_STRICT_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1
bench_front_strict: LDFLAGS += -Wl,-O2
bench_front_strict: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi
@echo "✓ bench_front_strict build complete (HAKMEM_TINY_STRICT_FRONT=1)"
# Bench build with Ultra (SLL-only front) for Tiny-Hot microbench
# - Compiles hakmem bench with SLL-first/strict front, without Quick/FrontCache, stats off
# - Only affects bench binaries; normal builds unchanged
bench_ultra_strict: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
-DHAKMEM_TINY_ULTRA=1 -DHAKMEM_TINY_TLS_SLL=1 -DHAKMEM_TINY_STRICT_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1 \
-DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
bench_ultra_strict: LDFLAGS += -Wl,-O2
bench_ultra_strict: clean bench_tiny_hot_hakmem
@echo "✓ bench_ultra_strict build complete (ULTRA+STRICT front)"
# Bench build with Ultra (SLL-only) but without STRICT/MINIMAL, Quick/FrontCache compiled out
bench_ultra: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
-DHAKMEM_TINY_ULTRA=1 -DHAKMEM_TINY_TLS_SLL=1 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
bench_ultra: LDFLAGS += -Wl,-O2
bench_ultra: clean bench_tiny_hot_hakmem
@echo "✓ bench_ultra build complete (ULTRA SLL-only, Quick/FrontCache OFF)"
# Bench build with explicit bench fast path (SLL→Mag→tiny reflll), stats/quick/front off
bench_fastpath: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
-DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
bench_fastpath: LDFLAGS += -Wl,-O2
bench_fastpath: clean bench_tiny_hot_hakmem
@echo "✓ bench_fastpath build complete (bench-only fast path)"
# Bench build: SLL-only (≤64B), with warmup
bench_sll_only: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
-DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 \
-DHAKMEM_TINY_BENCH_WARMUP32=160 -DHAKMEM_TINY_BENCH_WARMUP64=192 -DHAKMEM_TINY_BENCH_WARMUP8=64 -DHAKMEM_TINY_BENCH_WARMUP16=96 \
-DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
bench_sll_only: LDFLAGS += -Wl,-O2
bench_sll_only: clean bench_tiny_hot_hakmem
@echo "✓ bench_sll_only build complete (bench-only SLL-only + warmup)"
# Bench-fastpath with explicit refill sizes (A/B)
bench_fastpath_r8: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=8 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
bench_fastpath_r8: LDFLAGS += -Wl,-O2
bench_fastpath_r8: clean bench_tiny_hot_hakmem
@echo "✓ bench_fastpath_r8 build complete"
bench_fastpath_r12: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=12 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
bench_fastpath_r12: LDFLAGS += -Wl,-O2
bench_fastpath_r12: clean bench_tiny_hot_hakmem
@echo "✓ bench_fastpath_r12 build complete"
bench_fastpath_r16: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=16 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
bench_fastpath_r16: LDFLAGS += -Wl,-O2
bench_fastpath_r16: clean bench_tiny_hot_hakmem
@echo "✓ bench_fastpath_r16 build complete"
# PGO for bench-fastpath
pgo-benchfast-profile:
@echo "========================================="
@echo "PGO Profile (bench-fastpath)"
@echo "========================================="
rm -f *.gcda *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
@echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)"
./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
@echo "✓ bench-fastpath profile data collected (*.gcda)"
pgo-benchfast-build:
@echo "========================================="
@echo "PGO Build (bench-fastpath)"
@echo "========================================="
rm -f *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
@echo "✓ bench-fastpath PGO build complete"
# Debug bench (with counters/prints)
bench_debug: CFLAGS += -DHAKMEM_DEBUG_COUNTERS=1 -g -O2
bench_debug: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi
@echo "✓ bench_debug build complete (debug counters enabled)"
# Clean
clean:
rm -f $(OBJS) $(TARGET) $(BENCH_HAKMEM_OBJS) $(BENCH_SYSTEM_OBJS) $(BENCH_HAKMEM) $(BENCH_SYSTEM) $(SHARED_OBJS) $(SHARED_LIB) *.csv
rm -f bench_comprehensive.o bench_comprehensive_hakmem bench_comprehensive_system
rm -f bench_tiny bench_tiny.o bench_tiny_mt bench_tiny_mt.o test_mf2 test_mf2.o bench_tiny_hakmem
# Help
help:
@echo "hakmem PoC - Makefile targets:"
@echo " make - Build the test program"
@echo " make run - Build and run the test"
@echo " make bench - Build benchmark programs"
@echo " make shared - Build shared library (for LD_PRELOAD)"
@echo " make clean - Clean build artifacts"
@echo " make bench-mode - Run Tiny-focused PGO bench (scripts/bench_mode.sh)"
@echo " make bench-all - Run (near) full mimalloc-bench with timeouts"
@echo ""
@echo "Benchmark workflow:"
@echo " 1. make bench"
@echo " 2. bash bench_runner.sh --runs 10"
@echo " 3. python3 analyze_results.py benchmark_results.csv"
@echo ""
@echo "mimalloc-bench workflow:"
@echo " 1. make shared"
@echo " 2. LD_PRELOAD=./libhakmem.so <benchmark>"
# Step 2: PGO (Profile-Guided Optimization) targets
pgo-profile:
@echo "========================================="
@echo "Step 2b: PGO Profile Collection"
@echo "========================================="
rm -f *.gcda *.o bench_comprehensive_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto" LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_comprehensive_hakmem
@echo "Running profile workload..."
HAKMEM_WRAP_TINY=1 ./bench_comprehensive_hakmem 2>&1 | grep -E "(Test 1:|Throughput:)" | head -6
@echo "✓ Profile data collected (*.gcda files)"
pgo-build:
@echo "========================================="
@echo "Step 2c: PGO Optimized Build (LTO+PGO)"
@echo "========================================="
rm -f *.o bench_comprehensive_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto" LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_comprehensive_hakmem
@echo "✓ LTO+PGO optimized build complete"
# PGO for tiny_hot (Strict Front recommended)
pgo-hot-profile:
@echo "========================================="
@echo "PGO Profile (tiny_hot) with Strict Front"
@echo "========================================="
rm -f *.gcda *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_STRICT_FRONT=1" \
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
@echo "[profile-run] bench_tiny_hot_hakmem (sizes 16/32/64, batch=100, cycles=60000)"
HAKMEM_TINY_SPECIALIZE_MASK=0x02 ./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
@echo "✓ tiny_hot profile data collected (*.gcda)"
pgo-hot-build:
@echo "========================================="
@echo "PGO Build (tiny_hot) with Strict Front"
@echo "========================================="
rm -f *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_STRICT_FRONT=1" \
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
@echo "✓ tiny_hot PGO build complete"
# Phase 8.2: Memory profiling build (verbose memory breakdown)
bench-memory: CFLAGS += -DHAKMEM_DEBUG_MEMORY
bench-memory: clean bench_comprehensive_hakmem
@echo ""
@echo "========================================="
@echo "Memory profiling build complete!"
@echo " Run: ./bench_comprehensive_hakmem"
@echo " Memory breakdown will be printed at end"
@echo "========================================="
.PHONY: all run bench shared debug clean help pgo-profile pgo-build bench-memory
# PGO for shared library (LD_PRELOAD)
# Step 1: Build instrumented shared lib and collect profile
pgo-profile-shared:
@echo "========================================="
@echo "Step: PGO Profile Collection (shared lib)"
@echo "========================================="
rm -f *_shared.gcda *_shared.o $(SHARED_LIB)
$(MAKE) CFLAGS_SHARED="$(CFLAGS_SHARED) -fprofile-generate -flto" LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" shared
@echo "Running profile workload (LD_PRELOAD)..."
HAKMEM_WRAP_TINY=1 LD_PRELOAD=./$(SHARED_LIB) ./bench_comprehensive_system 2>&1 | grep -E "(SIZE CLASS:|Throughput:)" | head -20 || true
@echo "✓ Profile data collected (*.gcda for *_shared)"
# Step 2: Build optimized shared lib using profile
pgo-build-shared:
@echo "========================================="
@echo "Step: PGO Optimized Build (shared lib)"
@echo "========================================="
rm -f *_shared.o $(SHARED_LIB)
$(MAKE) CFLAGS_SHARED="$(CFLAGS_SHARED) -fprofile-use -flto -Wno-error=coverage-mismatch" LDFLAGS="$(LDFLAGS) -fprofile-use -flto" shared
@echo "✓ LTO+PGO optimized shared library complete"
# Convenience: run Bench Mode script
bench-mode:
@bash scripts/bench_mode.sh
bench-all:
@bash scripts/run_all_benches_with_timeouts.sh
# PGO for bench_sll_only
pgo-benchsll-profile:
@echo "========================================="
@echo "PGO Profile (bench_sll_only)"
@echo "========================================="
rm -f *.gcda *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
@echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)"
./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
@echo "✓ bench_sll_only profile data collected (*.gcda)"
pgo-benchsll-build:
@echo "========================================="
@echo "PGO Build (bench_sll_only)"
@echo "========================================="
rm -f *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
@echo "✓ bench_sll_only PGO build complete"
# Variant: SLL-only with REFILL=12 and WARMUP32=192 (tune for 32B)
pgo-benchsll-r12w192-profile:
@echo "========================================="
@echo "PGO Profile (bench_sll_only r12 w32=192)"
@echo "========================================="
rm -f *.gcda *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL32=12 -DHAKMEM_TINY_BENCH_WARMUP32=192 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
@echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)"
./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 16 100 60000 >/devnull || true
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
@echo "✓ r12 w32=192 profile data collected (*.gcda)"
pgo-benchsll-r12w192-build:
@echo "========================================="
@echo "PGO Build (bench_sll_only r12 w32=192)"
@echo "========================================="
rm -f *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL32=12 -DHAKMEM_TINY_BENCH_WARMUP32=192 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
@echo "✓ r12 w32=192 PGO build complete"
MI_RPATH := $(shell pwd)/mimalloc-bench/extern/mi/out/release
# Sanitized builds (compiler-assisted debugging)
.PHONY: asan-larson ubsan-larson tsan-larson
SAN_ASAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \
-fsanitize=address,undefined -fno-sanitize-recover=all -fstack-protector-strong \
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
SAN_ASAN_LDFLAGS = -fsanitize=address,undefined
SAN_UBSAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \
-fsanitize=undefined -fno-sanitize-recover=undefined -fstack-protector-strong \
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
SAN_UBSAN_LDFLAGS = -fsanitize=undefined
SAN_TSAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto -fsanitize=thread \
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
SAN_TSAN_LDFLAGS = -fsanitize=thread
asan-larson:
@$(MAKE) clean >/dev/null
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_ASAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_ASAN_LDFLAGS)" >/dev/null
@cp -f larson_hakmem larson_hakmem_asan
@echo "✓ Built larson_hakmem_asan with ASan/UBSan"
ubsan-larson:
@$(MAKE) clean >/dev/null
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_UBSAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_UBSAN_LDFLAGS)" >/dev/null
@cp -f larson_hakmem larson_hakmem_ubsan
@echo "✓ Built larson_hakmem_ubsan with UBSan"
tsan-larson:
@$(MAKE) clean >/dev/null
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_TSAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_TSAN_LDFLAGS)" >/dev/null
@cp -f larson_hakmem larson_hakmem_tsan
@echo "✓ Built larson_hakmem_tsan with TSan (no ASan)"