Changes: - Add mimalloc-bench as git submodule for Larson benchmark source - Simplify Makefile: Remove shim layer (hakmem.o provides malloc/free directly) - Enable larson.sh script to build and run Larson benchmarks This allows running: ./scripts/larson.sh hakmem --profile tinyhot_tput 2 4
787 lines
40 KiB
Makefile
787 lines
40 KiB
Makefile
# Makefile for hakmem PoC
|
||
|
||
CC = gcc
|
||
CXX = g++
|
||
|
||
# Directory structure (2025-11-01 reorganization)
|
||
SRC_DIR := core
|
||
BENCH_SRC := benchmarks/src
|
||
TEST_SRC := tests
|
||
BUILD_DIR := build
|
||
BENCH_BIN_DIR := benchmarks/bin
|
||
|
||
# Search paths for source files
|
||
VPATH := $(SRC_DIR):$(BENCH_SRC)/tiny:$(BENCH_SRC)/mid:$(BENCH_SRC)/comprehensive:$(BENCH_SRC)/stress:$(TEST_SRC)/unit:$(TEST_SRC)/integration:$(TEST_SRC)/stress
|
||
|
||
# Timing: default OFF for performance. Set HAKMEM_TIMING=1 to enable.
|
||
HAKMEM_TIMING ?= 0
|
||
# Phase 6.25: Aggressive optimization flags (default ON, overridable)
|
||
OPT_LEVEL ?= 3
|
||
USE_LTO ?= 1
|
||
NATIVE ?= 1
|
||
|
||
BASE_CFLAGS := -Wall -Wextra -std=c11 -D_GNU_SOURCE -D_POSIX_C_SOURCE=199309L \
|
||
-D_GLIBC_USE_ISOC2X=0 -D__isoc23_strtol=strtol -D__isoc23_strtoll=strtoll \
|
||
-D__isoc23_strtoul=strtoul -D__isoc23_strtoull=strtoull -DHAKMEM_DEBUG_TIMING=$(HAKMEM_TIMING) \
|
||
-ffast-math -funroll-loops -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
|
||
-fno-semantic-interposition -I core
|
||
|
||
CFLAGS = -O$(OPT_LEVEL) $(BASE_CFLAGS)
|
||
ifeq ($(NATIVE),1)
|
||
CFLAGS += -march=native -mtune=native -fno-plt
|
||
endif
|
||
ifeq ($(USE_LTO),1)
|
||
CFLAGS += -flto
|
||
endif
|
||
# Allow overriding TLS ring capacity at build time: make shared RING_CAP=32
|
||
RING_CAP ?= 32
|
||
# Phase 6.25: Aggressive optimization + TLS Ring 拡張
|
||
CFLAGS_SHARED = -O$(OPT_LEVEL) $(BASE_CFLAGS) -fPIC -DPOOL_TLS_RING_CAP=$(RING_CAP)
|
||
ifeq ($(NATIVE),1)
|
||
CFLAGS_SHARED += -march=native -mtune=native -fno-plt
|
||
endif
|
||
ifeq ($(USE_LTO),1)
|
||
CFLAGS_SHARED += -flto
|
||
endif
|
||
LDFLAGS = -lm -lpthread
|
||
ifeq ($(USE_LTO),1)
|
||
LDFLAGS += -flto
|
||
endif
|
||
|
||
# Default: enable Box Theory refactor for Tiny (Phase 6-1.7)
|
||
# This is the best performing option currently (4.19M ops/s)
|
||
# To opt-out for legacy path: make BOX_REFACTOR_DEFAULT=0
|
||
BOX_REFACTOR_DEFAULT ?= 1
|
||
ifeq ($(BOX_REFACTOR_DEFAULT),1)
|
||
CFLAGS += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1
|
||
CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1
|
||
endif
|
||
|
||
# Phase 6-2: Ultra-Simple was tested but slower (-15%)
|
||
# Ultra-Simple: 3.56M ops/s, BOX_REFACTOR: 4.19M ops/s
|
||
# Both have same superslab_refill bottleneck (29% CPU)
|
||
# To enable ultra_simple: make ULTRA_SIMPLE_DEFAULT=1
|
||
ULTRA_SIMPLE_DEFAULT ?= 0
|
||
ifeq ($(ULTRA_SIMPLE_DEFAULT),1)
|
||
CFLAGS += -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1
|
||
CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1
|
||
endif
|
||
|
||
# Phase 6-3: Tiny Fast Path (System tcache style, 3-4 instruction fast path)
|
||
# Target: 70-80% of System tcache (95-108 M ops/s)
|
||
# Enable by default for testing
|
||
TINY_FAST_PATH_DEFAULT ?= 1
|
||
ifeq ($(TINY_FAST_PATH_DEFAULT),1)
|
||
CFLAGS += -DHAKMEM_TINY_FAST_PATH=1
|
||
CFLAGS_SHARED += -DHAKMEM_TINY_FAST_PATH=1
|
||
endif
|
||
|
||
ifdef PROFILE_GEN
|
||
CFLAGS += -fprofile-generate
|
||
LDFLAGS += -fprofile-generate
|
||
endif
|
||
|
||
ifdef PROFILE_USE
|
||
CFLAGS += -fprofile-use -Wno-error=coverage-mismatch
|
||
LDFLAGS += -fprofile-use
|
||
endif
|
||
|
||
CFLAGS += $(EXTRA_CFLAGS)
|
||
LDFLAGS += $(EXTRA_LDFLAGS)
|
||
|
||
# Targets
|
||
TARGET = test_hakmem
|
||
OBJS = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_mailbox.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o test_hakmem.o
|
||
|
||
# Shared library
|
||
SHARED_LIB = libhakmem.so
|
||
SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o hakmem_tiny_superslab_shared.o tiny_mailbox_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_mid_mt_shared.o hakmem_super_registry_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o
|
||
|
||
# Benchmark targets
|
||
BENCH_HAKMEM = bench_allocators_hakmem
|
||
BENCH_SYSTEM = bench_allocators_system
|
||
BENCH_HAKMEM_OBJS = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_mailbox.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o bench_allocators_hakmem.o
|
||
BENCH_SYSTEM_OBJS = bench_allocators_system.o
|
||
|
||
# Default target
|
||
all: $(TARGET)
|
||
|
||
# Build test program
|
||
$(TARGET): $(OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
@echo ""
|
||
@echo "========================================="
|
||
@echo "Build successful! Run with:"
|
||
@echo " ./$(TARGET)"
|
||
@echo "========================================="
|
||
|
||
# Compile C files
|
||
%.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_tiny_superslab.h hakmem_mid_mt.h hakmem_super_registry.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h
|
||
$(CC) $(CFLAGS) -c -o $@ $<
|
||
|
||
# Build benchmark programs
|
||
bench: CFLAGS += -DHAKMEM_PROF_STATIC=1
|
||
bench: $(BENCH_HAKMEM) $(BENCH_SYSTEM)
|
||
@echo ""
|
||
@echo "========================================="
|
||
@echo "Benchmark programs built successfully!"
|
||
@echo " $(BENCH_HAKMEM) - hakmem versions"
|
||
@echo " $(BENCH_SYSTEM) - system/jemalloc/mimalloc"
|
||
@echo ""
|
||
@echo "Run benchmarks with:"
|
||
@echo " bash bench_runner.sh --runs 10"
|
||
@echo "========================================="
|
||
|
||
# hakmem version (with hakmem linked)
|
||
bench_allocators_hakmem.o: bench_allocators.c hakmem.h
|
||
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
||
|
||
$(BENCH_HAKMEM): $(BENCH_HAKMEM_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
|
||
# system version (without hakmem, for LD_PRELOAD testing)
|
||
bench_allocators_system.o: bench_allocators.c
|
||
$(CC) $(CFLAGS) -c -o $@ $<
|
||
|
||
$(BENCH_SYSTEM): $(BENCH_SYSTEM_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
|
||
# Tiny hot microbench (direct link vs system)
|
||
bench_tiny_hot_hakmem.o: bench_tiny_hot.c hakmem.h
|
||
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
||
|
||
bench_tiny_hot_system.o: bench_tiny_hot.c
|
||
$(CC) $(CFLAGS) -c -o $@ $<
|
||
|
||
bench_tiny_hot_hakmem: $(filter-out bench_allocators_hakmem.o bench_allocators_system.o,$(BENCH_HAKMEM_OBJS)) bench_tiny_hot_hakmem.o
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
|
||
bench_tiny_hot_system: bench_tiny_hot_system.o
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
|
||
# mimalloc variant for tiny hot bench (direct link)
|
||
bench_tiny_hot_mi.o: bench_tiny_hot.c
|
||
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
||
|
||
bench_tiny_hot_mi: bench_tiny_hot_mi.o
|
||
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
||
|
||
# hakmi variant for tiny hot bench (direct link via front API)
|
||
bench_tiny_hot_hakmi.o: bench_tiny_hot.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
|
||
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc -c -o $@ $<
|
||
|
||
HAKMI_FRONT_OBJS = adapters/hakmi_front/hakmi_front.o adapters/hakmi_front/hakmi_env.o adapters/hakmi_front/hakmi_tls_front.o
|
||
|
||
# ===== Convenience perf targets =====
|
||
.PHONY: pgo-gen-tinyhot pgo-use-tinyhot perf-help
|
||
|
||
# Generate PGO profile for Tiny Hot (32/100/60000) with SLL-first fast path
|
||
pgo-gen-tinyhot:
|
||
$(MAKE) PROFILE_GEN=1 bench_tiny_hot_hakmem
|
||
HAKMEM_TINY_TRACE_RING=0 HAKMEM_SAFE_FREE=0 \
|
||
HAKMEM_TINY_TLS_SLL=1 HAKMEM_TINY_TLS_LIST=1 HAKMEM_TINY_HOTMAG=0 HAKMEM_SLL_MULTIPLIER=1 \
|
||
./bench_tiny_hot_hakmem 32 100 60000 || true
|
||
|
||
# Use generated PGO profile for Tiny Hot binary
|
||
pgo-use-tinyhot:
|
||
$(MAKE) PROFILE_USE=1 bench_tiny_hot_hakmem
|
||
|
||
# Show recommended runtime envs for bench reproducibility
|
||
perf-help:
|
||
@echo "Recommended runtime envs (Tiny Hot / Larson):"
|
||
@echo " export HAKMEM_TINY_TRACE_RING=0 HAKMEM_SAFE_FREE=0"
|
||
@echo " export HAKMEM_TINY_TLS_SLL=1 HAKMEM_TINY_TLS_LIST=1 HAKMEM_TINY_HOTMAG=0"
|
||
@echo " export HAKMEM_SLL_MULTIPLIER=1"
|
||
@echo "Build flags (overridable): OPT_LEVEL=$(OPT_LEVEL) USE_LTO=$(USE_LTO) NATIVE=$(NATIVE)"
|
||
|
||
# Explicit compile rules for hakmi front objects (require mimalloc headers)
|
||
adapters/hakmi_front/hakmi_front.o: adapters/hakmi_front/hakmi_front.c adapters/hakmi_front/hakmi_front.h include/hakmi/hakmi_api.h
|
||
$(CC) $(CFLAGS) -I include -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
||
adapters/hakmi_front/hakmi_env.o: adapters/hakmi_front/hakmi_env.c adapters/hakmi_front/hakmi_env.h
|
||
$(CC) $(CFLAGS) -I include -c -o $@ $<
|
||
adapters/hakmi_front/hakmi_tls_front.o: adapters/hakmi_front/hakmi_tls_front.c adapters/hakmi_front/hakmi_tls_front.h
|
||
$(CC) $(CFLAGS) -I include -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
||
|
||
bench_tiny_hot_hakmi: bench_tiny_hot_hakmi.o $(HAKMI_FRONT_OBJS)
|
||
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
||
|
||
# Run test
|
||
run: $(TARGET)
|
||
@echo ""
|
||
@echo "========================================="
|
||
@echo "Running hakmem PoC test..."
|
||
@echo "========================================="
|
||
@./$(TARGET)
|
||
|
||
# Shared library target (for LD_PRELOAD with mimalloc-bench)
|
||
%_shared.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h
|
||
$(CC) $(CFLAGS_SHARED) -c -o $@ $<
|
||
|
||
$(SHARED_LIB): $(SHARED_OBJS)
|
||
$(CC) -shared -o $@ $^ $(LDFLAGS)
|
||
@echo ""
|
||
@echo "========================================="
|
||
@echo "Shared library built successfully!"
|
||
@echo " $(SHARED_LIB)"
|
||
@echo ""
|
||
@echo "Use with LD_PRELOAD:"
|
||
@echo " LD_PRELOAD=./$(SHARED_LIB) <command>"
|
||
@echo "========================================="
|
||
|
||
shared: $(SHARED_LIB)
|
||
|
||
# Phase 6.15: Debug build target (verbose logging)
|
||
debug: CFLAGS += -DHAKMEM_DEBUG_VERBOSE -g -O0 -DHAKMEM_PROF_STATIC=1
|
||
debug: CFLAGS_SHARED += -DHAKMEM_DEBUG_VERBOSE -g -O0 -DHAKMEM_PROF_STATIC=1
|
||
debug: HAKMEM_TIMING=1
|
||
debug: shared
|
||
|
||
# Phase 6-1.7: Box Theory Refactoring
|
||
box-refactor:
|
||
$(MAKE) clean
|
||
$(MAKE) CFLAGS="$(CFLAGS) -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1" larson_hakmem
|
||
@echo ""
|
||
@echo "========================================="
|
||
@echo "Built with Box Refactor (Phase 6-1.7)"
|
||
@echo " larson_hakmem (with Box 1/5/6)"
|
||
@echo "========================================="
|
||
|
||
# Convenience target: build and test box-refactor
|
||
test-box-refactor: box-refactor
|
||
@echo ""
|
||
@echo "========================================="
|
||
@echo "Running Box Refactor Test..."
|
||
@echo "========================================="
|
||
./larson_hakmem 10 8 128 1024 1 12345 4
|
||
|
||
# Phase 4: Tiny Pool benchmarks (properly linked with hakmem)
|
||
TINY_BENCH_OBJS = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_mailbox.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o
|
||
|
||
bench_tiny: bench_tiny.o $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
@echo "✓ bench_tiny built with hakmem"
|
||
|
||
bench_tiny_mt: bench_tiny_mt.o $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
@echo "✓ bench_tiny_mt built with hakmem"
|
||
|
||
# Burst+Pause bench (mimalloc stress pattern)
|
||
bench_burst_pause_hakmem.o: bench_burst_pause.c hakmem.h
|
||
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
||
|
||
bench_burst_pause_system.o: bench_burst_pause.c
|
||
$(CC) $(CFLAGS) -c -o $@ $<
|
||
|
||
bench_burst_pause_mi.o: bench_burst_pause.c
|
||
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
||
|
||
bench_burst_pause_hakmem: bench_burst_pause_hakmem.o $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
@echo "✓ bench_burst_pause_hakmem built"
|
||
|
||
bench_burst_pause_system: bench_burst_pause_system.o
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
@echo "✓ bench_burst_pause_system built"
|
||
|
||
bench_burst_pause_mi: bench_burst_pause_mi.o
|
||
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
||
@echo "✓ bench_burst_pause_mi built"
|
||
|
||
bench_burst_pause_mt_hakmem.o: bench_burst_pause_mt.c hakmem.h
|
||
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
||
|
||
bench_burst_pause_mt_system.o: bench_burst_pause_mt.c
|
||
$(CC) $(CFLAGS) -c -o $@ $<
|
||
|
||
bench_burst_pause_mt_mi.o: bench_burst_pause_mt.c
|
||
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
||
|
||
bench_burst_pause_mt_hakmem: bench_burst_pause_mt_hakmem.o $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
@echo "✓ bench_burst_pause_mt_hakmem built"
|
||
|
||
bench_burst_pause_mt_system: bench_burst_pause_mt_system.o
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
@echo "✓ bench_burst_pause_mt_system built"
|
||
|
||
bench_burst_pause_mt_mi: bench_burst_pause_mt_mi.o
|
||
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
||
@echo "✓ bench_burst_pause_mt_mi built"
|
||
|
||
# ----------------------------------------------------------------------------
|
||
# Larson benchmarks (Google/mimalloc-bench style)
|
||
# ----------------------------------------------------------------------------
|
||
|
||
LARSON_SRC := mimalloc-bench/bench/larson/larson.cpp
|
||
|
||
# System variant (uses system malloc/free)
|
||
larson_system.o: $(LARSON_SRC)
|
||
$(CXX) $(CFLAGS) -c -o $@ $<
|
||
|
||
larson_system: larson_system.o
|
||
$(CXX) -o $@ $^ $(LDFLAGS)
|
||
|
||
# mimalloc variant (direct link to prebuilt mimalloc)
|
||
larson_mi.o: $(LARSON_SRC)
|
||
$(CXX) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
||
|
||
larson_mi: larson_mi.o
|
||
$(CXX) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
||
|
||
# HAKMEM variant (hakmem.o provides malloc/free symbols directly)
|
||
larson_hakmem.o: $(LARSON_SRC)
|
||
$(CXX) $(CFLAGS) -I core -c -o $@ $<
|
||
|
||
larson_hakmem: larson_hakmem.o $(TINY_BENCH_OBJS)
|
||
$(CXX) -o $@ $^ $(LDFLAGS)
|
||
|
||
test_mf2: test_mf2.o $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
@echo "✓ test_mf2 built with hakmem"
|
||
|
||
# bench_comprehensive.o with USE_HAKMEM flag
|
||
bench_comprehensive.o: bench_comprehensive.c
|
||
$(CC) $(CFLAGS) -DUSE_HAKMEM -c $< -o $@
|
||
|
||
bench_comprehensive_hakmem: bench_comprehensive.o $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
@echo "✓ bench_comprehensive_hakmem built with hakmem"
|
||
|
||
bench_comprehensive_system: bench_comprehensive.c
|
||
$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
|
||
@echo "✓ bench_comprehensive_system built (system malloc)"
|
||
|
||
# mimalloc direct-link variant (no LD_PRELOAD dependency)
|
||
bench_comprehensive_mi: bench_comprehensive.c
|
||
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include \
|
||
bench_comprehensive.c -o $@ \
|
||
-L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
||
@echo "✓ bench_comprehensive_mi built (direct link to mimalloc)"
|
||
|
||
# hakx (new hybrid) front API stubs
|
||
HAKX_OBJS = engines/hakx/hakx_api_stub.o engines/hakx/hakx_front_tiny.o engines/hakx/hakx_l25_tuner.o
|
||
|
||
engines/hakx/hakx_api_stub.o: engines/hakx/hakx_api_stub.c include/hakx/hakx_api.h engines/hakx/hakx_front_tiny.h
|
||
$(CC) $(CFLAGS) -I include -c -o $@ $<
|
||
|
||
# hakx variant for tiny hot bench (direct link via hakx API)
|
||
bench_tiny_hot_hakx.o: bench_tiny_hot.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
|
||
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
|
||
|
||
bench_tiny_hot_hakx: bench_tiny_hot_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
@echo "✓ bench_tiny_hot_hakx built (hakx API stub)"
|
||
|
||
# P0 variant with batch refill optimization
|
||
bench_tiny_hot_hakx_p0.o: bench_tiny_hot.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
|
||
$(CC) $(CFLAGS) -DHAKMEM_TINY_P0_BATCH_REFILL=1 -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
|
||
|
||
bench_tiny_hot_hakx_p0: bench_tiny_hot_hakx_p0.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
@echo "✓ bench_tiny_hot_hakx_p0 built (with P0 batch refill)"
|
||
|
||
# hak_tiny_alloc/free 直叩きの比較用ベンチ
|
||
bench_tiny_hot_direct.o: bench_tiny_hot_direct.c core/hakmem_tiny.h
|
||
$(CC) $(CFLAGS) -c -o $@ $<
|
||
|
||
bench_tiny_hot_direct: bench_tiny_hot_direct.o $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
@echo "✓ bench_tiny_hot_direct built (hak_tiny_alloc/free direct)"
|
||
|
||
# hakmi variant for comprehensive bench (front + mimalloc backend)
|
||
bench_comprehensive_hakmi: bench_comprehensive.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
|
||
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc \
|
||
bench_comprehensive.c -o $@ \
|
||
adapters/hakmi_front/hakmi_front.o adapters/hakmi_front/hakmi_env.o adapters/hakmi_front/hakmi_tls_front.o \
|
||
-L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
||
@echo "✓ bench_comprehensive_hakmi built (hakmi front + mimalloc backend)"
|
||
|
||
# hakx variant for comprehensive bench
|
||
bench_comprehensive_hakx: bench_comprehensive.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h $(HAKX_OBJS) $(TINY_BENCH_OBJS)
|
||
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast \
|
||
bench_comprehensive.c -o $@ $(HAKX_OBJS) $(TINY_BENCH_OBJS) $(LDFLAGS)
|
||
@echo "✓ bench_comprehensive_hakx built (hakx API stub)"
|
||
|
||
# Random mixed bench (direct link variants)
|
||
bench_random_mixed_hakmem.o: bench_random_mixed.c hakmem.h
|
||
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
||
|
||
bench_random_mixed_system.o: bench_random_mixed.c
|
||
$(CC) $(CFLAGS) -c -o $@ $<
|
||
|
||
bench_random_mixed_mi.o: bench_random_mixed.c
|
||
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
||
|
||
bench_random_mixed_hakmem: bench_random_mixed_hakmem.o $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
|
||
bench_random_mixed_system: bench_random_mixed_system.o
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
|
||
bench_random_mixed_mi: bench_random_mixed_mi.o
|
||
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
||
|
||
# hakmi variant for random mixed bench
|
||
bench_random_mixed_hakmi.o: bench_random_mixed.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
|
||
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc -c -o $@ $<
|
||
|
||
bench_random_mixed_hakmi: bench_random_mixed_hakmi.o $(HAKMI_FRONT_OBJS)
|
||
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
||
|
||
# hakx variant for random mixed bench
|
||
bench_random_mixed_hakx.o: bench_random_mixed.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
|
||
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
|
||
|
||
bench_random_mixed_hakx: bench_random_mixed_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
|
||
# Ultra-fast build for benchmarks: trims unwinding/PLT overhead and
|
||
# improves code locality. Use: `make bench_fast` then run the binary.
|
||
bench_fast: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
|
||
bench_fast: LDFLAGS += -Wl,-O2
|
||
bench_fast: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi bench_tiny_hot_hakx
|
||
@echo "✓ bench_fast build complete"
|
||
|
||
# Perf-Main (safe) bench build: no bench-only macros; same O flags
|
||
perf_main: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
|
||
perf_main: LDFLAGS += -Wl,-O2
|
||
perf_main: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi bench_random_mixed_hakmem bench_random_mixed_system bench_random_mixed_mi bench_comprehensive_hakx bench_tiny_hot_hakx bench_random_mixed_hakx
|
||
@echo "✓ perf_main build complete (no bench-only macros)"
|
||
|
||
# Mid/Large (8–32KiB) bench
|
||
bench_mid_large_hakmem.o: bench_mid_large.c hakmem.h
|
||
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
||
bench_mid_large_system.o: bench_mid_large.c
|
||
$(CC) $(CFLAGS) -c -o $@ $<
|
||
bench_mid_large_mi.o: bench_mid_large.c
|
||
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
||
bench_mid_large_hakmem: bench_mid_large_hakmem.o $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
bench_mid_large_system: bench_mid_large_system.o
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
bench_mid_large_mi: bench_mid_large_mi.o
|
||
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
||
|
||
# hakx variant for mid/large (1T)
|
||
bench_mid_large_hakx.o: bench_mid_large.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
|
||
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
|
||
|
||
bench_mid_large_hakx: bench_mid_large_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
|
||
# Mid/Large MT (8–32KiB) bench
|
||
bench_mid_large_mt_hakmem.o: bench_mid_large_mt.c hakmem.h
|
||
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
||
bench_mid_large_mt_system.o: bench_mid_large_mt.c
|
||
$(CC) $(CFLAGS) -c -o $@ $<
|
||
bench_mid_large_mt_mi.o: bench_mid_large_mt.c
|
||
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
||
bench_mid_large_mt_hakmem: bench_mid_large_mt_hakmem.o $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
bench_mid_large_mt_system: bench_mid_large_mt_system.o
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
bench_mid_large_mt_mi: bench_mid_large_mt_mi.o
|
||
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
||
|
||
# hakx variant for mid/large MT
|
||
bench_mid_large_mt_hakx.o: bench_mid_large_mt.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
|
||
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
|
||
|
||
bench_mid_large_mt_hakx: bench_mid_large_mt_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
|
||
# Fragmentation stress bench
|
||
bench_fragment_stress_hakmem.o: bench_fragment_stress.c hakmem.h
|
||
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
||
bench_fragment_stress_system.o: bench_fragment_stress.c
|
||
$(CC) $(CFLAGS) -c -o $@ $<
|
||
bench_fragment_stress_mi.o: bench_fragment_stress.c
|
||
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
||
bench_fragment_stress_hakmem: bench_fragment_stress_hakmem.o $(TINY_BENCH_OBJS)
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
bench_fragment_stress_system: bench_fragment_stress_system.o
|
||
$(CC) -o $@ $^ $(LDFLAGS)
|
||
bench_fragment_stress_mi: bench_fragment_stress_mi.o
|
||
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
||
|
||
# Bench build with Minimal Tiny Front (physically excludes optional front tiers)
|
||
bench_tiny_front: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -DHAKMEM_TINY_MINIMAL_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_MAG_OWNER=0
|
||
bench_tiny_front: LDFLAGS += -Wl,-O2
|
||
bench_tiny_front: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi
|
||
@echo "✓ bench_tiny_front build complete (HAKMEM_TINY_MINIMAL_FRONT=1)"
|
||
|
||
# Bench build with Strict Front (compile-out optional front tiers, baseline structure)
|
||
bench_front_strict: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -DHAKMEM_TINY_STRICT_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1
|
||
bench_front_strict: LDFLAGS += -Wl,-O2
|
||
bench_front_strict: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi
|
||
@echo "✓ bench_front_strict build complete (HAKMEM_TINY_STRICT_FRONT=1)"
|
||
|
||
# Bench build with Ultra (SLL-only front) for Tiny-Hot microbench
|
||
# - Compiles hakmem bench with SLL-first/strict front, without Quick/FrontCache, stats off
|
||
# - Only affects bench binaries; normal builds unchanged
|
||
bench_ultra_strict: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
|
||
-DHAKMEM_TINY_ULTRA=1 -DHAKMEM_TINY_TLS_SLL=1 -DHAKMEM_TINY_STRICT_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1 \
|
||
-DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
|
||
bench_ultra_strict: LDFLAGS += -Wl,-O2
|
||
bench_ultra_strict: clean bench_tiny_hot_hakmem
|
||
@echo "✓ bench_ultra_strict build complete (ULTRA+STRICT front)"
|
||
|
||
# Bench build with Ultra (SLL-only) but without STRICT/MINIMAL, Quick/FrontCache compiled out
|
||
bench_ultra: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
|
||
-DHAKMEM_TINY_ULTRA=1 -DHAKMEM_TINY_TLS_SLL=1 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
|
||
bench_ultra: LDFLAGS += -Wl,-O2
|
||
bench_ultra: clean bench_tiny_hot_hakmem
|
||
@echo "✓ bench_ultra build complete (ULTRA SLL-only, Quick/FrontCache OFF)"
|
||
|
||
# Bench build with explicit bench fast path (SLL→Mag→tiny reflll), stats/quick/front off
|
||
bench_fastpath: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
|
||
-DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
|
||
bench_fastpath: LDFLAGS += -Wl,-O2
|
||
bench_fastpath: clean bench_tiny_hot_hakmem
|
||
@echo "✓ bench_fastpath build complete (bench-only fast path)"
|
||
|
||
# Bench build: SLL-only (≤64B), with warmup
|
||
bench_sll_only: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
|
||
-DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 \
|
||
-DHAKMEM_TINY_BENCH_WARMUP32=160 -DHAKMEM_TINY_BENCH_WARMUP64=192 -DHAKMEM_TINY_BENCH_WARMUP8=64 -DHAKMEM_TINY_BENCH_WARMUP16=96 \
|
||
-DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
|
||
bench_sll_only: LDFLAGS += -Wl,-O2
|
||
bench_sll_only: clean bench_tiny_hot_hakmem
|
||
@echo "✓ bench_sll_only build complete (bench-only SLL-only + warmup)"
|
||
|
||
# Bench-fastpath with explicit refill sizes (A/B)
|
||
bench_fastpath_r8: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=8 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
|
||
bench_fastpath_r8: LDFLAGS += -Wl,-O2
|
||
bench_fastpath_r8: clean bench_tiny_hot_hakmem
|
||
@echo "✓ bench_fastpath_r8 build complete"
|
||
|
||
bench_fastpath_r12: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=12 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
|
||
bench_fastpath_r12: LDFLAGS += -Wl,-O2
|
||
bench_fastpath_r12: clean bench_tiny_hot_hakmem
|
||
@echo "✓ bench_fastpath_r12 build complete"
|
||
|
||
bench_fastpath_r16: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=16 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
|
||
bench_fastpath_r16: LDFLAGS += -Wl,-O2
|
||
bench_fastpath_r16: clean bench_tiny_hot_hakmem
|
||
@echo "✓ bench_fastpath_r16 build complete"
|
||
|
||
# PGO for bench-fastpath
|
||
pgo-benchfast-profile:
|
||
@echo "========================================="
|
||
@echo "PGO Profile (bench-fastpath)"
|
||
@echo "========================================="
|
||
rm -f *.gcda *.o bench_tiny_hot_hakmem
|
||
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
|
||
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
|
||
@echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)"
|
||
./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true
|
||
./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true
|
||
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
|
||
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
|
||
@echo "✓ bench-fastpath profile data collected (*.gcda)"
|
||
|
||
pgo-benchfast-build:
|
||
@echo "========================================="
|
||
@echo "PGO Build (bench-fastpath)"
|
||
@echo "========================================="
|
||
rm -f *.o bench_tiny_hot_hakmem
|
||
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
|
||
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
|
||
@echo "✓ bench-fastpath PGO build complete"
|
||
|
||
# Debug bench (with counters/prints)
|
||
bench_debug: CFLAGS += -DHAKMEM_DEBUG_COUNTERS=1 -g -O2
|
||
bench_debug: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi
|
||
@echo "✓ bench_debug build complete (debug counters enabled)"
|
||
|
||
# Clean
|
||
clean:
|
||
rm -f $(OBJS) $(TARGET) $(BENCH_HAKMEM_OBJS) $(BENCH_SYSTEM_OBJS) $(BENCH_HAKMEM) $(BENCH_SYSTEM) $(SHARED_OBJS) $(SHARED_LIB) *.csv
|
||
rm -f bench_comprehensive.o bench_comprehensive_hakmem bench_comprehensive_system
|
||
rm -f bench_tiny bench_tiny.o bench_tiny_mt bench_tiny_mt.o test_mf2 test_mf2.o bench_tiny_hakmem
|
||
|
||
# Help
|
||
help:
|
||
@echo "hakmem PoC - Makefile targets:"
|
||
@echo " make - Build the test program"
|
||
@echo " make run - Build and run the test"
|
||
@echo " make bench - Build benchmark programs"
|
||
@echo " make shared - Build shared library (for LD_PRELOAD)"
|
||
@echo " make clean - Clean build artifacts"
|
||
@echo " make bench-mode - Run Tiny-focused PGO bench (scripts/bench_mode.sh)"
|
||
@echo " make bench-all - Run (near) full mimalloc-bench with timeouts"
|
||
@echo ""
|
||
@echo "Benchmark workflow:"
|
||
@echo " 1. make bench"
|
||
@echo " 2. bash bench_runner.sh --runs 10"
|
||
@echo " 3. python3 analyze_results.py benchmark_results.csv"
|
||
@echo ""
|
||
@echo "mimalloc-bench workflow:"
|
||
@echo " 1. make shared"
|
||
@echo " 2. LD_PRELOAD=./libhakmem.so <benchmark>"
|
||
|
||
# Step 2: PGO (Profile-Guided Optimization) targets
|
||
pgo-profile:
|
||
@echo "========================================="
|
||
@echo "Step 2b: PGO Profile Collection"
|
||
@echo "========================================="
|
||
rm -f *.gcda *.o bench_comprehensive_hakmem
|
||
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto" LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_comprehensive_hakmem
|
||
@echo "Running profile workload..."
|
||
HAKMEM_WRAP_TINY=1 ./bench_comprehensive_hakmem 2>&1 | grep -E "(Test 1:|Throughput:)" | head -6
|
||
@echo "✓ Profile data collected (*.gcda files)"
|
||
|
||
pgo-build:
|
||
@echo "========================================="
|
||
@echo "Step 2c: PGO Optimized Build (LTO+PGO)"
|
||
@echo "========================================="
|
||
rm -f *.o bench_comprehensive_hakmem
|
||
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto" LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_comprehensive_hakmem
|
||
@echo "✓ LTO+PGO optimized build complete"
|
||
|
||
# PGO for tiny_hot (Strict Front recommended)
|
||
pgo-hot-profile:
|
||
@echo "========================================="
|
||
@echo "PGO Profile (tiny_hot) with Strict Front"
|
||
@echo "========================================="
|
||
rm -f *.gcda *.o bench_tiny_hot_hakmem
|
||
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_STRICT_FRONT=1" \
|
||
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
|
||
@echo "[profile-run] bench_tiny_hot_hakmem (sizes 16/32/64, batch=100, cycles=60000)"
|
||
HAKMEM_TINY_SPECIALIZE_MASK=0x02 ./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true
|
||
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
|
||
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
|
||
@echo "✓ tiny_hot profile data collected (*.gcda)"
|
||
|
||
pgo-hot-build:
|
||
@echo "========================================="
|
||
@echo "PGO Build (tiny_hot) with Strict Front"
|
||
@echo "========================================="
|
||
rm -f *.o bench_tiny_hot_hakmem
|
||
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_STRICT_FRONT=1" \
|
||
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
|
||
@echo "✓ tiny_hot PGO build complete"
|
||
|
||
# Phase 8.2: Memory profiling build (verbose memory breakdown)
|
||
bench-memory: CFLAGS += -DHAKMEM_DEBUG_MEMORY
|
||
bench-memory: clean bench_comprehensive_hakmem
|
||
@echo ""
|
||
@echo "========================================="
|
||
@echo "Memory profiling build complete!"
|
||
@echo " Run: ./bench_comprehensive_hakmem"
|
||
@echo " Memory breakdown will be printed at end"
|
||
@echo "========================================="
|
||
|
||
.PHONY: all run bench shared debug clean help pgo-profile pgo-build bench-memory
|
||
|
||
# PGO for shared library (LD_PRELOAD)
|
||
# Step 1: Build instrumented shared lib and collect profile
|
||
pgo-profile-shared:
|
||
@echo "========================================="
|
||
@echo "Step: PGO Profile Collection (shared lib)"
|
||
@echo "========================================="
|
||
rm -f *_shared.gcda *_shared.o $(SHARED_LIB)
|
||
$(MAKE) CFLAGS_SHARED="$(CFLAGS_SHARED) -fprofile-generate -flto" LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" shared
|
||
@echo "Running profile workload (LD_PRELOAD)..."
|
||
HAKMEM_WRAP_TINY=1 LD_PRELOAD=./$(SHARED_LIB) ./bench_comprehensive_system 2>&1 | grep -E "(SIZE CLASS:|Throughput:)" | head -20 || true
|
||
@echo "✓ Profile data collected (*.gcda for *_shared)"
|
||
|
||
# Step 2: Build optimized shared lib using profile
|
||
pgo-build-shared:
|
||
@echo "========================================="
|
||
@echo "Step: PGO Optimized Build (shared lib)"
|
||
@echo "========================================="
|
||
rm -f *_shared.o $(SHARED_LIB)
|
||
$(MAKE) CFLAGS_SHARED="$(CFLAGS_SHARED) -fprofile-use -flto -Wno-error=coverage-mismatch" LDFLAGS="$(LDFLAGS) -fprofile-use -flto" shared
|
||
@echo "✓ LTO+PGO optimized shared library complete"
|
||
|
||
# Convenience: run Bench Mode script
|
||
bench-mode:
|
||
@bash scripts/bench_mode.sh
|
||
|
||
bench-all:
|
||
@bash scripts/run_all_benches_with_timeouts.sh
|
||
|
||
# PGO for bench_sll_only
|
||
pgo-benchsll-profile:
|
||
@echo "========================================="
|
||
@echo "PGO Profile (bench_sll_only)"
|
||
@echo "========================================="
|
||
rm -f *.gcda *.o bench_tiny_hot_hakmem
|
||
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
|
||
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
|
||
@echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)"
|
||
./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true
|
||
./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true
|
||
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
|
||
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
|
||
@echo "✓ bench_sll_only profile data collected (*.gcda)"
|
||
|
||
pgo-benchsll-build:
|
||
@echo "========================================="
|
||
@echo "PGO Build (bench_sll_only)"
|
||
@echo "========================================="
|
||
rm -f *.o bench_tiny_hot_hakmem
|
||
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
|
||
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
|
||
@echo "✓ bench_sll_only PGO build complete"
|
||
|
||
# Variant: SLL-only with REFILL=12 and WARMUP32=192 (tune for 32B)
|
||
pgo-benchsll-r12w192-profile:
|
||
@echo "========================================="
|
||
@echo "PGO Profile (bench_sll_only r12 w32=192)"
|
||
@echo "========================================="
|
||
rm -f *.gcda *.o bench_tiny_hot_hakmem
|
||
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL32=12 -DHAKMEM_TINY_BENCH_WARMUP32=192 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
|
||
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
|
||
@echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)"
|
||
./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true
|
||
./bench_tiny_hot_hakmem 16 100 60000 >/devnull || true
|
||
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
|
||
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
|
||
@echo "✓ r12 w32=192 profile data collected (*.gcda)"
|
||
|
||
pgo-benchsll-r12w192-build:
|
||
@echo "========================================="
|
||
@echo "PGO Build (bench_sll_only r12 w32=192)"
|
||
@echo "========================================="
|
||
rm -f *.o bench_tiny_hot_hakmem
|
||
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL32=12 -DHAKMEM_TINY_BENCH_WARMUP32=192 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
|
||
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
|
||
@echo "✓ r12 w32=192 PGO build complete"
|
||
MI_RPATH := $(shell pwd)/mimalloc-bench/extern/mi/out/release
|
||
# Sanitized builds (compiler-assisted debugging)
|
||
.PHONY: asan-larson ubsan-larson tsan-larson
|
||
|
||
SAN_ASAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \
|
||
-fsanitize=address,undefined -fno-sanitize-recover=all -fstack-protector-strong \
|
||
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
|
||
SAN_ASAN_LDFLAGS = -fsanitize=address,undefined
|
||
|
||
SAN_UBSAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \
|
||
-fsanitize=undefined -fno-sanitize-recover=undefined -fstack-protector-strong \
|
||
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
|
||
SAN_UBSAN_LDFLAGS = -fsanitize=undefined
|
||
|
||
SAN_TSAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto -fsanitize=thread \
|
||
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
|
||
SAN_TSAN_LDFLAGS = -fsanitize=thread
|
||
|
||
asan-larson:
|
||
@$(MAKE) clean >/dev/null
|
||
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_ASAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_ASAN_LDFLAGS)" >/dev/null
|
||
@cp -f larson_hakmem larson_hakmem_asan
|
||
@echo "✓ Built larson_hakmem_asan with ASan/UBSan"
|
||
|
||
ubsan-larson:
|
||
@$(MAKE) clean >/dev/null
|
||
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_UBSAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_UBSAN_LDFLAGS)" >/dev/null
|
||
@cp -f larson_hakmem larson_hakmem_ubsan
|
||
@echo "✓ Built larson_hakmem_ubsan with UBSan"
|
||
|
||
tsan-larson:
|
||
@$(MAKE) clean >/dev/null
|
||
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_TSAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_TSAN_LDFLAGS)" >/dev/null
|
||
@cp -f larson_hakmem larson_hakmem_tsan
|
||
@echo "✓ Built larson_hakmem_tsan with TSan (no ASan)"
|