Files
hakmem/Makefile
2025-12-19 03:45:01 +09:00

1571 lines
81 KiB
Makefile
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Makefile for hakmem PoC
CC = gcc
# Default target: Show help
.DEFAULT_GOAL := help
.PHONY: help
help:
@echo "========================================="
@echo "HAKMEM Build Targets"
@echo "========================================="
@echo ""
@echo "Development (Fast builds):"
@echo " make bench_random_mixed_hakmem - Quick build (~1-2 min)"
@echo " make bench_tiny_hot_hakmem - Quick build"
@echo ""
@echo "Benchmarking (PGO-optimized, +6% faster):"
@echo " make pgo-tiny-full - Full PGO workflow (~5-10 min)"
@echo " = Profile + Optimize + Test"
@echo " make pgo-tiny-profile - Step 1: Build profile binaries"
@echo " make pgo-tiny-collect - Step 2: Collect profile data"
@echo " make pgo-tiny-build - Step 3: Build optimized"
@echo ""
@echo "Comparison:"
@echo " make bench - Build allocator comparison benches"
@echo " make bench-pool-tls - Pool TLS benchmark"
@echo ""
@echo "Cleanup:"
@echo " make clean - Clean build artifacts"
@echo ""
@echo "Phase 4 Performance:"
@echo " Baseline: 57.0 M ops/s"
@echo " PGO-optimized: 60.6 M ops/s (+6.25%)"
@echo ""
@echo "TIP: For best performance, use 'make pgo-tiny-full'"
@echo "========================================="
CXX = g++
# Directory structure (2025-11-01 reorganization)
SRC_DIR := core
BENCH_SRC := benchmarks/src
TEST_SRC := tests
BUILD_DIR := build
BENCH_BIN_DIR := benchmarks/bin
# Search paths for source files
VPATH := $(SRC_DIR):$(SRC_DIR)/box:$(BENCH_SRC)/tiny:$(BENCH_SRC)/mid:$(BENCH_SRC)/comprehensive:$(BENCH_SRC)/stress:$(TEST_SRC)/unit:$(TEST_SRC)/integration:$(TEST_SRC)/stress
# Timing: default OFF for performance. Set HAKMEM_TIMING=1 to enable.
HAKMEM_TIMING ?= 0
# Phase 6.25: Aggressive optimization flags (default ON, overridable)
OPT_LEVEL ?= 3
USE_LTO ?= 1
NATIVE ?= 1
BASE_CFLAGS := -Wall -Wextra -std=c11 -D_GNU_SOURCE -D_POSIX_C_SOURCE=199309L \
-D_GLIBC_USE_ISOC2X=0 -D__isoc23_strtol=strtol -D__isoc23_strtoll=strtoll \
-D__isoc23_strtoul=strtoul -D__isoc23_strtoull=strtoull -DHAKMEM_DEBUG_TIMING=$(HAKMEM_TIMING) \
-ffast-math -funroll-loops -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
-fno-semantic-interposition -I core -I include
CFLAGS = -O$(OPT_LEVEL) $(BASE_CFLAGS)
ifeq ($(NATIVE),1)
CFLAGS += -march=native -mtune=native -fno-plt
endif
ifeq ($(USE_LTO),1)
CFLAGS += -flto
endif
# Allow overriding TLS ring capacity at build time: make shared RING_CAP=32
RING_CAP ?= 32
# Phase 6.25: Aggressive optimization + TLS Ring 拡張
CFLAGS_SHARED = -O$(OPT_LEVEL) $(BASE_CFLAGS) -fPIC -DPOOL_TLS_RING_CAP=$(RING_CAP)
ifeq ($(NATIVE),1)
CFLAGS_SHARED += -march=native -mtune=native -fno-plt
endif
ifeq ($(USE_LTO),1)
CFLAGS_SHARED += -flto
endif
LDFLAGS = -lm -lpthread
ifeq ($(USE_LTO),1)
LDFLAGS += -flto
endif
# ------------------------------------------------------------
# Build hygiene: dependency tracking + flag consistency checks
# ------------------------------------------------------------
# Track header dependencies for explicit compile rules as well
CFLAGS += -MMD -MP
# If someone injects -DHAKMEM_POOL_TLS_PHASE1=1 directly into CFLAGS
# but forgets POOL_TLS_PHASE1=1, object lists will miss pool_tls*.o.
# Fail fast to avoid confusing link/runtime errors.
ifneq ($(filter -DHAKMEM_POOL_TLS_PHASE1=1,$(CFLAGS)),)
ifneq ($(POOL_TLS_PHASE1),1)
$(error Detected -DHAKMEM_POOL_TLS_PHASE1=1 in CFLAGS but POOL_TLS_PHASE1!=1. Please invoke: make POOL_TLS_PHASE1=1 ...)
endif
endif
# Include generated .d files if present (safe even if none yet)
# Filter to only files (not directories like glibc-2.38/build/iconvdata/gconv-modules.d)
# Also exclude glibc and mimalloc-bench subdirectories
-include $(shell find . -name '*.d' -type f -not -path './glibc*' -not -path './mimalloc-bench*' 2>/dev/null)
# ------------------------------------------------------------
# Build flavor: release/debug (controls HAKMEM_BUILD_* and NDEBUG)
# ------------------------------------------------------------
BUILD_FLAVOR ?= release
ifeq ($(BUILD_FLAVOR),release)
CFLAGS += -DNDEBUG -DHAKMEM_BUILD_RELEASE=1
CFLAGS_SHARED += -DNDEBUG -DHAKMEM_BUILD_RELEASE=1
else ifeq ($(BUILD_FLAVOR),debug)
CFLAGS += -DHAKMEM_BUILD_DEBUG=1
CFLAGS_SHARED += -DHAKMEM_BUILD_DEBUG=1
endif
# ------------------------------------------------------------
# Phase 18: Hot Text Isolation (I-cache locality optimization)
# ------------------------------------------------------------
# Enable (safe): make HOT_TEXT_ISOLATION=1 bench_random_mixed_hakmem
# Default: OFF (research box, requires A/B validation)
# What it does:
# - Adds -DHAKMEM_HOT_TEXT_ISOLATION=1 (hot/cold attribute macros only)
#
# NOTE (Phase 18 v1 NO-GO):
# - The section-splitting + --gc-sections experiment caused a large I-cache regression.
# - Keep it behind a separate opt-in knob (HOT_TEXT_GC_SECTIONS=1) if needed for research.
HOT_TEXT_ISOLATION ?= 0
ifeq ($(HOT_TEXT_ISOLATION),1)
CFLAGS += -DHAKMEM_HOT_TEXT_ISOLATION=1
CFLAGS_SHARED += -DHAKMEM_HOT_TEXT_ISOLATION=1
endif
# Research-only (currently NO-GO): function/data sections + --gc-sections.
# Enable explicitly only when combined with an ordering strategy.
HOT_TEXT_GC_SECTIONS ?= 0
ifeq ($(HOT_TEXT_GC_SECTIONS),1)
CFLAGS += -ffunction-sections -fdata-sections
CFLAGS_SHARED += -ffunction-sections -fdata-sections
LDFLAGS += -Wl,--gc-sections
endif
# Phase 18 v2: BENCH_MINIMAL (remove instrumentation for benchmark builds)
BENCH_MINIMAL ?= 0
ifeq ($(BENCH_MINIMAL),1)
CFLAGS += -DHAKMEM_BENCH_MINIMAL=1
CFLAGS_SHARED += -DHAKMEM_BENCH_MINIMAL=1
# Note: Both bench and shared lib will disable instrumentation
# Mainly impacts bench_* binaries (where BENCH_MINIMAL is intentionally enabled)
endif
# Default: enable Box Theory refactor for Tiny (Phase 6-1.7)
# This is the best performing option currently (4.19M ops/s)
# NOTE: Disabled while testing ULTRA_SIMPLE with SFC integration
# To opt-out for legacy path: make BOX_REFACTOR_DEFAULT=0
BOX_REFACTOR_DEFAULT ?= 1
ifeq ($(BOX_REFACTOR_DEFAULT),1)
CFLAGS += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1
CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1
else
CFLAGS += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=0
CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=0
endif
# (Removed) legacy BUILD_RELEASE_DEFAULT in favor of BUILD_FLAVOR
# Phase 6-2: Ultra-Simple with SFC integration
# Original Ultra-Simple (without SFC): 3.56M ops/s vs BOX_REFACTOR: 4.19M ops/s
# Now testing with SFC (128-slot cache) integration - expecting >5M ops/s
# To disable: make ULTRA_SIMPLE_DEFAULT=0
ULTRA_SIMPLE_DEFAULT ?= 0
ifeq ($(ULTRA_SIMPLE_DEFAULT),1)
CFLAGS += -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1
CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1
endif
# Phase 6-3: Tiny Fast Path (System tcache style, 3-4 instruction fast path)
# Target: 70-80% of System tcache (95-108 M ops/s)
# Enable by default for testing
TINY_FAST_PATH_DEFAULT ?= 1
ifeq ($(TINY_FAST_PATH_DEFAULT),1)
CFLAGS += -DHAKMEM_TINY_FAST_PATH=1
CFLAGS_SHARED += -DHAKMEM_TINY_FAST_PATH=1
endif
# Phase 6-1.8: New 3-Layer Tiny front (A/B)
# To enable by default: make NEW_3LAYER_DEFAULT=1
NEW_3LAYER_DEFAULT ?= 0
ifeq ($(NEW_3LAYER_DEFAULT),1)
CFLAGS += -DHAKMEM_TINY_USE_NEW_3LAYER=1
CFLAGS_SHARED += -DHAKMEM_TINY_USE_NEW_3LAYER=1
endif
# Phase 7: Region-ID Direct Lookup (Header-based class_idx)
# Ultra-fast free: 3-5 instructions, 5-10 cycles (vs 500+ cycles current)
# Target: 40-80M ops/s (70-140% of System malloc)
# Enable: make HEADER_CLASSIDX=1
# Default: ON (Phase 7 validated, Fix #16 stable, mimalloc strategy Phase 1)
HEADER_CLASSIDX ?= 1
ifeq ($(HEADER_CLASSIDX),1)
CFLAGS += -DHAKMEM_TINY_HEADER_CLASSIDX=1
CFLAGS_SHARED += -DHAKMEM_TINY_HEADER_CLASSIDX=1
endif
# Phase 7 Task 2: Aggressive inline TLS cache access
# Enable: make HEADER_CLASSIDX=1 AGGRESSIVE_INLINE=1
# Expected: +10-15% performance (save 5-10 cycles per alloc)
# Default: ON (mimalloc strategy Phase 1)
AGGRESSIVE_INLINE ?= 1
ifeq ($(AGGRESSIVE_INLINE),1)
CFLAGS += -DHAKMEM_TINY_AGGRESSIVE_INLINE=1
CFLAGS_SHARED += -DHAKMEM_TINY_AGGRESSIVE_INLINE=1
endif
# Phase 7 Task 3: Pre-warm TLS cache
# Enable: make PREWARM_TLS=1
# Expected: Reduce first-allocation miss penalty
# Default: ON (mimalloc strategy Phase 1)
PREWARM_TLS ?= 1
ifeq ($(PREWARM_TLS),1)
CFLAGS += -DHAKMEM_TINY_PREWARM_TLS=1
CFLAGS_SHARED += -DHAKMEM_TINY_PREWARM_TLS=1
endif
# Performance Optimization: Fixed refill for class5 (256B)
# ChatGPT-sensei recommendation: Eliminate branches by fixing want=256
# Enable: make CLASS5_FIXED_REFILL=1
# Expected: Reduce branch mispredictions and instruction count
CLASS5_FIXED_REFILL ?= 0
ifeq ($(CLASS5_FIXED_REFILL),1)
CFLAGS += -DHAKMEM_TINY_CLASS5_FIXED_REFILL=1
CFLAGS_SHARED += -DHAKMEM_TINY_CLASS5_FIXED_REFILL=1
endif
# Phase 91: C6 Intrusive LIFO Inline Slots (Per-class LIFO transformation)
# Purpose: Replace FIFO ring with intrusive LIFO to reduce per-operation metadata overhead
# Enable: make BOX_TINY_C6_INLINE_SLOTS_IFL=1
# Expected: +1-2% throughput improvement (C6 only, 57% coverage)
# Default: ON (research box, reversible via ENV gate HAKMEM_TINY_C6_INLINE_SLOTS_IFL=0)
BOX_TINY_C6_INLINE_SLOTS_IFL ?= 1
ifeq ($(BOX_TINY_C6_INLINE_SLOTS_IFL),1)
CFLAGS += -DHAKMEM_BOX_TINY_C6_INLINE_SLOTS_IFL=1
CFLAGS_SHARED += -DHAKMEM_BOX_TINY_C6_INLINE_SLOTS_IFL=1
endif
# Phase 3 (2025-11-29): mincore removed entirely
# - mincore() syscall overhead eliminated (was +10.3% with DISABLE flag)
# - Phase 1b/2 registry-based validation provides sufficient safety
# - Dead code cleanup: DISABLE_MINCORE flag no longer needed
ifdef PROFILE_GEN
CFLAGS += -fprofile-generate
LDFLAGS += -fprofile-generate
endif
ifdef PROFILE_USE
CFLAGS += -fprofile-use -Wno-error=coverage-mismatch
LDFLAGS += -fprofile-use
endif
CFLAGS += $(EXTRA_CFLAGS)
CFLAGS_SHARED += $(EXTRA_CFLAGS)
LDFLAGS += $(EXTRA_LDFLAGS)
# Targets
TARGET = test_hakmem
OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o core/box/ss_release_policy_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_pt_impl.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/free_cold_shape_env_box.o core/box/free_cold_shape_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/free_wrapper_env_snapshot_box.o core/box/malloc_wrapper_env_snapshot_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/box/tiny_free_route_cache_env_box.o core/box/hakmem_env_snapshot_box.o core/box/tiny_c7_preserve_header_env_box.o core/box/tiny_tcache_env_box.o core/box/tiny_unified_lifo_env_box.o core/box/front_fastlane_alloc_legacy_direct_env_box.o core/box/fastlane_direct_env_box.o core/box/tiny_header_hotfull_env_box.o core/box/tiny_inline_slots_fixed_mode_box.o core/box/tiny_inline_slots_switch_dispatch_fixed_box.o core/box/free_path_commit_once_fixed_box.o core/box/free_path_legacy_mask_box.o core/box/tiny_inline_slots_overflow_stats_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/tiny_c6_inline_slots.o core/tiny_c6_inline_slots_ifl.o core/tiny_c5_inline_slots.o core/tiny_c2_local_cache.o core/tiny_c3_inline_slots.o core/tiny_c4_inline_slots.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o core/box/small_policy_snapshot_tls_box.o
OBJS = $(OBJS_BASE)
# Shared library
SHARED_LIB = libhakmem.so
# IMPORTANT: keep the shared library in sync with the current hakmem build to avoid
# LD_PRELOAD runtime link errors (undefined symbols) as new boxes/files are added.
SHARED_OBJS = $(patsubst %.o,%_shared.o,$(OBJS_BASE))
# Pool TLS Phase 1 (enable with POOL_TLS_PHASE1=1)
ifeq ($(POOL_TLS_PHASE1),1)
OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
SHARED_OBJS += pool_tls_shared.o pool_refill_shared.o pool_tls_arena_shared.o pool_tls_registry_shared.o pool_tls_remote_shared.o
CFLAGS += -DHAKMEM_POOL_TLS_PHASE1=1
CFLAGS_SHARED += -DHAKMEM_POOL_TLS_PHASE1=1
endif
# Pool TLS Phase 1.5b - Pre-warm optimization
ifeq ($(POOL_TLS_PREWARM),1)
CFLAGS += -DHAKMEM_POOL_TLS_PREWARM=1
CFLAGS_SHARED += -DHAKMEM_POOL_TLS_PREWARM=1
endif
# Pool TLS Bind Box - Registry lookup short-circuit (Phase 1.6)
ifeq ($(POOL_TLS_BIND_BOX),1)
OBJS += pool_tls_bind.o
SHARED_OBJS += pool_tls_bind_shared.o
CFLAGS += -DHAKMEM_POOL_TLS_BIND_BOX=1
CFLAGS_SHARED += -DHAKMEM_POOL_TLS_BIND_BOX=1
endif
# Benchmark targets
BENCH_HAKMEM = bench_allocators_hakmem
BENCH_SYSTEM = bench_allocators_system
BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o core/box/ss_release_policy_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/free_cold_shape_env_box.o core/box/free_cold_shape_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/free_wrapper_env_snapshot_box.o core/box/malloc_wrapper_env_snapshot_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/box/tiny_free_route_cache_env_box.o core/box/fastlane_direct_env_box.o core/box/tiny_inline_slots_fixed_mode_box.o core/box/tiny_inline_slots_switch_dispatch_fixed_box.o core/box/free_path_commit_once_fixed_box.o core/box/free_path_legacy_mask_box.o core/box/tiny_inline_slots_overflow_stats_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/tiny_c6_inline_slots.o core/tiny_c6_inline_slots_ifl.o core/tiny_c5_inline_slots.o core/tiny_c2_local_cache.o core/tiny_c3_inline_slots.o core/tiny_c4_inline_slots.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o core/box/small_policy_snapshot_tls_box.o bench_allocators_hakmem.o
BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE)
ifeq ($(POOL_TLS_PHASE1),1)
BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
endif
BENCH_SYSTEM_OBJS = bench_allocators_system.o
# Default target
all: $(TARGET)
# Show key build-time switches for troubleshooting
.PHONY: print-flags
print-flags:
@echo "==== Build Switches ===="
@echo "FLAVOR = $(BUILD_FLAVOR)"
@echo "POOL_TLS_PHASE1 = $(POOL_TLS_PHASE1)"
@echo "POOL_TLS_PREWARM = $(POOL_TLS_PREWARM)"
@echo "HEADER_CLASSIDX = $(HEADER_CLASSIDX)"
@echo "AGGRESSIVE_INLINE = $(AGGRESSIVE_INLINE)"
@echo "PREWARM_TLS = $(PREWARM_TLS)"
@echo "USE_LTO = $(USE_LTO)"
@echo "OPT_LEVEL = $(OPT_LEVEL)"
@echo "NATIVE = $(NATIVE)"
@echo "CFLAGS contains = $(filter -DHAKMEM_BUILD_%,$(CFLAGS))"
# Build test program
$(TARGET): $(OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo ""
@echo "========================================="
@echo "Build successful! Run with:"
@echo " ./$(TARGET)"
@echo "========================================="
# Compile C files
%.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_tiny_superslab.h hakmem_super_registry.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h
$(CC) $(CFLAGS) -c -o $@ $<
# Build benchmark programs
bench: CFLAGS += -DHAKMEM_PROF_STATIC=1
bench: $(BENCH_HAKMEM) $(BENCH_SYSTEM)
@echo ""
@echo "========================================="
@echo "Benchmark programs built successfully!"
@echo " $(BENCH_HAKMEM) - hakmem versions"
@echo " $(BENCH_SYSTEM) - system/jemalloc/mimalloc"
@echo ""
@echo "Run benchmarks with:"
@echo " bash bench_runner.sh --runs 10"
@echo "========================================="
# hakmem version (with hakmem linked)
bench_allocators_hakmem.o: bench_allocators.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
$(BENCH_HAKMEM): $(BENCH_HAKMEM_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
# system version (without hakmem, for LD_PRELOAD testing)
bench_allocators_system.o: bench_allocators.c
$(CC) $(CFLAGS) -c -o $@ $<
$(BENCH_SYSTEM): $(BENCH_SYSTEM_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
# Tiny hot microbench (direct link vs system)
bench_tiny_hot_hakmem.o: bench_tiny_hot.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_tiny_hot_system.o: bench_tiny_hot.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_tiny_hot_hakmem: bench_tiny_hot_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_tiny_hot_system: bench_tiny_hot_system.o
$(CC) -o $@ $^ $(LDFLAGS)
# mimalloc variant for tiny hot bench (direct link)
bench_tiny_hot_mi.o: bench_tiny_hot.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_mi_force.o: bench_mi_force.c
$(CC) $(CFLAGS) -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_tiny_hot_mi: bench_tiny_hot_mi.o bench_mi_force.o
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# hakmi variant for tiny hot bench (direct link via front API)
bench_tiny_hot_hakmi.o: bench_tiny_hot.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc -c -o $@ $<
HAKMI_FRONT_OBJS = adapters/hakmi_front/hakmi_front.o adapters/hakmi_front/hakmi_env.o adapters/hakmi_front/hakmi_tls_front.o
# ===== Convenience perf targets =====
.PHONY: pgo-gen-tinyhot pgo-use-tinyhot perf-help
# Generate PGO profile for Tiny Hot (32/100/60000) with SLL-first fast path
pgo-gen-tinyhot:
$(MAKE) PROFILE_GEN=1 bench_tiny_hot_hakmem
HAKMEM_TINY_TRACE_RING=0 HAKMEM_SAFE_FREE=0 \
HAKMEM_TINY_TLS_SLL=1 HAKMEM_TINY_TLS_LIST=1 HAKMEM_SLL_MULTIPLIER=1 \
./bench_tiny_hot_hakmem 32 100 60000 || true
# Use generated PGO profile for Tiny Hot binary
pgo-use-tinyhot:
$(MAKE) PROFILE_USE=1 bench_tiny_hot_hakmem
# Show recommended runtime envs for bench reproducibility
perf-help:
@echo "Recommended runtime envs (Tiny Hot / Larson):"
@echo " export HAKMEM_TINY_TRACE_RING=0 HAKMEM_SAFE_FREE=0"
@echo " export HAKMEM_TINY_TLS_SLL=1 HAKMEM_TINY_TLS_LIST=1"
@echo " export HAKMEM_SLL_MULTIPLIER=1"
@echo "Build flags (overridable): OPT_LEVEL=$(OPT_LEVEL) USE_LTO=$(USE_LTO) NATIVE=$(NATIVE)"
# Explicit compile rules for hakmi front objects (require mimalloc headers)
adapters/hakmi_front/hakmi_front.o: adapters/hakmi_front/hakmi_front.c adapters/hakmi_front/hakmi_front.h include/hakmi/hakmi_api.h
$(CC) $(CFLAGS) -I include -I mimalloc-bench/extern/mi/include -c -o $@ $<
adapters/hakmi_front/hakmi_env.o: adapters/hakmi_front/hakmi_env.c adapters/hakmi_front/hakmi_env.h
$(CC) $(CFLAGS) -I include -c -o $@ $<
adapters/hakmi_front/hakmi_tls_front.o: adapters/hakmi_front/hakmi_tls_front.c adapters/hakmi_front/hakmi_tls_front.h
$(CC) $(CFLAGS) -I include -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_tiny_hot_hakmi: bench_tiny_hot_hakmi.o $(HAKMI_FRONT_OBJS)
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
# Run test
run: $(TARGET)
@echo ""
@echo "========================================="
@echo "Running hakmem PoC test..."
@echo "========================================="
@./$(TARGET)
# Shared library target (for LD_PRELOAD with mimalloc-bench)
%_shared.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h
$(CC) $(CFLAGS_SHARED) -c -o $@ $<
$(SHARED_LIB): $(SHARED_OBJS)
$(CC) -shared -o $@ $^ $(LDFLAGS)
@echo ""
@echo "========================================="
@echo "Shared library built successfully!"
@echo " $(SHARED_LIB)"
@echo ""
@echo "Use with LD_PRELOAD:"
@echo " LD_PRELOAD=./$(SHARED_LIB) <command>"
@echo "========================================="
shared: $(SHARED_LIB)
# Phase 6.15: Debug build target (verbose logging)
debug: CFLAGS += -DHAKMEM_DEBUG_VERBOSE -g -O0 -DHAKMEM_PROF_STATIC=1
debug: CFLAGS_SHARED += -DHAKMEM_DEBUG_VERBOSE -g -O0 -DHAKMEM_PROF_STATIC=1
debug: HAKMEM_TIMING=1
debug: shared
# Phase 6-1.7: Box Theory Refactoring
box-refactor:
$(MAKE) clean
$(MAKE) CFLAGS="$(CFLAGS) -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1" larson_hakmem
@echo ""
@echo "========================================="
@echo "Built with Box Refactor (Phase 6-1.7)"
@echo " larson_hakmem (with Box 1/5/6)"
@echo "========================================="
# Convenience target: build and test box-refactor
test-box-refactor: box-refactor
@echo ""
@echo "========================================="
@echo "Running Box Refactor Test..."
@echo "========================================="
./larson_hakmem 10 8 128 1024 1 12345 4
# Phase 4: Tiny Pool benchmarks (properly linked with hakmem)
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o core/box/ss_release_policy_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_pt_impl.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/free_cold_shape_env_box.o core/box/free_cold_shape_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/free_wrapper_env_snapshot_box.o core/box/malloc_wrapper_env_snapshot_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/box/tiny_free_route_cache_env_box.o core/box/hakmem_env_snapshot_box.o core/box/tiny_c7_preserve_header_env_box.o core/box/tiny_tcache_env_box.o core/box/tiny_unified_lifo_env_box.o core/box/front_fastlane_alloc_legacy_direct_env_box.o core/box/fastlane_direct_env_box.o core/box/tiny_header_hotfull_env_box.o core/box/tiny_inline_slots_fixed_mode_box.o core/box/tiny_inline_slots_switch_dispatch_fixed_box.o core/box/free_path_commit_once_fixed_box.o core/box/free_path_legacy_mask_box.o core/box/tiny_inline_slots_overflow_stats_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/tiny_c6_inline_slots.o core/tiny_c6_inline_slots_ifl.o core/tiny_c5_inline_slots.o core/tiny_c2_local_cache.o core/tiny_c3_inline_slots.o core/tiny_c4_inline_slots.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o core/box/small_policy_snapshot_tls_box.o
TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE)
ifeq ($(POOL_TLS_PHASE1),1)
TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
endif
ifeq ($(POOL_TLS_BIND_BOX),1)
TINY_BENCH_OBJS += pool_tls_bind.o
endif
bench_tiny: bench_tiny.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_tiny built with hakmem"
bench_tiny_mt: bench_tiny_mt.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_tiny_mt built with hakmem"
# Burst+Pause bench (mimalloc stress pattern)
bench_burst_pause_hakmem.o: bench_burst_pause.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_burst_pause_system.o: bench_burst_pause.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_burst_pause_mi.o: bench_burst_pause.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_burst_pause_hakmem: bench_burst_pause_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_burst_pause_hakmem built"
bench_burst_pause_system: bench_burst_pause_system.o
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_burst_pause_system built"
bench_burst_pause_mi: bench_burst_pause_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
@echo "✓ bench_burst_pause_mi built"
bench_burst_pause_mt_hakmem.o: bench_burst_pause_mt.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_burst_pause_mt_system.o: bench_burst_pause_mt.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_burst_pause_mt_mi.o: bench_burst_pause_mt.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_burst_pause_mt_hakmem: bench_burst_pause_mt_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_burst_pause_mt_hakmem built"
bench_burst_pause_mt_system: bench_burst_pause_mt_system.o
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_burst_pause_mt_system built"
bench_burst_pause_mt_mi: bench_burst_pause_mt_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
@echo "✓ bench_burst_pause_mt_mi built"
# ----------------------------------------------------------------------------
# Hako FFI stub (optional; for front-end integration smoke)
# ----------------------------------------------------------------------------
hako_ffi_stub: libhako_ffi_stub.a
@echo "✓ libhako_ffi_stub.a built"
hako_ffi_stub.o: src/hako/ffi_stub.c include/hako/ffi.h include/hako/types.h
$(CC) $(CFLAGS) -c -o hako_ffi_stub.o src/hako/ffi_stub.c
libhako_ffi_stub.a: hako_ffi_stub.o
ar rcs $@ $^
# Smoke test for Hako FFI stubs
hako_smoke: hako_ffi_stub tests/hako_smoke.c
$(CC) $(CFLAGS) -o hako_smoke tests/hako_smoke.c libhako_ffi_stub.a
@echo "✓ hako_smoke built"
# ----------------------------------------------------------------------------
# Larson benchmarks (Google/mimalloc-bench style)
# ----------------------------------------------------------------------------
LARSON_SRC := mimalloc-bench/bench/larson/larson.cpp
# System variant (uses system malloc/free)
larson_system.o: $(LARSON_SRC)
$(CXX) $(CFLAGS) -c -o $@ $<
larson_system: larson_system.o
$(CXX) -o $@ $^ $(LDFLAGS)
# mimalloc variant (direct link to prebuilt mimalloc)
larson_mi.o: $(LARSON_SRC)
$(CXX) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
larson_mi: larson_mi.o bench_mi_force.o
$(CXX) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# HAKMEM variant (hakmem.o provides malloc/free symbols directly)
larson_hakmem.o: $(LARSON_SRC)
$(CXX) $(CFLAGS) -I core -c -o $@ $<
larson_hakmem: larson_hakmem.o $(TINY_BENCH_OBJS)
$(CXX) -o $@ $^ $(LDFLAGS)
test_mf2: test_mf2.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ test_mf2 built with hakmem"
# bench_comprehensive.o with USE_HAKMEM flag
bench_comprehensive.o: bench_comprehensive.c
$(CC) $(CFLAGS) -DUSE_HAKMEM -c $< -o $@
bench_comprehensive_hakmem: bench_comprehensive.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_comprehensive_hakmem built with hakmem"
bench_comprehensive_system: bench_comprehensive.c
$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
@echo "✓ bench_comprehensive_system built (system malloc)"
# mimalloc direct-link variant (no LD_PRELOAD dependency)
bench_comprehensive_mi: bench_comprehensive.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include \
bench_comprehensive.c -o $@ \
-L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
@echo "✓ bench_comprehensive_mi built (direct link to mimalloc)"
# hakx (new hybrid) front API stubs
HAKX_OBJS = engines/hakx/hakx_api_stub.o engines/hakx/hakx_front_tiny.o engines/hakx/hakx_l25_tuner.o
engines/hakx/hakx_api_stub.o: engines/hakx/hakx_api_stub.c include/hakx/hakx_api.h engines/hakx/hakx_front_tiny.h
$(CC) $(CFLAGS) -I include -c -o $@ $<
# hakx variant for tiny hot bench (direct link via hakx API)
bench_tiny_hot_hakx.o: bench_tiny_hot.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
bench_tiny_hot_hakx: bench_tiny_hot_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_tiny_hot_hakx built (hakx API stub)"
# P0 variant with batch refill optimization
bench_tiny_hot_hakx_p0.o: bench_tiny_hot.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
$(CC) $(CFLAGS) -DHAKMEM_TINY_P0_BATCH_REFILL=1 -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
bench_tiny_hot_hakx_p0: bench_tiny_hot_hakx_p0.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_tiny_hot_hakx_p0 built (with P0 batch refill)"
# hak_tiny_alloc/free 直叩きの比較用ベンチ
bench_tiny_hot_direct.o: bench_tiny_hot_direct.c core/hakmem_tiny.h
$(CC) $(CFLAGS) -c -o $@ $<
bench_tiny_hot_direct: bench_tiny_hot_direct.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@echo "✓ bench_tiny_hot_direct built (hak_tiny_alloc/free direct)"
# hakmi variant for comprehensive bench (front + mimalloc backend)
bench_comprehensive_hakmi: bench_comprehensive.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc \
bench_comprehensive.c -o $@ \
adapters/hakmi_front/hakmi_front.o adapters/hakmi_front/hakmi_env.o adapters/hakmi_front/hakmi_tls_front.o \
-Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
@echo "✓ bench_comprehensive_hakmi built (hakmi front + mimalloc backend)"
# hakx variant for comprehensive bench
bench_comprehensive_hakx: bench_comprehensive.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h $(HAKX_OBJS) $(TINY_BENCH_OBJS)
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast \
bench_comprehensive.c -o $@ $(HAKX_OBJS) $(TINY_BENCH_OBJS) $(LDFLAGS)
@echo "✓ bench_comprehensive_hakx built (hakx API stub)"
# Random mixed bench (direct link variants)
# Phase 7-Step2: Enable PGO mode for bench builds (compile-time unified gate)
bench_random_mixed_hakmem.o: bench_random_mixed.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -DHAKMEM_TINY_FRONT_PGO=1 -c -o $@ $<
bench_random_mixed_system.o: bench_random_mixed.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_random_mixed_mi.o: bench_random_mixed.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_random_mixed_hakmem: bench_random_mixed_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
# Phase 35-A: BENCH_MINIMAL target (eliminates gate function overhead)
# Usage: make bench_random_mixed_hakmem_minimal
# Note: This rebuilds all objects with -DHAKMEM_BENCH_MINIMAL=1
# Purpose: Pure performance measurement (FAST build)
.PHONY: bench_random_mixed_hakmem_minimal
bench_random_mixed_hakmem_minimal:
$(MAKE) clean
$(MAKE) bench_random_mixed_hakmem EXTRA_CFLAGS='-DHAKMEM_BENCH_MINIMAL=1'
mv bench_random_mixed_hakmem bench_random_mixed_hakmem_minimal
# Phase 63: FAST profile fixed target (BENCH_MINIMAL + FAST_PROFILE_FIXED)
# Usage: make bench_random_mixed_hakmem_fast_fixed
# Note: This rebuilds all objects with BENCH_MINIMAL + FAST_PROFILE_FIXED.
# Purpose: FAST build with compile-time constant gates matching MIXED_TINYV3_C7_SAFE defaults.
.PHONY: bench_random_mixed_hakmem_fast_fixed
bench_random_mixed_hakmem_fast_fixed:
$(MAKE) clean
$(MAKE) bench_random_mixed_hakmem EXTRA_CFLAGS='-DHAKMEM_BENCH_MINIMAL=1 -DHAKMEM_FAST_PROFILE_FIXED=1'
mv bench_random_mixed_hakmem bench_random_mixed_hakmem_fast_fixed
# Phase 65: Hot Symbol Ordering was investigated but is BLOCKED under the current
# GCC+LTO toolchain constraints (see docs/analysis/PHASE65_HOT_SYMBOL_ORDERING_1_RESULTS.md).
# We intentionally do not provide a build target that disables LTO or swaps linkers,
# because it makes baseline comparisons unfair and tends to introduce layout tax.
# Phase 64: Backend pruning target (BENCH_MINIMAL + FAST_PROFILE_FIXED + FAST_PROFILE_PRUNE_BACKENDS)
# Usage: make bench_random_mixed_hakmem_fast_pruned
# Note: This rebuilds all objects with BENCH_MINIMAL + FAST_PROFILE_FIXED + FAST_PROFILE_PRUNE_BACKENDS.
# Purpose: LTO DCE optimization - makes MID_V3, POOL_V2 unreachable at compile-time for +5-10% gain
.PHONY: bench_random_mixed_hakmem_fast_pruned
bench_random_mixed_hakmem_fast_pruned:
$(MAKE) clean
$(MAKE) bench_random_mixed_hakmem EXTRA_CFLAGS='-DHAKMEM_BENCH_MINIMAL=1 -DHAKMEM_FAST_PROFILE_FIXED=1 -DHAKMEM_FAST_PROFILE_PRUNE_BACKENDS=1'
mv bench_random_mixed_hakmem bench_random_mixed_hakmem_fast_pruned
# Phase 66: PGO (Profile-Guided Optimization) for FAST minimal build (keeps GCC+LTO)
# Usage: make pgo-fast-full
.PHONY: pgo-fast-profile pgo-fast-collect pgo-fast-build pgo-fast-full
pgo-fast-profile:
@echo "========================================="
@echo "Phase 66: Building PGO Profile Binaries (FAST minimal)"
@echo "========================================="
$(MAKE) clean
$(MAKE) PROFILE_GEN=1 bench_random_mixed_hakmem bench_tiny_hot_hakmem EXTRA_CFLAGS='-DHAKMEM_BENCH_MINIMAL=1'
@echo ""
@echo "✓ PGO profile binaries built (FAST minimal)"
@echo "Next: make pgo-fast-collect"
@echo ""
pgo-fast-collect:
@echo "========================================="
@echo "Phase 66: Collecting PGO Profile Data (FAST minimal)"
@echo "========================================="
PGO_CONFIG=pgo_fast_profile_config.sh ./scripts/box/pgo_tiny_profile_box.sh
@echo ""
@echo "✓ PGO profile collection complete"
@echo "Next: make pgo-fast-build"
@echo ""
pgo-fast-build:
@echo "========================================="
@echo "Phase 66: Building PGO-Optimized Binary (FAST minimal)"
@echo "========================================="
@if [ -x bench_random_mixed_hakmem ]; then mv bench_random_mixed_hakmem bench_random_mixed_hakmem.standard_saved; fi
$(MAKE) clean
$(MAKE) PROFILE_USE=1 bench_random_mixed_hakmem EXTRA_CFLAGS='-DHAKMEM_BENCH_MINIMAL=1'
mv bench_random_mixed_hakmem bench_random_mixed_hakmem_minimal_pgo
@if [ -x bench_random_mixed_hakmem.standard_saved ]; then mv bench_random_mixed_hakmem.standard_saved bench_random_mixed_hakmem; fi
@echo ""
@echo "✓ PGO-optimized FAST minimal binary built: bench_random_mixed_hakmem_minimal_pgo"
@echo "Next: BENCH_BIN=./bench_random_mixed_hakmem_minimal_pgo scripts/run_mixed_10_cleanenv.sh"
@echo ""
pgo-fast-bin: pgo-fast-build
# Convenience alias (SSOT runner expects this name to be buildable).
# Usage: make bench_random_mixed_hakmem_minimal_pgo
.PHONY: bench_random_mixed_hakmem_minimal_pgo
bench_random_mixed_hakmem_minimal_pgo: pgo-fast-build
pgo-fast-full: pgo-fast-profile pgo-fast-collect pgo-fast-build
@echo "========================================="
@echo "Phase 66: PGO Full Workflow Complete (FAST minimal)"
@echo "========================================="
BENCH_BIN=./bench_random_mixed_hakmem_minimal_pgo scripts/run_mixed_10_cleanenv.sh
# Phase 47: FAST+PGO target (BENCH_MINIMAL + TINY_FRONT_PGO)
# Usage: make bench_random_mixed_hakmem_fast_pgo
# Note: This rebuilds all objects with BENCH_MINIMAL + TINY_FRONT_PGO
# Purpose: FAST build with compile-time fixed front config (phase 47 A/B test)
.PHONY: bench_random_mixed_hakmem_fast_pgo
bench_random_mixed_hakmem_fast_pgo:
@if [ -x bench_random_mixed_hakmem ]; then mv bench_random_mixed_hakmem bench_random_mixed_hakmem.standard_saved; fi
$(MAKE) clean
$(MAKE) bench_random_mixed_hakmem EXTRA_CFLAGS='-DHAKMEM_BENCH_MINIMAL=1 -DHAKMEM_TINY_FRONT_PGO=1'
mv bench_random_mixed_hakmem bench_random_mixed_hakmem_fast_pgo
@if [ -x bench_random_mixed_hakmem.standard_saved ]; then mv bench_random_mixed_hakmem.standard_saved bench_random_mixed_hakmem; fi
# Phase 35-B: OBSERVE target (enables diagnostic counters for behavior observation)
# Usage: make bench_random_mixed_hakmem_observe
# Note: This rebuilds all objects with stats/trace compiled in
# Purpose: Behavior observation & debugging (OBSERVE build)
.PHONY: bench_random_mixed_hakmem_observe
bench_random_mixed_hakmem_observe:
@if [ -x bench_random_mixed_hakmem ]; then mv bench_random_mixed_hakmem bench_random_mixed_hakmem.standard_saved; fi
$(MAKE) clean
$(MAKE) bench_random_mixed_hakmem EXTRA_CFLAGS='-DHAKMEM_TINY_CLASS_STATS_COMPILED=1 -DHAKMEM_TINY_FREE_STATS_COMPILED=1 -DHAKMEM_UNIFIED_CACHE_STATS_COMPILED=1 -DHAKMEM_TINY_FREE_TRACE_COMPILED=1 -DHAKMEM_INLINE_SLOTS_OVERFLOW_STATS_COMPILED=1'
mv bench_random_mixed_hakmem bench_random_mixed_hakmem_observe
@if [ -x bench_random_mixed_hakmem.standard_saved ]; then mv bench_random_mixed_hakmem.standard_saved bench_random_mixed_hakmem; fi
# Phase 38: Automated perf workflow targets
# Usage: make perf_fast - Build FAST binary and run 10-run benchmark
# Usage: make perf_observe - Build OBSERVE binary and run health check + 1-run perf
.PHONY: perf_fast
perf_fast: bench_random_mixed_hakmem_minimal
@echo "========================================"
@echo "Phase 38: FAST build 10-run benchmark"
@echo "========================================"
BENCH_BIN=./bench_random_mixed_hakmem_minimal scripts/run_mixed_10_cleanenv.sh
@echo "========================================"
@echo "FAST benchmark complete. See results above."
@echo "========================================"
.PHONY: perf_observe
perf_observe: bench_random_mixed_hakmem_observe
@echo "========================================"
@echo "Phase 38: OBSERVE build health check"
@echo "========================================"
@echo "[1/3] Health profiles check..."
scripts/verify_health_profiles.sh || echo "Health check script not found, skipping"
@echo "[2/3] Syscall stats (1-run)..."
HAKMEM_SS_OS_STATS=1 ./bench_random_mixed_hakmem_observe 20000000 400 1 2>&1 | grep -E "^\[|^Throughput"
@echo "[3/3] Single perf run..."
./bench_random_mixed_hakmem_observe 20000000 400 1 2>&1 | grep "^Throughput"
@echo "========================================"
@echo "OBSERVE health check complete."
@echo "========================================"
.PHONY: perf_all
perf_all: perf_fast perf_observe
@echo "========================================"
@echo "Phase 38: All perf checks complete"
@echo "========================================"
bench_random_mixed_system: bench_random_mixed_system.o
$(CC) -o $@ $^ $(LDFLAGS)
# Mid MT gap benchmark (1KB-8KB allocations) - Phase 5-Step2 verification
bench_mid_mt_gap_hakmem.o: bench_mid_mt_gap.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_mid_mt_gap_system.o: bench_mid_mt_gap.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_mid_mt_gap_hakmem: bench_mid_mt_gap_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_mt_gap_system: bench_mid_mt_gap_system.o
$(CC) -o $@ $^ $(LDFLAGS)
# Fixed-size microbench (direct link variants)
bench_fixed_size_hakmem.o: benchmarks/src/fixed/bench_fixed_size.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_fixed_size_system.o: benchmarks/src/fixed/bench_fixed_size.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_fixed_size_hakmem: bench_fixed_size_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_fixed_size_system: bench_fixed_size_system.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_random_mixed_mi: bench_random_mixed_mi.o bench_mi_force.o
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# hakmi variant for random mixed bench
bench_random_mixed_hakmi.o: bench_random_mixed.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc -c -o $@ $<
bench_random_mixed_hakmi: bench_random_mixed_hakmi.o $(HAKMI_FRONT_OBJS) bench_mi_force.o
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# hakx variant for random mixed bench
bench_random_mixed_hakx.o: bench_random_mixed.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
bench_random_mixed_hakx: bench_random_mixed_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
# VM-mixed bench around L2.5 (512KB<2MB)
bench_vm_mixed_hakmem.o: bench_vm_mixed.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_vm_mixed_system.o: bench_vm_mixed.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_vm_mixed_hakmem: bench_vm_mixed_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_vm_mixed_system: bench_vm_mixed_system.o
$(CC) -o $@ $^ $(LDFLAGS)
# Ultra-fast build for benchmarks: trims unwinding/PLT overhead and
# improves code locality. Use: `make bench_fast` then run the binary.
bench_fast: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
bench_fast: LDFLAGS += -Wl,-O2
bench_fast: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi bench_tiny_hot_hakx
@echo "✓ bench_fast build complete"
# Perf-Main (safe) bench build: no bench-only macros; same O flags
perf_main: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
perf_main: LDFLAGS += -Wl,-O2
perf_main: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi bench_random_mixed_hakmem bench_random_mixed_system bench_random_mixed_mi bench_comprehensive_hakx bench_tiny_hot_hakx bench_random_mixed_hakx
@echo "✓ perf_main build complete (no bench-only macros)"
# Mid/Large (832KiB) bench
bench_mid_large_hakmem.o: bench_mid_large.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_mid_large_system.o: bench_mid_large.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_mid_large_mi.o: bench_mid_large.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_mid_large_hakmem: bench_mid_large_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_large_system: bench_mid_large_system.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_large_mi: bench_mid_large_mi.o bench_mi_force.o
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# hakx variant for mid/large (1T)
bench_mid_large_hakx.o: bench_mid_large.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
bench_mid_large_hakx: bench_mid_large_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
# Mid/Large MT (832KiB) bench
bench_mid_large_mt_hakmem.o: bench_mid_large_mt.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_mid_large_mt_system.o: bench_mid_large_mt.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_mid_large_mt_mi.o: bench_mid_large_mt.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_mid_large_mt_hakmem: bench_mid_large_mt_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_large_mt_system: bench_mid_large_mt_system.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_large_mt_mi: bench_mid_large_mt_mi.o bench_mi_force.o
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# hakx variant for mid/large MT
bench_mid_large_mt_hakx.o: bench_mid_large_mt.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
bench_mid_large_mt_hakx: bench_mid_large_mt_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
# Fragmentation stress bench
bench_fragment_stress_hakmem.o: bench_fragment_stress.c hakmem.h
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
bench_fragment_stress_system.o: bench_fragment_stress.c
$(CC) $(CFLAGS) -c -o $@ $<
bench_fragment_stress_mi.o: bench_fragment_stress.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_fragment_stress_hakmem: bench_fragment_stress_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_fragment_stress_system: bench_fragment_stress_system.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_fragment_stress_mi: bench_fragment_stress_mi.o bench_mi_force.o
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# Bench build with Minimal Tiny Front (physically excludes optional front tiers)
bench_tiny_front: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -DHAKMEM_TINY_MINIMAL_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_MAG_OWNER=0
bench_tiny_front: LDFLAGS += -Wl,-O2
bench_tiny_front: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi
@echo "✓ bench_tiny_front build complete (HAKMEM_TINY_MINIMAL_FRONT=1)"
# Bench build with Strict Front (compile-out optional front tiers, baseline structure)
bench_front_strict: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -DHAKMEM_TINY_STRICT_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1
bench_front_strict: LDFLAGS += -Wl,-O2
bench_front_strict: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi
@echo "✓ bench_front_strict build complete (HAKMEM_TINY_STRICT_FRONT=1)"
# Bench build with Ultra (SLL-only front) for Tiny-Hot microbench
# - Compiles hakmem bench with SLL-first/strict front, without Quick/FrontCache, stats off
# - Only affects bench binaries; normal builds unchanged
bench_ultra_strict: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
-DHAKMEM_TINY_ULTRA=1 -DHAKMEM_TINY_TLS_SLL=1 -DHAKMEM_TINY_STRICT_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1 \
-DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
bench_ultra_strict: LDFLAGS += -Wl,-O2
bench_ultra_strict: clean bench_tiny_hot_hakmem
@echo "✓ bench_ultra_strict build complete (ULTRA+STRICT front)"
# Bench build with Ultra (SLL-only) but without STRICT/MINIMAL, Quick/FrontCache compiled out
bench_ultra: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
-DHAKMEM_TINY_ULTRA=1 -DHAKMEM_TINY_TLS_SLL=1 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
bench_ultra: LDFLAGS += -Wl,-O2
bench_ultra: clean bench_tiny_hot_hakmem
@echo "✓ bench_ultra build complete (ULTRA SLL-only, Quick/FrontCache OFF)"
# Bench build with explicit bench fast path (SLL→Mag→tiny reflll), stats/quick/front off
bench_fastpath: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
-DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
bench_fastpath: LDFLAGS += -Wl,-O2
bench_fastpath: clean bench_tiny_hot_hakmem
@echo "✓ bench_fastpath build complete (bench-only fast path)"
# Bench build: SLL-only (≤64B), with warmup
bench_sll_only: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
-DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 \
-DHAKMEM_TINY_BENCH_WARMUP32=160 -DHAKMEM_TINY_BENCH_WARMUP64=192 -DHAKMEM_TINY_BENCH_WARMUP8=64 -DHAKMEM_TINY_BENCH_WARMUP16=96 \
-DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
bench_sll_only: LDFLAGS += -Wl,-O2
bench_sll_only: clean bench_tiny_hot_hakmem
@echo "✓ bench_sll_only build complete (bench-only SLL-only + warmup)"
# Bench-fastpath with explicit refill sizes (A/B)
bench_fastpath_r8: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=8 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
bench_fastpath_r8: LDFLAGS += -Wl,-O2
bench_fastpath_r8: clean bench_tiny_hot_hakmem
@echo "✓ bench_fastpath_r8 build complete"
bench_fastpath_r12: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=12 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
bench_fastpath_r12: LDFLAGS += -Wl,-O2
bench_fastpath_r12: clean bench_tiny_hot_hakmem
@echo "✓ bench_fastpath_r12 build complete"
bench_fastpath_r16: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=16 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
bench_fastpath_r16: LDFLAGS += -Wl,-O2
bench_fastpath_r16: clean bench_tiny_hot_hakmem
@echo "✓ bench_fastpath_r16 build complete"
# PGO for bench-fastpath
pgo-benchfast-profile:
@echo "========================================="
@echo "PGO Profile (bench-fastpath)"
@echo "========================================="
rm -f *.gcda *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
@echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)"
./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
@echo "✓ bench-fastpath profile data collected (*.gcda)"
pgo-benchfast-build:
@echo "========================================="
@echo "PGO Build (bench-fastpath)"
@echo "========================================="
rm -f *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
@echo "✓ bench-fastpath PGO build complete"
# Debug bench (with counters/prints)
bench_debug: CFLAGS += -DHAKMEM_DEBUG_COUNTERS=1 -g -O2
bench_debug: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi
@echo "✓ bench_debug build complete (debug counters enabled)"
# Debug build for random_mixed (enable counters for SFC stats)
.PHONY: bench_random_mixed_debug
bench_random_mixed_debug:
@echo "[debug] Rebuilding bench_random_mixed_hakmem with HAKMEM_DEBUG_COUNTERS=1"
$(MAKE) clean >/dev/null
$(MAKE) CFLAGS+=" -DHAKMEM_DEBUG_COUNTERS=1 -O2 -g" bench_random_mixed_hakmem >/dev/null
@echo "✓ bench_random_mixed_debug built"
# ========================================
# Phase 7 便利ターゲット(重要な定数がデフォルト化されています)
# ========================================
# Phase 7: 全最適化を有効化Task 1+2+3
# 使い方: make phase7
# または: make phase7-bench で自動ベンチマーク
.PHONY: phase7 phase7-bench phase7-test
phase7:
@echo "========================================="
@echo "Phase 7: Building with all optimizations"
@echo "========================================="
@echo "Flags:"
@echo " HEADER_CLASSIDX=1 (Task 1: Skip magic validation)"
@echo " AGGRESSIVE_INLINE=1 (Task 2: Inline TLS macros)"
@echo " PREWARM_TLS=1 (Task 3: Pre-warm cache)"
@echo ""
$(MAKE) clean
$(MAKE) HEADER_CLASSIDX=1 AGGRESSIVE_INLINE=1 PREWARM_TLS=1 \
bench_random_mixed_hakmem larson_hakmem
@echo ""
@echo "✓ Phase 7 build complete!"
@echo " Run: make phase7-bench (quick benchmark)"
@echo " Run: make phase7-test (sanity test)"
phase7-bench: phase7
@echo ""
@echo "========================================="
@echo "Phase 7 Quick Benchmark"
@echo "========================================="
@echo "Larson 1T:"
@./larson_hakmem 1 1 128 1024 1 12345 1 2>&1 | grep "Throughput ="
@echo ""
@echo "Random Mixed (128B, 256B, 1024B):"
@./bench_random_mixed_hakmem 100000 128 1234567 2>&1 | tail -1
@./bench_random_mixed_hakmem 100000 256 1234567 2>&1 | tail -1
@./bench_random_mixed_hakmem 100000 1024 1234567 2>&1 | tail -1
phase7-test: phase7
@echo ""
@echo "========================================="
@echo "Phase 7 Sanity Test"
@echo "========================================="
@./larson_hakmem 1 1 128 1024 1 12345 1 >/dev/null 2>&1 && echo "✓ Larson 1T OK" || echo "✗ Larson 1T FAILED"
@./bench_random_mixed_hakmem 10000 128 1234567 >/dev/null 2>&1 && echo "✓ Random Mixed 128B OK" || echo "✗ Random Mixed 128B FAILED"
@./bench_random_mixed_hakmem 10000 1024 1234567 >/dev/null 2>&1 && echo "✓ Random Mixed 1024B OK" || echo "✗ Random Mixed 1024B FAILED"
# Clean
clean:
rm -f $(OBJS) $(TARGET) $(BENCH_HAKMEM_OBJS) $(BENCH_SYSTEM_OBJS) $(BENCH_HAKMEM) $(BENCH_SYSTEM) $(SHARED_OBJS) $(SHARED_LIB) *.csv libhako_ffi_stub.a hako_ffi_stub.o
rm -f bench_comprehensive.o bench_comprehensive_hakmem bench_comprehensive_system
rm -f bench_tiny bench_tiny.o bench_tiny_mt bench_tiny_mt.o test_mf2 test_mf2.o bench_tiny_hakmem
rm -f bench_random_mixed_hakmem.o bench_random_mixed_system.o bench_random_mixed_mi.o
rm -f bench_tiny_hot_hakmem.o bench_tiny_hot_system.o bench_tiny_hot_mi.o bench_mi_force.o
rm -f bench_random_mixed_hakmem bench_random_mixed_system bench_random_mixed_mi bench_random_mixed_hakx
rm -f bench_random_mixed_hakmem_minimal bench_random_mixed_hakmem_minimal_pgo
rm -f bench_random_mixed_hakmem_fast_fixed bench_random_mixed_hakmem_fast_pruned bench_random_mixed_hakmem_fast_pgo
rm -f bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi bench_tiny_hot_hakmi bench_tiny_hot_hakx bench_tiny_hot_hakx_p0 bench_tiny_hot_direct
# Help
help:
@echo "hakmem PoC - Makefile targets:"
@echo ""
@echo "=== Phase 7 Optimizations (推奨) ==="
@echo " make phase7 - Phase 7全最適化ビルド (Task 1+2+3)"
@echo " make phase7-bench - Phase 7 + クイックベンチマーク"
@echo " make phase7-test - Phase 7 + サニティテスト"
@echo ""
@echo "=== 基本ターゲット ==="
@echo " make - Build the test program"
@echo " make run - Build and run the test"
@echo " make bench - Build benchmark programs"
@echo " make shared - Build shared library (for LD_PRELOAD)"
@echo " make clean - Clean build artifacts"
@echo " make bench-mode - Run Tiny-focused PGO bench (scripts/bench_mode.sh)"
@echo " make bench-all - Run (near) full mimalloc-bench with timeouts"
@echo ""
@echo "Benchmark workflow:"
@echo " 1. make bench"
@echo " 2. bash bench_runner.sh --runs 10"
@echo " 3. python3 analyze_results.py benchmark_results.csv"
@echo ""
@echo "mimalloc-bench workflow:"
@echo " 1. make shared"
@echo " 2. LD_PRELOAD=./libhakmem.so <benchmark>"
# Step 2: PGO (Profile-Guided Optimization) targets - temporarily disabled
pgo-profile:
@echo "========================================="
@echo "PGO Profile Collection (disabled)"
@echo "========================================="
@echo "PGO flow is temporarily parked during Tiny front Phase 4 refactor."
@echo "Use normal builds instead, e.g.:"
@echo " ./build.sh release bench_random_mixed_hakmem"
pgo-build:
@echo "========================================="
@echo "PGO Optimized Build (disabled)"
@echo "========================================="
@echo "PGO flow is temporarily parked during Tiny front Phase 4 refactor."
@echo "Use normal builds instead, e.g.:"
@echo " ./build.sh release bench_random_mixed_hakmem"
# PGO for tiny_hot (Strict Front) - temporarily disabled
pgo-hot-profile:
@echo "========================================="
@echo "PGO Profile (tiny_hot) (disabled)"
@echo "========================================="
@echo "Tiny-hot PGO profiling is temporarily disabled."
@echo "Run benches directly instead, e.g.:"
@echo " ./build.sh release bench_tiny_hot_hakmem"
@echo "✓ tiny_hot profile data collected (*.gcda)"
pgo-hot-build:
@echo "========================================="
@echo "PGO Build (tiny_hot) with Strict Front"
@echo "========================================="
rm -f *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_STRICT_FRONT=1" \
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
@echo "✓ tiny_hot PGO build complete"
# Phase 8.2: Memory profiling build (verbose memory breakdown)
bench-memory: CFLAGS += -DHAKMEM_DEBUG_MEMORY
bench-memory: clean bench_comprehensive_hakmem
@echo ""
@echo "========================================="
@echo "Memory profiling build complete!"
@echo " Run: ./bench_comprehensive_hakmem"
@echo " Memory breakdown will be printed at end"
@echo "========================================="
.PHONY: all run bench shared debug clean help pgo-profile pgo-build bench-memory
# PGO for shared library (LD_PRELOAD)
# Step 1: Build instrumented shared lib and collect profile
pgo-profile-shared:
@echo "========================================="
@echo "Step: PGO Profile Collection (shared lib)"
@echo "========================================="
rm -f *_shared.gcda *_shared.o $(SHARED_LIB)
$(MAKE) CFLAGS_SHARED="$(CFLAGS_SHARED) -fprofile-generate -flto" LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" shared
@echo "Running profile workload (LD_PRELOAD)..."
HAKMEM_WRAP_TINY=1 LD_PRELOAD=./$(SHARED_LIB) ./bench_comprehensive_system 2>&1 | grep -E "(SIZE CLASS:|Throughput:)" | head -20 || true
@echo "✓ Profile data collected (*.gcda for *_shared)"
# Step 2: Build optimized shared lib using profile
pgo-build-shared:
@echo "========================================="
@echo "Step: PGO Optimized Build (shared lib)"
@echo "========================================="
rm -f *_shared.o $(SHARED_LIB)
$(MAKE) CFLAGS_SHARED="$(CFLAGS_SHARED) -fprofile-use -flto -Wno-error=coverage-mismatch" LDFLAGS="$(LDFLAGS) -fprofile-use -flto" shared
@echo "✓ LTO+PGO optimized shared library complete"
# Convenience: run Bench Mode script
bench-mode:
@bash scripts/bench_mode.sh
bench-all:
@bash scripts/run_all_benches_with_timeouts.sh
# PGO for bench_sll_only
pgo-benchsll-profile:
@echo "========================================="
@echo "PGO Profile (bench_sll_only)"
@echo "========================================="
rm -f *.gcda *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
@echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)"
./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
@echo "✓ bench_sll_only profile data collected (*.gcda)"
pgo-benchsll-build:
@echo "========================================="
@echo "PGO Build (bench_sll_only)"
@echo "========================================="
rm -f *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
@echo "✓ bench_sll_only PGO build complete"
# Variant: SLL-only with REFILL=12 and WARMUP32=192 (tune for 32B)
pgo-benchsll-r12w192-profile:
@echo "========================================="
@echo "PGO Profile (bench_sll_only r12 w32=192)"
@echo "========================================="
rm -f *.gcda *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL32=12 -DHAKMEM_TINY_BENCH_WARMUP32=192 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
@echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)"
./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
@echo "✓ r12 w32=192 profile data collected (*.gcda)"
pgo-benchsll-r12w192-build:
@echo "========================================="
@echo "PGO Build (bench_sll_only r12 w32=192)"
@echo "========================================="
rm -f *.o bench_tiny_hot_hakmem
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL32=12 -DHAKMEM_TINY_BENCH_WARMUP32=192 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
@echo "✓ r12 w32=192 PGO build complete"
MI_RPATH := $(shell pwd)/mimalloc-bench/extern/mi/out/release
# Sanitized builds (compiler-assisted debugging)
.PHONY: asan-larson ubsan-larson tsan-larson
SAN_ASAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \
-fsanitize=address,undefined -fno-sanitize-recover=all -fstack-protector-strong \
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
SAN_ASAN_LDFLAGS = -fsanitize=address,undefined
SAN_UBSAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \
-fsanitize=undefined -fno-sanitize-recover=undefined -fstack-protector-strong \
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
SAN_UBSAN_LDFLAGS = -fsanitize=undefined
# Allocator-enabled sanitizer variants (no FORCE_LIBC)
# FIXME 2025-11-07: TLS initialization order issue - using libc for now
SAN_ASAN_ALLOC_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \
-fsanitize=address,undefined -fno-sanitize-recover=all -fstack-protector-strong \
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
SAN_ASAN_ALLOC_LDFLAGS = -fsanitize=address,undefined
SAN_UBSAN_ALLOC_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \
-fsanitize=undefined -fno-sanitize-recover=undefined -fstack-protector-strong \
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
SAN_UBSAN_ALLOC_LDFLAGS = -fsanitize=undefined
SAN_TSAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto -fsanitize=thread \
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
SAN_TSAN_LDFLAGS = -fsanitize=thread
# Variant: TSan with allocator enabled (no FORCE_LIBC)
# FIXME 2025-11-07: TLS initialization order issue - using libc for now
SAN_TSAN_ALLOC_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto -fsanitize=thread \
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
SAN_TSAN_ALLOC_LDFLAGS = -fsanitize=thread
asan-larson:
@$(MAKE) clean >/dev/null
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_ASAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_ASAN_LDFLAGS)" >/dev/null
@cp -f larson_hakmem larson_hakmem_asan
@echo "✓ Built larson_hakmem_asan with ASan/UBSan"
ubsan-larson:
@$(MAKE) clean >/dev/null
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_UBSAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_UBSAN_LDFLAGS)" >/dev/null
@cp -f larson_hakmem larson_hakmem_ubsan
@echo "✓ Built larson_hakmem_ubsan with UBSan"
tsan-larson:
@$(MAKE) clean >/dev/null
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_TSAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_TSAN_LDFLAGS)" >/dev/null
@cp -f larson_hakmem larson_hakmem_tsan
@echo "✓ Built larson_hakmem_tsan with TSan (no ASan)"
.PHONY: tsan-larson-alloc
tsan-larson-alloc:
@$(MAKE) clean >/dev/null
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_TSAN_ALLOC_CFLAGS)" EXTRA_LDFLAGS="$(SAN_TSAN_ALLOC_LDFLAGS)" >/dev/null
@cp -f larson_hakmem larson_hakmem_tsan_alloc
@echo "✓ Built larson_hakmem_tsan_alloc with TSan (allocator enabled)"
.PHONY: asan-larson-alloc ubsan-larson-alloc
asan-larson-alloc:
@$(MAKE) clean >/dev/null
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_ASAN_ALLOC_CFLAGS)" EXTRA_LDFLAGS="$(SAN_ASAN_ALLOC_LDFLAGS)" >/dev/null
@cp -f larson_hakmem larson_hakmem_asan_alloc
@echo "✓ Built larson_hakmem_asan_alloc with ASan/UBSan (allocator enabled)"
ubsan-larson-alloc:
@$(MAKE) clean >/dev/null
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_UBSAN_ALLOC_CFLAGS)" EXTRA_LDFLAGS="$(SAN_UBSAN_ALLOC_LDFLAGS)" >/dev/null
@cp -f larson_hakmem larson_hakmem_ubsan_alloc
@echo "✓ Built larson_hakmem_ubsan_alloc with UBSan (allocator enabled)"
# Sanitized shared libraries for LD_PRELOAD (allocator enabled)
.PHONY: asan-shared-alloc tsan-shared-alloc
asan-shared-alloc:
@$(MAKE) clean >/dev/null
@$(MAKE) SHARED_LIB=libhakmem_asan.so \
CFLAGS_SHARED="$(CFLAGS_SHARED) $(SAN_ASAN_ALLOC_CFLAGS)" \
LDFLAGS="$(LDFLAGS) $(SAN_ASAN_ALLOC_LDFLAGS)" shared >/dev/null
@echo "✓ Built libhakmem_asan.so (LD_PRELOAD, allocator enabled)"
tsan-shared-alloc:
@$(MAKE) clean >/dev/null
@$(MAKE) SHARED_LIB=libhakmem_tsan.so \
CFLAGS_SHARED="$(CFLAGS_SHARED) $(SAN_TSAN_ALLOC_CFLAGS)" \
LDFLAGS="$(LDFLAGS) $(SAN_TSAN_ALLOC_LDFLAGS)" shared >/dev/null
@echo "✓ Built libhakmem_tsan.so (LD_PRELOAD, allocator enabled)"
# TSan multithread smoke linking against allocator (direct link)
.PHONY: mt-smoke-tsan
mt-smoke-tsan:
@$(MAKE) clean >/dev/null
@$(MAKE) $(TINY_BENCH_OBJS) >/dev/null
$(CC) -O1 -g -fno-omit-frame-pointer -fno-lto -fsanitize=thread \
-o mt_smoke tests/mt_smoke.c $(TINY_BENCH_OBJS) $(LDFLAGS) -fsanitize=thread
@echo "✓ Built mt_smoke (TSan)"
# ----------------------------------------------------------------------------
# Convenience targets (debug/route/3layer)
# ----------------------------------------------------------------------------
.PHONY: larson_hakmem_3layer larson_hakmem_route
# ----------------------------------------------------------------------------
# Runtime helpers: sanitizer-safe runners for debugging/bench
# ----------------------------------------------------------------------------
# Default run params (overridable):
THREADS ?= 4
SLEEP ?= 10
MIN ?= 8
MAX ?= 128
CHPT ?= 1024
ROUNDS ?= 1
SEED ?= 12345
# Resolve libasan from the active toolchain
ASAN_LIB := $(shell $(CC) -print-file-name=libasan.so)
.PHONY: asan-preload-run
asan-preload-run:
@$(MAKE) -j asan-shared-alloc larson_system >/dev/null
@echo "[asan-preload] LD_PRELOAD chain: $$LD_PRELOAD"
@echo "[asan-preload] Running: ./larson_system $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS)"
@LSAN_OPTIONS=detect_leaks=0 \
LD_PRELOAD="$(ASAN_LIB):$(PWD)/libhakmem_asan.so" \
./larson_system $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS)
.PHONY: asan-preload-mailbox-lite
asan-preload-mailbox-lite:
@$(MAKE) -j asan-shared-alloc larson_system >/dev/null
@echo "[asan-preload-mailbox-lite] (short-run)"
@echo "[asan-preload-mailbox-lite] Running: ./larson_system 5 $(MIN) $(MAX) 256 $(ROUNDS) $(SEED) $(THREADS)"
@HAKMEM_WRAP_TINY=1 HAKMEM_TINY_SS_ADOPT=1 \
HAKMEM_TINY_DEBUG_REMOTE_GUARD=1 HAKMEM_TINY_TRACE_RING=1 \
LSAN_OPTIONS=detect_leaks=0 \
LD_PRELOAD="$(ASAN_LIB):$(PWD)/libhakmem_asan.so" \
./larson_system 5 $(MIN) $(MAX) 256 $(ROUNDS) $(SEED) $(THREADS)
.PHONY: ubsan-mailbox-run
ubsan-mailbox-run:
@$(MAKE) -j ubsan-larson-alloc >/dev/null
@echo "[ubsan-mailbox] Running: ./larson_hakmem_ubsan_alloc $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS)"
@HAKMEM_WRAP_TINY=1 HAKMEM_TINY_SS_ADOPT=1 \
./larson_hakmem_ubsan_alloc $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS)
# ----------------------------------------------------------------------------
# HAKMEM direct-link benches & reproducer helpers
# ----------------------------------------------------------------------------
.PHONY: bench-hakmem
bench-hakmem:
@$(MAKE) -j larson_hakmem >/dev/null
@echo "== hakmem 1T ==" && ./larson_hakmem $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) 1
@echo "== hakmem $(THREADS)T ==" && ./larson_hakmem $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS)
.PHONY: bench-hakmem-hot64
bench-hakmem-hot64:
@$(MAKE) -j larson_hakmem >/dev/null
@echo "== hakmem HOT64 1T ==" && HAKMEM_TINY_REFILL_COUNT_HOT=64 ./larson_hakmem 5 $(MIN) $(MAX) 512 $(ROUNDS) $(SEED) 1
@echo "== hakmem HOT64 $(THREADS)T ==" && HAKMEM_TINY_REFILL_COUNT_HOT=64 ./larson_hakmem 5 $(MIN) $(MAX) 512 $(ROUNDS) $(SEED) $(THREADS)
.PHONY: bench-hakmem-hot64-fastcap-ab
bench-hakmem-hot64-fastcap-ab:
@$(MAKE) -j larson_hakmem >/dev/null
@for cap in 8 16 32; do \
echo "== HOT64 FastCap=$$cap $(THREADS)T (short) =="; \
HAKMEM_TINY_REFILL_COUNT_HOT=64 HAKMEM_TINY_FAST_CAP=$$cap \
HAKMEM_TINY_DEBUG_REMOTE_GUARD=1 HAKMEM_TINY_TRACE_RING=1 \
./larson_hakmem 5 $(MIN) $(MAX) 256 $(ROUNDS) $(SEED) $(THREADS) || true; \
done
.PHONY: valgrind-hakmem-hot64-lite
valgrind-hakmem-hot64-lite:
@$(MAKE) clean >/dev/null
@$(MAKE) OPT_LEVEL=0 USE_LTO=0 NATIVE=0 larson_hakmem >/dev/null
@echo "== valgrind HOT64 lite $(THREADS)T =="
@HAKMEM_TINY_REFILL_COUNT_HOT=64 \
valgrind --quiet --leak-check=full --show-leak-kinds=all \
--errors-for-leak-kinds=all --track-origins=yes --error-exitcode=99 \
./larson_hakmem 2 $(MIN) $(MAX) 256 $(ROUNDS) $(SEED) $(THREADS) || true
# ----------------------------------------------------------------------------
# Unit tests (Box-level)
# ----------------------------------------------------------------------------
.PHONY: unit unit-run
UNIT_BIN_DIR := tests/bin
UNIT_BINS := $(UNIT_BIN_DIR)/test_super_registry $(UNIT_BIN_DIR)/test_ready_ring $(UNIT_BIN_DIR)/test_mailbox_box $(UNIT_BIN_DIR)/madvise_guard_test $(UNIT_BIN_DIR)/libm_reloc_guard_test
unit: $(UNIT_BINS)
@echo "OK: unit tests built -> $(UNIT_BINS)"
$(UNIT_BIN_DIR)/test_super_registry: tests/unit/test_super_registry.c core/hakmem_super_registry.c core/hakmem_tiny_superslab.c
@mkdir -p $(UNIT_BIN_DIR)
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
$(UNIT_BIN_DIR)/test_ready_ring: tests/unit/test_ready_ring.c
@mkdir -p $(UNIT_BIN_DIR)
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
$(UNIT_BIN_DIR)/test_mailbox_box: tests/unit/test_mailbox_box.c tests/unit/mailbox_test_stubs.c core/box/mailbox_box.c
@mkdir -p $(UNIT_BIN_DIR)
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
$(UNIT_BIN_DIR)/madvise_guard_test: tests/unit/madvise_guard_test.c core/box/madvise_guard_box.c
@mkdir -p $(UNIT_BIN_DIR)
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
$(UNIT_BIN_DIR)/libm_reloc_guard_test: tests/unit/libm_reloc_guard_test.c core/box/libm_reloc_guard_box.c
@mkdir -p $(UNIT_BIN_DIR)
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
unit-run: unit
@echo "Running unit: test_super_registry" && $(UNIT_BIN_DIR)/test_super_registry
@echo "Running unit: test_ready_ring" && $(UNIT_BIN_DIR)/test_ready_ring
@echo "Running unit: test_mailbox_box" && $(UNIT_BIN_DIR)/test_mailbox_box
@echo "Running unit: madvise_guard_test" && $(UNIT_BIN_DIR)/madvise_guard_test
@echo "Running unit: libm_reloc_guard_test" && $(UNIT_BIN_DIR)/libm_reloc_guard_test
# Build 3-layer Tiny (new front) with low optimization for debug/testing
larson_hakmem_3layer:
$(MAKE) clean
$(MAKE) NEW_3LAYER_DEFAULT=1 ULTRA_SIMPLE_DEFAULT=0 BOX_REFACTOR_DEFAULT=1 USE_LTO=0 OPT_LEVEL=1 larson_hakmem
@echo "========================================="
@echo "Built larson_hakmem with NEW 3-LAYER front"
@echo " NEW_3LAYER_DEFAULT=1, LTO=OFF, O1"
@echo "========================================="
# Build 3-layer + route fingerprint enabled (runtime ring still needs ENV)
larson_hakmem_route:
$(MAKE) clean
$(MAKE) NEW_3LAYER_DEFAULT=1 ULTRA_SIMPLE_DEFAULT=0 BOX_REFACTOR_DEFAULT=1 USE_LTO=0 OPT_LEVEL=1 \
EXTRA_CFLAGS+=" -DHAKMEM_ROUTE=1" larson_hakmem
@echo "========================================="
@echo "Built larson_hakmem (3-layer + route)"
@echo " HAKMEM_ROUTE build-flag set; runtime ENV still controls output"
@echo "========================================="
# ----------------------------------------------------------------------------
# Pool TLS Benchmarks (Phase 1.5b)
# ----------------------------------------------------------------------------
# Build HAKMEM shared library first to satisfy -lhakmem
bench_pool_tls_hakmem: benchmarks/bench_pool_tls.c $(SHARED_LIB)
$(CC) $(CFLAGS) -o $@ $< -L. -lhakmem $(LDFLAGS)
bench_pool_tls_system: benchmarks/bench_pool_tls.c
$(CC) $(CFLAGS) -DUSE_SYSTEM_MALLOC -o $@ $< $(LDFLAGS)
.PHONY: bench-pool-tls
bench-pool-tls: bench_pool_tls_hakmem bench_pool_tls_system
@echo "========================================="
@echo "Pool TLS Benchmark (8KB-52KB allocations)"
@echo "========================================="
@echo ""
@echo "== HAKMEM (Phase 1.5b Pre-warm) =="
@./bench_pool_tls_hakmem 1 100000 256 42
@echo ""
@echo "== System malloc =="
@./bench_pool_tls_system 1 100000 256 42
@echo ""
@echo "========================================="
# Phase E1-CORRECT Debug Bench (minimal test)
test_simple_e1: test_simple_e1.o $(HAKMEM_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
test_simple_e1.o: test_simple_e1.c
$(CC) $(CFLAGS) -c -o $@ $<
# ========================================
# Phase 4: PGO (Profile-Guided Optimization) Targets
# ========================================
# Phase 4-Step1: PGO Profile Build
# Builds binaries with -fprofile-generate for profiling
.PHONY: pgo-tiny-profile
pgo-tiny-profile:
@echo "========================================="
@echo "Phase 4: Building PGO Profile Binaries"
@echo "========================================="
$(MAKE) clean
$(MAKE) PROFILE_GEN=1 bench_random_mixed_hakmem bench_tiny_hot_hakmem
@echo ""
@echo "✓ PGO profile binaries built"
@echo "Next: Run 'make pgo-tiny-collect' to collect profile data"
@echo ""
# Phase 4-Step1: PGO Profile Collection
# Executes representative workloads to generate .gcda files
.PHONY: pgo-tiny-collect
pgo-tiny-collect:
@echo "========================================="
@echo "Phase 4: Collecting PGO Profile Data"
@echo "========================================="
./scripts/box/pgo_tiny_profile_box.sh
# Phase 4-Step1: PGO Optimized Build
# Builds binaries with -fprofile-use for optimization
.PHONY: pgo-tiny-build
pgo-tiny-build:
@echo "========================================="
@echo "Phase 4: Building PGO-Optimized Binaries"
@echo "========================================="
@echo "Building optimized binaries..."
$(MAKE) clean
$(MAKE) PROFILE_USE=1 bench_random_mixed_hakmem bench_tiny_hot_hakmem
@echo ""
@echo "✓ PGO-optimized binaries built"
@echo "Next: Run './bench_random_mixed_hakmem 1000000 256 42' to test"
@echo ""
# Phase 4-Step1: Full PGO Workflow
# Complete workflow: profile → collect → build → test
.PHONY: pgo-tiny-full
pgo-tiny-full: pgo-tiny-profile pgo-tiny-collect pgo-tiny-build
@echo "========================================="
@echo "Phase 4: PGO Full Workflow Complete"
@echo "========================================="
@echo "Testing PGO-optimized binary..."
@echo ""
./bench_random_mixed_hakmem 1000000 256 42
@echo ""
@echo "✓ PGO optimization complete!"
@echo ""