2025-11-05 12:31:14 +09:00
|
|
|
|
# Makefile for hakmem PoC
|
|
|
|
|
|
|
|
|
|
|
|
CC = gcc
|
2025-11-29 11:28:38 +09:00
|
|
|
|
# Default target: Show help
|
|
|
|
|
|
.DEFAULT_GOAL := help
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: help
|
|
|
|
|
|
help:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "HAKMEM Build Targets"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "Development (Fast builds):"
|
|
|
|
|
|
@echo " make bench_random_mixed_hakmem - Quick build (~1-2 min)"
|
|
|
|
|
|
@echo " make bench_tiny_hot_hakmem - Quick build"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "Benchmarking (PGO-optimized, +6% faster):"
|
|
|
|
|
|
@echo " make pgo-tiny-full - Full PGO workflow (~5-10 min)"
|
|
|
|
|
|
@echo " = Profile + Optimize + Test"
|
|
|
|
|
|
@echo " make pgo-tiny-profile - Step 1: Build profile binaries"
|
|
|
|
|
|
@echo " make pgo-tiny-collect - Step 2: Collect profile data"
|
|
|
|
|
|
@echo " make pgo-tiny-build - Step 3: Build optimized"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "Comparison:"
|
2025-12-18 18:50:00 +09:00
|
|
|
|
@echo " make bench - Build allocator comparison benches"
|
2025-11-29 11:28:38 +09:00
|
|
|
|
@echo " make bench-pool-tls - Pool TLS benchmark"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "Cleanup:"
|
|
|
|
|
|
@echo " make clean - Clean build artifacts"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "Phase 4 Performance:"
|
|
|
|
|
|
@echo " Baseline: 57.0 M ops/s"
|
|
|
|
|
|
@echo " PGO-optimized: 60.6 M ops/s (+6.25%)"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "TIP: For best performance, use 'make pgo-tiny-full'"
|
|
|
|
|
|
@echo "========================================="
|
2025-11-05 12:31:14 +09:00
|
|
|
|
CXX = g++
|
|
|
|
|
|
|
|
|
|
|
|
# Directory structure (2025-11-01 reorganization)
|
|
|
|
|
|
SRC_DIR := core
|
|
|
|
|
|
BENCH_SRC := benchmarks/src
|
|
|
|
|
|
TEST_SRC := tests
|
|
|
|
|
|
BUILD_DIR := build
|
|
|
|
|
|
BENCH_BIN_DIR := benchmarks/bin
|
|
|
|
|
|
|
|
|
|
|
|
# Search paths for source files
|
2025-11-07 01:27:04 +09:00
|
|
|
|
VPATH := $(SRC_DIR):$(SRC_DIR)/box:$(BENCH_SRC)/tiny:$(BENCH_SRC)/mid:$(BENCH_SRC)/comprehensive:$(BENCH_SRC)/stress:$(TEST_SRC)/unit:$(TEST_SRC)/integration:$(TEST_SRC)/stress
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
# Timing: default OFF for performance. Set HAKMEM_TIMING=1 to enable.
|
|
|
|
|
|
HAKMEM_TIMING ?= 0
|
|
|
|
|
|
# Phase 6.25: Aggressive optimization flags (default ON, overridable)
|
|
|
|
|
|
OPT_LEVEL ?= 3
|
|
|
|
|
|
USE_LTO ?= 1
|
|
|
|
|
|
NATIVE ?= 1
|
|
|
|
|
|
|
|
|
|
|
|
BASE_CFLAGS := -Wall -Wextra -std=c11 -D_GNU_SOURCE -D_POSIX_C_SOURCE=199309L \
|
|
|
|
|
|
-D_GLIBC_USE_ISOC2X=0 -D__isoc23_strtol=strtol -D__isoc23_strtoll=strtoll \
|
|
|
|
|
|
-D__isoc23_strtoul=strtoul -D__isoc23_strtoull=strtoull -DHAKMEM_DEBUG_TIMING=$(HAKMEM_TIMING) \
|
|
|
|
|
|
-ffast-math -funroll-loops -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
|
2025-11-07 12:09:28 +09:00
|
|
|
|
-fno-semantic-interposition -I core -I include
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
CFLAGS = -O$(OPT_LEVEL) $(BASE_CFLAGS)
|
|
|
|
|
|
ifeq ($(NATIVE),1)
|
|
|
|
|
|
CFLAGS += -march=native -mtune=native -fno-plt
|
|
|
|
|
|
endif
|
|
|
|
|
|
ifeq ($(USE_LTO),1)
|
|
|
|
|
|
CFLAGS += -flto
|
|
|
|
|
|
endif
|
|
|
|
|
|
# Allow overriding TLS ring capacity at build time: make shared RING_CAP=32
|
|
|
|
|
|
RING_CAP ?= 32
|
|
|
|
|
|
# Phase 6.25: Aggressive optimization + TLS Ring 拡張
|
|
|
|
|
|
CFLAGS_SHARED = -O$(OPT_LEVEL) $(BASE_CFLAGS) -fPIC -DPOOL_TLS_RING_CAP=$(RING_CAP)
|
|
|
|
|
|
ifeq ($(NATIVE),1)
|
|
|
|
|
|
CFLAGS_SHARED += -march=native -mtune=native -fno-plt
|
|
|
|
|
|
endif
|
|
|
|
|
|
ifeq ($(USE_LTO),1)
|
|
|
|
|
|
CFLAGS_SHARED += -flto
|
|
|
|
|
|
endif
|
|
|
|
|
|
LDFLAGS = -lm -lpthread
|
|
|
|
|
|
ifeq ($(USE_LTO),1)
|
|
|
|
|
|
LDFLAGS += -flto
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
2025-11-09 11:50:18 +09:00
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
|
# Build hygiene: dependency tracking + flag consistency checks
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
# Track header dependencies for explicit compile rules as well
|
|
|
|
|
|
CFLAGS += -MMD -MP
|
|
|
|
|
|
|
|
|
|
|
|
# If someone injects -DHAKMEM_POOL_TLS_PHASE1=1 directly into CFLAGS
|
|
|
|
|
|
# but forgets POOL_TLS_PHASE1=1, object lists will miss pool_tls*.o.
|
|
|
|
|
|
# Fail fast to avoid confusing link/runtime errors.
|
|
|
|
|
|
ifneq ($(filter -DHAKMEM_POOL_TLS_PHASE1=1,$(CFLAGS)),)
|
|
|
|
|
|
ifneq ($(POOL_TLS_PHASE1),1)
|
|
|
|
|
|
$(error Detected -DHAKMEM_POOL_TLS_PHASE1=1 in CFLAGS but POOL_TLS_PHASE1!=1. Please invoke: make POOL_TLS_PHASE1=1 ...)
|
|
|
|
|
|
endif
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
|
|
# Include generated .d files if present (safe even if none yet)
|
|
|
|
|
|
# Filter to only files (not directories like glibc-2.38/build/iconvdata/gconv-modules.d)
|
|
|
|
|
|
# Also exclude glibc and mimalloc-bench subdirectories
|
|
|
|
|
|
-include $(shell find . -name '*.d' -type f -not -path './glibc*' -not -path './mimalloc-bench*' 2>/dev/null)
|
|
|
|
|
|
|
2025-11-09 18:55:50 +09:00
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
|
# Build flavor: release/debug (controls HAKMEM_BUILD_* and NDEBUG)
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
|
BUILD_FLAVOR ?= release
|
|
|
|
|
|
ifeq ($(BUILD_FLAVOR),release)
|
|
|
|
|
|
CFLAGS += -DNDEBUG -DHAKMEM_BUILD_RELEASE=1
|
|
|
|
|
|
CFLAGS_SHARED += -DNDEBUG -DHAKMEM_BUILD_RELEASE=1
|
|
|
|
|
|
else ifeq ($(BUILD_FLAVOR),debug)
|
|
|
|
|
|
CFLAGS += -DHAKMEM_BUILD_DEBUG=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_BUILD_DEBUG=1
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
2025-12-15 05:53:58 +09:00
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
|
# Phase 18: Hot Text Isolation (I-cache locality optimization)
|
|
|
|
|
|
# ------------------------------------------------------------
|
|
|
|
|
|
# Enable (safe): make HOT_TEXT_ISOLATION=1 bench_random_mixed_hakmem
|
|
|
|
|
|
# Default: OFF (research box, requires A/B validation)
|
|
|
|
|
|
# What it does:
|
|
|
|
|
|
# - Adds -DHAKMEM_HOT_TEXT_ISOLATION=1 (hot/cold attribute macros only)
|
|
|
|
|
|
#
|
|
|
|
|
|
# NOTE (Phase 18 v1 NO-GO):
|
|
|
|
|
|
# - The section-splitting + --gc-sections experiment caused a large I-cache regression.
|
|
|
|
|
|
# - Keep it behind a separate opt-in knob (HOT_TEXT_GC_SECTIONS=1) if needed for research.
|
|
|
|
|
|
HOT_TEXT_ISOLATION ?= 0
|
|
|
|
|
|
ifeq ($(HOT_TEXT_ISOLATION),1)
|
|
|
|
|
|
CFLAGS += -DHAKMEM_HOT_TEXT_ISOLATION=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_HOT_TEXT_ISOLATION=1
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
|
|
# Research-only (currently NO-GO): function/data sections + --gc-sections.
|
|
|
|
|
|
# Enable explicitly only when combined with an ordering strategy.
|
|
|
|
|
|
HOT_TEXT_GC_SECTIONS ?= 0
|
|
|
|
|
|
ifeq ($(HOT_TEXT_GC_SECTIONS),1)
|
|
|
|
|
|
CFLAGS += -ffunction-sections -fdata-sections
|
|
|
|
|
|
CFLAGS_SHARED += -ffunction-sections -fdata-sections
|
|
|
|
|
|
LDFLAGS += -Wl,--gc-sections
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
Phase 18 v2: BENCH_MINIMAL — NEUTRAL (+2.32% throughput, -5.06% instructions)
## Summary
Phase 18 v2 attempted instruction count reduction via conditional compilation:
- Stats collection → no-op
- ENV checks → constant propagation
- Binary size: 653K → 649K (-4K, -0.6%)
Result: NEUTRAL (below GO threshold)
- Throughput: +2.32% (target: +5% minimum) ❌
- Instructions: -5.06% (target: -15% minimum) ❌
- Cycles: -3.26% (positive signal)
- Branches: -8.67% (positive signal)
- Cache-misses: +30% (unexpected, likely layout)
## Analysis
Positive signals:
- Implementation correct (Branch -8.67%, Instruction -5.06%)
- Binary size reduced (-4K)
- Modest throughput gain (+2.32%)
- Cycles and branch overhead reduced
Negative signals:
- Instruction reduction insufficient (-5.06% << -15% smoking gun)
- Throughput gain below +5% threshold
- Cache-misses increased (+30%, layout noise?)
## Verdict
Freeze Phase 18 v2 (weak positive, insufficient for production).
Per user guidance: "If instructions don't drop clearly, continuation value is thin."
-5.06% instruction reduction is marginal. Allocator micro-optimization plateau confirmed.
## Key Insight
Phase 17 showed:
- IPC = 2.30 (consistent, memory-bound)
- I-cache gap: 55% (Phase 17: 153K → 68K)
- Instruction gap: 48% (Phase 17: 41.3B → 21.5B)
Phase 18 v1/v2 results confirm:
- Layout tweaks are fragile (v1: I-cache +91%)
- Instruction removal is modest benefit (v2: -5.06%)
- Allocator is NOT the bottleneck (IPC constant, memory-limited)
## Recommendation
Do NOT continue Phase 18 micro-optimizations.
Next frontier requires different approach:
1. Architectural redesign (SIMD, lock-free, batching)
2. Memory layout optimization (cache-friendly structures)
3. Broader profiling (not allocator-focused)
Or: Accept that 48M → 85M (75% gap) is achievable with current architecture.
Files:
- docs/analysis/PHASE18_HOT_TEXT_ISOLATION_2_AB_TEST_RESULTS.md (results)
- CURRENT_TASK.md (Phase 18 complete status)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-15 06:02:28 +09:00
|
|
|
|
# Phase 18 v2: BENCH_MINIMAL (remove instrumentation for benchmark builds)
|
|
|
|
|
|
BENCH_MINIMAL ?= 0
|
|
|
|
|
|
ifeq ($(BENCH_MINIMAL),1)
|
|
|
|
|
|
CFLAGS += -DHAKMEM_BENCH_MINIMAL=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_BENCH_MINIMAL=1
|
|
|
|
|
|
# Note: Both bench and shared lib will disable instrumentation
|
|
|
|
|
|
# Mainly impacts bench_* binaries (where BENCH_MINIMAL is intentionally enabled)
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
# Default: enable Box Theory refactor for Tiny (Phase 6-1.7)
|
|
|
|
|
|
# This is the best performing option currently (4.19M ops/s)
|
2025-11-07 01:27:04 +09:00
|
|
|
|
# NOTE: Disabled while testing ULTRA_SIMPLE with SFC integration
|
2025-11-05 12:31:14 +09:00
|
|
|
|
# To opt-out for legacy path: make BOX_REFACTOR_DEFAULT=0
|
|
|
|
|
|
BOX_REFACTOR_DEFAULT ?= 1
|
|
|
|
|
|
ifeq ($(BOX_REFACTOR_DEFAULT),1)
|
|
|
|
|
|
CFLAGS += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1
|
2025-11-07 01:27:04 +09:00
|
|
|
|
else
|
|
|
|
|
|
CFLAGS += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=0
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=0
|
2025-11-05 12:31:14 +09:00
|
|
|
|
endif
|
|
|
|
|
|
|
2025-11-09 18:55:50 +09:00
|
|
|
|
# (Removed) legacy BUILD_RELEASE_DEFAULT in favor of BUILD_FLAVOR
|
|
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
# Phase 6-2: Ultra-Simple with SFC integration
|
|
|
|
|
|
# Original Ultra-Simple (without SFC): 3.56M ops/s vs BOX_REFACTOR: 4.19M ops/s
|
|
|
|
|
|
# Now testing with SFC (128-slot cache) integration - expecting >5M ops/s
|
|
|
|
|
|
# To disable: make ULTRA_SIMPLE_DEFAULT=0
|
2025-11-05 12:31:14 +09:00
|
|
|
|
ULTRA_SIMPLE_DEFAULT ?= 0
|
|
|
|
|
|
ifeq ($(ULTRA_SIMPLE_DEFAULT),1)
|
|
|
|
|
|
CFLAGS += -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 6-3: Tiny Fast Path (System tcache style, 3-4 instruction fast path)
|
|
|
|
|
|
# Target: 70-80% of System tcache (95-108 M ops/s)
|
|
|
|
|
|
# Enable by default for testing
|
|
|
|
|
|
TINY_FAST_PATH_DEFAULT ?= 1
|
|
|
|
|
|
ifeq ($(TINY_FAST_PATH_DEFAULT),1)
|
|
|
|
|
|
CFLAGS += -DHAKMEM_TINY_FAST_PATH=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_TINY_FAST_PATH=1
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
# Phase 6-1.8: New 3-Layer Tiny front (A/B)
|
|
|
|
|
|
# To enable by default: make NEW_3LAYER_DEFAULT=1
|
|
|
|
|
|
NEW_3LAYER_DEFAULT ?= 0
|
|
|
|
|
|
ifeq ($(NEW_3LAYER_DEFAULT),1)
|
|
|
|
|
|
CFLAGS += -DHAKMEM_TINY_USE_NEW_3LAYER=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_TINY_USE_NEW_3LAYER=1
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
2025-11-08 03:18:17 +09:00
|
|
|
|
# Phase 7: Region-ID Direct Lookup (Header-based class_idx)
|
|
|
|
|
|
# Ultra-fast free: 3-5 instructions, 5-10 cycles (vs 500+ cycles current)
|
|
|
|
|
|
# Target: 40-80M ops/s (70-140% of System malloc)
|
|
|
|
|
|
# Enable: make HEADER_CLASSIDX=1
|
2025-11-12 13:57:46 +09:00
|
|
|
|
# Default: ON (Phase 7 validated, Fix #16 stable, mimalloc strategy Phase 1)
|
|
|
|
|
|
HEADER_CLASSIDX ?= 1
|
2025-11-08 03:18:17 +09:00
|
|
|
|
ifeq ($(HEADER_CLASSIDX),1)
|
|
|
|
|
|
CFLAGS += -DHAKMEM_TINY_HEADER_CLASSIDX=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_TINY_HEADER_CLASSIDX=1
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
2025-11-08 12:54:52 +09:00
|
|
|
|
# Phase 7 Task 2: Aggressive inline TLS cache access
|
|
|
|
|
|
# Enable: make HEADER_CLASSIDX=1 AGGRESSIVE_INLINE=1
|
|
|
|
|
|
# Expected: +10-15% performance (save 5-10 cycles per alloc)
|
2025-11-12 13:57:46 +09:00
|
|
|
|
# Default: ON (mimalloc strategy Phase 1)
|
|
|
|
|
|
AGGRESSIVE_INLINE ?= 1
|
2025-11-08 12:54:52 +09:00
|
|
|
|
ifeq ($(AGGRESSIVE_INLINE),1)
|
|
|
|
|
|
CFLAGS += -DHAKMEM_TINY_AGGRESSIVE_INLINE=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_TINY_AGGRESSIVE_INLINE=1
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 7 Task 3: Pre-warm TLS cache
|
|
|
|
|
|
# Enable: make PREWARM_TLS=1
|
|
|
|
|
|
# Expected: Reduce first-allocation miss penalty
|
2025-11-12 13:57:46 +09:00
|
|
|
|
# Default: ON (mimalloc strategy Phase 1)
|
|
|
|
|
|
PREWARM_TLS ?= 1
|
2025-11-08 12:54:52 +09:00
|
|
|
|
ifeq ($(PREWARM_TLS),1)
|
|
|
|
|
|
CFLAGS += -DHAKMEM_TINY_PREWARM_TLS=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_TINY_PREWARM_TLS=1
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
2025-11-12 13:57:46 +09:00
|
|
|
|
# Performance Optimization: Fixed refill for class5 (256B)
|
|
|
|
|
|
# ChatGPT-sensei recommendation: Eliminate branches by fixing want=256
|
|
|
|
|
|
# Enable: make CLASS5_FIXED_REFILL=1
|
|
|
|
|
|
# Expected: Reduce branch mispredictions and instruction count
|
|
|
|
|
|
CLASS5_FIXED_REFILL ?= 0
|
|
|
|
|
|
ifeq ($(CLASS5_FIXED_REFILL),1)
|
|
|
|
|
|
CFLAGS += -DHAKMEM_TINY_CLASS5_FIXED_REFILL=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_TINY_CLASS5_FIXED_REFILL=1
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
2025-11-29 09:04:32 +09:00
|
|
|
|
# Phase 3 (2025-11-29): mincore removed entirely
|
|
|
|
|
|
# - mincore() syscall overhead eliminated (was +10.3% with DISABLE flag)
|
|
|
|
|
|
# - Phase 1b/2 registry-based validation provides sufficient safety
|
|
|
|
|
|
# - Dead code cleanup: DISABLE_MINCORE flag no longer needed
|
2025-11-14 15:32:07 +09:00
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
ifdef PROFILE_GEN
|
|
|
|
|
|
CFLAGS += -fprofile-generate
|
|
|
|
|
|
LDFLAGS += -fprofile-generate
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
|
|
ifdef PROFILE_USE
|
|
|
|
|
|
CFLAGS += -fprofile-use -Wno-error=coverage-mismatch
|
|
|
|
|
|
LDFLAGS += -fprofile-use
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
|
|
CFLAGS += $(EXTRA_CFLAGS)
|
2025-12-03 12:11:27 +09:00
|
|
|
|
CFLAGS_SHARED += $(EXTRA_CFLAGS)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
LDFLAGS += $(EXTRA_LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
# Targets
|
|
|
|
|
|
TARGET = test_hakmem
|
2025-12-18 22:05:34 +09:00
|
|
|
|
OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o core/box/ss_release_policy_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_pt_impl.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/free_cold_shape_env_box.o core/box/free_cold_shape_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/free_wrapper_env_snapshot_box.o core/box/malloc_wrapper_env_snapshot_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/box/tiny_free_route_cache_env_box.o core/box/hakmem_env_snapshot_box.o core/box/tiny_c7_preserve_header_env_box.o core/box/tiny_tcache_env_box.o core/box/tiny_unified_lifo_env_box.o core/box/front_fastlane_alloc_legacy_direct_env_box.o core/box/fastlane_direct_env_box.o core/box/tiny_header_hotfull_env_box.o core/box/tiny_inline_slots_fixed_mode_box.o core/box/tiny_inline_slots_switch_dispatch_fixed_box.o core/box/free_path_commit_once_fixed_box.o core/box/free_path_legacy_mask_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/tiny_c6_inline_slots.o core/tiny_c5_inline_slots.o core/tiny_c2_local_cache.o core/tiny_c3_inline_slots.o core/tiny_c4_inline_slots.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o core/box/small_policy_snapshot_tls_box.o
|
2025-11-08 23:53:25 +09:00
|
|
|
|
OBJS = $(OBJS_BASE)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
# Shared library
|
|
|
|
|
|
SHARED_LIB = libhakmem.so
|
2025-12-18 18:50:00 +09:00
|
|
|
|
# IMPORTANT: keep the shared library in sync with the current hakmem build to avoid
|
|
|
|
|
|
# LD_PRELOAD runtime link errors (undefined symbols) as new boxes/files are added.
|
|
|
|
|
|
SHARED_OBJS = $(patsubst %.o,%_shared.o,$(OBJS_BASE))
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
2025-11-08 23:53:25 +09:00
|
|
|
|
# Pool TLS Phase 1 (enable with POOL_TLS_PHASE1=1)
|
|
|
|
|
|
ifeq ($(POOL_TLS_PHASE1),1)
|
2025-11-09 18:55:50 +09:00
|
|
|
|
OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
|
|
|
|
|
|
SHARED_OBJS += pool_tls_shared.o pool_refill_shared.o pool_tls_arena_shared.o pool_tls_registry_shared.o pool_tls_remote_shared.o
|
2025-11-08 23:53:25 +09:00
|
|
|
|
CFLAGS += -DHAKMEM_POOL_TLS_PHASE1=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_POOL_TLS_PHASE1=1
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
2025-11-09 11:50:18 +09:00
|
|
|
|
# Pool TLS Phase 1.5b - Pre-warm optimization
|
|
|
|
|
|
ifeq ($(POOL_TLS_PREWARM),1)
|
|
|
|
|
|
CFLAGS += -DHAKMEM_POOL_TLS_PREWARM=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_POOL_TLS_PREWARM=1
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
2025-11-14 15:32:07 +09:00
|
|
|
|
# Pool TLS Bind Box - Registry lookup short-circuit (Phase 1.6)
|
|
|
|
|
|
ifeq ($(POOL_TLS_BIND_BOX),1)
|
|
|
|
|
|
OBJS += pool_tls_bind.o
|
|
|
|
|
|
SHARED_OBJS += pool_tls_bind_shared.o
|
|
|
|
|
|
CFLAGS += -DHAKMEM_POOL_TLS_BIND_BOX=1
|
|
|
|
|
|
CFLAGS_SHARED += -DHAKMEM_POOL_TLS_BIND_BOX=1
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
# Benchmark targets
|
|
|
|
|
|
BENCH_HAKMEM = bench_allocators_hakmem
|
|
|
|
|
|
BENCH_SYSTEM = bench_allocators_system
|
2025-12-18 22:05:34 +09:00
|
|
|
|
BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o core/box/ss_release_policy_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/free_cold_shape_env_box.o core/box/free_cold_shape_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/free_wrapper_env_snapshot_box.o core/box/malloc_wrapper_env_snapshot_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/box/tiny_free_route_cache_env_box.o core/box/fastlane_direct_env_box.o core/box/tiny_inline_slots_fixed_mode_box.o core/box/tiny_inline_slots_switch_dispatch_fixed_box.o core/box/free_path_commit_once_fixed_box.o core/box/free_path_legacy_mask_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/tiny_c6_inline_slots.o core/tiny_c5_inline_slots.o core/tiny_c2_local_cache.o core/tiny_c3_inline_slots.o core/tiny_c4_inline_slots.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o core/box/small_policy_snapshot_tls_box.o bench_allocators_hakmem.o
|
2025-11-08 23:53:25 +09:00
|
|
|
|
BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE)
|
|
|
|
|
|
ifeq ($(POOL_TLS_PHASE1),1)
|
2025-11-09 18:55:50 +09:00
|
|
|
|
BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
|
2025-11-08 23:53:25 +09:00
|
|
|
|
endif
|
2025-11-05 12:31:14 +09:00
|
|
|
|
BENCH_SYSTEM_OBJS = bench_allocators_system.o
|
|
|
|
|
|
|
|
|
|
|
|
# Default target
|
|
|
|
|
|
all: $(TARGET)
|
|
|
|
|
|
|
2025-11-09 11:50:18 +09:00
|
|
|
|
# Show key build-time switches for troubleshooting
|
|
|
|
|
|
.PHONY: print-flags
|
|
|
|
|
|
print-flags:
|
2025-11-09 18:55:50 +09:00
|
|
|
|
@echo "==== Build Switches ===="
|
|
|
|
|
|
@echo "FLAVOR = $(BUILD_FLAVOR)"
|
|
|
|
|
|
@echo "POOL_TLS_PHASE1 = $(POOL_TLS_PHASE1)"
|
|
|
|
|
|
@echo "POOL_TLS_PREWARM = $(POOL_TLS_PREWARM)"
|
|
|
|
|
|
@echo "HEADER_CLASSIDX = $(HEADER_CLASSIDX)"
|
|
|
|
|
|
@echo "AGGRESSIVE_INLINE = $(AGGRESSIVE_INLINE)"
|
|
|
|
|
|
@echo "PREWARM_TLS = $(PREWARM_TLS)"
|
|
|
|
|
|
@echo "USE_LTO = $(USE_LTO)"
|
|
|
|
|
|
@echo "OPT_LEVEL = $(OPT_LEVEL)"
|
|
|
|
|
|
@echo "NATIVE = $(NATIVE)"
|
|
|
|
|
|
@echo "CFLAGS contains = $(filter -DHAKMEM_BUILD_%,$(CFLAGS))"
|
2025-11-09 11:50:18 +09:00
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
# Build test program
|
|
|
|
|
|
$(TARGET): $(OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Build successful! Run with:"
|
|
|
|
|
|
@echo " ./$(TARGET)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
|
|
|
|
|
|
# Compile C files
|
2025-12-01 23:43:44 +09:00
|
|
|
|
%.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_tiny_superslab.h hakmem_super_registry.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h
|
2025-11-05 12:31:14 +09:00
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
# Build benchmark programs
|
|
|
|
|
|
bench: CFLAGS += -DHAKMEM_PROF_STATIC=1
|
|
|
|
|
|
bench: $(BENCH_HAKMEM) $(BENCH_SYSTEM)
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Benchmark programs built successfully!"
|
|
|
|
|
|
@echo " $(BENCH_HAKMEM) - hakmem versions"
|
|
|
|
|
|
@echo " $(BENCH_SYSTEM) - system/jemalloc/mimalloc"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "Run benchmarks with:"
|
|
|
|
|
|
@echo " bash bench_runner.sh --runs 10"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
|
|
|
|
|
|
# hakmem version (with hakmem linked)
|
|
|
|
|
|
bench_allocators_hakmem.o: bench_allocators.c hakmem.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
$(BENCH_HAKMEM): $(BENCH_HAKMEM_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
# system version (without hakmem, for LD_PRELOAD testing)
|
|
|
|
|
|
bench_allocators_system.o: bench_allocators.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
$(BENCH_SYSTEM): $(BENCH_SYSTEM_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
# Tiny hot microbench (direct link vs system)
|
|
|
|
|
|
bench_tiny_hot_hakmem.o: bench_tiny_hot.c hakmem.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_tiny_hot_system.o: bench_tiny_hot.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
|
2025-12-17 21:08:17 +09:00
|
|
|
|
bench_tiny_hot_hakmem: bench_tiny_hot_hakmem.o $(TINY_BENCH_OBJS)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
bench_tiny_hot_system: bench_tiny_hot_system.o
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
# mimalloc variant for tiny hot bench (direct link)
|
|
|
|
|
|
bench_tiny_hot_mi.o: bench_tiny_hot.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
|
|
|
|
|
|
2025-11-09 11:50:18 +09:00
|
|
|
|
bench_mi_force.o: bench_mi_force.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_tiny_hot_mi: bench_tiny_hot_mi.o bench_mi_force.o
|
|
|
|
|
|
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
# hakmi variant for tiny hot bench (direct link via front API)
|
|
|
|
|
|
bench_tiny_hot_hakmi.o: bench_tiny_hot.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
HAKMI_FRONT_OBJS = adapters/hakmi_front/hakmi_front.o adapters/hakmi_front/hakmi_env.o adapters/hakmi_front/hakmi_tls_front.o
|
|
|
|
|
|
|
|
|
|
|
|
# ===== Convenience perf targets =====
|
|
|
|
|
|
.PHONY: pgo-gen-tinyhot pgo-use-tinyhot perf-help
|
|
|
|
|
|
|
|
|
|
|
|
# Generate PGO profile for Tiny Hot (32/100/60000) with SLL-first fast path
|
|
|
|
|
|
pgo-gen-tinyhot:
|
|
|
|
|
|
$(MAKE) PROFILE_GEN=1 bench_tiny_hot_hakmem
|
|
|
|
|
|
HAKMEM_TINY_TRACE_RING=0 HAKMEM_SAFE_FREE=0 \
|
2025-11-26 14:45:26 +09:00
|
|
|
|
HAKMEM_TINY_TLS_SLL=1 HAKMEM_TINY_TLS_LIST=1 HAKMEM_SLL_MULTIPLIER=1 \
|
2025-11-05 12:31:14 +09:00
|
|
|
|
./bench_tiny_hot_hakmem 32 100 60000 || true
|
|
|
|
|
|
|
|
|
|
|
|
# Use generated PGO profile for Tiny Hot binary
|
|
|
|
|
|
pgo-use-tinyhot:
|
|
|
|
|
|
$(MAKE) PROFILE_USE=1 bench_tiny_hot_hakmem
|
|
|
|
|
|
|
|
|
|
|
|
# Show recommended runtime envs for bench reproducibility
|
|
|
|
|
|
perf-help:
|
|
|
|
|
|
@echo "Recommended runtime envs (Tiny Hot / Larson):"
|
|
|
|
|
|
@echo " export HAKMEM_TINY_TRACE_RING=0 HAKMEM_SAFE_FREE=0"
|
2025-11-26 14:45:26 +09:00
|
|
|
|
@echo " export HAKMEM_TINY_TLS_SLL=1 HAKMEM_TINY_TLS_LIST=1"
|
2025-11-05 12:31:14 +09:00
|
|
|
|
@echo " export HAKMEM_SLL_MULTIPLIER=1"
|
|
|
|
|
|
@echo "Build flags (overridable): OPT_LEVEL=$(OPT_LEVEL) USE_LTO=$(USE_LTO) NATIVE=$(NATIVE)"
|
|
|
|
|
|
|
|
|
|
|
|
# Explicit compile rules for hakmi front objects (require mimalloc headers)
|
|
|
|
|
|
adapters/hakmi_front/hakmi_front.o: adapters/hakmi_front/hakmi_front.c adapters/hakmi_front/hakmi_front.h include/hakmi/hakmi_api.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -I include -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
|
|
|
|
|
adapters/hakmi_front/hakmi_env.o: adapters/hakmi_front/hakmi_env.c adapters/hakmi_front/hakmi_env.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -I include -c -o $@ $<
|
|
|
|
|
|
adapters/hakmi_front/hakmi_tls_front.o: adapters/hakmi_front/hakmi_tls_front.c adapters/hakmi_front/hakmi_tls_front.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -I include -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_tiny_hot_hakmi: bench_tiny_hot_hakmi.o $(HAKMI_FRONT_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
# Run test
|
|
|
|
|
|
run: $(TARGET)
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Running hakmem PoC test..."
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@./$(TARGET)
|
|
|
|
|
|
|
|
|
|
|
|
# Shared library target (for LD_PRELOAD with mimalloc-bench)
|
|
|
|
|
|
%_shared.o: %.c hakmem.h hakmem_config.h hakmem_features.h hakmem_internal.h hakmem_bigcache.h hakmem_pool.h hakmem_l25_pool.h hakmem_site_rules.h hakmem_tiny.h hakmem_elo.h hakmem_batch.h hakmem_p2.h hakmem_sizeclass_dist.h hakmem_evo.h
|
|
|
|
|
|
$(CC) $(CFLAGS_SHARED) -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
$(SHARED_LIB): $(SHARED_OBJS)
|
|
|
|
|
|
$(CC) -shared -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Shared library built successfully!"
|
|
|
|
|
|
@echo " $(SHARED_LIB)"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "Use with LD_PRELOAD:"
|
|
|
|
|
|
@echo " LD_PRELOAD=./$(SHARED_LIB) <command>"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
|
|
|
|
|
|
shared: $(SHARED_LIB)
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 6.15: Debug build target (verbose logging)
|
|
|
|
|
|
debug: CFLAGS += -DHAKMEM_DEBUG_VERBOSE -g -O0 -DHAKMEM_PROF_STATIC=1
|
|
|
|
|
|
debug: CFLAGS_SHARED += -DHAKMEM_DEBUG_VERBOSE -g -O0 -DHAKMEM_PROF_STATIC=1
|
|
|
|
|
|
debug: HAKMEM_TIMING=1
|
|
|
|
|
|
debug: shared
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 6-1.7: Box Theory Refactoring
|
|
|
|
|
|
box-refactor:
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) CFLAGS="$(CFLAGS) -DHAKMEM_TINY_PHASE6_BOX_REFACTOR=1" larson_hakmem
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Built with Box Refactor (Phase 6-1.7)"
|
|
|
|
|
|
@echo " larson_hakmem (with Box 1/5/6)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
|
|
|
|
|
|
# Convenience target: build and test box-refactor
|
|
|
|
|
|
test-box-refactor: box-refactor
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Running Box Refactor Test..."
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
./larson_hakmem 10 8 128 1024 1 12345 4
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 4: Tiny Pool benchmarks (properly linked with hakmem)
|
2025-12-18 22:05:34 +09:00
|
|
|
|
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o core/box/ss_allocation_box.o core/box/ss_release_policy_box.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o core/superslab_head_stub.o hakmem_smallmid.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_pt_impl.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/tiny_route_box.o core/box/free_front_v3_env_box.o core/box/free_path_stats_box.o core/box/free_dispatch_stats_box.o core/box/free_cold_shape_env_box.o core/box/free_cold_shape_stats_box.o core/box/alloc_gate_stats_box.o core/box/tiny_c6_ultra_free_box.o core/box/tiny_c5_ultra_free_box.o core/box/tiny_c4_ultra_free_box.o core/box/tiny_ultra_tls_box.o core/box/tiny_page_box.o core/box/tiny_class_policy_box.o core/box/tiny_class_stats_box.o core/box/tiny_policy_learner_box.o core/box/ss_budget_box.o core/box/tiny_mem_stats_box.o core/box/c7_meta_used_counter_box.o core/box/tiny_static_route_box.o core/box/tiny_metadata_cache_hot_box.o core/box/wrapper_env_box.o core/box/free_wrapper_env_snapshot_box.o core/box/malloc_wrapper_env_snapshot_box.o core/box/madvise_guard_box.o core/box/libm_reloc_guard_box.o core/box/ptr_trace_box.o core/box/link_missing_stubs.o core/box/super_reg_box.o core/box/shared_pool_box.o core/box/remote_side_box.o core/box/tiny_free_route_cache_env_box.o core/box/hakmem_env_snapshot_box.o core/box/tiny_c7_preserve_header_env_box.o core/box/tiny_tcache_env_box.o core/box/tiny_unified_lifo_env_box.o core/box/front_fastlane_alloc_legacy_direct_env_box.o core/box/fastlane_direct_env_box.o core/box/tiny_header_hotfull_env_box.o core/box/tiny_inline_slots_fixed_mode_box.o core/box/tiny_inline_slots_switch_dispatch_fixed_box.o core/box/free_path_commit_once_fixed_box.o core/box/free_path_legacy_mask_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/tiny_c7_ultra_segment.o core/tiny_c7_ultra.o core/tiny_c6_inline_slots.o core/tiny_c5_inline_slots.o core/tiny_c2_local_cache.o core/tiny_c3_inline_slots.o core/tiny_c4_inline_slots.o core/link_stubs.o core/tiny_failfast.o core/tiny_destructors.o core/smallobject_hotbox_v3.o core/smallobject_hotbox_v4.o core/smallobject_hotbox_v5.o core/smallsegment_v5.o core/smallobject_cold_iface_v5.o core/smallsegment_v6.o core/smallobject_cold_iface_v6.o core/smallobject_core_v6.o core/region_id_v6.o core/smallsegment_v7.o core/smallobject_cold_iface_v7.o core/mid_hotbox_v3.o core/smallobject_policy_v7.o core/smallobject_segment_mid_v3.o core/smallobject_cold_iface_mid_v3.o core/smallobject_stats_mid_v3.o core/smallobject_learner_v2.o core/smallobject_mid_v35.o core/box/small_policy_snapshot_tls_box.o
|
2025-11-08 23:53:25 +09:00
|
|
|
|
TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE)
|
|
|
|
|
|
ifeq ($(POOL_TLS_PHASE1),1)
|
2025-11-09 18:55:50 +09:00
|
|
|
|
TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
|
2025-11-08 23:53:25 +09:00
|
|
|
|
endif
|
2025-11-14 15:32:07 +09:00
|
|
|
|
ifeq ($(POOL_TLS_BIND_BOX),1)
|
|
|
|
|
|
TINY_BENCH_OBJS += pool_tls_bind.o
|
|
|
|
|
|
endif
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
bench_tiny: bench_tiny.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_tiny built with hakmem"
|
|
|
|
|
|
|
|
|
|
|
|
bench_tiny_mt: bench_tiny_mt.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_tiny_mt built with hakmem"
|
|
|
|
|
|
|
|
|
|
|
|
# Burst+Pause bench (mimalloc stress pattern)
|
|
|
|
|
|
bench_burst_pause_hakmem.o: bench_burst_pause.c hakmem.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_burst_pause_system.o: bench_burst_pause.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_burst_pause_mi.o: bench_burst_pause.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_burst_pause_hakmem: bench_burst_pause_hakmem.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_burst_pause_hakmem built"
|
|
|
|
|
|
|
|
|
|
|
|
bench_burst_pause_system: bench_burst_pause_system.o
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_burst_pause_system built"
|
|
|
|
|
|
|
|
|
|
|
|
bench_burst_pause_mi: bench_burst_pause_mi.o
|
|
|
|
|
|
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_burst_pause_mi built"
|
|
|
|
|
|
|
|
|
|
|
|
bench_burst_pause_mt_hakmem.o: bench_burst_pause_mt.c hakmem.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_burst_pause_mt_system.o: bench_burst_pause_mt.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_burst_pause_mt_mi.o: bench_burst_pause_mt.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_burst_pause_mt_hakmem: bench_burst_pause_mt_hakmem.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_burst_pause_mt_hakmem built"
|
|
|
|
|
|
|
|
|
|
|
|
bench_burst_pause_mt_system: bench_burst_pause_mt_system.o
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_burst_pause_mt_system built"
|
|
|
|
|
|
|
|
|
|
|
|
bench_burst_pause_mt_mi: bench_burst_pause_mt_mi.o
|
|
|
|
|
|
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_burst_pause_mt_mi built"
|
|
|
|
|
|
|
2025-11-07 12:09:28 +09:00
|
|
|
|
# ----------------------------------------------------------------------------
|
|
|
|
|
|
# Hako FFI stub (optional; for front-end integration smoke)
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
hako_ffi_stub: libhako_ffi_stub.a
|
|
|
|
|
|
@echo "✓ libhako_ffi_stub.a built"
|
|
|
|
|
|
|
|
|
|
|
|
hako_ffi_stub.o: src/hako/ffi_stub.c include/hako/ffi.h include/hako/types.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o hako_ffi_stub.o src/hako/ffi_stub.c
|
|
|
|
|
|
|
|
|
|
|
|
libhako_ffi_stub.a: hako_ffi_stub.o
|
|
|
|
|
|
ar rcs $@ $^
|
|
|
|
|
|
|
|
|
|
|
|
# Smoke test for Hako FFI stubs
|
|
|
|
|
|
hako_smoke: hako_ffi_stub tests/hako_smoke.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -o hako_smoke tests/hako_smoke.c libhako_ffi_stub.a
|
|
|
|
|
|
@echo "✓ hako_smoke built"
|
|
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
# ----------------------------------------------------------------------------
|
|
|
|
|
|
# Larson benchmarks (Google/mimalloc-bench style)
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
LARSON_SRC := mimalloc-bench/bench/larson/larson.cpp
|
|
|
|
|
|
|
|
|
|
|
|
# System variant (uses system malloc/free)
|
|
|
|
|
|
larson_system.o: $(LARSON_SRC)
|
|
|
|
|
|
$(CXX) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
larson_system: larson_system.o
|
|
|
|
|
|
$(CXX) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
# mimalloc variant (direct link to prebuilt mimalloc)
|
|
|
|
|
|
larson_mi.o: $(LARSON_SRC)
|
|
|
|
|
|
$(CXX) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
|
|
|
|
|
|
2025-11-09 11:50:18 +09:00
|
|
|
|
larson_mi: larson_mi.o bench_mi_force.o
|
|
|
|
|
|
$(CXX) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
2025-11-05 03:43:50 +00:00
|
|
|
|
# HAKMEM variant (hakmem.o provides malloc/free symbols directly)
|
|
|
|
|
|
larson_hakmem.o: $(LARSON_SRC)
|
|
|
|
|
|
$(CXX) $(CFLAGS) -I core -c -o $@ $<
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
2025-11-05 03:43:50 +00:00
|
|
|
|
larson_hakmem: larson_hakmem.o $(TINY_BENCH_OBJS)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
$(CXX) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
test_mf2: test_mf2.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ test_mf2 built with hakmem"
|
|
|
|
|
|
|
|
|
|
|
|
# bench_comprehensive.o with USE_HAKMEM flag
|
|
|
|
|
|
bench_comprehensive.o: bench_comprehensive.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_HAKMEM -c $< -o $@
|
|
|
|
|
|
|
|
|
|
|
|
bench_comprehensive_hakmem: bench_comprehensive.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_comprehensive_hakmem built with hakmem"
|
|
|
|
|
|
|
|
|
|
|
|
bench_comprehensive_system: bench_comprehensive.c
|
|
|
|
|
|
$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_comprehensive_system built (system malloc)"
|
|
|
|
|
|
|
|
|
|
|
|
# mimalloc direct-link variant (no LD_PRELOAD dependency)
|
|
|
|
|
|
bench_comprehensive_mi: bench_comprehensive.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include \
|
|
|
|
|
|
bench_comprehensive.c -o $@ \
|
|
|
|
|
|
-L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_comprehensive_mi built (direct link to mimalloc)"
|
|
|
|
|
|
|
|
|
|
|
|
# hakx (new hybrid) front API stubs
|
|
|
|
|
|
HAKX_OBJS = engines/hakx/hakx_api_stub.o engines/hakx/hakx_front_tiny.o engines/hakx/hakx_l25_tuner.o
|
|
|
|
|
|
|
|
|
|
|
|
engines/hakx/hakx_api_stub.o: engines/hakx/hakx_api_stub.c include/hakx/hakx_api.h engines/hakx/hakx_front_tiny.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -I include -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
# hakx variant for tiny hot bench (direct link via hakx API)
|
|
|
|
|
|
bench_tiny_hot_hakx.o: bench_tiny_hot.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_tiny_hot_hakx: bench_tiny_hot_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_tiny_hot_hakx built (hakx API stub)"
|
|
|
|
|
|
|
|
|
|
|
|
# P0 variant with batch refill optimization
|
|
|
|
|
|
bench_tiny_hot_hakx_p0.o: bench_tiny_hot.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -DHAKMEM_TINY_P0_BATCH_REFILL=1 -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_tiny_hot_hakx_p0: bench_tiny_hot_hakx_p0.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_tiny_hot_hakx_p0 built (with P0 batch refill)"
|
|
|
|
|
|
|
|
|
|
|
|
# hak_tiny_alloc/free 直叩きの比較用ベンチ
|
|
|
|
|
|
bench_tiny_hot_direct.o: bench_tiny_hot_direct.c core/hakmem_tiny.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_tiny_hot_direct: bench_tiny_hot_direct.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_tiny_hot_direct built (hak_tiny_alloc/free direct)"
|
|
|
|
|
|
|
|
|
|
|
|
# hakmi variant for comprehensive bench (front + mimalloc backend)
|
2025-11-09 11:50:18 +09:00
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
bench_comprehensive_hakmi: bench_comprehensive.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc \
|
|
|
|
|
|
bench_comprehensive.c -o $@ \
|
|
|
|
|
|
adapters/hakmi_front/hakmi_front.o adapters/hakmi_front/hakmi_env.o adapters/hakmi_front/hakmi_tls_front.o \
|
2025-11-09 11:50:18 +09:00
|
|
|
|
-Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
@echo "✓ bench_comprehensive_hakmi built (hakmi front + mimalloc backend)"
|
|
|
|
|
|
|
|
|
|
|
|
# hakx variant for comprehensive bench
|
|
|
|
|
|
bench_comprehensive_hakx: bench_comprehensive.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h $(HAKX_OBJS) $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast \
|
|
|
|
|
|
bench_comprehensive.c -o $@ $(HAKX_OBJS) $(TINY_BENCH_OBJS) $(LDFLAGS)
|
|
|
|
|
|
@echo "✓ bench_comprehensive_hakx built (hakx API stub)"
|
|
|
|
|
|
|
|
|
|
|
|
# Random mixed bench (direct link variants)
|
2025-11-29 16:19:53 +09:00
|
|
|
|
# Phase 7-Step2: Enable PGO mode for bench builds (compile-time unified gate)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
bench_random_mixed_hakmem.o: bench_random_mixed.c hakmem.h
|
2025-11-29 16:19:53 +09:00
|
|
|
|
$(CC) $(CFLAGS) -DUSE_HAKMEM -DHAKMEM_TINY_FRONT_PGO=1 -c -o $@ $<
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
bench_random_mixed_system.o: bench_random_mixed.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_random_mixed_mi.o: bench_random_mixed.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_random_mixed_hakmem: bench_random_mixed_hakmem.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
2025-12-16 15:01:56 +09:00
|
|
|
|
# Phase 35-A: BENCH_MINIMAL target (eliminates gate function overhead)
|
|
|
|
|
|
# Usage: make bench_random_mixed_hakmem_minimal
|
|
|
|
|
|
# Note: This rebuilds all objects with -DHAKMEM_BENCH_MINIMAL=1
|
|
|
|
|
|
# Purpose: Pure performance measurement (FAST build)
|
|
|
|
|
|
.PHONY: bench_random_mixed_hakmem_minimal
|
|
|
|
|
|
bench_random_mixed_hakmem_minimal:
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) bench_random_mixed_hakmem EXTRA_CFLAGS='-DHAKMEM_BENCH_MINIMAL=1'
|
|
|
|
|
|
mv bench_random_mixed_hakmem bench_random_mixed_hakmem_minimal
|
|
|
|
|
|
|
2025-12-17 21:08:17 +09:00
|
|
|
|
# Phase 63: FAST profile fixed target (BENCH_MINIMAL + FAST_PROFILE_FIXED)
|
|
|
|
|
|
# Usage: make bench_random_mixed_hakmem_fast_fixed
|
|
|
|
|
|
# Note: This rebuilds all objects with BENCH_MINIMAL + FAST_PROFILE_FIXED.
|
|
|
|
|
|
# Purpose: FAST build with compile-time constant gates matching MIXED_TINYV3_C7_SAFE defaults.
|
|
|
|
|
|
.PHONY: bench_random_mixed_hakmem_fast_fixed
|
|
|
|
|
|
bench_random_mixed_hakmem_fast_fixed:
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) bench_random_mixed_hakmem EXTRA_CFLAGS='-DHAKMEM_BENCH_MINIMAL=1 -DHAKMEM_FAST_PROFILE_FIXED=1'
|
|
|
|
|
|
mv bench_random_mixed_hakmem bench_random_mixed_hakmem_fast_fixed
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 65: Hot Symbol Ordering was investigated but is BLOCKED under the current
|
|
|
|
|
|
# GCC+LTO toolchain constraints (see docs/analysis/PHASE65_HOT_SYMBOL_ORDERING_1_RESULTS.md).
|
|
|
|
|
|
# We intentionally do not provide a build target that disables LTO or swaps linkers,
|
|
|
|
|
|
# because it makes baseline comparisons unfair and tends to introduce layout tax.
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 64: Backend pruning target (BENCH_MINIMAL + FAST_PROFILE_FIXED + FAST_PROFILE_PRUNE_BACKENDS)
|
|
|
|
|
|
# Usage: make bench_random_mixed_hakmem_fast_pruned
|
|
|
|
|
|
# Note: This rebuilds all objects with BENCH_MINIMAL + FAST_PROFILE_FIXED + FAST_PROFILE_PRUNE_BACKENDS.
|
|
|
|
|
|
# Purpose: LTO DCE optimization - makes MID_V3, POOL_V2 unreachable at compile-time for +5-10% gain
|
|
|
|
|
|
.PHONY: bench_random_mixed_hakmem_fast_pruned
|
|
|
|
|
|
bench_random_mixed_hakmem_fast_pruned:
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) bench_random_mixed_hakmem EXTRA_CFLAGS='-DHAKMEM_BENCH_MINIMAL=1 -DHAKMEM_FAST_PROFILE_FIXED=1 -DHAKMEM_FAST_PROFILE_PRUNE_BACKENDS=1'
|
|
|
|
|
|
mv bench_random_mixed_hakmem bench_random_mixed_hakmem_fast_pruned
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 66: PGO (Profile-Guided Optimization) for FAST minimal build (keeps GCC+LTO)
|
|
|
|
|
|
# Usage: make pgo-fast-full
|
|
|
|
|
|
.PHONY: pgo-fast-profile pgo-fast-collect pgo-fast-build pgo-fast-full
|
|
|
|
|
|
pgo-fast-profile:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Phase 66: Building PGO Profile Binaries (FAST minimal)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) PROFILE_GEN=1 bench_random_mixed_hakmem bench_tiny_hot_hakmem EXTRA_CFLAGS='-DHAKMEM_BENCH_MINIMAL=1'
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "✓ PGO profile binaries built (FAST minimal)"
|
|
|
|
|
|
@echo "Next: make pgo-fast-collect"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
|
|
|
|
|
|
pgo-fast-collect:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Phase 66: Collecting PGO Profile Data (FAST minimal)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
PGO_CONFIG=pgo_fast_profile_config.sh ./scripts/box/pgo_tiny_profile_box.sh
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "✓ PGO profile collection complete"
|
|
|
|
|
|
@echo "Next: make pgo-fast-build"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
|
|
|
|
|
|
pgo-fast-build:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Phase 66: Building PGO-Optimized Binary (FAST minimal)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) PROFILE_USE=1 bench_random_mixed_hakmem EXTRA_CFLAGS='-DHAKMEM_BENCH_MINIMAL=1'
|
|
|
|
|
|
mv bench_random_mixed_hakmem bench_random_mixed_hakmem_minimal_pgo
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "✓ PGO-optimized FAST minimal binary built: bench_random_mixed_hakmem_minimal_pgo"
|
|
|
|
|
|
@echo "Next: BENCH_BIN=./bench_random_mixed_hakmem_minimal_pgo scripts/run_mixed_10_cleanenv.sh"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
|
|
|
|
|
|
pgo-fast-full: pgo-fast-profile pgo-fast-collect pgo-fast-build
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Phase 66: PGO Full Workflow Complete (FAST minimal)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
BENCH_BIN=./bench_random_mixed_hakmem_minimal_pgo scripts/run_mixed_10_cleanenv.sh
|
|
|
|
|
|
|
Phase 54-60: Memory-Lean mode, Balanced mode stabilization, M1 (50%) achievement
## Summary
Completed Phase 54-60 optimization work:
**Phase 54-56: Memory-Lean mode (LEAN+OFF prewarm suppression)**
- Implemented ss_mem_lean_env_box.h with ENV gates
- Balanced mode (LEAN+OFF) promoted as production default
- Result: +1.2% throughput, better stability, zero syscall overhead
- Added to bench_profile.h: MIXED_TINYV3_C7_BALANCED preset
**Phase 57: 60-min soak finalization**
- Balanced mode: 60-min soak, RSS drift 0%, CV 5.38%
- Speed-first mode: 60-min soak, RSS drift 0%, CV 1.58%
- Syscall budget: 1.25e-7/op (800× under target)
- Status: PRODUCTION-READY
**Phase 59: 50% recovery baseline rebase**
- hakmem FAST (Balanced): 59.184M ops/s, CV 1.31%
- mimalloc: 120.466M ops/s, CV 3.50%
- Ratio: 49.13% (M1 ACHIEVED within statistical noise)
- Superior stability: 2.68× better CV than mimalloc
**Phase 60: Alloc pass-down SSOT (NO-GO)**
- Implemented alloc_passdown_ssot_env_box.h
- Modified malloc_tiny_fast.h for SSOT pattern
- Result: -0.46% (NO-GO)
- Key lesson: SSOT not applicable where early-exit already optimized
## Key Metrics
- Performance: 49.13% of mimalloc (M1 effectively achieved)
- Stability: CV 1.31% (superior to mimalloc 3.50%)
- Syscall budget: 1.25e-7/op (excellent)
- RSS: 33MB stable, 0% drift over 60 minutes
## Files Added/Modified
New boxes:
- core/box/ss_mem_lean_env_box.h
- core/box/ss_release_policy_box.{h,c}
- core/box/alloc_passdown_ssot_env_box.h
Scripts:
- scripts/soak_mixed_single_process.sh
- scripts/analyze_epoch_tail_csv.py
- scripts/soak_mixed_rss.sh
- scripts/calculate_percentiles.py
- scripts/analyze_soak.py
Documentation: Phase 40-60 analysis documents
## Design Decisions
1. Profile separation (core/bench_profile.h):
- MIXED_TINYV3_C7_SAFE: Speed-first (no LEAN)
- MIXED_TINYV3_C7_BALANCED: Balanced mode (LEAN+OFF)
2. Box Theory compliance:
- All ENV gates reversible (HAKMEM_SS_MEM_LEAN, HAKMEM_ALLOC_PASSDOWN_SSOT)
- Single conversion points maintained
- No physical deletions (compile-out only)
3. Lessons learned:
- SSOT effective only where redundancy exists (Phase 60 showed limits)
- Branch prediction extremely effective (~0 cycles for well-predicted branches)
- Early-exit pattern valuable even when seemingly redundant
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-17 06:24:01 +09:00
|
|
|
|
# Phase 47: FAST+PGO target (BENCH_MINIMAL + TINY_FRONT_PGO)
|
|
|
|
|
|
# Usage: make bench_random_mixed_hakmem_fast_pgo
|
|
|
|
|
|
# Note: This rebuilds all objects with BENCH_MINIMAL + TINY_FRONT_PGO
|
|
|
|
|
|
# Purpose: FAST build with compile-time fixed front config (phase 47 A/B test)
|
|
|
|
|
|
.PHONY: bench_random_mixed_hakmem_fast_pgo
|
|
|
|
|
|
bench_random_mixed_hakmem_fast_pgo:
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) bench_random_mixed_hakmem EXTRA_CFLAGS='-DHAKMEM_BENCH_MINIMAL=1 -DHAKMEM_TINY_FRONT_PGO=1'
|
|
|
|
|
|
mv bench_random_mixed_hakmem bench_random_mixed_hakmem_fast_pgo
|
|
|
|
|
|
|
2025-12-16 15:01:56 +09:00
|
|
|
|
# Phase 35-B: OBSERVE target (enables diagnostic counters for behavior observation)
|
|
|
|
|
|
# Usage: make bench_random_mixed_hakmem_observe
|
|
|
|
|
|
# Note: This rebuilds all objects with stats/trace compiled in
|
|
|
|
|
|
# Purpose: Behavior observation & debugging (OBSERVE build)
|
|
|
|
|
|
.PHONY: bench_random_mixed_hakmem_observe
|
|
|
|
|
|
bench_random_mixed_hakmem_observe:
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) bench_random_mixed_hakmem EXTRA_CFLAGS='-DHAKMEM_TINY_CLASS_STATS_COMPILED=1 -DHAKMEM_TINY_FREE_STATS_COMPILED=1 -DHAKMEM_UNIFIED_CACHE_STATS_COMPILED=1 -DHAKMEM_TINY_FREE_TRACE_COMPILED=1'
|
|
|
|
|
|
mv bench_random_mixed_hakmem bench_random_mixed_hakmem_observe
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 38: Automated perf workflow targets
|
|
|
|
|
|
# Usage: make perf_fast - Build FAST binary and run 10-run benchmark
|
|
|
|
|
|
# Usage: make perf_observe - Build OBSERVE binary and run health check + 1-run perf
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: perf_fast
|
|
|
|
|
|
perf_fast: bench_random_mixed_hakmem_minimal
|
|
|
|
|
|
@echo "========================================"
|
|
|
|
|
|
@echo "Phase 38: FAST build 10-run benchmark"
|
|
|
|
|
|
@echo "========================================"
|
|
|
|
|
|
BENCH_BIN=./bench_random_mixed_hakmem_minimal scripts/run_mixed_10_cleanenv.sh
|
|
|
|
|
|
@echo "========================================"
|
|
|
|
|
|
@echo "FAST benchmark complete. See results above."
|
|
|
|
|
|
@echo "========================================"
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: perf_observe
|
|
|
|
|
|
perf_observe: bench_random_mixed_hakmem_observe
|
|
|
|
|
|
@echo "========================================"
|
|
|
|
|
|
@echo "Phase 38: OBSERVE build health check"
|
|
|
|
|
|
@echo "========================================"
|
|
|
|
|
|
@echo "[1/3] Health profiles check..."
|
|
|
|
|
|
scripts/verify_health_profiles.sh || echo "Health check script not found, skipping"
|
|
|
|
|
|
@echo "[2/3] Syscall stats (1-run)..."
|
|
|
|
|
|
HAKMEM_SS_OS_STATS=1 ./bench_random_mixed_hakmem_observe 20000000 400 1 2>&1 | grep -E "^\[|^Throughput"
|
|
|
|
|
|
@echo "[3/3] Single perf run..."
|
|
|
|
|
|
./bench_random_mixed_hakmem_observe 20000000 400 1 2>&1 | grep "^Throughput"
|
|
|
|
|
|
@echo "========================================"
|
|
|
|
|
|
@echo "OBSERVE health check complete."
|
|
|
|
|
|
@echo "========================================"
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: perf_all
|
|
|
|
|
|
perf_all: perf_fast perf_observe
|
|
|
|
|
|
@echo "========================================"
|
|
|
|
|
|
@echo "Phase 38: All perf checks complete"
|
|
|
|
|
|
@echo "========================================"
|
|
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
bench_random_mixed_system: bench_random_mixed_system.o
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
2025-11-29 14:18:20 +09:00
|
|
|
|
# Mid MT gap benchmark (1KB-8KB allocations) - Phase 5-Step2 verification
|
|
|
|
|
|
bench_mid_mt_gap_hakmem.o: bench_mid_mt_gap.c hakmem.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_mid_mt_gap_system.o: bench_mid_mt_gap.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_mid_mt_gap_hakmem: bench_mid_mt_gap_hakmem.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
bench_mid_mt_gap_system: bench_mid_mt_gap_system.o
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
2025-11-10 00:25:02 +09:00
|
|
|
|
# Fixed-size microbench (direct link variants)
|
|
|
|
|
|
bench_fixed_size_hakmem.o: benchmarks/src/fixed/bench_fixed_size.c hakmem.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_fixed_size_system.o: benchmarks/src/fixed/bench_fixed_size.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_fixed_size_hakmem: bench_fixed_size_hakmem.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
bench_fixed_size_system: bench_fixed_size_system.o
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
2025-11-09 11:50:18 +09:00
|
|
|
|
bench_random_mixed_mi: bench_random_mixed_mi.o bench_mi_force.o
|
|
|
|
|
|
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
# hakmi variant for random mixed bench
|
|
|
|
|
|
bench_random_mixed_hakmi.o: bench_random_mixed.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc -c -o $@ $<
|
|
|
|
|
|
|
2025-11-09 11:50:18 +09:00
|
|
|
|
bench_random_mixed_hakmi: bench_random_mixed_hakmi.o $(HAKMI_FRONT_OBJS) bench_mi_force.o
|
|
|
|
|
|
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
# hakx variant for random mixed bench
|
|
|
|
|
|
bench_random_mixed_hakx.o: bench_random_mixed.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_random_mixed_hakx: bench_random_mixed_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
# VM-mixed bench around L2.5 (512KB–<2MB)
|
|
|
|
|
|
bench_vm_mixed_hakmem.o: bench_vm_mixed.c hakmem.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_vm_mixed_system.o: bench_vm_mixed.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_vm_mixed_hakmem: bench_vm_mixed_hakmem.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
bench_vm_mixed_system: bench_vm_mixed_system.o
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
# Ultra-fast build for benchmarks: trims unwinding/PLT overhead and
|
|
|
|
|
|
# improves code locality. Use: `make bench_fast` then run the binary.
|
|
|
|
|
|
bench_fast: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
|
|
|
|
|
|
bench_fast: LDFLAGS += -Wl,-O2
|
|
|
|
|
|
bench_fast: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi bench_tiny_hot_hakx
|
|
|
|
|
|
@echo "✓ bench_fast build complete"
|
|
|
|
|
|
|
|
|
|
|
|
# Perf-Main (safe) bench build: no bench-only macros; same O flags
|
|
|
|
|
|
perf_main: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
|
|
|
|
|
|
perf_main: LDFLAGS += -Wl,-O2
|
|
|
|
|
|
perf_main: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi bench_random_mixed_hakmem bench_random_mixed_system bench_random_mixed_mi bench_comprehensive_hakx bench_tiny_hot_hakx bench_random_mixed_hakx
|
|
|
|
|
|
@echo "✓ perf_main build complete (no bench-only macros)"
|
|
|
|
|
|
|
|
|
|
|
|
# Mid/Large (8–32KiB) bench
|
|
|
|
|
|
bench_mid_large_hakmem.o: bench_mid_large.c hakmem.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
|
|
|
|
|
bench_mid_large_system.o: bench_mid_large.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
bench_mid_large_mi.o: bench_mid_large.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
|
|
|
|
|
bench_mid_large_hakmem: bench_mid_large_hakmem.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
bench_mid_large_system: bench_mid_large_system.o
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
2025-11-09 11:50:18 +09:00
|
|
|
|
bench_mid_large_mi: bench_mid_large_mi.o bench_mi_force.o
|
|
|
|
|
|
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
# hakx variant for mid/large (1T)
|
|
|
|
|
|
bench_mid_large_hakx.o: bench_mid_large.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_mid_large_hakx: bench_mid_large_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
# Mid/Large MT (8–32KiB) bench
|
|
|
|
|
|
bench_mid_large_mt_hakmem.o: bench_mid_large_mt.c hakmem.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
|
|
|
|
|
bench_mid_large_mt_system.o: bench_mid_large_mt.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
bench_mid_large_mt_mi.o: bench_mid_large_mt.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
|
|
|
|
|
bench_mid_large_mt_hakmem: bench_mid_large_mt_hakmem.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
bench_mid_large_mt_system: bench_mid_large_mt_system.o
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
2025-11-09 11:50:18 +09:00
|
|
|
|
bench_mid_large_mt_mi: bench_mid_large_mt_mi.o bench_mi_force.o
|
|
|
|
|
|
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
# hakx variant for mid/large MT
|
|
|
|
|
|
bench_mid_large_mt_hakx.o: bench_mid_large_mt.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -I include -DUSE_HAKX -include include/hakx/hakx_api.h -include include/hakx/hakx_fast_inline.h -Dmalloc=hakx_malloc_fast -Dfree=hakx_free_fast -Drealloc=hakx_realloc_fast -c -o $@ $<
|
|
|
|
|
|
|
|
|
|
|
|
bench_mid_large_mt_hakx: bench_mid_large_mt_hakx.o $(HAKX_OBJS) $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
# Fragmentation stress bench
|
|
|
|
|
|
bench_fragment_stress_hakmem.o: bench_fragment_stress.c hakmem.h
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $<
|
|
|
|
|
|
bench_fragment_stress_system.o: bench_fragment_stress.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
|
|
bench_fragment_stress_mi.o: bench_fragment_stress.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
|
|
|
|
|
|
bench_fragment_stress_hakmem: bench_fragment_stress_hakmem.o $(TINY_BENCH_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
bench_fragment_stress_system: bench_fragment_stress_system.o
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
2025-11-09 11:50:18 +09:00
|
|
|
|
bench_fragment_stress_mi: bench_fragment_stress_mi.o bench_mi_force.o
|
|
|
|
|
|
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
# Bench build with Minimal Tiny Front (physically excludes optional front tiers)
|
|
|
|
|
|
bench_tiny_front: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -DHAKMEM_TINY_MINIMAL_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_MAG_OWNER=0
|
|
|
|
|
|
bench_tiny_front: LDFLAGS += -Wl,-O2
|
|
|
|
|
|
bench_tiny_front: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi
|
|
|
|
|
|
@echo "✓ bench_tiny_front build complete (HAKMEM_TINY_MINIMAL_FRONT=1)"
|
|
|
|
|
|
|
|
|
|
|
|
# Bench build with Strict Front (compile-out optional front tiers, baseline structure)
|
|
|
|
|
|
bench_front_strict: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -DHAKMEM_TINY_STRICT_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1
|
|
|
|
|
|
bench_front_strict: LDFLAGS += -Wl,-O2
|
|
|
|
|
|
bench_front_strict: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi
|
|
|
|
|
|
@echo "✓ bench_front_strict build complete (HAKMEM_TINY_STRICT_FRONT=1)"
|
|
|
|
|
|
|
|
|
|
|
|
# Bench build with Ultra (SLL-only front) for Tiny-Hot microbench
|
|
|
|
|
|
# - Compiles hakmem bench with SLL-first/strict front, without Quick/FrontCache, stats off
|
|
|
|
|
|
# - Only affects bench binaries; normal builds unchanged
|
|
|
|
|
|
bench_ultra_strict: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
|
|
|
|
|
|
-DHAKMEM_TINY_ULTRA=1 -DHAKMEM_TINY_TLS_SLL=1 -DHAKMEM_TINY_STRICT_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1 \
|
|
|
|
|
|
-DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
|
|
|
|
|
|
bench_ultra_strict: LDFLAGS += -Wl,-O2
|
|
|
|
|
|
bench_ultra_strict: clean bench_tiny_hot_hakmem
|
|
|
|
|
|
@echo "✓ bench_ultra_strict build complete (ULTRA+STRICT front)"
|
|
|
|
|
|
|
|
|
|
|
|
# Bench build with Ultra (SLL-only) but without STRICT/MINIMAL, Quick/FrontCache compiled out
|
|
|
|
|
|
bench_ultra: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
|
|
|
|
|
|
-DHAKMEM_TINY_ULTRA=1 -DHAKMEM_TINY_TLS_SLL=1 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
|
|
|
|
|
|
bench_ultra: LDFLAGS += -Wl,-O2
|
|
|
|
|
|
bench_ultra: clean bench_tiny_hot_hakmem
|
|
|
|
|
|
@echo "✓ bench_ultra build complete (ULTRA SLL-only, Quick/FrontCache OFF)"
|
|
|
|
|
|
|
|
|
|
|
|
# Bench build with explicit bench fast path (SLL→Mag→tiny reflll), stats/quick/front off
|
|
|
|
|
|
bench_fastpath: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
|
|
|
|
|
|
-DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
|
|
|
|
|
|
bench_fastpath: LDFLAGS += -Wl,-O2
|
|
|
|
|
|
bench_fastpath: clean bench_tiny_hot_hakmem
|
|
|
|
|
|
@echo "✓ bench_fastpath build complete (bench-only fast path)"
|
|
|
|
|
|
|
|
|
|
|
|
# Bench build: SLL-only (≤64B), with warmup
|
|
|
|
|
|
bench_sll_only: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables \
|
|
|
|
|
|
-DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 \
|
|
|
|
|
|
-DHAKMEM_TINY_BENCH_WARMUP32=160 -DHAKMEM_TINY_BENCH_WARMUP64=192 -DHAKMEM_TINY_BENCH_WARMUP8=64 -DHAKMEM_TINY_BENCH_WARMUP16=96 \
|
|
|
|
|
|
-DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0
|
|
|
|
|
|
bench_sll_only: LDFLAGS += -Wl,-O2
|
|
|
|
|
|
bench_sll_only: clean bench_tiny_hot_hakmem
|
|
|
|
|
|
@echo "✓ bench_sll_only build complete (bench-only SLL-only + warmup)"
|
|
|
|
|
|
|
|
|
|
|
|
# Bench-fastpath with explicit refill sizes (A/B)
|
|
|
|
|
|
bench_fastpath_r8: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=8 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
|
|
|
|
|
|
bench_fastpath_r8: LDFLAGS += -Wl,-O2
|
|
|
|
|
|
bench_fastpath_r8: clean bench_tiny_hot_hakmem
|
|
|
|
|
|
@echo "✓ bench_fastpath_r8 build complete"
|
|
|
|
|
|
|
|
|
|
|
|
bench_fastpath_r12: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=12 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
|
|
|
|
|
|
bench_fastpath_r12: LDFLAGS += -Wl,-O2
|
|
|
|
|
|
bench_fastpath_r12: clean bench_tiny_hot_hakmem
|
|
|
|
|
|
@echo "✓ bench_fastpath_r12 build complete"
|
|
|
|
|
|
|
|
|
|
|
|
bench_fastpath_r16: CFLAGS += -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL=16 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0 -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables
|
|
|
|
|
|
bench_fastpath_r16: LDFLAGS += -Wl,-O2
|
|
|
|
|
|
bench_fastpath_r16: clean bench_tiny_hot_hakmem
|
|
|
|
|
|
@echo "✓ bench_fastpath_r16 build complete"
|
|
|
|
|
|
|
|
|
|
|
|
# PGO for bench-fastpath
|
|
|
|
|
|
pgo-benchfast-profile:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "PGO Profile (bench-fastpath)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
rm -f *.gcda *.o bench_tiny_hot_hakmem
|
|
|
|
|
|
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
|
|
|
|
|
|
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
|
|
|
|
|
|
@echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)"
|
|
|
|
|
|
./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true
|
|
|
|
|
|
./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true
|
|
|
|
|
|
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
|
|
|
|
|
|
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
|
|
|
|
|
|
@echo "✓ bench-fastpath profile data collected (*.gcda)"
|
|
|
|
|
|
|
|
|
|
|
|
pgo-benchfast-build:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "PGO Build (bench-fastpath)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
rm -f *.o bench_tiny_hot_hakmem
|
|
|
|
|
|
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
|
|
|
|
|
|
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
|
|
|
|
|
|
@echo "✓ bench-fastpath PGO build complete"
|
|
|
|
|
|
|
|
|
|
|
|
# Debug bench (with counters/prints)
|
|
|
|
|
|
bench_debug: CFLAGS += -DHAKMEM_DEBUG_COUNTERS=1 -g -O2
|
|
|
|
|
|
bench_debug: clean bench_comprehensive_hakmem bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi
|
|
|
|
|
|
@echo "✓ bench_debug build complete (debug counters enabled)"
|
|
|
|
|
|
|
2025-11-11 21:49:05 +09:00
|
|
|
|
# Debug build for random_mixed (enable counters for SFC stats)
|
|
|
|
|
|
.PHONY: bench_random_mixed_debug
|
|
|
|
|
|
bench_random_mixed_debug:
|
|
|
|
|
|
@echo "[debug] Rebuilding bench_random_mixed_hakmem with HAKMEM_DEBUG_COUNTERS=1"
|
|
|
|
|
|
$(MAKE) clean >/dev/null
|
|
|
|
|
|
$(MAKE) CFLAGS+=" -DHAKMEM_DEBUG_COUNTERS=1 -O2 -g" bench_random_mixed_hakmem >/dev/null
|
|
|
|
|
|
@echo "✓ bench_random_mixed_debug built"
|
|
|
|
|
|
|
2025-11-08 12:54:52 +09:00
|
|
|
|
# ========================================
|
|
|
|
|
|
# Phase 7 便利ターゲット(重要な定数がデフォルト化されています)
|
|
|
|
|
|
# ========================================
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 7: 全最適化を有効化(Task 1+2+3)
|
|
|
|
|
|
# 使い方: make phase7
|
|
|
|
|
|
# または: make phase7-bench で自動ベンチマーク
|
|
|
|
|
|
.PHONY: phase7 phase7-bench phase7-test
|
|
|
|
|
|
|
|
|
|
|
|
phase7:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Phase 7: Building with all optimizations"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Flags:"
|
|
|
|
|
|
@echo " HEADER_CLASSIDX=1 (Task 1: Skip magic validation)"
|
|
|
|
|
|
@echo " AGGRESSIVE_INLINE=1 (Task 2: Inline TLS macros)"
|
|
|
|
|
|
@echo " PREWARM_TLS=1 (Task 3: Pre-warm cache)"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) HEADER_CLASSIDX=1 AGGRESSIVE_INLINE=1 PREWARM_TLS=1 \
|
|
|
|
|
|
bench_random_mixed_hakmem larson_hakmem
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "✓ Phase 7 build complete!"
|
|
|
|
|
|
@echo " Run: make phase7-bench (quick benchmark)"
|
|
|
|
|
|
@echo " Run: make phase7-test (sanity test)"
|
|
|
|
|
|
|
|
|
|
|
|
phase7-bench: phase7
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Phase 7 Quick Benchmark"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Larson 1T:"
|
|
|
|
|
|
@./larson_hakmem 1 1 128 1024 1 12345 1 2>&1 | grep "Throughput ="
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "Random Mixed (128B, 256B, 1024B):"
|
|
|
|
|
|
@./bench_random_mixed_hakmem 100000 128 1234567 2>&1 | tail -1
|
|
|
|
|
|
@./bench_random_mixed_hakmem 100000 256 1234567 2>&1 | tail -1
|
|
|
|
|
|
@./bench_random_mixed_hakmem 100000 1024 1234567 2>&1 | tail -1
|
|
|
|
|
|
|
|
|
|
|
|
phase7-test: phase7
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Phase 7 Sanity Test"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@./larson_hakmem 1 1 128 1024 1 12345 1 >/dev/null 2>&1 && echo "✓ Larson 1T OK" || echo "✗ Larson 1T FAILED"
|
|
|
|
|
|
@./bench_random_mixed_hakmem 10000 128 1234567 >/dev/null 2>&1 && echo "✓ Random Mixed 128B OK" || echo "✗ Random Mixed 128B FAILED"
|
|
|
|
|
|
@./bench_random_mixed_hakmem 10000 1024 1234567 >/dev/null 2>&1 && echo "✓ Random Mixed 1024B OK" || echo "✗ Random Mixed 1024B FAILED"
|
|
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
# Clean
|
|
|
|
|
|
clean:
|
2025-11-07 12:09:28 +09:00
|
|
|
|
rm -f $(OBJS) $(TARGET) $(BENCH_HAKMEM_OBJS) $(BENCH_SYSTEM_OBJS) $(BENCH_HAKMEM) $(BENCH_SYSTEM) $(SHARED_OBJS) $(SHARED_LIB) *.csv libhako_ffi_stub.a hako_ffi_stub.o
|
2025-11-05 12:31:14 +09:00
|
|
|
|
rm -f bench_comprehensive.o bench_comprehensive_hakmem bench_comprehensive_system
|
|
|
|
|
|
rm -f bench_tiny bench_tiny.o bench_tiny_mt bench_tiny_mt.o test_mf2 test_mf2.o bench_tiny_hakmem
|
2025-12-18 01:55:27 +09:00
|
|
|
|
rm -f bench_random_mixed_hakmem.o bench_random_mixed_system.o bench_random_mixed_mi.o
|
|
|
|
|
|
rm -f bench_tiny_hot_hakmem.o bench_tiny_hot_system.o bench_tiny_hot_mi.o bench_mi_force.o
|
|
|
|
|
|
rm -f bench_random_mixed_hakmem bench_random_mixed_system bench_random_mixed_mi bench_random_mixed_hakx
|
|
|
|
|
|
rm -f bench_random_mixed_hakmem_minimal bench_random_mixed_hakmem_minimal_pgo
|
|
|
|
|
|
rm -f bench_random_mixed_hakmem_fast_fixed bench_random_mixed_hakmem_fast_pruned bench_random_mixed_hakmem_fast_pgo
|
|
|
|
|
|
rm -f bench_tiny_hot_hakmem bench_tiny_hot_system bench_tiny_hot_mi bench_tiny_hot_hakmi bench_tiny_hot_hakx bench_tiny_hot_hakx_p0 bench_tiny_hot_direct
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
# Help
|
|
|
|
|
|
help:
|
|
|
|
|
|
@echo "hakmem PoC - Makefile targets:"
|
2025-11-08 12:54:52 +09:00
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "=== Phase 7 Optimizations (推奨) ==="
|
|
|
|
|
|
@echo " make phase7 - Phase 7全最適化ビルド (Task 1+2+3)"
|
|
|
|
|
|
@echo " make phase7-bench - Phase 7 + クイックベンチマーク"
|
|
|
|
|
|
@echo " make phase7-test - Phase 7 + サニティテスト"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "=== 基本ターゲット ==="
|
2025-11-05 12:31:14 +09:00
|
|
|
|
@echo " make - Build the test program"
|
|
|
|
|
|
@echo " make run - Build and run the test"
|
|
|
|
|
|
@echo " make bench - Build benchmark programs"
|
|
|
|
|
|
@echo " make shared - Build shared library (for LD_PRELOAD)"
|
|
|
|
|
|
@echo " make clean - Clean build artifacts"
|
|
|
|
|
|
@echo " make bench-mode - Run Tiny-focused PGO bench (scripts/bench_mode.sh)"
|
|
|
|
|
|
@echo " make bench-all - Run (near) full mimalloc-bench with timeouts"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "Benchmark workflow:"
|
|
|
|
|
|
@echo " 1. make bench"
|
|
|
|
|
|
@echo " 2. bash bench_runner.sh --runs 10"
|
|
|
|
|
|
@echo " 3. python3 analyze_results.py benchmark_results.csv"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "mimalloc-bench workflow:"
|
|
|
|
|
|
@echo " 1. make shared"
|
|
|
|
|
|
@echo " 2. LD_PRELOAD=./libhakmem.so <benchmark>"
|
|
|
|
|
|
|
2025-11-29 11:58:37 +09:00
|
|
|
|
# Step 2: PGO (Profile-Guided Optimization) targets - temporarily disabled
|
2025-11-05 12:31:14 +09:00
|
|
|
|
pgo-profile:
|
|
|
|
|
|
@echo "========================================="
|
2025-11-29 11:58:37 +09:00
|
|
|
|
@echo "PGO Profile Collection (disabled)"
|
2025-11-05 12:31:14 +09:00
|
|
|
|
@echo "========================================="
|
2025-11-29 11:58:37 +09:00
|
|
|
|
@echo "PGO flow is temporarily parked during Tiny front Phase 4 refactor."
|
|
|
|
|
|
@echo "Use normal builds instead, e.g.:"
|
|
|
|
|
|
@echo " ./build.sh release bench_random_mixed_hakmem"
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
pgo-build:
|
|
|
|
|
|
@echo "========================================="
|
2025-11-29 11:58:37 +09:00
|
|
|
|
@echo "PGO Optimized Build (disabled)"
|
2025-11-05 12:31:14 +09:00
|
|
|
|
@echo "========================================="
|
2025-11-29 11:58:37 +09:00
|
|
|
|
@echo "PGO flow is temporarily parked during Tiny front Phase 4 refactor."
|
|
|
|
|
|
@echo "Use normal builds instead, e.g.:"
|
|
|
|
|
|
@echo " ./build.sh release bench_random_mixed_hakmem"
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
2025-11-29 11:58:37 +09:00
|
|
|
|
# PGO for tiny_hot (Strict Front) - temporarily disabled
|
2025-11-05 12:31:14 +09:00
|
|
|
|
pgo-hot-profile:
|
|
|
|
|
|
@echo "========================================="
|
2025-11-29 11:58:37 +09:00
|
|
|
|
@echo "PGO Profile (tiny_hot) (disabled)"
|
2025-11-05 12:31:14 +09:00
|
|
|
|
@echo "========================================="
|
2025-11-29 11:58:37 +09:00
|
|
|
|
@echo "Tiny-hot PGO profiling is temporarily disabled."
|
|
|
|
|
|
@echo "Run benches directly instead, e.g.:"
|
|
|
|
|
|
@echo " ./build.sh release bench_tiny_hot_hakmem"
|
2025-11-05 12:31:14 +09:00
|
|
|
|
@echo "✓ tiny_hot profile data collected (*.gcda)"
|
|
|
|
|
|
|
|
|
|
|
|
pgo-hot-build:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "PGO Build (tiny_hot) with Strict Front"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
rm -f *.o bench_tiny_hot_hakmem
|
|
|
|
|
|
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_STRICT_FRONT=1" \
|
|
|
|
|
|
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
|
|
|
|
|
|
@echo "✓ tiny_hot PGO build complete"
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 8.2: Memory profiling build (verbose memory breakdown)
|
|
|
|
|
|
bench-memory: CFLAGS += -DHAKMEM_DEBUG_MEMORY
|
|
|
|
|
|
bench-memory: clean bench_comprehensive_hakmem
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Memory profiling build complete!"
|
|
|
|
|
|
@echo " Run: ./bench_comprehensive_hakmem"
|
|
|
|
|
|
@echo " Memory breakdown will be printed at end"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: all run bench shared debug clean help pgo-profile pgo-build bench-memory
|
|
|
|
|
|
|
|
|
|
|
|
# PGO for shared library (LD_PRELOAD)
|
|
|
|
|
|
# Step 1: Build instrumented shared lib and collect profile
|
|
|
|
|
|
pgo-profile-shared:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Step: PGO Profile Collection (shared lib)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
rm -f *_shared.gcda *_shared.o $(SHARED_LIB)
|
|
|
|
|
|
$(MAKE) CFLAGS_SHARED="$(CFLAGS_SHARED) -fprofile-generate -flto" LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" shared
|
|
|
|
|
|
@echo "Running profile workload (LD_PRELOAD)..."
|
|
|
|
|
|
HAKMEM_WRAP_TINY=1 LD_PRELOAD=./$(SHARED_LIB) ./bench_comprehensive_system 2>&1 | grep -E "(SIZE CLASS:|Throughput:)" | head -20 || true
|
|
|
|
|
|
@echo "✓ Profile data collected (*.gcda for *_shared)"
|
|
|
|
|
|
|
|
|
|
|
|
# Step 2: Build optimized shared lib using profile
|
|
|
|
|
|
pgo-build-shared:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Step: PGO Optimized Build (shared lib)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
rm -f *_shared.o $(SHARED_LIB)
|
|
|
|
|
|
$(MAKE) CFLAGS_SHARED="$(CFLAGS_SHARED) -fprofile-use -flto -Wno-error=coverage-mismatch" LDFLAGS="$(LDFLAGS) -fprofile-use -flto" shared
|
|
|
|
|
|
@echo "✓ LTO+PGO optimized shared library complete"
|
|
|
|
|
|
|
|
|
|
|
|
# Convenience: run Bench Mode script
|
|
|
|
|
|
bench-mode:
|
|
|
|
|
|
@bash scripts/bench_mode.sh
|
|
|
|
|
|
|
|
|
|
|
|
bench-all:
|
|
|
|
|
|
@bash scripts/run_all_benches_with_timeouts.sh
|
|
|
|
|
|
|
|
|
|
|
|
# PGO for bench_sll_only
|
|
|
|
|
|
pgo-benchsll-profile:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "PGO Profile (bench_sll_only)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
rm -f *.gcda *.o bench_tiny_hot_hakmem
|
|
|
|
|
|
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
|
|
|
|
|
|
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
|
|
|
|
|
|
@echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)"
|
|
|
|
|
|
./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true
|
|
|
|
|
|
./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true
|
|
|
|
|
|
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
|
|
|
|
|
|
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
|
|
|
|
|
|
@echo "✓ bench_sll_only profile data collected (*.gcda)"
|
|
|
|
|
|
|
|
|
|
|
|
pgo-benchsll-build:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "PGO Build (bench_sll_only)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
rm -f *.o bench_tiny_hot_hakmem
|
|
|
|
|
|
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
|
|
|
|
|
|
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
|
|
|
|
|
|
@echo "✓ bench_sll_only PGO build complete"
|
|
|
|
|
|
|
|
|
|
|
|
# Variant: SLL-only with REFILL=12 and WARMUP32=192 (tune for 32B)
|
|
|
|
|
|
pgo-benchsll-r12w192-profile:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "PGO Profile (bench_sll_only r12 w32=192)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
rm -f *.gcda *.o bench_tiny_hot_hakmem
|
|
|
|
|
|
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-generate -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL32=12 -DHAKMEM_TINY_BENCH_WARMUP32=192 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
|
|
|
|
|
|
LDFLAGS="$(LDFLAGS) -fprofile-generate -flto" bench_tiny_hot_hakmem >/dev/null
|
|
|
|
|
|
@echo "[profile-run] bench_tiny_hot_hakmem (8/16/32/64, batch=100, cycles=60000)"
|
|
|
|
|
|
./bench_tiny_hot_hakmem 8 100 60000 >/dev/null || true
|
2025-11-07 01:27:04 +09:00
|
|
|
|
./bench_tiny_hot_hakmem 16 100 60000 >/dev/null || true
|
2025-11-05 12:31:14 +09:00
|
|
|
|
./bench_tiny_hot_hakmem 32 100 60000 >/dev/null || true
|
|
|
|
|
|
./bench_tiny_hot_hakmem 64 100 60000 >/dev/null || true
|
|
|
|
|
|
@echo "✓ r12 w32=192 profile data collected (*.gcda)"
|
|
|
|
|
|
|
|
|
|
|
|
pgo-benchsll-r12w192-build:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "PGO Build (bench_sll_only r12 w32=192)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
rm -f *.o bench_tiny_hot_hakmem
|
|
|
|
|
|
$(MAKE) CFLAGS="$(CFLAGS) -fprofile-use -flto -DHAKMEM_TINY_BENCH_FASTPATH=1 -DHAKMEM_TINY_BENCH_SLL_ONLY=1 -DHAKMEM_TINY_BENCH_TINY_CLASSES=3 -DHAKMEM_TINY_BENCH_REFILL32=12 -DHAKMEM_TINY_BENCH_WARMUP32=192 -DHAKMEM_TINY_NO_QUICK -DHAKMEM_TINY_NO_FRONT_CACHE -DHAKMEM_TINY_MAG_OWNER=0" \
|
|
|
|
|
|
LDFLAGS="$(LDFLAGS) -fprofile-use -flto" bench_tiny_hot_hakmem >/dev/null
|
|
|
|
|
|
@echo "✓ r12 w32=192 PGO build complete"
|
|
|
|
|
|
MI_RPATH := $(shell pwd)/mimalloc-bench/extern/mi/out/release
|
|
|
|
|
|
# Sanitized builds (compiler-assisted debugging)
|
|
|
|
|
|
.PHONY: asan-larson ubsan-larson tsan-larson
|
|
|
|
|
|
|
|
|
|
|
|
SAN_ASAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \
|
|
|
|
|
|
-fsanitize=address,undefined -fno-sanitize-recover=all -fstack-protector-strong \
|
|
|
|
|
|
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
|
|
|
|
|
|
SAN_ASAN_LDFLAGS = -fsanitize=address,undefined
|
|
|
|
|
|
|
|
|
|
|
|
SAN_UBSAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \
|
|
|
|
|
|
-fsanitize=undefined -fno-sanitize-recover=undefined -fstack-protector-strong \
|
|
|
|
|
|
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
|
|
|
|
|
|
SAN_UBSAN_LDFLAGS = -fsanitize=undefined
|
|
|
|
|
|
|
2025-11-07 12:09:28 +09:00
|
|
|
|
# Allocator-enabled sanitizer variants (no FORCE_LIBC)
|
|
|
|
|
|
# FIXME 2025-11-07: TLS initialization order issue - using libc for now
|
|
|
|
|
|
SAN_ASAN_ALLOC_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \
|
|
|
|
|
|
-fsanitize=address,undefined -fno-sanitize-recover=all -fstack-protector-strong \
|
|
|
|
|
|
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
|
|
|
|
|
|
SAN_ASAN_ALLOC_LDFLAGS = -fsanitize=address,undefined
|
|
|
|
|
|
|
|
|
|
|
|
SAN_UBSAN_ALLOC_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto \
|
|
|
|
|
|
-fsanitize=undefined -fno-sanitize-recover=undefined -fstack-protector-strong \
|
|
|
|
|
|
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
|
|
|
|
|
|
SAN_UBSAN_ALLOC_LDFLAGS = -fsanitize=undefined
|
|
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
SAN_TSAN_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto -fsanitize=thread \
|
|
|
|
|
|
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
|
|
|
|
|
|
SAN_TSAN_LDFLAGS = -fsanitize=thread
|
|
|
|
|
|
|
2025-11-07 12:09:28 +09:00
|
|
|
|
# Variant: TSan with allocator enabled (no FORCE_LIBC)
|
|
|
|
|
|
# FIXME 2025-11-07: TLS initialization order issue - using libc for now
|
|
|
|
|
|
SAN_TSAN_ALLOC_CFLAGS = -O1 -g -fno-omit-frame-pointer -fno-lto -fsanitize=thread \
|
|
|
|
|
|
-DHAKMEM_FORCE_LIBC_ALLOC_BUILD=1
|
|
|
|
|
|
SAN_TSAN_ALLOC_LDFLAGS = -fsanitize=thread
|
|
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
asan-larson:
|
|
|
|
|
|
@$(MAKE) clean >/dev/null
|
|
|
|
|
|
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_ASAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_ASAN_LDFLAGS)" >/dev/null
|
|
|
|
|
|
@cp -f larson_hakmem larson_hakmem_asan
|
|
|
|
|
|
@echo "✓ Built larson_hakmem_asan with ASan/UBSan"
|
|
|
|
|
|
|
|
|
|
|
|
ubsan-larson:
|
|
|
|
|
|
@$(MAKE) clean >/dev/null
|
|
|
|
|
|
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_UBSAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_UBSAN_LDFLAGS)" >/dev/null
|
|
|
|
|
|
@cp -f larson_hakmem larson_hakmem_ubsan
|
|
|
|
|
|
@echo "✓ Built larson_hakmem_ubsan with UBSan"
|
|
|
|
|
|
|
|
|
|
|
|
tsan-larson:
|
|
|
|
|
|
@$(MAKE) clean >/dev/null
|
|
|
|
|
|
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_TSAN_CFLAGS)" EXTRA_LDFLAGS="$(SAN_TSAN_LDFLAGS)" >/dev/null
|
|
|
|
|
|
@cp -f larson_hakmem larson_hakmem_tsan
|
|
|
|
|
|
@echo "✓ Built larson_hakmem_tsan with TSan (no ASan)"
|
2025-11-07 01:27:04 +09:00
|
|
|
|
|
2025-11-07 12:09:28 +09:00
|
|
|
|
.PHONY: tsan-larson-alloc
|
|
|
|
|
|
tsan-larson-alloc:
|
|
|
|
|
|
@$(MAKE) clean >/dev/null
|
|
|
|
|
|
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_TSAN_ALLOC_CFLAGS)" EXTRA_LDFLAGS="$(SAN_TSAN_ALLOC_LDFLAGS)" >/dev/null
|
|
|
|
|
|
@cp -f larson_hakmem larson_hakmem_tsan_alloc
|
|
|
|
|
|
@echo "✓ Built larson_hakmem_tsan_alloc with TSan (allocator enabled)"
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: asan-larson-alloc ubsan-larson-alloc
|
|
|
|
|
|
asan-larson-alloc:
|
|
|
|
|
|
@$(MAKE) clean >/dev/null
|
|
|
|
|
|
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_ASAN_ALLOC_CFLAGS)" EXTRA_LDFLAGS="$(SAN_ASAN_ALLOC_LDFLAGS)" >/dev/null
|
|
|
|
|
|
@cp -f larson_hakmem larson_hakmem_asan_alloc
|
|
|
|
|
|
@echo "✓ Built larson_hakmem_asan_alloc with ASan/UBSan (allocator enabled)"
|
|
|
|
|
|
|
|
|
|
|
|
ubsan-larson-alloc:
|
|
|
|
|
|
@$(MAKE) clean >/dev/null
|
|
|
|
|
|
@$(MAKE) larson_hakmem EXTRA_CFLAGS="$(SAN_UBSAN_ALLOC_CFLAGS)" EXTRA_LDFLAGS="$(SAN_UBSAN_ALLOC_LDFLAGS)" >/dev/null
|
|
|
|
|
|
@cp -f larson_hakmem larson_hakmem_ubsan_alloc
|
|
|
|
|
|
@echo "✓ Built larson_hakmem_ubsan_alloc with UBSan (allocator enabled)"
|
|
|
|
|
|
|
|
|
|
|
|
# Sanitized shared libraries for LD_PRELOAD (allocator enabled)
|
|
|
|
|
|
.PHONY: asan-shared-alloc tsan-shared-alloc
|
|
|
|
|
|
asan-shared-alloc:
|
|
|
|
|
|
@$(MAKE) clean >/dev/null
|
|
|
|
|
|
@$(MAKE) SHARED_LIB=libhakmem_asan.so \
|
|
|
|
|
|
CFLAGS_SHARED="$(CFLAGS_SHARED) $(SAN_ASAN_ALLOC_CFLAGS)" \
|
|
|
|
|
|
LDFLAGS="$(LDFLAGS) $(SAN_ASAN_ALLOC_LDFLAGS)" shared >/dev/null
|
|
|
|
|
|
@echo "✓ Built libhakmem_asan.so (LD_PRELOAD, allocator enabled)"
|
|
|
|
|
|
|
|
|
|
|
|
tsan-shared-alloc:
|
|
|
|
|
|
@$(MAKE) clean >/dev/null
|
|
|
|
|
|
@$(MAKE) SHARED_LIB=libhakmem_tsan.so \
|
|
|
|
|
|
CFLAGS_SHARED="$(CFLAGS_SHARED) $(SAN_TSAN_ALLOC_CFLAGS)" \
|
|
|
|
|
|
LDFLAGS="$(LDFLAGS) $(SAN_TSAN_ALLOC_LDFLAGS)" shared >/dev/null
|
|
|
|
|
|
@echo "✓ Built libhakmem_tsan.so (LD_PRELOAD, allocator enabled)"
|
|
|
|
|
|
|
|
|
|
|
|
# TSan multithread smoke linking against allocator (direct link)
|
|
|
|
|
|
.PHONY: mt-smoke-tsan
|
|
|
|
|
|
mt-smoke-tsan:
|
|
|
|
|
|
@$(MAKE) clean >/dev/null
|
|
|
|
|
|
@$(MAKE) $(TINY_BENCH_OBJS) >/dev/null
|
|
|
|
|
|
$(CC) -O1 -g -fno-omit-frame-pointer -fno-lto -fsanitize=thread \
|
|
|
|
|
|
-o mt_smoke tests/mt_smoke.c $(TINY_BENCH_OBJS) $(LDFLAGS) -fsanitize=thread
|
|
|
|
|
|
@echo "✓ Built mt_smoke (TSan)"
|
|
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
# ----------------------------------------------------------------------------
|
|
|
|
|
|
# Convenience targets (debug/route/3layer)
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
|
|
|
|
.PHONY: larson_hakmem_3layer larson_hakmem_route
|
|
|
|
|
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
2025-11-07 12:09:28 +09:00
|
|
|
|
# Runtime helpers: sanitizer-safe runners for debugging/bench
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
# Default run params (overridable):
|
|
|
|
|
|
THREADS ?= 4
|
|
|
|
|
|
SLEEP ?= 10
|
|
|
|
|
|
MIN ?= 8
|
|
|
|
|
|
MAX ?= 128
|
|
|
|
|
|
CHPT ?= 1024
|
|
|
|
|
|
ROUNDS ?= 1
|
|
|
|
|
|
SEED ?= 12345
|
|
|
|
|
|
|
|
|
|
|
|
# Resolve libasan from the active toolchain
|
|
|
|
|
|
ASAN_LIB := $(shell $(CC) -print-file-name=libasan.so)
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: asan-preload-run
|
|
|
|
|
|
asan-preload-run:
|
|
|
|
|
|
@$(MAKE) -j asan-shared-alloc larson_system >/dev/null
|
|
|
|
|
|
@echo "[asan-preload] LD_PRELOAD chain: $$LD_PRELOAD"
|
|
|
|
|
|
@echo "[asan-preload] Running: ./larson_system $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS)"
|
|
|
|
|
|
@LSAN_OPTIONS=detect_leaks=0 \
|
|
|
|
|
|
LD_PRELOAD="$(ASAN_LIB):$(PWD)/libhakmem_asan.so" \
|
|
|
|
|
|
./larson_system $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS)
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: asan-preload-mailbox-lite
|
|
|
|
|
|
asan-preload-mailbox-lite:
|
|
|
|
|
|
@$(MAKE) -j asan-shared-alloc larson_system >/dev/null
|
|
|
|
|
|
@echo "[asan-preload-mailbox-lite] (short-run)"
|
|
|
|
|
|
@echo "[asan-preload-mailbox-lite] Running: ./larson_system 5 $(MIN) $(MAX) 256 $(ROUNDS) $(SEED) $(THREADS)"
|
|
|
|
|
|
@HAKMEM_WRAP_TINY=1 HAKMEM_TINY_SS_ADOPT=1 \
|
|
|
|
|
|
HAKMEM_TINY_DEBUG_REMOTE_GUARD=1 HAKMEM_TINY_TRACE_RING=1 \
|
|
|
|
|
|
LSAN_OPTIONS=detect_leaks=0 \
|
|
|
|
|
|
LD_PRELOAD="$(ASAN_LIB):$(PWD)/libhakmem_asan.so" \
|
|
|
|
|
|
./larson_system 5 $(MIN) $(MAX) 256 $(ROUNDS) $(SEED) $(THREADS)
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: ubsan-mailbox-run
|
|
|
|
|
|
ubsan-mailbox-run:
|
|
|
|
|
|
@$(MAKE) -j ubsan-larson-alloc >/dev/null
|
|
|
|
|
|
@echo "[ubsan-mailbox] Running: ./larson_hakmem_ubsan_alloc $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS)"
|
|
|
|
|
|
@HAKMEM_WRAP_TINY=1 HAKMEM_TINY_SS_ADOPT=1 \
|
|
|
|
|
|
./larson_hakmem_ubsan_alloc $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS)
|
|
|
|
|
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
2025-11-07 18:07:48 +09:00
|
|
|
|
# HAKMEM direct-link benches & reproducer helpers
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: bench-hakmem
|
|
|
|
|
|
bench-hakmem:
|
|
|
|
|
|
@$(MAKE) -j larson_hakmem >/dev/null
|
|
|
|
|
|
@echo "== hakmem 1T ==" && ./larson_hakmem $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) 1
|
|
|
|
|
|
@echo "== hakmem $(THREADS)T ==" && ./larson_hakmem $(SLEEP) $(MIN) $(MAX) $(CHPT) $(ROUNDS) $(SEED) $(THREADS)
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: bench-hakmem-hot64
|
|
|
|
|
|
bench-hakmem-hot64:
|
|
|
|
|
|
@$(MAKE) -j larson_hakmem >/dev/null
|
|
|
|
|
|
@echo "== hakmem HOT64 1T ==" && HAKMEM_TINY_REFILL_COUNT_HOT=64 ./larson_hakmem 5 $(MIN) $(MAX) 512 $(ROUNDS) $(SEED) 1
|
|
|
|
|
|
@echo "== hakmem HOT64 $(THREADS)T ==" && HAKMEM_TINY_REFILL_COUNT_HOT=64 ./larson_hakmem 5 $(MIN) $(MAX) 512 $(ROUNDS) $(SEED) $(THREADS)
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: bench-hakmem-hot64-fastcap-ab
|
|
|
|
|
|
bench-hakmem-hot64-fastcap-ab:
|
|
|
|
|
|
@$(MAKE) -j larson_hakmem >/dev/null
|
|
|
|
|
|
@for cap in 8 16 32; do \
|
|
|
|
|
|
echo "== HOT64 FastCap=$$cap $(THREADS)T (short) =="; \
|
|
|
|
|
|
HAKMEM_TINY_REFILL_COUNT_HOT=64 HAKMEM_TINY_FAST_CAP=$$cap \
|
|
|
|
|
|
HAKMEM_TINY_DEBUG_REMOTE_GUARD=1 HAKMEM_TINY_TRACE_RING=1 \
|
|
|
|
|
|
./larson_hakmem 5 $(MIN) $(MAX) 256 $(ROUNDS) $(SEED) $(THREADS) || true; \
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: valgrind-hakmem-hot64-lite
|
|
|
|
|
|
valgrind-hakmem-hot64-lite:
|
|
|
|
|
|
@$(MAKE) clean >/dev/null
|
|
|
|
|
|
@$(MAKE) OPT_LEVEL=0 USE_LTO=0 NATIVE=0 larson_hakmem >/dev/null
|
|
|
|
|
|
@echo "== valgrind HOT64 lite $(THREADS)T =="
|
|
|
|
|
|
@HAKMEM_TINY_REFILL_COUNT_HOT=64 \
|
|
|
|
|
|
valgrind --quiet --leak-check=full --show-leak-kinds=all \
|
|
|
|
|
|
--errors-for-leak-kinds=all --track-origins=yes --error-exitcode=99 \
|
|
|
|
|
|
./larson_hakmem 2 $(MIN) $(MAX) 256 $(ROUNDS) $(SEED) $(THREADS) || true
|
|
|
|
|
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
2025-11-07 01:27:04 +09:00
|
|
|
|
# Unit tests (Box-level)
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
|
|
|
|
.PHONY: unit unit-run
|
|
|
|
|
|
|
|
|
|
|
|
UNIT_BIN_DIR := tests/bin
|
2025-12-10 09:08:18 +09:00
|
|
|
|
UNIT_BINS := $(UNIT_BIN_DIR)/test_super_registry $(UNIT_BIN_DIR)/test_ready_ring $(UNIT_BIN_DIR)/test_mailbox_box $(UNIT_BIN_DIR)/madvise_guard_test $(UNIT_BIN_DIR)/libm_reloc_guard_test
|
2025-11-07 01:27:04 +09:00
|
|
|
|
|
|
|
|
|
|
unit: $(UNIT_BINS)
|
|
|
|
|
|
@echo "OK: unit tests built -> $(UNIT_BINS)"
|
|
|
|
|
|
|
|
|
|
|
|
$(UNIT_BIN_DIR)/test_super_registry: tests/unit/test_super_registry.c core/hakmem_super_registry.c core/hakmem_tiny_superslab.c
|
|
|
|
|
|
@mkdir -p $(UNIT_BIN_DIR)
|
|
|
|
|
|
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
$(UNIT_BIN_DIR)/test_ready_ring: tests/unit/test_ready_ring.c
|
|
|
|
|
|
@mkdir -p $(UNIT_BIN_DIR)
|
|
|
|
|
|
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
$(UNIT_BIN_DIR)/test_mailbox_box: tests/unit/test_mailbox_box.c tests/unit/mailbox_test_stubs.c core/box/mailbox_box.c
|
|
|
|
|
|
@mkdir -p $(UNIT_BIN_DIR)
|
|
|
|
|
|
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
2025-12-10 09:08:18 +09:00
|
|
|
|
$(UNIT_BIN_DIR)/madvise_guard_test: tests/unit/madvise_guard_test.c core/box/madvise_guard_box.c
|
|
|
|
|
|
@mkdir -p $(UNIT_BIN_DIR)
|
|
|
|
|
|
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
$(UNIT_BIN_DIR)/libm_reloc_guard_test: tests/unit/libm_reloc_guard_test.c core/box/libm_reloc_guard_box.c
|
|
|
|
|
|
@mkdir -p $(UNIT_BIN_DIR)
|
|
|
|
|
|
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
unit-run: unit
|
|
|
|
|
|
@echo "Running unit: test_super_registry" && $(UNIT_BIN_DIR)/test_super_registry
|
|
|
|
|
|
@echo "Running unit: test_ready_ring" && $(UNIT_BIN_DIR)/test_ready_ring
|
|
|
|
|
|
@echo "Running unit: test_mailbox_box" && $(UNIT_BIN_DIR)/test_mailbox_box
|
2025-12-10 09:08:18 +09:00
|
|
|
|
@echo "Running unit: madvise_guard_test" && $(UNIT_BIN_DIR)/madvise_guard_test
|
|
|
|
|
|
@echo "Running unit: libm_reloc_guard_test" && $(UNIT_BIN_DIR)/libm_reloc_guard_test
|
2025-11-07 01:27:04 +09:00
|
|
|
|
|
|
|
|
|
|
# Build 3-layer Tiny (new front) with low optimization for debug/testing
|
|
|
|
|
|
larson_hakmem_3layer:
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) NEW_3LAYER_DEFAULT=1 ULTRA_SIMPLE_DEFAULT=0 BOX_REFACTOR_DEFAULT=1 USE_LTO=0 OPT_LEVEL=1 larson_hakmem
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Built larson_hakmem with NEW 3-LAYER front"
|
|
|
|
|
|
@echo " NEW_3LAYER_DEFAULT=1, LTO=OFF, O1"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
|
|
|
|
|
|
# Build 3-layer + route fingerprint enabled (runtime ring still needs ENV)
|
|
|
|
|
|
larson_hakmem_route:
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) NEW_3LAYER_DEFAULT=1 ULTRA_SIMPLE_DEFAULT=0 BOX_REFACTOR_DEFAULT=1 USE_LTO=0 OPT_LEVEL=1 \
|
|
|
|
|
|
EXTRA_CFLAGS+=" -DHAKMEM_ROUTE=1" larson_hakmem
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Built larson_hakmem (3-layer + route)"
|
|
|
|
|
|
@echo " HAKMEM_ROUTE build-flag set; runtime ENV still controls output"
|
|
|
|
|
|
@echo "========================================="
|
2025-11-09 11:50:18 +09:00
|
|
|
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
|
|
|
|
# Pool TLS Benchmarks (Phase 1.5b)
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
|
|
|
|
# Build HAKMEM shared library first to satisfy -lhakmem
|
|
|
|
|
|
bench_pool_tls_hakmem: benchmarks/bench_pool_tls.c $(SHARED_LIB)
|
|
|
|
|
|
$(CC) $(CFLAGS) -o $@ $< -L. -lhakmem $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
bench_pool_tls_system: benchmarks/bench_pool_tls.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -DUSE_SYSTEM_MALLOC -o $@ $< $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: bench-pool-tls
|
|
|
|
|
|
bench-pool-tls: bench_pool_tls_hakmem bench_pool_tls_system
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Pool TLS Benchmark (8KB-52KB allocations)"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "== HAKMEM (Phase 1.5b Pre-warm) =="
|
|
|
|
|
|
@./bench_pool_tls_hakmem 1 100000 256 42
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "== System malloc =="
|
|
|
|
|
|
@./bench_pool_tls_system 1 100000 256 42
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "========================================="
|
2025-11-13 01:45:30 +09:00
|
|
|
|
|
|
|
|
|
|
# Phase E1-CORRECT Debug Bench (minimal test)
|
|
|
|
|
|
test_simple_e1: test_simple_e1.o $(HAKMEM_OBJS)
|
|
|
|
|
|
$(CC) -o $@ $^ $(LDFLAGS)
|
|
|
|
|
|
|
|
|
|
|
|
test_simple_e1.o: test_simple_e1.c
|
|
|
|
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
2025-11-29 11:28:38 +09:00
|
|
|
|
|
|
|
|
|
|
# ========================================
|
|
|
|
|
|
# Phase 4: PGO (Profile-Guided Optimization) Targets
|
|
|
|
|
|
# ========================================
|
|
|
|
|
|
# Phase 4-Step1: PGO Profile Build
|
|
|
|
|
|
# Builds binaries with -fprofile-generate for profiling
|
|
|
|
|
|
.PHONY: pgo-tiny-profile
|
|
|
|
|
|
pgo-tiny-profile:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Phase 4: Building PGO Profile Binaries"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) PROFILE_GEN=1 bench_random_mixed_hakmem bench_tiny_hot_hakmem
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "✓ PGO profile binaries built"
|
|
|
|
|
|
@echo "Next: Run 'make pgo-tiny-collect' to collect profile data"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 4-Step1: PGO Profile Collection
|
|
|
|
|
|
# Executes representative workloads to generate .gcda files
|
|
|
|
|
|
.PHONY: pgo-tiny-collect
|
|
|
|
|
|
pgo-tiny-collect:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Phase 4: Collecting PGO Profile Data"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
./scripts/box/pgo_tiny_profile_box.sh
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 4-Step1: PGO Optimized Build
|
|
|
|
|
|
# Builds binaries with -fprofile-use for optimization
|
|
|
|
|
|
.PHONY: pgo-tiny-build
|
|
|
|
|
|
pgo-tiny-build:
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Phase 4: Building PGO-Optimized Binaries"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Building optimized binaries..."
|
|
|
|
|
|
$(MAKE) clean
|
|
|
|
|
|
$(MAKE) PROFILE_USE=1 bench_random_mixed_hakmem bench_tiny_hot_hakmem
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "✓ PGO-optimized binaries built"
|
|
|
|
|
|
@echo "Next: Run './bench_random_mixed_hakmem 1000000 256 42' to test"
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
|
|
|
|
|
|
# Phase 4-Step1: Full PGO Workflow
|
|
|
|
|
|
# Complete workflow: profile → collect → build → test
|
|
|
|
|
|
.PHONY: pgo-tiny-full
|
|
|
|
|
|
pgo-tiny-full: pgo-tiny-profile pgo-tiny-collect pgo-tiny-build
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Phase 4: PGO Full Workflow Complete"
|
|
|
|
|
|
@echo "========================================="
|
|
|
|
|
|
@echo "Testing PGO-optimized binary..."
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
./bench_random_mixed_hakmem 1000000 256 42
|
|
|
|
|
|
@echo ""
|
|
|
|
|
|
@echo "✓ PGO optimization complete!"
|
|
|
|
|
|
@echo ""
|