2025-11-09 11:50:18 +09:00
|
|
|
|
#!/usr/bin/env bash
|
2025-11-09 16:49:34 +09:00
|
|
|
|
# build.sh - Unified build wrapper (Phase 7 + Pool TLS) with discoverable help
|
|
|
|
|
|
#
|
|
|
|
|
|
# Quick use:
|
|
|
|
|
|
# ./build.sh bench_pool_tls_hakmem # Recommended target
|
|
|
|
|
|
# ./build.sh help # Show usage/hints/ENV
|
|
|
|
|
|
# ./build.sh verify bench_pool_tls_hakmem # Check freshness
|
|
|
|
|
|
#
|
|
|
|
|
|
# Notes:
|
|
|
|
|
|
# - Flags are pinned to avoid drift (see below). You can pass extra make flags via
|
|
|
|
|
|
# EXTRA_MAKEFLAGS, e.g. EXTRA_MAKEFLAGS="HAKMEM_DEBUG_VERBOSE=1" ./build.sh <target>
|
|
|
|
|
|
# - Arena ENV (Pool TLS): HAKMEM_POOL_TLS_ARENA_MB_INIT/MAX/GROWTH_LEVELS
|
|
|
|
|
|
# - See also: docs/BUILDING_QUICKSTART.md
|
|
|
|
|
|
|
2025-11-09 11:50:18 +09:00
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
|
2025-11-09 18:55:50 +09:00
|
|
|
|
FLAVOR="release"
|
|
|
|
|
|
if [[ $# -gt 0 && ( "$1" == "release" || "$1" == "debug" ) ]]; then
|
|
|
|
|
|
FLAVOR="$1"; shift
|
|
|
|
|
|
fi
|
2025-11-09 11:50:18 +09:00
|
|
|
|
TARGET="${1:-bench_mid_large_mt_hakmem}"
|
|
|
|
|
|
|
2025-11-09 16:49:34 +09:00
|
|
|
|
usage() {
|
|
|
|
|
|
cat <<'USAGE'
|
|
|
|
|
|
=========================================
|
|
|
|
|
|
HAKMEM Build Script (help)
|
|
|
|
|
|
=========================================
|
|
|
|
|
|
Usage:
|
|
|
|
|
|
./build.sh <target>
|
|
|
|
|
|
./build.sh help # Show this help
|
|
|
|
|
|
./build.sh list # Show common targets
|
|
|
|
|
|
./build.sh verify <bin> # Verify binary freshness
|
|
|
|
|
|
|
|
|
|
|
|
Common targets (curated):
|
|
|
|
|
|
- bench_random_mixed_hakmem
|
|
|
|
|
|
- bench_pool_tls_hakmem
|
|
|
|
|
|
- bench_mid_large_mt_hakmem
|
|
|
|
|
|
- larson_hakmem
|
|
|
|
|
|
|
|
|
|
|
|
Pinned build flags (by default):
|
2025-11-14 15:32:07 +09:00
|
|
|
|
POOL_TLS_PHASE1=1 HEADER_CLASSIDX=1 AGGRESSIVE_INLINE=1 PREWARM_TLS=1 POOL_TLS_PREWARM=1 POOL_TLS_BIND_BOX=1
|
2025-11-09 16:49:34 +09:00
|
|
|
|
|
|
|
|
|
|
Extra flags (optional):
|
|
|
|
|
|
Use environment var EXTRA_MAKEFLAGS, e.g.:
|
|
|
|
|
|
EXTRA_MAKEFLAGS="HAKMEM_DEBUG_VERBOSE=1" ./build.sh bench_pool_tls_hakmem
|
|
|
|
|
|
EXTRA_MAKEFLAGS="HAKMEM_TINY_SAFE_FREE=1" ./build.sh bench_random_mixed_hakmem
|
|
|
|
|
|
|
|
|
|
|
|
Pool TLS Arena ENV (A/B friendly):
|
|
|
|
|
|
export HAKMEM_POOL_TLS_ARENA_MB_INIT=2 # default 1
|
|
|
|
|
|
export HAKMEM_POOL_TLS_ARENA_MB_MAX=16 # default 8
|
|
|
|
|
|
export HAKMEM_POOL_TLS_ARENA_GROWTH_LEVELS=4 # default 3
|
|
|
|
|
|
|
|
|
|
|
|
Verify & perf tips:
|
|
|
|
|
|
make print-flags
|
|
|
|
|
|
./verify_build.sh <bin>
|
|
|
|
|
|
perf stat -e cycles,instructions,branches,branch-misses,cache-misses -r 3 -- ./<bin> ...
|
|
|
|
|
|
strace -e trace=mmap,madvise,munmap -c ./<bin> ...
|
|
|
|
|
|
USAGE
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
list_targets() {
|
|
|
|
|
|
cat <<'LIST'
|
|
|
|
|
|
Common build targets:
|
|
|
|
|
|
bench_random_mixed_hakmem # Tiny 1T mixed
|
|
|
|
|
|
bench_pool_tls_hakmem # Pool TLS (8–52KB)
|
|
|
|
|
|
bench_mid_large_mt_hakmem # Mid-Large MT (8–32KB)
|
|
|
|
|
|
larson_hakmem # Larson mixed
|
|
|
|
|
|
bench_random_mixed_system # glibc baseline
|
|
|
|
|
|
bench_pool_tls_system # glibc baseline (PoolTLS workload)
|
|
|
|
|
|
bench_mid_large_mt_system # glibc baseline (Mid-Large workload)
|
|
|
|
|
|
LIST
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if [[ "${TARGET}" == "help" || "${TARGET}" == "-h" || "${TARGET}" == "--help" ]]; then
|
|
|
|
|
|
usage
|
|
|
|
|
|
exit 0
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
if [[ "${TARGET}" == "list" ]]; then
|
|
|
|
|
|
list_targets
|
|
|
|
|
|
exit 0
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
if [[ "${TARGET}" == "verify" ]]; then
|
|
|
|
|
|
BIN="${2:-}"
|
|
|
|
|
|
if [[ -z "${BIN}" ]]; then
|
|
|
|
|
|
echo "Usage: ./build.sh verify <binary>" >&2
|
|
|
|
|
|
exit 2
|
|
|
|
|
|
fi
|
|
|
|
|
|
./verify_build.sh "${BIN}"
|
|
|
|
|
|
exit 0
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
2025-11-09 11:50:18 +09:00
|
|
|
|
echo "========================================="
|
|
|
|
|
|
echo " HAKMEM Build Script"
|
2025-11-09 18:55:50 +09:00
|
|
|
|
echo " Flavor: ${FLAVOR}"
|
2025-11-09 11:50:18 +09:00
|
|
|
|
echo " Target: ${TARGET}"
|
2025-11-14 15:32:07 +09:00
|
|
|
|
echo " Flags: POOL_TLS_PHASE1=${POOL_TLS_PHASE1:-0} POOL_TLS_PREWARM=${POOL_TLS_PREWARM:-0} POOL_TLS_BIND_BOX=${POOL_TLS_BIND_BOX:-0} DISABLE_MINCORE=${DISABLE_MINCORE:-0} HEADER_CLASSIDX=1 AGGRESSIVE_INLINE=1 PREWARM_TLS=1 ${EXTRA_MAKEFLAGS:-}"
|
2025-11-09 11:50:18 +09:00
|
|
|
|
echo "========================================="
|
|
|
|
|
|
|
|
|
|
|
|
# Always clean to avoid stale objects when toggling flags
|
|
|
|
|
|
make clean >/dev/null 2>&1 || true
|
|
|
|
|
|
|
2025-11-09 16:49:34 +09:00
|
|
|
|
# Phase 7 + Pool TLS defaults (pinned) + user extras
|
2025-11-14 20:07:29 +09:00
|
|
|
|
# Default: Target-specific Pool TLS settings
|
|
|
|
|
|
# - Mid-Large targets (8-34KB workloads) → Pool TLS ON (critical for performance)
|
|
|
|
|
|
# - Tiny targets (≤1KB workloads) → Pool TLS OFF (avoid TLS overhead for short benchmarks)
|
|
|
|
|
|
case "${TARGET}" in
|
2025-11-14 22:09:14 +09:00
|
|
|
|
bench_mid_large_mt_hakmem|bench_pool_tls_hakmem|bench_mid_large_mt_system|bench_pool_tls_system|larson_hakmem|larson_mi|larson_system)
|
|
|
|
|
|
POOL_TLS_PHASE1_DEFAULT=${POOL_TLS_PHASE1:-1} # ON for Mid-Large and mixed workloads
|
2025-11-14 20:07:29 +09:00
|
|
|
|
;;
|
|
|
|
|
|
*)
|
|
|
|
|
|
POOL_TLS_PHASE1_DEFAULT=${POOL_TLS_PHASE1:-0} # OFF for Tiny-focused benchmarks
|
|
|
|
|
|
;;
|
|
|
|
|
|
esac
|
Add Box 3 (Pointer Conversion Layer) and fix POOL_TLS_PHASE1 default
## Major Changes
### 1. Box 3: Pointer Conversion Module (NEW)
- File: core/box/ptr_conversion_box.h
- Purpose: Unified BASE ↔ USER pointer conversion (single source of truth)
- API: PTR_BASE_TO_USER(), PTR_USER_TO_BASE()
- Features: Zero-overhead inline, debug mode, NULL-safe, class 7 headerless support
- Design: Header-only, fully modular, no external dependencies
### 2. POOL_TLS_PHASE1 Default OFF (CRITICAL FIX)
- File: build.sh
- Change: POOL_TLS_PHASE1 now defaults to 0 (was hardcoded to 1)
- Impact: Eliminates pthread_mutex overhead on every free() (was causing 3.3x slowdown)
- Usage: Set POOL_TLS_PHASE1=1 env var to enable if needed
### 3. Pointer Conversion Fixes (PARTIAL)
- Files: core/box/front_gate_box.c, core/tiny_alloc_fast.inc.h, etc.
- Status: Partial implementation using Box 3 API
- Note: Work in progress, some conversions still need review
### 4. Performance Investigation Report (NEW)
- File: HOTPATH_PERFORMANCE_INVESTIGATION.md
- Findings:
- Hotpath works (+24% vs baseline) after POOL_TLS fix
- Still 9.2x slower than system malloc due to:
* Heavy initialization (23.85% of cycles)
* Syscall overhead (2,382 syscalls per 100K ops)
* Workload mismatch (C7 1KB is 49.8%, but only C5 256B has hotpath)
* 9.4x more instructions than system malloc
### 5. Known Issues
- SEGV at 20K-30K iterations (pre-existing bug, not related to pointer conversions)
- Root cause: Likely active counter corruption or TLS-SLL chain issues
- Status: Under investigation
## Performance Results (100K iterations, 256B)
- Baseline (Hotpath OFF): 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- System malloc: 82.2M ops/s (still 9.2x faster)
## Next Steps
- P0: Fix 20K-30K SEGV bug (GDB investigation needed)
- P1: Lazy initialization (+20-25% expected)
- P1: C7 (1KB) hotpath (+30-40% expected, biggest win)
- P2: Reduce syscalls (+15-20% expected)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 01:01:23 +09:00
|
|
|
|
POOL_TLS_PREWARM_DEFAULT=${POOL_TLS_PREWARM:-0}
|
2025-11-14 15:32:07 +09:00
|
|
|
|
POOL_TLS_BIND_BOX_DEFAULT=${POOL_TLS_BIND_BOX:-0}
|
|
|
|
|
|
DISABLE_MINCORE_DEFAULT=${DISABLE_MINCORE:-0}
|
Add Box 3 (Pointer Conversion Layer) and fix POOL_TLS_PHASE1 default
## Major Changes
### 1. Box 3: Pointer Conversion Module (NEW)
- File: core/box/ptr_conversion_box.h
- Purpose: Unified BASE ↔ USER pointer conversion (single source of truth)
- API: PTR_BASE_TO_USER(), PTR_USER_TO_BASE()
- Features: Zero-overhead inline, debug mode, NULL-safe, class 7 headerless support
- Design: Header-only, fully modular, no external dependencies
### 2. POOL_TLS_PHASE1 Default OFF (CRITICAL FIX)
- File: build.sh
- Change: POOL_TLS_PHASE1 now defaults to 0 (was hardcoded to 1)
- Impact: Eliminates pthread_mutex overhead on every free() (was causing 3.3x slowdown)
- Usage: Set POOL_TLS_PHASE1=1 env var to enable if needed
### 3. Pointer Conversion Fixes (PARTIAL)
- Files: core/box/front_gate_box.c, core/tiny_alloc_fast.inc.h, etc.
- Status: Partial implementation using Box 3 API
- Note: Work in progress, some conversions still need review
### 4. Performance Investigation Report (NEW)
- File: HOTPATH_PERFORMANCE_INVESTIGATION.md
- Findings:
- Hotpath works (+24% vs baseline) after POOL_TLS fix
- Still 9.2x slower than system malloc due to:
* Heavy initialization (23.85% of cycles)
* Syscall overhead (2,382 syscalls per 100K ops)
* Workload mismatch (C7 1KB is 49.8%, but only C5 256B has hotpath)
* 9.4x more instructions than system malloc
### 5. Known Issues
- SEGV at 20K-30K iterations (pre-existing bug, not related to pointer conversions)
- Root cause: Likely active counter corruption or TLS-SLL chain issues
- Status: Under investigation
## Performance Results (100K iterations, 256B)
- Baseline (Hotpath OFF): 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- System malloc: 82.2M ops/s (still 9.2x faster)
## Next Steps
- P0: Fix 20K-30K SEGV bug (GDB investigation needed)
- P1: Lazy initialization (+20-25% expected)
- P1: C7 (1KB) hotpath (+30-40% expected, biggest win)
- P2: Reduce syscalls (+15-20% expected)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 01:01:23 +09:00
|
|
|
|
|
2025-11-09 18:55:50 +09:00
|
|
|
|
MAKE_ARGS=(
|
|
|
|
|
|
BUILD_FLAVOR=${FLAVOR} \
|
Add Box 3 (Pointer Conversion Layer) and fix POOL_TLS_PHASE1 default
## Major Changes
### 1. Box 3: Pointer Conversion Module (NEW)
- File: core/box/ptr_conversion_box.h
- Purpose: Unified BASE ↔ USER pointer conversion (single source of truth)
- API: PTR_BASE_TO_USER(), PTR_USER_TO_BASE()
- Features: Zero-overhead inline, debug mode, NULL-safe, class 7 headerless support
- Design: Header-only, fully modular, no external dependencies
### 2. POOL_TLS_PHASE1 Default OFF (CRITICAL FIX)
- File: build.sh
- Change: POOL_TLS_PHASE1 now defaults to 0 (was hardcoded to 1)
- Impact: Eliminates pthread_mutex overhead on every free() (was causing 3.3x slowdown)
- Usage: Set POOL_TLS_PHASE1=1 env var to enable if needed
### 3. Pointer Conversion Fixes (PARTIAL)
- Files: core/box/front_gate_box.c, core/tiny_alloc_fast.inc.h, etc.
- Status: Partial implementation using Box 3 API
- Note: Work in progress, some conversions still need review
### 4. Performance Investigation Report (NEW)
- File: HOTPATH_PERFORMANCE_INVESTIGATION.md
- Findings:
- Hotpath works (+24% vs baseline) after POOL_TLS fix
- Still 9.2x slower than system malloc due to:
* Heavy initialization (23.85% of cycles)
* Syscall overhead (2,382 syscalls per 100K ops)
* Workload mismatch (C7 1KB is 49.8%, but only C5 256B has hotpath)
* 9.4x more instructions than system malloc
### 5. Known Issues
- SEGV at 20K-30K iterations (pre-existing bug, not related to pointer conversions)
- Root cause: Likely active counter corruption or TLS-SLL chain issues
- Status: Under investigation
## Performance Results (100K iterations, 256B)
- Baseline (Hotpath OFF): 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- System malloc: 82.2M ops/s (still 9.2x faster)
## Next Steps
- P0: Fix 20K-30K SEGV bug (GDB investigation needed)
- P1: Lazy initialization (+20-25% expected)
- P1: C7 (1KB) hotpath (+30-40% expected, biggest win)
- P2: Reduce syscalls (+15-20% expected)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 01:01:23 +09:00
|
|
|
|
POOL_TLS_PHASE1=${POOL_TLS_PHASE1_DEFAULT} \
|
|
|
|
|
|
POOL_TLS_PREWARM=${POOL_TLS_PREWARM_DEFAULT} \
|
2025-11-14 15:32:07 +09:00
|
|
|
|
POOL_TLS_BIND_BOX=${POOL_TLS_BIND_BOX_DEFAULT} \
|
|
|
|
|
|
DISABLE_MINCORE=${DISABLE_MINCORE_DEFAULT} \
|
2025-11-09 11:50:18 +09:00
|
|
|
|
HEADER_CLASSIDX=1 \
|
|
|
|
|
|
AGGRESSIVE_INLINE=1 \
|
|
|
|
|
|
PREWARM_TLS=1 \
|
2025-11-09 18:55:50 +09:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# Apply debug flavor extras (non-invasive): verbose + safe-free optional
|
|
|
|
|
|
if [[ "${FLAVOR}" == "debug" ]]; then
|
|
|
|
|
|
MAKE_ARGS+=(HAKMEM_DEBUG_VERBOSE=1)
|
|
|
|
|
|
MAKE_ARGS+=(BUILD_RELEASE_DEFAULT=0) # Disable release mode for debug builds
|
|
|
|
|
|
# Uncomment to enable extra safety by default for debug runs (may slow hot path)
|
|
|
|
|
|
# MAKE_ARGS+=(HAKMEM_TINY_SAFE_FREE=1)
|
|
|
|
|
|
else
|
|
|
|
|
|
MAKE_ARGS+=(BUILD_RELEASE_DEFAULT=1) # Enable release mode for release builds
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# Append user-provided extras and target
|
|
|
|
|
|
if [[ -n "${EXTRA_MAKEFLAGS:-}" ]]; then
|
|
|
|
|
|
# shellcheck disable=SC2206
|
|
|
|
|
|
MAKE_ARGS+=(${EXTRA_MAKEFLAGS})
|
|
|
|
|
|
fi
|
|
|
|
|
|
MAKE_ARGS+=("${TARGET}")
|
|
|
|
|
|
|
|
|
|
|
|
make "${MAKE_ARGS[@]}"
|
2025-11-09 11:50:18 +09:00
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "========================================="
|
|
|
|
|
|
echo " ✅ Build successful"
|
|
|
|
|
|
echo " Run: ./${TARGET}"
|
2025-11-09 18:55:50 +09:00
|
|
|
|
echo "-----------------------------------------"
|
|
|
|
|
|
# Place artifacts under out/<flavor>/
|
|
|
|
|
|
OUTDIR="out/${FLAVOR}"
|
|
|
|
|
|
mkdir -p "${OUTDIR}"
|
|
|
|
|
|
if [[ -x "./${TARGET}" ]]; then
|
|
|
|
|
|
cp -f "./${TARGET}" "${OUTDIR}/${TARGET}"
|
|
|
|
|
|
echo " Saved: ${OUTDIR}/${TARGET}"
|
|
|
|
|
|
fi
|
2025-11-09 16:49:34 +09:00
|
|
|
|
echo " Tip: ./build.sh help # flags, ENV, targets"
|
2025-11-09 11:50:18 +09:00
|
|
|
|
echo "========================================="
|