Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
164 lines
4.7 KiB
Bash
Executable File
164 lines
4.7 KiB
Bash
Executable File
#!/bin/bash
|
|
# build_pgo_shared.sh - Build libhakmem.so with PGO optimization
|
|
#
|
|
# Usage:
|
|
# ./build_pgo_shared.sh - Full PGO build (clean → instrument → profile → optimize)
|
|
# ./build_pgo_shared.sh clean - Clean all artifacts
|
|
# ./build_pgo_shared.sh instrument - Build instrumented version only
|
|
# ./build_pgo_shared.sh profile - Run profile collection only
|
|
# ./build_pgo_shared.sh build - Build optimized version only (requires .gcda files)
|
|
|
|
set -e
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Logging
|
|
log() {
|
|
echo -e "${BLUE}[PGO-Shared]${NC} $1"
|
|
}
|
|
|
|
success() {
|
|
echo -e "${GREEN}✓${NC} $1"
|
|
}
|
|
|
|
error() {
|
|
echo -e "${RED}✗ Error:${NC} $1" >&2
|
|
exit 1
|
|
}
|
|
|
|
warning() {
|
|
echo -e "${YELLOW}⚠${NC} $1"
|
|
}
|
|
|
|
# Clean function
|
|
do_clean() {
|
|
log "Cleaning all PGO artifacts and shared library..."
|
|
rm -f *.gcda *.gcno *_shared.o libhakmem.so
|
|
success "Clean complete"
|
|
}
|
|
|
|
# Build instrumented version
|
|
build_instrumented() {
|
|
log "Step 1/3: Building instrumented libhakmem.so..."
|
|
|
|
# Base flags from Makefile
|
|
# Add -ftls-model=initial-exec for fast TLS access (same as mimalloc)
|
|
BASE_CFLAGS="-O3 -march=native -mtune=native -Wall -Wextra -std=c11 -D_GNU_SOURCE -D_POSIX_C_SOURCE=199309L -DHAKMEM_DEBUG_TIMING=0 -fPIC -ffast-math -funroll-loops -ftls-model=initial-exec"
|
|
BASE_LDFLAGS="-lm -lpthread"
|
|
|
|
# Add PGO instrumentation
|
|
make CFLAGS_SHARED="$BASE_CFLAGS -fprofile-generate -flto" \
|
|
LDFLAGS="$BASE_LDFLAGS -fprofile-generate -flto" \
|
|
shared 2>&1 | tail -5
|
|
|
|
if [ ! -f "libhakmem.so" ]; then
|
|
error "Instrumented libhakmem.so build failed!"
|
|
fi
|
|
|
|
success "Instrumented libhakmem.so complete"
|
|
}
|
|
|
|
# Collect profile data
|
|
collect_profile() {
|
|
log "Step 2/3: Collecting profile data..."
|
|
|
|
if [ ! -f "libhakmem.so" ]; then
|
|
error "libhakmem.so not found. Run 'instrument' step first."
|
|
fi
|
|
|
|
if [ ! -f "bench_comprehensive_system" ]; then
|
|
log "Building bench_comprehensive_system..."
|
|
make bench_comprehensive_system 2>&1 | tail -3
|
|
fi
|
|
|
|
log "Running benchmark with LD_PRELOAD to collect profile..."
|
|
# Run with HAKMEM_WRAP_TINY=1 to activate Tiny allocator
|
|
HAKMEM_WRAP_TINY=1 LD_PRELOAD=./libhakmem.so ./bench_comprehensive_system 2>&1 | head -20
|
|
|
|
# Check if profile data was generated
|
|
GCDA_COUNT=$(ls -1 *.gcda 2>/dev/null | wc -l)
|
|
if [ "$GCDA_COUNT" -eq 0 ]; then
|
|
error "Profile data not generated! (no .gcda files found)"
|
|
fi
|
|
|
|
success "Profile data collected ($GCDA_COUNT .gcda files)"
|
|
}
|
|
|
|
# Build optimized version
|
|
build_optimized() {
|
|
log "Step 3/3: Building PGO-optimized libhakmem.so..."
|
|
|
|
# Check if profile data exists
|
|
GCDA_COUNT=$(ls -1 *.gcda 2>/dev/null | wc -l)
|
|
if [ "$GCDA_COUNT" -eq 0 ]; then
|
|
error "No profile data found! Run 'profile' step first."
|
|
fi
|
|
|
|
# Remove old .o files but keep .gcda
|
|
rm -f *_shared.o libhakmem.so
|
|
|
|
# Base flags
|
|
# Add -ftls-model=initial-exec for fast TLS access (same as mimalloc)
|
|
BASE_CFLAGS="-O3 -march=native -mtune=native -Wall -Wextra -std=c11 -D_GNU_SOURCE -D_POSIX_C_SOURCE=199309L -DHAKMEM_DEBUG_TIMING=0 -fPIC -ffast-math -funroll-loops -ftls-model=initial-exec"
|
|
BASE_LDFLAGS="-lm -lpthread"
|
|
|
|
# Add PGO optimization
|
|
make CFLAGS_SHARED="$BASE_CFLAGS -fprofile-use -flto -Wno-error=coverage-mismatch" \
|
|
LDFLAGS="$BASE_LDFLAGS -fprofile-use -flto" \
|
|
shared 2>&1 | grep -v "coverage mismatch" | tail -5
|
|
|
|
if [ ! -f "libhakmem.so" ]; then
|
|
error "PGO-optimized libhakmem.so build failed!"
|
|
fi
|
|
|
|
success "PGO-optimized libhakmem.so complete"
|
|
}
|
|
|
|
# Main workflow
|
|
main() {
|
|
case "${1:-full}" in
|
|
clean)
|
|
do_clean
|
|
;;
|
|
instrument)
|
|
build_instrumented
|
|
;;
|
|
profile)
|
|
collect_profile
|
|
;;
|
|
build)
|
|
build_optimized
|
|
;;
|
|
full)
|
|
log "========================================="
|
|
log "Building PGO-Optimized libhakmem.so"
|
|
log "========================================="
|
|
echo ""
|
|
|
|
do_clean
|
|
build_instrumented
|
|
collect_profile
|
|
build_optimized
|
|
|
|
echo ""
|
|
log "========================================="
|
|
success "PGO libhakmem.so build complete!"
|
|
log "========================================="
|
|
echo ""
|
|
log "Test with LD_PRELOAD:"
|
|
echo " HAKMEM_WRAP_TINY=1 LD_PRELOAD=./libhakmem.so ./bench_comprehensive_system"
|
|
echo ""
|
|
;;
|
|
*)
|
|
error "Unknown command: $1\nUsage: $0 [clean|instrument|profile|build|full]"
|
|
;;
|
|
esac
|
|
}
|
|
|
|
main "$@"
|