#!/bin/bash # Layout Tax Forensics Box # Purpose: Compare baseline vs treatment binaries to isolate layout tax causes # Usage: ./scripts/box/layout_tax_forensics_box.sh # Example: ./scripts/box/layout_tax_forensics_box.sh ./bench_random_mixed_hakmem_minimal_pgo ./bench_random_mixed_hakmem_fast_pruned set -e BASELINE_BIN="${1:-./.bench_random_mixed_hakmem_minimal_pgo}" TREATMENT_BIN="${2:-./.bench_random_mixed_hakmem_fast_pruned}" ITERS=20000000 WS=400 RUNS=10 RESULT_DIR="./results/layout_tax_forensics" # Ensure binaries exist if [ ! -f "$BASELINE_BIN" ]; then echo "ERROR: Baseline binary not found: $BASELINE_BIN" exit 1 fi if [ ! -f "$TREATMENT_BIN" ]; then echo "ERROR: Treatment binary not found: $TREATMENT_BIN" exit 1 fi mkdir -p "$RESULT_DIR" # Metrics to collect PERF_EVENTS="cycles,instructions,branches,branch-misses,cache-misses,iTLB-loads,iTLB-load-misses,dTLB-loads,dTLB-load-misses,L1-dcache-loads,L1-dcache-load-misses,LLC-loads,LLC-load-misses" echo "==========================================" echo "Layout Tax Forensics Box" echo "==========================================" echo "Baseline binary: $BASELINE_BIN" echo "Treatment binary: $TREATMENT_BIN" echo "Workload: Mixed, ITERS=$ITERS, WS=$WS, RUNS=$RUNS" echo "Metrics: $PERF_EVENTS" echo "Output: $RESULT_DIR" echo "" # Throughput 10-run (baseline) echo "=== BASELINE: Throughput (10-run) ===" BASELINE_THROUGHPUT_FILE="$RESULT_DIR/baseline_throughput.txt" > "$BASELINE_THROUGHPUT_FILE" for i in $(seq 1 $RUNS); do # Use cleanenv to match canonical benchmark HAKMEM_PROFILE=MIXED_TINYV3_C7_SAFE RUNS=1 ITERS=$ITERS WS=$WS BENCH_BIN="$BASELINE_BIN" \ bash -c 'source scripts/run_mixed_10_cleanenv.sh' 2>/dev/null | grep -oP "Throughput = +\K[0-9.]+" >> "$BASELINE_THROUGHPUT_FILE" || true done BASELINE_MEAN=$(awk '{sum+=$1; count++} END {print sum/count}' "$BASELINE_THROUGHPUT_FILE") BASELINE_MEDIAN=$(sort -n "$BASELINE_THROUGHPUT_FILE" | awk 'NR==('$(($RUNS/2))')' | head -1) BASELINE_STDDEV=$(awk -v mean="$BASELINE_MEAN" '{sum+=($1-mean)^2; count++} END {print sqrt(sum/count)}' "$BASELINE_THROUGHPUT_FILE") BASELINE_CV=$(awk -v mean="$BASELINE_MEAN" -v sd="$BASELINE_STDDEV" 'BEGIN {print (sd/mean)*100}') echo "Baseline throughput (M ops/s):" cat "$BASELINE_THROUGHPUT_FILE" | nl echo "Mean: $BASELINE_MEAN" echo "Median: $BASELINE_MEDIAN" echo "CV: $BASELINE_CV %" echo "" # Throughput 10-run (treatment) echo "=== TREATMENT: Throughput (10-run) ===" TREATMENT_THROUGHPUT_FILE="$RESULT_DIR/treatment_throughput.txt" > "$TREATMENT_THROUGHPUT_FILE" for i in $(seq 1 $RUNS); do HAKMEM_PROFILE=MIXED_TINYV3_C7_SAFE RUNS=1 ITERS=$ITERS WS=$WS BENCH_BIN="$TREATMENT_BIN" \ bash -c 'source scripts/run_mixed_10_cleanenv.sh' 2>/dev/null | grep -oP "Throughput = +\K[0-9.]+" >> "$TREATMENT_THROUGHPUT_FILE" || true done TREATMENT_MEAN=$(awk '{sum+=$1; count++} END {print sum/count}' "$TREATMENT_THROUGHPUT_FILE") TREATMENT_MEDIAN=$(sort -n "$TREATMENT_THROUGHPUT_FILE" | awk 'NR==('$(($RUNS/2))')' | head -1) TREATMENT_STDDEV=$(awk -v mean="$TREATMENT_MEAN" '{sum+=($1-mean)^2; count++} END {print sqrt(sum/count)}' "$TREATMENT_THROUGHPUT_FILE") TREATMENT_CV=$(awk -v mean="$TREATMENT_MEAN" -v sd="$TREATMENT_STDDEV" 'BEGIN {print (sd/mean)*100}') echo "Treatment throughput (M ops/s):" cat "$TREATMENT_THROUGHPUT_FILE" | nl echo "Mean: $TREATMENT_MEAN" echo "Median: $TREATMENT_MEDIAN" echo "CV: $TREATMENT_CV %" echo "" # Calculate delta DELTA=$(awk -v b="$BASELINE_MEAN" -v t="$TREATMENT_MEAN" 'BEGIN {print ((t-b)/b)*100}') echo "Performance delta: $DELTA % ($(awk -v t="$TREATMENT_MEAN" -v b="$BASELINE_MEAN" 'BEGIN {print t-b}' | cut -c1-6)M ops/s)" echo "" # perf stat: single representative runs (baseline) echo "=== BASELINE: perf stat (representative run) ===" BASELINE_PERF_FILE="$RESULT_DIR/baseline_perf.txt" perf stat -e "$PERF_EVENTS" -o "$BASELINE_PERF_FILE" \ bash -c "HAKMEM_PROFILE=MIXED_TINYV3_C7_SAFE RUNS=1 ITERS=$ITERS WS=$WS BENCH_BIN='$BASELINE_BIN' source scripts/run_mixed_10_cleanenv.sh" 2>&1 || true cat "$BASELINE_PERF_FILE" echo "" # perf stat: single representative runs (treatment) echo "=== TREATMENT: perf stat (representative run) ===" TREATMENT_PERF_FILE="$RESULT_DIR/treatment_perf.txt" perf stat -e "$PERF_EVENTS" -o "$TREATMENT_PERF_FILE" \ bash -c "HAKMEM_PROFILE=MIXED_TINYV3_C7_SAFE RUNS=1 ITERS=$ITERS WS=$WS BENCH_BIN='$TREATMENT_BIN' source scripts/run_mixed_10_cleanenv.sh" 2>&1 || true cat "$TREATMENT_PERF_FILE" echo "" # Binary metadata echo "=== Binary Metadata ===" echo "Baseline:" ls -lh "$BASELINE_BIN" | awk '{print " Size:", $5}' size "$BASELINE_BIN" 2>/dev/null | tail -1 || echo " (size info not available)" echo "" echo "Treatment:" ls -lh "$TREATMENT_BIN" | awk '{print " Size:", $5}' size "$TREATMENT_BIN" 2>/dev/null | tail -1 || echo " (size info not available)" echo "" # Summary report SUMMARY_FILE="$RESULT_DIR/layout_tax_forensics_summary.txt" cat > "$SUMMARY_FILE" << EOF ================================================================================ Layout Tax Forensics Summary ================================================================================ Baseline: $BASELINE_BIN Treatment: $TREATMENT_BIN Workload: Mixed (ITERS=$ITERS, WS=$WS) THROUGHPUT RESULTS ================== Baseline Mean: $BASELINE_MEAN M ops/s (CV: $BASELINE_CV %) Treatment Mean: $TREATMENT_MEAN M ops/s (CV: $TREATMENT_CV %) Delta: $DELTA % DETAILED OUTPUT ================ - Throughput samples: $BASELINE_THROUGHPUT_FILE, $TREATMENT_THROUGHPUT_FILE - perf stat: $BASELINE_PERF_FILE, $TREATMENT_PERF_FILE NEXT STEPS ========== Use PHASE67A_LAYOUT_TAX_FORENSICS_SSOT.md to: 1. Categorize delta as GO/NEUTRAL/NO-GO 2. Map perf metrics to root causes (IPC/cache/iTLB/branch-miss) 3. Document symptoms and remediation strategies ================================================================================ EOF cat "$SUMMARY_FILE" echo "" echo "Results saved to: $RESULT_DIR"