Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-05 12:31:14 +09:00
commit 52386401b3
27144 changed files with 124451 additions and 0 deletions

View File

@ -0,0 +1,122 @@
#!/bin/bash
# Compare Mid Range MT performance across different allocators
#
# Runs bench_mid_large_mt with:
# - System allocator (glibc)
# - mimalloc
# - HAKX (our implementation)
#
# Usage: ./compare_mid_mt_allocators.sh [threads] [cycles] [ws] [seed] [runs]
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
# Default parameters
THREADS=${1:-4}
CYCLES=${2:-60000}
WORKING_SET=${3:-256}
SEED=${4:-1}
RUNS=${5:-3}
# CPU affinity
TASKSET="taskset -c 0-3"
echo "=========================================="
echo "Mid Range MT Allocator Comparison"
echo "=========================================="
echo "Configuration:"
echo " Threads: $THREADS"
echo " Cycles: $CYCLES"
echo " Working Set: $WORKING_SET"
echo " Seed: $SEED"
echo " Runs/each: $RUNS"
echo ""
cd "$PROJECT_ROOT"
# Build all variants if needed
for variant in system mi hakx; do
TARGET="bench_mid_large_mt_${variant}"
if [ ! -f "./$TARGET" ]; then
echo "Building $TARGET..."
make "$TARGET"
fi
done
echo ""
echo "=========================================="
# Function to run benchmark and extract median throughput
run_bench() {
local variant=$1
local results=()
echo ""
echo "Testing: $variant"
echo "----------------------------------------"
for i in $(seq 1 $RUNS); do
local output=$($TASKSET ./bench_mid_large_mt_${variant} $THREADS $CYCLES $WORKING_SET $SEED 2>&1)
local mops=$(echo "$output" | grep "Throughput:" | awk '{print $2}')
results+=($mops)
printf " Run %d: %s M ops/sec\n" $i "$mops"
done
# Calculate median - simpler approach using awk
local median=$(printf "%s\n" "${results[@]}" | sort -n | awk '{
a[NR] = $1
}
END {
if (NR % 2 == 1) {
print a[int(NR/2) + 1]
} else {
median = (a[NR/2] + a[NR/2 + 1]) / 2
printf "%.2f", median
}
}')
echo " Median: $median M ops/sec"
echo "$median"
}
# Run benchmarks
echo "Running benchmarks..."
SYSTEM_RESULT=$(run_bench "system")
MI_RESULT=$(run_bench "mi")
HAKX_RESULT=$(run_bench "hakx")
# Summary
echo ""
echo "=========================================="
echo "Summary"
echo "=========================================="
printf "%-20s %10s %15s\n" "Allocator" "Throughput" "vs System"
echo "----------------------------------------"
printf "%-20s %10.2f M %15s\n" "System (glibc)" "$SYSTEM_RESULT" "1.00x"
MI_RATIO=$(echo "scale=2; $MI_RESULT / $SYSTEM_RESULT" | bc)
printf "%-20s %10.2f M %15s\n" "mimalloc" "$MI_RESULT" "${MI_RATIO}x"
HAKX_RATIO=$(echo "scale=2; $HAKX_RESULT / $SYSTEM_RESULT" | bc)
printf "%-20s %10.2f M %15s\n" "HAKX (Mid MT)" "$HAKX_RESULT" "${HAKX_RATIO}x"
echo ""
echo "HAKX vs mimalloc:"
HAKX_VS_MI=$(echo "scale=2; $HAKX_RESULT / $MI_RESULT * 100" | bc)
printf " %.1f%% of mimalloc performance\n" "$HAKX_VS_MI"
# Winner
echo ""
if (( $(echo "$HAKX_RESULT > $MI_RESULT" | bc -l) )); then
echo "🏆 HAKX is FASTER than mimalloc!"
elif (( $(echo "$HAKX_RESULT > $SYSTEM_RESULT * 1.5" | bc -l) )); then
echo "✅ HAKX significantly faster than system allocator (>1.5x)"
else
echo "⚠️ HAKX needs optimization"
fi
echo ""
echo "Comparison completed!"

View File

@ -0,0 +1,123 @@
#!/bin/bash
# Mid Range MT Benchmark - Optimal Configuration
#
# Parameters discovered through performance tuning:
# - threads=4: Optimal for quad-core systems
# - cycles=60000: Sufficient iterations for stable results
# - ws=256: Working set that fits in L3 cache (4MB)
# - taskset -c 0-3: Pin to cores 0-3 for consistency
#
# Performance target: 95-99 M ops/sec
# vs System allocator: ~1.87x faster
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
# Default parameters (optimized for cache efficiency)
THREADS=${1:-4}
CYCLES=${2:-60000}
WORKING_SET=${3:-256}
SEED=${4:-1}
RUNS=${5:-5}
# CPU affinity (use cores 0-3)
TASKSET="taskset -c 0-3"
echo "======================================"
echo "Mid Range MT Benchmark (8-32KB)"
echo "======================================"
echo "Configuration:"
echo " Threads: $THREADS"
echo " Cycles: $CYCLES"
echo " Working Set: $WORKING_SET"
echo " Seed: $SEED"
echo " Runs: $RUNS"
echo " CPU Affinity: cores 0-3"
echo ""
echo "Working Set Analysis:"
WS_SIZE=$((WORKING_SET * 16)) # Average 16KB per allocation
echo " Memory: ~${WS_SIZE} KB per thread"
echo " Total: ~$((WS_SIZE * THREADS / 1024)) MB"
echo ""
cd "$PROJECT_ROOT"
# Check if benchmark exists
if [ ! -f "./bench_mid_large_mt_hakx" ]; then
echo "ERROR: bench_mid_large_mt_hakx not found!"
echo "Building benchmark..."
make bench_mid_large_mt_hakx
echo ""
fi
# Run benchmark multiple times and collect results
echo "Running benchmark $RUNS times..."
echo ""
RESULTS=()
for i in $(seq 1 $RUNS); do
echo "Run $i/$RUNS:"
OUTPUT=$($TASKSET ./bench_mid_large_mt_hakx $THREADS $CYCLES $WORKING_SET $SEED 2>&1)
echo "$OUTPUT"
# Extract throughput
MOPS=$(echo "$OUTPUT" | grep "Throughput:" | awk '{print $2}')
RESULTS+=($MOPS)
echo ""
done
# Calculate statistics
echo "======================================"
echo "Summary Statistics"
echo "======================================"
# Sort results for median calculation
IFS=$'\n' SORTED=($(sort -n <<<"${RESULTS[*]}"))
unset IFS
# Calculate average
SUM=0
for val in "${RESULTS[@]}"; do
SUM=$(echo "$SUM + $val" | bc)
done
AVG=$(echo "scale=2; $SUM / ${#RESULTS[@]}" | bc)
# Get median
MID=$((${#RESULTS[@]} / 2))
if [ $((${#RESULTS[@]} % 2)) -eq 0 ]; then
MEDIAN=$(echo "scale=2; (${SORTED[$MID-1]} + ${SORTED[$MID]}) / 2" | bc)
else
MEDIAN=${SORTED[$MID]}
fi
# Get min/max
MIN=${SORTED[0]}
MAX=${SORTED[-1]}
echo "Results (M ops/sec):"
for i in "${!RESULTS[@]}"; do
printf " Run %d: %s\n" $((i+1)) "${RESULTS[$i]}"
done
echo ""
echo "Statistics:"
printf " Average: %.2f M ops/sec\n" $AVG
printf " Median: %.2f M ops/sec\n" $MEDIAN
printf " Min: %.2f M ops/sec\n" $MIN
printf " Max: %.2f M ops/sec\n" $MAX
printf " Range: %.2f - %.2f M\n" $MIN $MAX
echo ""
# Performance vs target
TARGET_MIN=95
TARGET_MAX=120
if (( $(echo "$MEDIAN >= $TARGET_MIN" | bc -l) )); then
PCT=$(echo "scale=1; $MEDIAN / $TARGET_MAX * 100" | bc)
echo "Target Achievement: ${PCT}% of 120M target ✅"
else
echo "Target Achievement: Below 95M target ❌"
fi
echo ""
echo "Benchmark completed successfully!"