Debug Counters Implementation - Clean History

Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-05 12:31:14 +09:00
commit 52386401b3
27144 changed files with 124451 additions and 0 deletions
--- a/benchmarks/scripts/mid/compare_mid_mt_allocators.sh
+++ b/benchmarks/scripts/mid/compare_mid_mt_allocators.sh
@ -0,0 +1,122 @@
+#!/bin/bash
+# Compare Mid Range MT performance across different allocators
+#
+# Runs bench_mid_large_mt with:
+# - System allocator (glibc)
+# - mimalloc
+# - HAKX (our implementation)
+#
+# Usage: ./compare_mid_mt_allocators.sh [threads] [cycles] [ws] [seed] [runs]
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+# Default parameters
+THREADS=${1:-4}
+CYCLES=${2:-60000}
+WORKING_SET=${3:-256}
+SEED=${4:-1}
+RUNS=${5:-3}
+
+# CPU affinity
+TASKSET="taskset -c 0-3"
+
+echo "=========================================="
+echo "Mid Range MT Allocator Comparison"
+echo "=========================================="
+echo "Configuration:"
+echo "  Threads:     $THREADS"
+echo "  Cycles:      $CYCLES"
+echo "  Working Set: $WORKING_SET"
+echo "  Seed:        $SEED"
+echo "  Runs/each:   $RUNS"
+echo ""
+
+cd "$PROJECT_ROOT"
+
+# Build all variants if needed
+for variant in system mi hakx; do
+    TARGET="bench_mid_large_mt_${variant}"
+    if [ ! -f "./$TARGET" ]; then
+        echo "Building $TARGET..."
+        make "$TARGET"
+    fi
+done
+
+echo ""
+echo "=========================================="
+
+# Function to run benchmark and extract median throughput
+run_bench() {
+    local variant=$1
+    local results=()
+
+    echo ""
+    echo "Testing: $variant"
+    echo "----------------------------------------"
+
+    for i in $(seq 1 $RUNS); do
+        local output=$($TASKSET ./bench_mid_large_mt_${variant} $THREADS $CYCLES $WORKING_SET $SEED 2>&1)
+        local mops=$(echo "$output" | grep "Throughput:" | awk '{print $2}')
+        results+=($mops)
+        printf "  Run %d: %s M ops/sec\n" $i "$mops"
+    done
+
+    # Calculate median - simpler approach using awk
+    local median=$(printf "%s\n" "${results[@]}" | sort -n | awk '{
+        a[NR] = $1
+    }
+    END {
+        if (NR % 2 == 1) {
+            print a[int(NR/2) + 1]
+        } else {
+            median = (a[NR/2] + a[NR/2 + 1]) / 2
+            printf "%.2f", median
+        }
+    }')
+
+    echo "  Median: $median M ops/sec"
+    echo "$median"
+}
+
+# Run benchmarks
+echo "Running benchmarks..."
+
+SYSTEM_RESULT=$(run_bench "system")
+MI_RESULT=$(run_bench "mi")
+HAKX_RESULT=$(run_bench "hakx")
+
+# Summary
+echo ""
+echo "=========================================="
+echo "Summary"
+echo "=========================================="
+printf "%-20s %10s %15s\n" "Allocator" "Throughput" "vs System"
+echo "----------------------------------------"
+printf "%-20s %10.2f M  %15s\n" "System (glibc)" "$SYSTEM_RESULT" "1.00x"
+
+MI_RATIO=$(echo "scale=2; $MI_RESULT / $SYSTEM_RESULT" | bc)
+printf "%-20s %10.2f M  %15s\n" "mimalloc" "$MI_RESULT" "${MI_RATIO}x"
+
+HAKX_RATIO=$(echo "scale=2; $HAKX_RESULT / $SYSTEM_RESULT" | bc)
+printf "%-20s %10.2f M  %15s\n" "HAKX (Mid MT)" "$HAKX_RESULT" "${HAKX_RATIO}x"
+
+echo ""
+echo "HAKX vs mimalloc:"
+HAKX_VS_MI=$(echo "scale=2; $HAKX_RESULT / $MI_RESULT * 100" | bc)
+printf "  %.1f%% of mimalloc performance\n" "$HAKX_VS_MI"
+
+# Winner
+echo ""
+if (( $(echo "$HAKX_RESULT > $MI_RESULT" | bc -l) )); then
+    echo "🏆 HAKX is FASTER than mimalloc!"
+elif (( $(echo "$HAKX_RESULT > $SYSTEM_RESULT * 1.5" | bc -l) )); then
+    echo "✅ HAKX significantly faster than system allocator (>1.5x)"
+else
+    echo "⚠️  HAKX needs optimization"
+fi
+
+echo ""
+echo "Comparison completed!"
--- a/benchmarks/scripts/mid/run_mid_mt_bench.sh
+++ b/benchmarks/scripts/mid/run_mid_mt_bench.sh
@ -0,0 +1,123 @@
+#!/bin/bash
+# Mid Range MT Benchmark - Optimal Configuration
+#
+# Parameters discovered through performance tuning:
+# - threads=4: Optimal for quad-core systems
+# - cycles=60000: Sufficient iterations for stable results
+# - ws=256: Working set that fits in L3 cache (4MB)
+# - taskset -c 0-3: Pin to cores 0-3 for consistency
+#
+# Performance target: 95-99 M ops/sec
+# vs System allocator: ~1.87x faster
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+# Default parameters (optimized for cache efficiency)
+THREADS=${1:-4}
+CYCLES=${2:-60000}
+WORKING_SET=${3:-256}
+SEED=${4:-1}
+RUNS=${5:-5}
+
+# CPU affinity (use cores 0-3)
+TASKSET="taskset -c 0-3"
+
+echo "======================================"
+echo "Mid Range MT Benchmark (8-32KB)"
+echo "======================================"
+echo "Configuration:"
+echo "  Threads:     $THREADS"
+echo "  Cycles:      $CYCLES"
+echo "  Working Set: $WORKING_SET"
+echo "  Seed:        $SEED"
+echo "  Runs:        $RUNS"
+echo "  CPU Affinity: cores 0-3"
+echo ""
+echo "Working Set Analysis:"
+WS_SIZE=$((WORKING_SET * 16))  # Average 16KB per allocation
+echo "  Memory: ~${WS_SIZE} KB per thread"
+echo "  Total:  ~$((WS_SIZE * THREADS / 1024)) MB"
+echo ""
+
+cd "$PROJECT_ROOT"
+
+# Check if benchmark exists
+if [ ! -f "./bench_mid_large_mt_hakx" ]; then
+    echo "ERROR: bench_mid_large_mt_hakx not found!"
+    echo "Building benchmark..."
+    make bench_mid_large_mt_hakx
+    echo ""
+fi
+
+# Run benchmark multiple times and collect results
+echo "Running benchmark $RUNS times..."
+echo ""
+
+RESULTS=()
+for i in $(seq 1 $RUNS); do
+    echo "Run $i/$RUNS:"
+    OUTPUT=$($TASKSET ./bench_mid_large_mt_hakx $THREADS $CYCLES $WORKING_SET $SEED 2>&1)
+    echo "$OUTPUT"
+
+    # Extract throughput
+    MOPS=$(echo "$OUTPUT" | grep "Throughput:" | awk '{print $2}')
+    RESULTS+=($MOPS)
+    echo ""
+done
+
+# Calculate statistics
+echo "======================================"
+echo "Summary Statistics"
+echo "======================================"
+
+# Sort results for median calculation
+IFS=$'\n' SORTED=($(sort -n <<<"${RESULTS[*]}"))
+unset IFS
+
+# Calculate average
+SUM=0
+for val in "${RESULTS[@]}"; do
+    SUM=$(echo "$SUM + $val" | bc)
+done
+AVG=$(echo "scale=2; $SUM / ${#RESULTS[@]}" | bc)
+
+# Get median
+MID=$((${#RESULTS[@]} / 2))
+if [ $((${#RESULTS[@]} % 2)) -eq 0 ]; then
+    MEDIAN=$(echo "scale=2; (${SORTED[$MID-1]} + ${SORTED[$MID]}) / 2" | bc)
+else
+    MEDIAN=${SORTED[$MID]}
+fi
+
+# Get min/max
+MIN=${SORTED[0]}
+MAX=${SORTED[-1]}
+
+echo "Results (M ops/sec):"
+for i in "${!RESULTS[@]}"; do
+    printf "  Run %d: %s\n" $((i+1)) "${RESULTS[$i]}"
+done
+echo ""
+echo "Statistics:"
+printf "  Average: %.2f M ops/sec\n" $AVG
+printf "  Median:  %.2f M ops/sec\n" $MEDIAN
+printf "  Min:     %.2f M ops/sec\n" $MIN
+printf "  Max:     %.2f M ops/sec\n" $MAX
+printf "  Range:   %.2f - %.2f M\n" $MIN $MAX
+echo ""
+
+# Performance vs target
+TARGET_MIN=95
+TARGET_MAX=120
+if (( $(echo "$MEDIAN >= $TARGET_MIN" | bc -l) )); then
+    PCT=$(echo "scale=1; $MEDIAN / $TARGET_MAX * 100" | bc)
+    echo "Target Achievement: ${PCT}% of 120M target ✅"
+else
+    echo "Target Achievement: Below 95M target ❌"
+fi
+
+echo ""
+echo "Benchmark completed successfully!"