hakmem/analyze_soak.py

#!/usr/bin/env python3
"""Analyze soak test CSV results for Phase 50."""

import sys
import csv
import statistics

def analyze_csv(filename):
    """Analyze a single CSV file and return metrics."""
    throughputs = []
    rss_values = []

    with open(filename, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            throughput = float(row['throughput_ops_s'])
            rss = float(row['peak_rss_mb'])
            throughputs.append(throughput)
            rss_values.append(rss)

    if len(throughputs) == 0:
        return None

    # Calculate metrics
    first_5 = throughputs[:5] if len(throughputs) >= 5 else throughputs
    last_5 = throughputs[-5:] if len(throughputs) >= 5 else throughputs

    first_throughput = statistics.mean(first_5)
    last_throughput = statistics.mean(last_5)
    throughput_drift_pct = ((last_throughput - first_throughput) / first_throughput) * 100

    mean_throughput = statistics.mean(throughputs)
    stddev_throughput = statistics.stdev(throughputs) if len(throughputs) > 1 else 0
    cv_pct = (stddev_throughput / mean_throughput) * 100

    first_rss = rss_values[0]
    last_rss = rss_values[-1]
    rss_drift_pct = ((last_rss - first_rss) / first_rss) * 100
    peak_rss = max(rss_values)

    return {
        'samples': len(throughputs),
        'mean_throughput': mean_throughput,
        'first_throughput': first_throughput,
        'last_throughput': last_throughput,
        'throughput_drift_pct': throughput_drift_pct,
        'stddev_throughput': stddev_throughput,
        'cv_pct': cv_pct,
        'first_rss': first_rss,
        'last_rss': last_rss,
        'peak_rss': peak_rss,
        'rss_drift_pct': rss_drift_pct,
    }

def main():
    files = {
        'hakmem FAST': 'soak_fast_5min.csv',
        'mimalloc': 'soak_mimalloc_5min.csv',
        'system malloc': 'soak_system_5min.csv',
    }

    results = {}
    for name, filename in files.items():
        try:
            metrics = analyze_csv(filename)
            if metrics:
                results[name] = metrics
                print(f"\n{'='*60}")
                print(f"Allocator: {name}")
                print(f"{'='*60}")
                print(f"Samples: {metrics['samples']}")
                print(f"Mean throughput: {metrics['mean_throughput']/1e6:.2f} M ops/s")
                print(f"First 5 avg: {metrics['first_throughput']/1e6:.2f} M ops/s")
                print(f"Last 5 avg: {metrics['last_throughput']/1e6:.2f} M ops/s")
                print(f"Throughput drift: {metrics['throughput_drift_pct']:+.2f}%")
                print(f"Throughput CV: {metrics['cv_pct']:.2f}%")
                print(f"First RSS: {metrics['first_rss']:.2f} MB")
                print(f"Last RSS: {metrics['last_rss']:.2f} MB")
                print(f"Peak RSS: {metrics['peak_rss']:.2f} MB")
                print(f"RSS drift: {metrics['rss_drift_pct']:+.2f}%")
        except Exception as e:
            print(f"Error processing {name}: {e}", file=sys.stderr)

    print(f"\n{'='*60}")
    print("Summary")
    print(f"{'='*60}")
    print(f"{'Allocator':<20} {'Throughput':>12} {'TP Drift':>10} {'CV':>8} {'Peak RSS':>10} {'RSS Drift':>10}")
    print(f"{'':<20} {'(M ops/s)':>12} {'(%)':>10} {'(%)':>8} {'(MB)':>10} {'(%)':>10}")
    print("-" * 80)
    for name in ['hakmem FAST', 'mimalloc', 'system malloc']:
        if name in results:
            m = results[name]
            print(f"{name:<20} {m['mean_throughput']/1e6:>12.2f} {m['throughput_drift_pct']:>10.2f} {m['cv_pct']:>8.2f} {m['peak_rss']:>10.2f} {m['rss_drift_pct']:>10.2f}")

if __name__ == '__main__':
    main()