Files
hakmem/quick_analyze.py

128 lines
4.0 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""Quick benchmark analysis for Phase 6.10.1"""
import csv
import sys
from collections import defaultdict
import statistics
def parse_csv(filename):
"""Parse CSV with mixed output"""
results = defaultdict(list)
with open(filename, 'r') as f:
for line in f:
# Skip headers, debug output, and empty lines
if line.startswith('allocator,') or line.startswith('[') or not line.strip():
continue
parts = line.strip().split(',')
if len(parts) == 8:
try:
allocator = parts[0]
scenario = parts[1]
avg_ns = int(parts[3])
if avg_ns > 0: # Valid data
key = f"{allocator}:{scenario}"
results[key].append(avg_ns)
except (ValueError, IndexError):
continue
return results
def analyze(results):
"""Analyze benchmark results"""
print("=" * 60)
print("Phase 6.10.1 Benchmark Analysis")
print("=" * 60)
print()
# Calculate statistics per allocator:scenario
stats = {}
for key, values in sorted(results.items()):
if len(values) >= 3: # Need at least 3 samples
stats[key] = {
'median': statistics.median(values),
'mean': statistics.mean(values),
'stdev': statistics.stdev(values) if len(values) > 1 else 0,
'min': min(values),
'max': max(values),
'count': len(values)
}
# Group by scenario
scenarios = defaultdict(dict)
for key, stat in stats.items():
allocator, scenario = key.split(':', 1)
scenarios[scenario][allocator] = stat
# Print results per scenario
for scenario in sorted(scenarios.keys()):
print(f"Scenario: {scenario}")
print("-" * 60)
allocators_data = scenarios[scenario]
# Find baseline (mimalloc or system)
baseline_ns = None
if 'mimalloc' in allocators_data:
baseline_ns = allocators_data['mimalloc']['median']
baseline_name = 'mimalloc'
elif 'system' in allocators_data:
baseline_ns = allocators_data['system']['median']
baseline_name = 'system'
# Print sorted by median
sorted_allocs = sorted(allocators_data.items(), key=lambda x: x[1]['median'])
for i, (alloc, stat) in enumerate(sorted_allocs, 1):
median = stat['median']
if baseline_ns and alloc != baseline_name:
diff_ns = median - baseline_ns
diff_pct = (diff_ns / baseline_ns) * 100
vs_baseline = f" ({diff_pct:+.1f}% vs {baseline_name})"
else:
vs_baseline = " (baseline)" if alloc == baseline_name else ""
print(f" {i}. {alloc:20s}: {median:7.0f} ns "
f"{stat['stdev']:5.0f}){vs_baseline}")
print()
# Overall summary
print("=" * 60)
print("Summary - hakmem variants:")
print("=" * 60)
for scenario in sorted(scenarios.keys()):
print(f"\n{scenario}:")
allocators_data = scenarios[scenario]
# Compare hakmem variants
hakmem_variants = {k: v for k, v in allocators_data.items() if k.startswith('hakmem')}
if len(hakmem_variants) >= 2:
sorted_variants = sorted(hakmem_variants.items(), key=lambda x: x[1]['median'])
best = sorted_variants[0]
for variant, stat in sorted_variants:
diff = stat['median'] - best[1]['median']
diff_pct = (diff / best[1]['median']) * 100 if best[1]['median'] > 0 else 0
print(f" {variant:20s}: {stat['median']:7.0f} ns ({diff_pct:+.1f}%)")
if __name__ == '__main__':
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <benchmark_results.csv>")
sys.exit(1)
results = parse_csv(sys.argv[1])
if not results:
print("No valid benchmark data found in CSV")
sys.exit(1)
analyze(results)