hakmem/analyze_results.py

#!/usr/bin/env python3
"""
analyze_results.py - Analyze benchmark results for paper
"""

import csv
import sys
from collections import defaultdict
import statistics

def load_results(filename):
    """Load CSV results into data structure"""
    data = defaultdict(lambda: defaultdict(list))
    
    with open(filename, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            allocator = row['allocator']
            scenario = row['scenario']
            avg_ns = int(row['avg_ns'])
            soft_pf = int(row['soft_pf'])
            hard_pf = int(row['hard_pf'])
            ops_per_sec = int(row['ops_per_sec'])
            
            data[scenario][allocator].append({
                'avg_ns': avg_ns,
                'soft_pf': soft_pf,
                'hard_pf': hard_pf,
                'ops_per_sec': ops_per_sec
            })
    
    return data

def analyze(data):
    """Analyze and print statistics"""
    print("=" * 80)
    print("📊 FULL BENCHMARK RESULTS (50 runs)")
    print("=" * 80)
    print()
    
    for scenario in ['json', 'mir', 'vm', 'mixed']:
        print(f"## {scenario.upper()} Scenario")
        print("-" * 80)
        
        allocators = ['hakmem-baseline', 'hakmem-evolving', 'system']
        
        # Header
        print(f"{'Allocator':<20} {'Median (ns)':<15} {'P95 (ns)':<15} {'P99 (ns)':<15} {'PF (median)':<15}")
        print("-" * 80)
        
        results = {}
        for allocator in allocators:
            if allocator not in data[scenario]:
                continue
                
            latencies = [r['avg_ns'] for r in data[scenario][allocator]]
            page_faults = [r['soft_pf'] for r in data[scenario][allocator]]
            
            median_ns = statistics.median(latencies)
            p95_ns = statistics.quantiles(latencies, n=20)[18]  # 95th percentile
            p99_ns = statistics.quantiles(latencies, n=100)[98] if len(latencies) >= 100 else max(latencies)
            median_pf = statistics.median(page_faults)
            
            results[allocator] = median_ns
            
            print(f"{allocator:<20} {median_ns:<15.1f} {p95_ns:<15.1f} {p99_ns:<15.1f} {median_pf:<15.1f}")
        
        # Winner analysis
        if 'hakmem-baseline' in results and 'system' in results:
            baseline = results['hakmem-baseline']
            system = results['system']
            improvement = ((system - baseline) / system) * 100
            
            if improvement > 0:
                print(f"\n🥇 Winner: hakmem-baseline ({improvement:+.1f}% faster than system)")
            elif improvement < -2:  # Allow 2% margin
                print(f"\n🥈 Winner: system ({-improvement:+.1f}% faster than hakmem)")
            else:
                print(f"\n🤝 Tie: hakmem ≈ system (within 2%)")
        
        print()

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print(f"Usage: {sys.argv[0]} <results.csv>")
        sys.exit(1)
    
    data = load_results(sys.argv[1])
    analyze(data)
Debug Counters Implementation - Clean History Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-05 12:31:14 +09:00			`#!/usr/bin/env python3`
			`"""`
			`analyze_results.py - Analyze benchmark results for paper`
			`"""`

			`import csv`
			`import sys`
			`from collections import defaultdict`
			`import statistics`

			`def load_results(filename):`
			`"""Load CSV results into data structure"""`
			`data = defaultdict(lambda: defaultdict(list))`

			`with open(filename, 'r') as f:`
			`reader = csv.DictReader(f)`
			`for row in reader:`
			`allocator = row['allocator']`
			`scenario = row['scenario']`
			`avg_ns = int(row['avg_ns'])`
			`soft_pf = int(row['soft_pf'])`
			`hard_pf = int(row['hard_pf'])`
			`ops_per_sec = int(row['ops_per_sec'])`

			`data[scenario][allocator].append({`
			`'avg_ns': avg_ns,`
			`'soft_pf': soft_pf,`
			`'hard_pf': hard_pf,`
			`'ops_per_sec': ops_per_sec`
			`})`

			`return data`

			`def analyze(data):`
			`"""Analyze and print statistics"""`
			`print("=" * 80)`
			`print("📊 FULL BENCHMARK RESULTS (50 runs)")`
			`print("=" * 80)`
			`print()`

			`for scenario in ['json', 'mir', 'vm', 'mixed']:`
			`print(f"## {scenario.upper()} Scenario")`
			`print("-" * 80)`

			`allocators = ['hakmem-baseline', 'hakmem-evolving', 'system']`

			`# Header`
			`print(f"{'Allocator':<20} {'Median (ns)':<15} {'P95 (ns)':<15} {'P99 (ns)':<15} {'PF (median)':<15}")`
			`print("-" * 80)`

			`results = {}`
			`for allocator in allocators:`
			`if allocator not in data[scenario]:`
			`continue`

			`latencies = [r['avg_ns'] for r in data[scenario][allocator]]`
			`page_faults = [r['soft_pf'] for r in data[scenario][allocator]]`

			`median_ns = statistics.median(latencies)`
			`p95_ns = statistics.quantiles(latencies, n=20)[18] # 95th percentile`
			`p99_ns = statistics.quantiles(latencies, n=100)[98] if len(latencies) >= 100 else max(latencies)`
			`median_pf = statistics.median(page_faults)`

			`results[allocator] = median_ns`

			`print(f"{allocator:<20} {median_ns:<15.1f} {p95_ns:<15.1f} {p99_ns:<15.1f} {median_pf:<15.1f}")`

			`# Winner analysis`
			`if 'hakmem-baseline' in results and 'system' in results:`
			`baseline = results['hakmem-baseline']`
			`system = results['system']`
			`improvement = ((system - baseline) / system) * 100`

			`if improvement > 0:`
			`print(f"\n🥇 Winner: hakmem-baseline ({improvement:+.1f}% faster than system)")`
			`elif improvement < -2: # Allow 2% margin`
			`print(f"\n🥈 Winner: system ({-improvement:+.1f}% faster than hakmem)")`
			`else:`
			`print(f"\n🤝 Tie: hakmem ≈ system (within 2%)")`

			`print()`

			`if __name__ == '__main__':`
			`if len(sys.argv) != 2:`
			`print(f"Usage: {sys.argv[0]} <results.csv>")`
			`sys.exit(1)`

			`data = load_results(sys.argv[1])`
			`analyze(data)`