#!/usr/bin/env python3 """ Calculate percentiles (p50/p90/p99/p999) from epoch soak CSV data. """ import sys import csv import statistics def percentile(data, p): """Calculate percentile p (0-100) from sorted data.""" n = len(data) if n == 0: return 0 k = (n - 1) * p / 100.0 f = int(k) c = int(k) + 1 if c >= n: return data[-1] d0 = data[f] d1 = data[c] return d0 + (d1 - d0) * (k - f) def calculate_percentiles(csv_file): """Calculate percentiles from CSV file containing throughput data.""" throughputs = [] with open(csv_file, 'r') as f: reader = csv.DictReader(f) for row in reader: throughput = float(row['throughput_ops_s']) throughputs.append(throughput) if not throughputs: print(f"Error: No data found in {csv_file}", file=sys.stderr) return None throughputs_sorted = sorted(throughputs) # Calculate percentiles p50 = percentile(throughputs_sorted, 50) p90 = percentile(throughputs_sorted, 90) p99 = percentile(throughputs_sorted, 99) p999 = percentile(throughputs_sorted, 99.9) # Calculate latency proxy (1/throughput in nanoseconds) # throughput is in ops/sec, so 1/throughput gives sec/op # multiply by 1e9 to get ns/op latencies = [1e9 / t for t in throughputs] latencies_sorted = sorted(latencies) lat_p50 = percentile(latencies_sorted, 50) lat_p90 = percentile(latencies_sorted, 90) lat_p99 = percentile(latencies_sorted, 99) lat_p999 = percentile(latencies_sorted, 99.9) return { 'throughput': { 'p50': p50, 'p90': p90, 'p99': p99, 'p999': p999, 'mean': statistics.mean(throughputs), 'min': min(throughputs), 'max': max(throughputs), 'std': statistics.stdev(throughputs) if len(throughputs) > 1 else 0, }, 'latency_ns': { 'p50': lat_p50, 'p90': lat_p90, 'p99': lat_p99, 'p999': lat_p999, 'mean': statistics.mean(latencies), 'min': min(latencies), 'max': max(latencies), 'std': statistics.stdev(latencies) if len(latencies) > 1 else 0, } } def format_number(n, decimals=2): """Format number with commas and fixed decimals.""" return f"{n:,.{decimals}f}" def main(): if len(sys.argv) != 2: print("Usage: calculate_percentiles.py ") sys.exit(1) csv_file = sys.argv[1] results = calculate_percentiles(csv_file) if results is None: sys.exit(1) print(f"Results for: {csv_file}") print("\n=== Throughput (ops/sec) ===") print(f" p50: {format_number(results['throughput']['p50'], 0)}") print(f" p90: {format_number(results['throughput']['p90'], 0)}") print(f" p99: {format_number(results['throughput']['p99'], 0)}") print(f" p999: {format_number(results['throughput']['p999'], 0)}") print(f" mean: {format_number(results['throughput']['mean'], 0)}") print(f" std: {format_number(results['throughput']['std'], 0)}") print(f" min: {format_number(results['throughput']['min'], 0)}") print(f" max: {format_number(results['throughput']['max'], 0)}") print("\n=== Latency Proxy (ns/op) ===") print(f" p50: {format_number(results['latency_ns']['p50'], 2)}") print(f" p90: {format_number(results['latency_ns']['p90'], 2)}") print(f" p99: {format_number(results['latency_ns']['p99'], 2)}") print(f" p999: {format_number(results['latency_ns']['p999'], 2)}") print(f" mean: {format_number(results['latency_ns']['mean'], 2)}") print(f" std: {format_number(results['latency_ns']['std'], 2)}") print(f" min: {format_number(results['latency_ns']['min'], 2)}") print(f" max: {format_number(results['latency_ns']['max'], 2)}") if __name__ == '__main__': main()