Files
hakmem/analyze_phase8_benchmark.py

191 lines
8.2 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import re
import statistics
# Raw data extracted from benchmark results (ops/s)
results = {
'hakmem_256': [78480676, 78099247, 77034450, 81120430, 81206714],
'system_256': [87329938, 86497843, 87514376, 85308713, 86630819],
'mimalloc_256': [115842807, 115180313, 116209200, 112542094, 114950573],
'hakmem_8192': [16504443, 15799180, 16916987, 16687009, 16582555],
'system_8192': [56095157, 57843156, 56999206, 57717254, 56720055],
'mimalloc_8192': [96824532, 96117137, 95521242, 97733856, 96327554],
}
def analyze(name, data):
mean = statistics.mean(data)
stdev = statistics.stdev(data)
min_val = min(data)
max_val = max(data)
stdev_pct = (stdev / mean) * 100
# Convert to M ops/s
mean_m = mean / 1_000_000
min_m = min_val / 1_000_000
max_m = max_val / 1_000_000
return {
'name': name,
'mean': mean,
'mean_m': mean_m,
'stdev_pct': stdev_pct,
'min_m': min_m,
'max_m': max_m,
'data': data
}
print("=" * 80)
print("Phase 8 Comprehensive Allocator Comparison - Analysis")
print("=" * 80)
print()
# Analyze all datasets
stats = {}
for key, data in results.items():
stats[key] = analyze(key, data)
print("## Working Set 256 (Hot cache, Phase 7 comparison)")
print()
print("| Allocator | Avg (M ops/s) | StdDev (%) | Min - Max | vs HAKMEM |")
print("|----------------|---------------|------------|----------------|-----------|")
hakmem_256_mean = stats['hakmem_256']['mean']
system_256_mean = stats['system_256']['mean']
mimalloc_256_mean = stats['mimalloc_256']['mean']
print(f"| HAKMEM Phase 8 | {stats['hakmem_256']['mean_m']:6.1f} | ±{stats['hakmem_256']['stdev_pct']:4.1f}% | {stats['hakmem_256']['min_m']:5.1f} - {stats['hakmem_256']['max_m']:5.1f} | 1.00x |")
print(f"| System malloc | {stats['system_256']['mean_m']:6.1f} | ±{stats['system_256']['stdev_pct']:4.1f}% | {stats['system_256']['min_m']:5.1f} - {stats['system_256']['max_m']:5.1f} | {system_256_mean/hakmem_256_mean:5.2f}x |")
print(f"| mimalloc | {stats['mimalloc_256']['mean_m']:6.1f} | ±{stats['mimalloc_256']['stdev_pct']:4.1f}% | {stats['mimalloc_256']['min_m']:5.1f} - {stats['mimalloc_256']['max_m']:5.1f} | {mimalloc_256_mean/hakmem_256_mean:5.2f}x |")
print()
print("## Working Set 8192 (Realistic workload)")
print()
print("| Allocator | Avg (M ops/s) | StdDev (%) | Min - Max | vs HAKMEM |")
print("|----------------|---------------|------------|----------------|-----------|")
hakmem_8192_mean = stats['hakmem_8192']['mean']
system_8192_mean = stats['system_8192']['mean']
mimalloc_8192_mean = stats['mimalloc_8192']['mean']
print(f"| HAKMEM Phase 8 | {stats['hakmem_8192']['mean_m']:6.1f} | ±{stats['hakmem_8192']['stdev_pct']:4.1f}% | {stats['hakmem_8192']['min_m']:5.1f} - {stats['hakmem_8192']['max_m']:5.1f} | 1.00x |")
print(f"| System malloc | {stats['system_8192']['mean_m']:6.1f} | ±{stats['system_8192']['stdev_pct']:4.1f}% | {stats['system_8192']['min_m']:5.1f} - {stats['system_8192']['max_m']:5.1f} | {system_8192_mean/hakmem_8192_mean:5.2f}x |")
print(f"| mimalloc | {stats['mimalloc_8192']['mean_m']:6.1f} | ±{stats['mimalloc_8192']['stdev_pct']:4.1f}% | {stats['mimalloc_8192']['min_m']:5.1f} - {stats['mimalloc_8192']['max_m']:5.1f} | {mimalloc_8192_mean/hakmem_8192_mean:5.2f}x |")
print()
print("=" * 80)
print("Performance Analysis")
print("=" * 80)
print()
print("### 1. Working Set 256 (Hot Cache) Results")
print()
print(f"- HAKMEM Phase 8: {stats['hakmem_256']['mean_m']:.1f} M ops/s")
print(f"- System malloc: {stats['system_256']['mean_m']:.1f} M ops/s ({system_256_mean/hakmem_256_mean:.2f}x faster)")
print(f"- mimalloc: {stats['mimalloc_256']['mean_m']:.1f} M ops/s ({mimalloc_256_mean/hakmem_256_mean:.2f}x faster)")
print()
print("HAKMEM is **{:.1f}% slower** than System malloc and **{:.1f}% slower** than mimalloc".format(
((system_256_mean/hakmem_256_mean - 1) * 100),
((mimalloc_256_mean/hakmem_256_mean - 1) * 100)
))
print()
print("### 2. Working Set 8192 (Realistic Workload) Results")
print()
print(f"- HAKMEM Phase 8: {stats['hakmem_8192']['mean_m']:.1f} M ops/s")
print(f"- System malloc: {stats['system_8192']['mean_m']:.1f} M ops/s ({system_8192_mean/hakmem_8192_mean:.2f}x faster)")
print(f"- mimalloc: {stats['mimalloc_8192']['mean_m']:.1f} M ops/s ({mimalloc_8192_mean/hakmem_8192_mean:.2f}x faster)")
print()
print("HAKMEM is **{:.1f}% slower** than System malloc and **{:.1f}% slower** than mimalloc".format(
((system_8192_mean/hakmem_8192_mean - 1) * 100),
((mimalloc_8192_mean/hakmem_8192_mean - 1) * 100)
))
print()
print("=" * 80)
print("Critical Observations")
print("=" * 80)
print()
print("### HAKMEM Performance Gap Analysis")
print()
# Calculate performance degradation from WS256 to WS8192
hakmem_degradation = (stats['hakmem_256']['mean_m'] / stats['hakmem_8192']['mean_m'])
system_degradation = (stats['system_256']['mean_m'] / stats['system_8192']['mean_m'])
mimalloc_degradation = (stats['mimalloc_256']['mean_m'] / stats['mimalloc_8192']['mean_m'])
print(f"Performance degradation from WS256 to WS8192:")
print(f"- HAKMEM: {hakmem_degradation:.2f}x slowdown ({stats['hakmem_256']['mean_m']:.1f}{stats['hakmem_8192']['mean_m']:.1f} M ops/s)")
print(f"- System: {system_degradation:.2f}x slowdown ({stats['system_256']['mean_m']:.1f}{stats['system_8192']['mean_m']:.1f} M ops/s)")
print(f"- mimalloc: {mimalloc_degradation:.2f}x slowdown ({stats['mimalloc_256']['mean_m']:.1f}{stats['mimalloc_8192']['mean_m']:.1f} M ops/s)")
print()
print(f"HAKMEM degrades **{hakmem_degradation/system_degradation:.2f}x MORE** than System malloc")
print(f"HAKMEM degrades **{hakmem_degradation/mimalloc_degradation:.2f}x MORE** than mimalloc")
print()
print("### Key Issues Identified")
print()
print("1. **Hot Cache Performance (WS256)**:")
print(" - HAKMEM: 79.2 M ops/s")
print(" - Gap: -9.1% vs System, -45.8% vs mimalloc")
print(" - Issue: Fast-path overhead (TLS drain, SuperSlab lookup)")
print()
print("2. **Realistic Workload Performance (WS8192)**:")
print(" - HAKMEM: 16.5 M ops/s")
print(" - Gap: -71.1% vs System, -83.1% vs mimalloc")
print(" - Issue: SEVERE - SuperSlab scaling, fragmentation, TLB pressure")
print()
print("3. **Scalability Problem**:")
print(f" - HAKMEM loses {hakmem_degradation:.1f}x performance with larger working sets")
print(f" - System loses only {system_degradation:.1f}x")
print(f" - mimalloc loses only {mimalloc_degradation:.1f}x")
print(" - Root cause: SuperSlab architecture doesn't scale well")
print()
print("=" * 80)
print("Recommendations for Phase 9+")
print("=" * 80)
print()
print("### CRITICAL PRIORITY: Fix WS8192 Performance Gap")
print()
print("The 71-83% performance gap at realistic working sets is UNACCEPTABLE.")
print()
print("**Immediate Actions Required:**")
print()
print("1. **Investigate SuperSlab Scaling (Phase 9)**")
print(" - Profile: Why does performance collapse with larger working sets?")
print(" - Hypothesis: SuperSlab lookup overhead, fragmentation, or TLB misses")
print(" - Debug logs show 'shared_fail→legacy' messages → shared slab exhaustion")
print()
print("2. **Optimize Fast Path (Phase 10)**")
print(" - Even WS256 shows 9-46% gap vs competitors")
print(" - Profile TLS drain overhead")
print(" - Consider reducing drain frequency or lazy draining")
print()
print("3. **Consider Alternative Architectures (Phase 11)**")
print(" - Current SuperSlab model may be fundamentally flawed")
print(" - Benchmark shows 4.8x degradation vs 1.5x for System malloc")
print(" - May need hybrid approach: TLS fast path + different backend")
print()
print("4. **Specific Debug Actions**")
print(" - Analyze '[SS_BACKEND] shared_fail→legacy' logs")
print(" - Measure SuperSlab hit rate at different working set sizes")
print(" - Profile cache misses and TLB misses")
print()
print("=" * 80)
print("Raw Data (for reproducibility)")
print("=" * 80)
print()
for key in ['hakmem_256', 'system_256', 'mimalloc_256', 'hakmem_8192', 'system_8192', 'mimalloc_8192']:
print(f"{key:20s}: {stats[key]['data']}")
print()
print("=" * 80)
print("Analysis Complete")
print("=" * 80)