191 lines
8.2 KiB
Python
191 lines
8.2 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
|
||
|
|
import re
|
||
|
|
import statistics
|
||
|
|
|
||
|
|
# Raw data extracted from benchmark results (ops/s)
|
||
|
|
results = {
|
||
|
|
'hakmem_256': [78480676, 78099247, 77034450, 81120430, 81206714],
|
||
|
|
'system_256': [87329938, 86497843, 87514376, 85308713, 86630819],
|
||
|
|
'mimalloc_256': [115842807, 115180313, 116209200, 112542094, 114950573],
|
||
|
|
|
||
|
|
'hakmem_8192': [16504443, 15799180, 16916987, 16687009, 16582555],
|
||
|
|
'system_8192': [56095157, 57843156, 56999206, 57717254, 56720055],
|
||
|
|
'mimalloc_8192': [96824532, 96117137, 95521242, 97733856, 96327554],
|
||
|
|
}
|
||
|
|
|
||
|
|
def analyze(name, data):
|
||
|
|
mean = statistics.mean(data)
|
||
|
|
stdev = statistics.stdev(data)
|
||
|
|
min_val = min(data)
|
||
|
|
max_val = max(data)
|
||
|
|
stdev_pct = (stdev / mean) * 100
|
||
|
|
|
||
|
|
# Convert to M ops/s
|
||
|
|
mean_m = mean / 1_000_000
|
||
|
|
min_m = min_val / 1_000_000
|
||
|
|
max_m = max_val / 1_000_000
|
||
|
|
|
||
|
|
return {
|
||
|
|
'name': name,
|
||
|
|
'mean': mean,
|
||
|
|
'mean_m': mean_m,
|
||
|
|
'stdev_pct': stdev_pct,
|
||
|
|
'min_m': min_m,
|
||
|
|
'max_m': max_m,
|
||
|
|
'data': data
|
||
|
|
}
|
||
|
|
|
||
|
|
print("=" * 80)
|
||
|
|
print("Phase 8 Comprehensive Allocator Comparison - Analysis")
|
||
|
|
print("=" * 80)
|
||
|
|
print()
|
||
|
|
|
||
|
|
# Analyze all datasets
|
||
|
|
stats = {}
|
||
|
|
for key, data in results.items():
|
||
|
|
stats[key] = analyze(key, data)
|
||
|
|
|
||
|
|
print("## Working Set 256 (Hot cache, Phase 7 comparison)")
|
||
|
|
print()
|
||
|
|
print("| Allocator | Avg (M ops/s) | StdDev (%) | Min - Max | vs HAKMEM |")
|
||
|
|
print("|----------------|---------------|------------|----------------|-----------|")
|
||
|
|
|
||
|
|
hakmem_256_mean = stats['hakmem_256']['mean']
|
||
|
|
system_256_mean = stats['system_256']['mean']
|
||
|
|
mimalloc_256_mean = stats['mimalloc_256']['mean']
|
||
|
|
|
||
|
|
print(f"| HAKMEM Phase 8 | {stats['hakmem_256']['mean_m']:6.1f} | ±{stats['hakmem_256']['stdev_pct']:4.1f}% | {stats['hakmem_256']['min_m']:5.1f} - {stats['hakmem_256']['max_m']:5.1f} | 1.00x |")
|
||
|
|
print(f"| System malloc | {stats['system_256']['mean_m']:6.1f} | ±{stats['system_256']['stdev_pct']:4.1f}% | {stats['system_256']['min_m']:5.1f} - {stats['system_256']['max_m']:5.1f} | {system_256_mean/hakmem_256_mean:5.2f}x |")
|
||
|
|
print(f"| mimalloc | {stats['mimalloc_256']['mean_m']:6.1f} | ±{stats['mimalloc_256']['stdev_pct']:4.1f}% | {stats['mimalloc_256']['min_m']:5.1f} - {stats['mimalloc_256']['max_m']:5.1f} | {mimalloc_256_mean/hakmem_256_mean:5.2f}x |")
|
||
|
|
print()
|
||
|
|
|
||
|
|
print("## Working Set 8192 (Realistic workload)")
|
||
|
|
print()
|
||
|
|
print("| Allocator | Avg (M ops/s) | StdDev (%) | Min - Max | vs HAKMEM |")
|
||
|
|
print("|----------------|---------------|------------|----------------|-----------|")
|
||
|
|
|
||
|
|
hakmem_8192_mean = stats['hakmem_8192']['mean']
|
||
|
|
system_8192_mean = stats['system_8192']['mean']
|
||
|
|
mimalloc_8192_mean = stats['mimalloc_8192']['mean']
|
||
|
|
|
||
|
|
print(f"| HAKMEM Phase 8 | {stats['hakmem_8192']['mean_m']:6.1f} | ±{stats['hakmem_8192']['stdev_pct']:4.1f}% | {stats['hakmem_8192']['min_m']:5.1f} - {stats['hakmem_8192']['max_m']:5.1f} | 1.00x |")
|
||
|
|
print(f"| System malloc | {stats['system_8192']['mean_m']:6.1f} | ±{stats['system_8192']['stdev_pct']:4.1f}% | {stats['system_8192']['min_m']:5.1f} - {stats['system_8192']['max_m']:5.1f} | {system_8192_mean/hakmem_8192_mean:5.2f}x |")
|
||
|
|
print(f"| mimalloc | {stats['mimalloc_8192']['mean_m']:6.1f} | ±{stats['mimalloc_8192']['stdev_pct']:4.1f}% | {stats['mimalloc_8192']['min_m']:5.1f} - {stats['mimalloc_8192']['max_m']:5.1f} | {mimalloc_8192_mean/hakmem_8192_mean:5.2f}x |")
|
||
|
|
print()
|
||
|
|
|
||
|
|
print("=" * 80)
|
||
|
|
print("Performance Analysis")
|
||
|
|
print("=" * 80)
|
||
|
|
print()
|
||
|
|
|
||
|
|
print("### 1. Working Set 256 (Hot Cache) Results")
|
||
|
|
print()
|
||
|
|
print(f"- HAKMEM Phase 8: {stats['hakmem_256']['mean_m']:.1f} M ops/s")
|
||
|
|
print(f"- System malloc: {stats['system_256']['mean_m']:.1f} M ops/s ({system_256_mean/hakmem_256_mean:.2f}x faster)")
|
||
|
|
print(f"- mimalloc: {stats['mimalloc_256']['mean_m']:.1f} M ops/s ({mimalloc_256_mean/hakmem_256_mean:.2f}x faster)")
|
||
|
|
print()
|
||
|
|
print("HAKMEM is **{:.1f}% slower** than System malloc and **{:.1f}% slower** than mimalloc".format(
|
||
|
|
((system_256_mean/hakmem_256_mean - 1) * 100),
|
||
|
|
((mimalloc_256_mean/hakmem_256_mean - 1) * 100)
|
||
|
|
))
|
||
|
|
print()
|
||
|
|
|
||
|
|
print("### 2. Working Set 8192 (Realistic Workload) Results")
|
||
|
|
print()
|
||
|
|
print(f"- HAKMEM Phase 8: {stats['hakmem_8192']['mean_m']:.1f} M ops/s")
|
||
|
|
print(f"- System malloc: {stats['system_8192']['mean_m']:.1f} M ops/s ({system_8192_mean/hakmem_8192_mean:.2f}x faster)")
|
||
|
|
print(f"- mimalloc: {stats['mimalloc_8192']['mean_m']:.1f} M ops/s ({mimalloc_8192_mean/hakmem_8192_mean:.2f}x faster)")
|
||
|
|
print()
|
||
|
|
print("HAKMEM is **{:.1f}% slower** than System malloc and **{:.1f}% slower** than mimalloc".format(
|
||
|
|
((system_8192_mean/hakmem_8192_mean - 1) * 100),
|
||
|
|
((mimalloc_8192_mean/hakmem_8192_mean - 1) * 100)
|
||
|
|
))
|
||
|
|
print()
|
||
|
|
|
||
|
|
print("=" * 80)
|
||
|
|
print("Critical Observations")
|
||
|
|
print("=" * 80)
|
||
|
|
print()
|
||
|
|
|
||
|
|
print("### HAKMEM Performance Gap Analysis")
|
||
|
|
print()
|
||
|
|
|
||
|
|
# Calculate performance degradation from WS256 to WS8192
|
||
|
|
hakmem_degradation = (stats['hakmem_256']['mean_m'] / stats['hakmem_8192']['mean_m'])
|
||
|
|
system_degradation = (stats['system_256']['mean_m'] / stats['system_8192']['mean_m'])
|
||
|
|
mimalloc_degradation = (stats['mimalloc_256']['mean_m'] / stats['mimalloc_8192']['mean_m'])
|
||
|
|
|
||
|
|
print(f"Performance degradation from WS256 to WS8192:")
|
||
|
|
print(f"- HAKMEM: {hakmem_degradation:.2f}x slowdown ({stats['hakmem_256']['mean_m']:.1f} → {stats['hakmem_8192']['mean_m']:.1f} M ops/s)")
|
||
|
|
print(f"- System: {system_degradation:.2f}x slowdown ({stats['system_256']['mean_m']:.1f} → {stats['system_8192']['mean_m']:.1f} M ops/s)")
|
||
|
|
print(f"- mimalloc: {mimalloc_degradation:.2f}x slowdown ({stats['mimalloc_256']['mean_m']:.1f} → {stats['mimalloc_8192']['mean_m']:.1f} M ops/s)")
|
||
|
|
print()
|
||
|
|
print(f"HAKMEM degrades **{hakmem_degradation/system_degradation:.2f}x MORE** than System malloc")
|
||
|
|
print(f"HAKMEM degrades **{hakmem_degradation/mimalloc_degradation:.2f}x MORE** than mimalloc")
|
||
|
|
print()
|
||
|
|
|
||
|
|
print("### Key Issues Identified")
|
||
|
|
print()
|
||
|
|
print("1. **Hot Cache Performance (WS256)**:")
|
||
|
|
print(" - HAKMEM: 79.2 M ops/s")
|
||
|
|
print(" - Gap: -9.1% vs System, -45.8% vs mimalloc")
|
||
|
|
print(" - Issue: Fast-path overhead (TLS drain, SuperSlab lookup)")
|
||
|
|
print()
|
||
|
|
print("2. **Realistic Workload Performance (WS8192)**:")
|
||
|
|
print(" - HAKMEM: 16.5 M ops/s")
|
||
|
|
print(" - Gap: -71.1% vs System, -83.1% vs mimalloc")
|
||
|
|
print(" - Issue: SEVERE - SuperSlab scaling, fragmentation, TLB pressure")
|
||
|
|
print()
|
||
|
|
print("3. **Scalability Problem**:")
|
||
|
|
print(f" - HAKMEM loses {hakmem_degradation:.1f}x performance with larger working sets")
|
||
|
|
print(f" - System loses only {system_degradation:.1f}x")
|
||
|
|
print(f" - mimalloc loses only {mimalloc_degradation:.1f}x")
|
||
|
|
print(" - Root cause: SuperSlab architecture doesn't scale well")
|
||
|
|
print()
|
||
|
|
|
||
|
|
print("=" * 80)
|
||
|
|
print("Recommendations for Phase 9+")
|
||
|
|
print("=" * 80)
|
||
|
|
print()
|
||
|
|
|
||
|
|
print("### CRITICAL PRIORITY: Fix WS8192 Performance Gap")
|
||
|
|
print()
|
||
|
|
print("The 71-83% performance gap at realistic working sets is UNACCEPTABLE.")
|
||
|
|
print()
|
||
|
|
print("**Immediate Actions Required:**")
|
||
|
|
print()
|
||
|
|
print("1. **Investigate SuperSlab Scaling (Phase 9)**")
|
||
|
|
print(" - Profile: Why does performance collapse with larger working sets?")
|
||
|
|
print(" - Hypothesis: SuperSlab lookup overhead, fragmentation, or TLB misses")
|
||
|
|
print(" - Debug logs show 'shared_fail→legacy' messages → shared slab exhaustion")
|
||
|
|
print()
|
||
|
|
print("2. **Optimize Fast Path (Phase 10)**")
|
||
|
|
print(" - Even WS256 shows 9-46% gap vs competitors")
|
||
|
|
print(" - Profile TLS drain overhead")
|
||
|
|
print(" - Consider reducing drain frequency or lazy draining")
|
||
|
|
print()
|
||
|
|
print("3. **Consider Alternative Architectures (Phase 11)**")
|
||
|
|
print(" - Current SuperSlab model may be fundamentally flawed")
|
||
|
|
print(" - Benchmark shows 4.8x degradation vs 1.5x for System malloc")
|
||
|
|
print(" - May need hybrid approach: TLS fast path + different backend")
|
||
|
|
print()
|
||
|
|
print("4. **Specific Debug Actions**")
|
||
|
|
print(" - Analyze '[SS_BACKEND] shared_fail→legacy' logs")
|
||
|
|
print(" - Measure SuperSlab hit rate at different working set sizes")
|
||
|
|
print(" - Profile cache misses and TLB misses")
|
||
|
|
print()
|
||
|
|
|
||
|
|
print("=" * 80)
|
||
|
|
print("Raw Data (for reproducibility)")
|
||
|
|
print("=" * 80)
|
||
|
|
print()
|
||
|
|
|
||
|
|
for key in ['hakmem_256', 'system_256', 'mimalloc_256', 'hakmem_8192', 'system_8192', 'mimalloc_8192']:
|
||
|
|
print(f"{key:20s}: {stats[key]['data']}")
|
||
|
|
|
||
|
|
print()
|
||
|
|
print("=" * 80)
|
||
|
|
print("Analysis Complete")
|
||
|
|
print("=" * 80)
|