#!/usr/bin/env python3 import re import statistics # Raw data extracted from benchmark results (ops/s) results = { 'hakmem_256': [78480676, 78099247, 77034450, 81120430, 81206714], 'system_256': [87329938, 86497843, 87514376, 85308713, 86630819], 'mimalloc_256': [115842807, 115180313, 116209200, 112542094, 114950573], 'hakmem_8192': [16504443, 15799180, 16916987, 16687009, 16582555], 'system_8192': [56095157, 57843156, 56999206, 57717254, 56720055], 'mimalloc_8192': [96824532, 96117137, 95521242, 97733856, 96327554], } def analyze(name, data): mean = statistics.mean(data) stdev = statistics.stdev(data) min_val = min(data) max_val = max(data) stdev_pct = (stdev / mean) * 100 # Convert to M ops/s mean_m = mean / 1_000_000 min_m = min_val / 1_000_000 max_m = max_val / 1_000_000 return { 'name': name, 'mean': mean, 'mean_m': mean_m, 'stdev_pct': stdev_pct, 'min_m': min_m, 'max_m': max_m, 'data': data } print("=" * 80) print("Phase 8 Comprehensive Allocator Comparison - Analysis") print("=" * 80) print() # Analyze all datasets stats = {} for key, data in results.items(): stats[key] = analyze(key, data) print("## Working Set 256 (Hot cache, Phase 7 comparison)") print() print("| Allocator | Avg (M ops/s) | StdDev (%) | Min - Max | vs HAKMEM |") print("|----------------|---------------|------------|----------------|-----------|") hakmem_256_mean = stats['hakmem_256']['mean'] system_256_mean = stats['system_256']['mean'] mimalloc_256_mean = stats['mimalloc_256']['mean'] print(f"| HAKMEM Phase 8 | {stats['hakmem_256']['mean_m']:6.1f} | ±{stats['hakmem_256']['stdev_pct']:4.1f}% | {stats['hakmem_256']['min_m']:5.1f} - {stats['hakmem_256']['max_m']:5.1f} | 1.00x |") print(f"| System malloc | {stats['system_256']['mean_m']:6.1f} | ±{stats['system_256']['stdev_pct']:4.1f}% | {stats['system_256']['min_m']:5.1f} - {stats['system_256']['max_m']:5.1f} | {system_256_mean/hakmem_256_mean:5.2f}x |") print(f"| mimalloc | {stats['mimalloc_256']['mean_m']:6.1f} | ±{stats['mimalloc_256']['stdev_pct']:4.1f}% | {stats['mimalloc_256']['min_m']:5.1f} - {stats['mimalloc_256']['max_m']:5.1f} | {mimalloc_256_mean/hakmem_256_mean:5.2f}x |") print() print("## Working Set 8192 (Realistic workload)") print() print("| Allocator | Avg (M ops/s) | StdDev (%) | Min - Max | vs HAKMEM |") print("|----------------|---------------|------------|----------------|-----------|") hakmem_8192_mean = stats['hakmem_8192']['mean'] system_8192_mean = stats['system_8192']['mean'] mimalloc_8192_mean = stats['mimalloc_8192']['mean'] print(f"| HAKMEM Phase 8 | {stats['hakmem_8192']['mean_m']:6.1f} | ±{stats['hakmem_8192']['stdev_pct']:4.1f}% | {stats['hakmem_8192']['min_m']:5.1f} - {stats['hakmem_8192']['max_m']:5.1f} | 1.00x |") print(f"| System malloc | {stats['system_8192']['mean_m']:6.1f} | ±{stats['system_8192']['stdev_pct']:4.1f}% | {stats['system_8192']['min_m']:5.1f} - {stats['system_8192']['max_m']:5.1f} | {system_8192_mean/hakmem_8192_mean:5.2f}x |") print(f"| mimalloc | {stats['mimalloc_8192']['mean_m']:6.1f} | ±{stats['mimalloc_8192']['stdev_pct']:4.1f}% | {stats['mimalloc_8192']['min_m']:5.1f} - {stats['mimalloc_8192']['max_m']:5.1f} | {mimalloc_8192_mean/hakmem_8192_mean:5.2f}x |") print() print("=" * 80) print("Performance Analysis") print("=" * 80) print() print("### 1. Working Set 256 (Hot Cache) Results") print() print(f"- HAKMEM Phase 8: {stats['hakmem_256']['mean_m']:.1f} M ops/s") print(f"- System malloc: {stats['system_256']['mean_m']:.1f} M ops/s ({system_256_mean/hakmem_256_mean:.2f}x faster)") print(f"- mimalloc: {stats['mimalloc_256']['mean_m']:.1f} M ops/s ({mimalloc_256_mean/hakmem_256_mean:.2f}x faster)") print() print("HAKMEM is **{:.1f}% slower** than System malloc and **{:.1f}% slower** than mimalloc".format( ((system_256_mean/hakmem_256_mean - 1) * 100), ((mimalloc_256_mean/hakmem_256_mean - 1) * 100) )) print() print("### 2. Working Set 8192 (Realistic Workload) Results") print() print(f"- HAKMEM Phase 8: {stats['hakmem_8192']['mean_m']:.1f} M ops/s") print(f"- System malloc: {stats['system_8192']['mean_m']:.1f} M ops/s ({system_8192_mean/hakmem_8192_mean:.2f}x faster)") print(f"- mimalloc: {stats['mimalloc_8192']['mean_m']:.1f} M ops/s ({mimalloc_8192_mean/hakmem_8192_mean:.2f}x faster)") print() print("HAKMEM is **{:.1f}% slower** than System malloc and **{:.1f}% slower** than mimalloc".format( ((system_8192_mean/hakmem_8192_mean - 1) * 100), ((mimalloc_8192_mean/hakmem_8192_mean - 1) * 100) )) print() print("=" * 80) print("Critical Observations") print("=" * 80) print() print("### HAKMEM Performance Gap Analysis") print() # Calculate performance degradation from WS256 to WS8192 hakmem_degradation = (stats['hakmem_256']['mean_m'] / stats['hakmem_8192']['mean_m']) system_degradation = (stats['system_256']['mean_m'] / stats['system_8192']['mean_m']) mimalloc_degradation = (stats['mimalloc_256']['mean_m'] / stats['mimalloc_8192']['mean_m']) print(f"Performance degradation from WS256 to WS8192:") print(f"- HAKMEM: {hakmem_degradation:.2f}x slowdown ({stats['hakmem_256']['mean_m']:.1f} → {stats['hakmem_8192']['mean_m']:.1f} M ops/s)") print(f"- System: {system_degradation:.2f}x slowdown ({stats['system_256']['mean_m']:.1f} → {stats['system_8192']['mean_m']:.1f} M ops/s)") print(f"- mimalloc: {mimalloc_degradation:.2f}x slowdown ({stats['mimalloc_256']['mean_m']:.1f} → {stats['mimalloc_8192']['mean_m']:.1f} M ops/s)") print() print(f"HAKMEM degrades **{hakmem_degradation/system_degradation:.2f}x MORE** than System malloc") print(f"HAKMEM degrades **{hakmem_degradation/mimalloc_degradation:.2f}x MORE** than mimalloc") print() print("### Key Issues Identified") print() print("1. **Hot Cache Performance (WS256)**:") print(" - HAKMEM: 79.2 M ops/s") print(" - Gap: -9.1% vs System, -45.8% vs mimalloc") print(" - Issue: Fast-path overhead (TLS drain, SuperSlab lookup)") print() print("2. **Realistic Workload Performance (WS8192)**:") print(" - HAKMEM: 16.5 M ops/s") print(" - Gap: -71.1% vs System, -83.1% vs mimalloc") print(" - Issue: SEVERE - SuperSlab scaling, fragmentation, TLB pressure") print() print("3. **Scalability Problem**:") print(f" - HAKMEM loses {hakmem_degradation:.1f}x performance with larger working sets") print(f" - System loses only {system_degradation:.1f}x") print(f" - mimalloc loses only {mimalloc_degradation:.1f}x") print(" - Root cause: SuperSlab architecture doesn't scale well") print() print("=" * 80) print("Recommendations for Phase 9+") print("=" * 80) print() print("### CRITICAL PRIORITY: Fix WS8192 Performance Gap") print() print("The 71-83% performance gap at realistic working sets is UNACCEPTABLE.") print() print("**Immediate Actions Required:**") print() print("1. **Investigate SuperSlab Scaling (Phase 9)**") print(" - Profile: Why does performance collapse with larger working sets?") print(" - Hypothesis: SuperSlab lookup overhead, fragmentation, or TLB misses") print(" - Debug logs show 'shared_fail→legacy' messages → shared slab exhaustion") print() print("2. **Optimize Fast Path (Phase 10)**") print(" - Even WS256 shows 9-46% gap vs competitors") print(" - Profile TLS drain overhead") print(" - Consider reducing drain frequency or lazy draining") print() print("3. **Consider Alternative Architectures (Phase 11)**") print(" - Current SuperSlab model may be fundamentally flawed") print(" - Benchmark shows 4.8x degradation vs 1.5x for System malloc") print(" - May need hybrid approach: TLS fast path + different backend") print() print("4. **Specific Debug Actions**") print(" - Analyze '[SS_BACKEND] shared_fail→legacy' logs") print(" - Measure SuperSlab hit rate at different working set sizes") print(" - Profile cache misses and TLB misses") print() print("=" * 80) print("Raw Data (for reproducibility)") print("=" * 80) print() for key in ['hakmem_256', 'system_256', 'mimalloc_256', 'hakmem_8192', 'system_8192', 'mimalloc_8192']: print(f"{key:20s}: {stats[key]['data']}") print() print("=" * 80) print("Analysis Complete") print("=" * 80)