hakmem/analyze_phase8_benchmark.py

#!/usr/bin/env python3

import re
import statistics

# Raw data extracted from benchmark results (ops/s)
results = {
    'hakmem_256': [78480676, 78099247, 77034450, 81120430, 81206714],
    'system_256': [87329938, 86497843, 87514376, 85308713, 86630819],
    'mimalloc_256': [115842807, 115180313, 116209200, 112542094, 114950573],

    'hakmem_8192': [16504443, 15799180, 16916987, 16687009, 16582555],
    'system_8192': [56095157, 57843156, 56999206, 57717254, 56720055],
    'mimalloc_8192': [96824532, 96117137, 95521242, 97733856, 96327554],
}

def analyze(name, data):
    mean = statistics.mean(data)
    stdev = statistics.stdev(data)
    min_val = min(data)
    max_val = max(data)
    stdev_pct = (stdev / mean) * 100

    # Convert to M ops/s
    mean_m = mean / 1_000_000
    min_m = min_val / 1_000_000
    max_m = max_val / 1_000_000

    return {
        'name': name,
        'mean': mean,
        'mean_m': mean_m,
        'stdev_pct': stdev_pct,
        'min_m': min_m,
        'max_m': max_m,
        'data': data
    }

print("=" * 80)
print("Phase 8 Comprehensive Allocator Comparison - Analysis")
print("=" * 80)
print()

# Analyze all datasets
stats = {}
for key, data in results.items():
    stats[key] = analyze(key, data)

print("## Working Set 256 (Hot cache, Phase 7 comparison)")
print()
print("| Allocator      | Avg (M ops/s) | StdDev (%) | Min - Max      | vs HAKMEM |")
print("|----------------|---------------|------------|----------------|-----------|")

hakmem_256_mean = stats['hakmem_256']['mean']
system_256_mean = stats['system_256']['mean']
mimalloc_256_mean = stats['mimalloc_256']['mean']

print(f"| HAKMEM Phase 8 | {stats['hakmem_256']['mean_m']:6.1f}        | ±{stats['hakmem_256']['stdev_pct']:4.1f}%     | {stats['hakmem_256']['min_m']:5.1f} - {stats['hakmem_256']['max_m']:5.1f}  | 1.00x     |")
print(f"| System malloc  | {stats['system_256']['mean_m']:6.1f}        | ±{stats['system_256']['stdev_pct']:4.1f}%     | {stats['system_256']['min_m']:5.1f} - {stats['system_256']['max_m']:5.1f}  | {system_256_mean/hakmem_256_mean:5.2f}x    |")
print(f"| mimalloc       | {stats['mimalloc_256']['mean_m']:6.1f}        | ±{stats['mimalloc_256']['stdev_pct']:4.1f}%     | {stats['mimalloc_256']['min_m']:5.1f} - {stats['mimalloc_256']['max_m']:5.1f}  | {mimalloc_256_mean/hakmem_256_mean:5.2f}x    |")
print()

print("## Working Set 8192 (Realistic workload)")
print()
print("| Allocator      | Avg (M ops/s) | StdDev (%) | Min - Max      | vs HAKMEM |")
print("|----------------|---------------|------------|----------------|-----------|")

hakmem_8192_mean = stats['hakmem_8192']['mean']
system_8192_mean = stats['system_8192']['mean']
mimalloc_8192_mean = stats['mimalloc_8192']['mean']

print(f"| HAKMEM Phase 8 | {stats['hakmem_8192']['mean_m']:6.1f}        | ±{stats['hakmem_8192']['stdev_pct']:4.1f}%     | {stats['hakmem_8192']['min_m']:5.1f} - {stats['hakmem_8192']['max_m']:5.1f}  | 1.00x     |")
print(f"| System malloc  | {stats['system_8192']['mean_m']:6.1f}        | ±{stats['system_8192']['stdev_pct']:4.1f}%     | {stats['system_8192']['min_m']:5.1f} - {stats['system_8192']['max_m']:5.1f}  | {system_8192_mean/hakmem_8192_mean:5.2f}x    |")
print(f"| mimalloc       | {stats['mimalloc_8192']['mean_m']:6.1f}        | ±{stats['mimalloc_8192']['stdev_pct']:4.1f}%     | {stats['mimalloc_8192']['min_m']:5.1f} - {stats['mimalloc_8192']['max_m']:5.1f}  | {mimalloc_8192_mean/hakmem_8192_mean:5.2f}x    |")
print()

print("=" * 80)
print("Performance Analysis")
print("=" * 80)
print()

print("### 1. Working Set 256 (Hot Cache) Results")
print()
print(f"- HAKMEM Phase 8: {stats['hakmem_256']['mean_m']:.1f} M ops/s")
print(f"- System malloc:  {stats['system_256']['mean_m']:.1f} M ops/s ({system_256_mean/hakmem_256_mean:.2f}x faster)")
print(f"- mimalloc:       {stats['mimalloc_256']['mean_m']:.1f} M ops/s ({mimalloc_256_mean/hakmem_256_mean:.2f}x faster)")
print()
print("HAKMEM is **{:.1f}% slower** than System malloc and **{:.1f}% slower** than mimalloc".format(
    ((system_256_mean/hakmem_256_mean - 1) * 100),
    ((mimalloc_256_mean/hakmem_256_mean - 1) * 100)
))
print()

print("### 2. Working Set 8192 (Realistic Workload) Results")
print()
print(f"- HAKMEM Phase 8: {stats['hakmem_8192']['mean_m']:.1f} M ops/s")
print(f"- System malloc:  {stats['system_8192']['mean_m']:.1f} M ops/s ({system_8192_mean/hakmem_8192_mean:.2f}x faster)")
print(f"- mimalloc:       {stats['mimalloc_8192']['mean_m']:.1f} M ops/s ({mimalloc_8192_mean/hakmem_8192_mean:.2f}x faster)")
print()
print("HAKMEM is **{:.1f}% slower** than System malloc and **{:.1f}% slower** than mimalloc".format(
    ((system_8192_mean/hakmem_8192_mean - 1) * 100),
    ((mimalloc_8192_mean/hakmem_8192_mean - 1) * 100)
))
print()

print("=" * 80)
print("Critical Observations")
print("=" * 80)
print()

print("### HAKMEM Performance Gap Analysis")
print()

# Calculate performance degradation from WS256 to WS8192
hakmem_degradation = (stats['hakmem_256']['mean_m'] / stats['hakmem_8192']['mean_m'])
system_degradation = (stats['system_256']['mean_m'] / stats['system_8192']['mean_m'])
mimalloc_degradation = (stats['mimalloc_256']['mean_m'] / stats['mimalloc_8192']['mean_m'])

print(f"Performance degradation from WS256 to WS8192:")
print(f"- HAKMEM:   {hakmem_degradation:.2f}x slowdown ({stats['hakmem_256']['mean_m']:.1f} → {stats['hakmem_8192']['mean_m']:.1f} M ops/s)")
print(f"- System:   {system_degradation:.2f}x slowdown ({stats['system_256']['mean_m']:.1f} → {stats['system_8192']['mean_m']:.1f} M ops/s)")
print(f"- mimalloc: {mimalloc_degradation:.2f}x slowdown ({stats['mimalloc_256']['mean_m']:.1f} → {stats['mimalloc_8192']['mean_m']:.1f} M ops/s)")
print()
print(f"HAKMEM degrades **{hakmem_degradation/system_degradation:.2f}x MORE** than System malloc")
print(f"HAKMEM degrades **{hakmem_degradation/mimalloc_degradation:.2f}x MORE** than mimalloc")
print()

print("### Key Issues Identified")
print()
print("1. **Hot Cache Performance (WS256)**:")
print("   - HAKMEM: 79.2 M ops/s")
print("   - Gap: -9.1% vs System, -45.8% vs mimalloc")
print("   - Issue: Fast-path overhead (TLS drain, SuperSlab lookup)")
print()
print("2. **Realistic Workload Performance (WS8192)**:")
print("   - HAKMEM: 16.5 M ops/s")
print("   - Gap: -71.1% vs System, -83.1% vs mimalloc")
print("   - Issue: SEVERE - SuperSlab scaling, fragmentation, TLB pressure")
print()
print("3. **Scalability Problem**:")
print(f"   - HAKMEM loses {hakmem_degradation:.1f}x performance with larger working sets")
print(f"   - System loses only {system_degradation:.1f}x")
print(f"   - mimalloc loses only {mimalloc_degradation:.1f}x")
print("   - Root cause: SuperSlab architecture doesn't scale well")
print()

print("=" * 80)
print("Recommendations for Phase 9+")
print("=" * 80)
print()

print("### CRITICAL PRIORITY: Fix WS8192 Performance Gap")
print()
print("The 71-83% performance gap at realistic working sets is UNACCEPTABLE.")
print()
print("**Immediate Actions Required:**")
print()
print("1. **Investigate SuperSlab Scaling (Phase 9)**")
print("   - Profile: Why does performance collapse with larger working sets?")
print("   - Hypothesis: SuperSlab lookup overhead, fragmentation, or TLB misses")
print("   - Debug logs show 'shared_fail→legacy' messages → shared slab exhaustion")
print()
print("2. **Optimize Fast Path (Phase 10)**")
print("   - Even WS256 shows 9-46% gap vs competitors")
print("   - Profile TLS drain overhead")
print("   - Consider reducing drain frequency or lazy draining")
print()
print("3. **Consider Alternative Architectures (Phase 11)**")
print("   - Current SuperSlab model may be fundamentally flawed")
print("   - Benchmark shows 4.8x degradation vs 1.5x for System malloc")
print("   - May need hybrid approach: TLS fast path + different backend")
print()
print("4. **Specific Debug Actions**")
print("   - Analyze '[SS_BACKEND] shared_fail→legacy' logs")
print("   - Measure SuperSlab hit rate at different working set sizes")
print("   - Profile cache misses and TLB misses")
print()

print("=" * 80)
print("Raw Data (for reproducibility)")
print("=" * 80)
print()

for key in ['hakmem_256', 'system_256', 'mimalloc_256', 'hakmem_8192', 'system_8192', 'mimalloc_8192']:
    print(f"{key:20s}: {stats[key]['data']}")

print()
print("=" * 80)
print("Analysis Complete")
print("=" * 80)
feat: Add ACE allocation failure tracing and debug hooks This commit introduces a comprehensive tracing mechanism for allocation failures within the Adaptive Cache Engine (ACE) component. This feature allows for precise identification of the root cause for Out-Of-Memory (OOM) issues related to ACE allocations. Key changes include: - ACE Tracing Implementation: - Added environment variable to enable/disable detailed logging of allocation failures. - Instrumented , , and to distinguish between "Threshold" (size class mismatch), "Exhaustion" (pool depletion), and "MapFail" (OS memory allocation failure). - Build System Fixes: - Corrected to ensure is properly linked into , resolving an error. - LD_PRELOAD Wrapper Adjustments: - Investigated and understood the wrapper's behavior under , particularly its interaction with and checks. - Enabled debugging flags for environment to prevent unintended fallbacks to 's for non-tiny allocations, allowing comprehensive testing of the allocator. - Debugging & Verification: - Introduced temporary verbose logging to pinpoint execution flow issues within interception and routing. These temporary logs have been removed. - Created to facilitate testing of the tracing features. This feature will significantly aid in diagnosing and resolving allocation-related OOM issues in by providing clear insights into the failure pathways. 2025-12-01 16:37:59 +09:00			`#!/usr/bin/env python3`

			`import re`
			`import statistics`

			`# Raw data extracted from benchmark results (ops/s)`
			`results = {`
			`'hakmem_256': [78480676, 78099247, 77034450, 81120430, 81206714],`
			`'system_256': [87329938, 86497843, 87514376, 85308713, 86630819],`
			`'mimalloc_256': [115842807, 115180313, 116209200, 112542094, 114950573],`

			`'hakmem_8192': [16504443, 15799180, 16916987, 16687009, 16582555],`
			`'system_8192': [56095157, 57843156, 56999206, 57717254, 56720055],`
			`'mimalloc_8192': [96824532, 96117137, 95521242, 97733856, 96327554],`
			`}`

			`def analyze(name, data):`
			`mean = statistics.mean(data)`
			`stdev = statistics.stdev(data)`
			`min_val = min(data)`
			`max_val = max(data)`
			`stdev_pct = (stdev / mean) * 100`

			`# Convert to M ops/s`
			`mean_m = mean / 1_000_000`
			`min_m = min_val / 1_000_000`
			`max_m = max_val / 1_000_000`

			`return {`
			`'name': name,`
			`'mean': mean,`
			`'mean_m': mean_m,`
			`'stdev_pct': stdev_pct,`
			`'min_m': min_m,`
			`'max_m': max_m,`
			`'data': data`
			`}`

			`print("=" * 80)`
			`print("Phase 8 Comprehensive Allocator Comparison - Analysis")`
			`print("=" * 80)`
			`print()`

			`# Analyze all datasets`
			`stats = {}`
			`for key, data in results.items():`
			`stats[key] = analyze(key, data)`

			`print("## Working Set 256 (Hot cache, Phase 7 comparison)")`
			`print()`
			`print("\| Allocator \| Avg (M ops/s) \| StdDev (%) \| Min - Max \| vs HAKMEM \|")`
			`print("\|----------------\|---------------\|------------\|----------------\|-----------\|")`

			`hakmem_256_mean = stats['hakmem_256']['mean']`
			`system_256_mean = stats['system_256']['mean']`
			`mimalloc_256_mean = stats['mimalloc_256']['mean']`

			`print(f"\| HAKMEM Phase 8 \| {stats['hakmem_256']['mean_m']:6.1f} \| ±{stats['hakmem_256']['stdev_pct']:4.1f}% \| {stats['hakmem_256']['min_m']:5.1f} - {stats['hakmem_256']['max_m']:5.1f} \| 1.00x \|")`
			`print(f"\| System malloc \| {stats['system_256']['mean_m']:6.1f} \| ±{stats['system_256']['stdev_pct']:4.1f}% \| {stats['system_256']['min_m']:5.1f} - {stats['system_256']['max_m']:5.1f} \| {system_256_mean/hakmem_256_mean:5.2f}x \|")`
			`print(f"\| mimalloc \| {stats['mimalloc_256']['mean_m']:6.1f} \| ±{stats['mimalloc_256']['stdev_pct']:4.1f}% \| {stats['mimalloc_256']['min_m']:5.1f} - {stats['mimalloc_256']['max_m']:5.1f} \| {mimalloc_256_mean/hakmem_256_mean:5.2f}x \|")`
			`print()`

			`print("## Working Set 8192 (Realistic workload)")`
			`print()`
			`print("\| Allocator \| Avg (M ops/s) \| StdDev (%) \| Min - Max \| vs HAKMEM \|")`
			`print("\|----------------\|---------------\|------------\|----------------\|-----------\|")`

			`hakmem_8192_mean = stats['hakmem_8192']['mean']`
			`system_8192_mean = stats['system_8192']['mean']`
			`mimalloc_8192_mean = stats['mimalloc_8192']['mean']`

			`print(f"\| HAKMEM Phase 8 \| {stats['hakmem_8192']['mean_m']:6.1f} \| ±{stats['hakmem_8192']['stdev_pct']:4.1f}% \| {stats['hakmem_8192']['min_m']:5.1f} - {stats['hakmem_8192']['max_m']:5.1f} \| 1.00x \|")`
			`print(f"\| System malloc \| {stats['system_8192']['mean_m']:6.1f} \| ±{stats['system_8192']['stdev_pct']:4.1f}% \| {stats['system_8192']['min_m']:5.1f} - {stats['system_8192']['max_m']:5.1f} \| {system_8192_mean/hakmem_8192_mean:5.2f}x \|")`
			`print(f"\| mimalloc \| {stats['mimalloc_8192']['mean_m']:6.1f} \| ±{stats['mimalloc_8192']['stdev_pct']:4.1f}% \| {stats['mimalloc_8192']['min_m']:5.1f} - {stats['mimalloc_8192']['max_m']:5.1f} \| {mimalloc_8192_mean/hakmem_8192_mean:5.2f}x \|")`
			`print()`

			`print("=" * 80)`
			`print("Performance Analysis")`
			`print("=" * 80)`
			`print()`

			`print("### 1. Working Set 256 (Hot Cache) Results")`
			`print()`
			`print(f"- HAKMEM Phase 8: {stats['hakmem_256']['mean_m']:.1f} M ops/s")`
			`print(f"- System malloc: {stats['system_256']['mean_m']:.1f} M ops/s ({system_256_mean/hakmem_256_mean:.2f}x faster)")`
			`print(f"- mimalloc: {stats['mimalloc_256']['mean_m']:.1f} M ops/s ({mimalloc_256_mean/hakmem_256_mean:.2f}x faster)")`
			`print()`
			`print("HAKMEM is {:.1f}% slower than System malloc and {:.1f}% slower than mimalloc".format(`
			`((system_256_mean/hakmem_256_mean - 1) * 100),`
			`((mimalloc_256_mean/hakmem_256_mean - 1) * 100)`
			`))`
			`print()`

			`print("### 2. Working Set 8192 (Realistic Workload) Results")`
			`print()`
			`print(f"- HAKMEM Phase 8: {stats['hakmem_8192']['mean_m']:.1f} M ops/s")`
			`print(f"- System malloc: {stats['system_8192']['mean_m']:.1f} M ops/s ({system_8192_mean/hakmem_8192_mean:.2f}x faster)")`
			`print(f"- mimalloc: {stats['mimalloc_8192']['mean_m']:.1f} M ops/s ({mimalloc_8192_mean/hakmem_8192_mean:.2f}x faster)")`
			`print()`
			`print("HAKMEM is {:.1f}% slower than System malloc and {:.1f}% slower than mimalloc".format(`
			`((system_8192_mean/hakmem_8192_mean - 1) * 100),`
			`((mimalloc_8192_mean/hakmem_8192_mean - 1) * 100)`
			`))`
			`print()`

			`print("=" * 80)`
			`print("Critical Observations")`
			`print("=" * 80)`
			`print()`

			`print("### HAKMEM Performance Gap Analysis")`
			`print()`

			`# Calculate performance degradation from WS256 to WS8192`
			`hakmem_degradation = (stats['hakmem_256']['mean_m'] / stats['hakmem_8192']['mean_m'])`
			`system_degradation = (stats['system_256']['mean_m'] / stats['system_8192']['mean_m'])`
			`mimalloc_degradation = (stats['mimalloc_256']['mean_m'] / stats['mimalloc_8192']['mean_m'])`

			`print(f"Performance degradation from WS256 to WS8192:")`
			`print(f"- HAKMEM: {hakmem_degradation:.2f}x slowdown ({stats['hakmem_256']['mean_m']:.1f} → {stats['hakmem_8192']['mean_m']:.1f} M ops/s)")`
			`print(f"- System: {system_degradation:.2f}x slowdown ({stats['system_256']['mean_m']:.1f} → {stats['system_8192']['mean_m']:.1f} M ops/s)")`
			`print(f"- mimalloc: {mimalloc_degradation:.2f}x slowdown ({stats['mimalloc_256']['mean_m']:.1f} → {stats['mimalloc_8192']['mean_m']:.1f} M ops/s)")`
			`print()`
			`print(f"HAKMEM degrades {hakmem_degradation/system_degradation:.2f}x MORE than System malloc")`
			`print(f"HAKMEM degrades {hakmem_degradation/mimalloc_degradation:.2f}x MORE than mimalloc")`
			`print()`

			`print("### Key Issues Identified")`
			`print()`
			`print("1. Hot Cache Performance (WS256):")`
			`print(" - HAKMEM: 79.2 M ops/s")`
			`print(" - Gap: -9.1% vs System, -45.8% vs mimalloc")`
			`print(" - Issue: Fast-path overhead (TLS drain, SuperSlab lookup)")`
			`print()`
			`print("2. Realistic Workload Performance (WS8192):")`
			`print(" - HAKMEM: 16.5 M ops/s")`
			`print(" - Gap: -71.1% vs System, -83.1% vs mimalloc")`
			`print(" - Issue: SEVERE - SuperSlab scaling, fragmentation, TLB pressure")`
			`print()`
			`print("3. Scalability Problem:")`
			`print(f" - HAKMEM loses {hakmem_degradation:.1f}x performance with larger working sets")`
			`print(f" - System loses only {system_degradation:.1f}x")`
			`print(f" - mimalloc loses only {mimalloc_degradation:.1f}x")`
			`print(" - Root cause: SuperSlab architecture doesn't scale well")`
			`print()`

			`print("=" * 80)`
			`print("Recommendations for Phase 9+")`
			`print("=" * 80)`
			`print()`

			`print("### CRITICAL PRIORITY: Fix WS8192 Performance Gap")`
			`print()`
			`print("The 71-83% performance gap at realistic working sets is UNACCEPTABLE.")`
			`print()`
			`print("Immediate Actions Required:")`
			`print()`
			`print("1. Investigate SuperSlab Scaling (Phase 9)")`
			`print(" - Profile: Why does performance collapse with larger working sets?")`
			`print(" - Hypothesis: SuperSlab lookup overhead, fragmentation, or TLB misses")`
			`print(" - Debug logs show 'shared_fail→legacy' messages → shared slab exhaustion")`
			`print()`
			`print("2. Optimize Fast Path (Phase 10)")`
			`print(" - Even WS256 shows 9-46% gap vs competitors")`
			`print(" - Profile TLS drain overhead")`
			`print(" - Consider reducing drain frequency or lazy draining")`
			`print()`
			`print("3. Consider Alternative Architectures (Phase 11)")`
			`print(" - Current SuperSlab model may be fundamentally flawed")`
			`print(" - Benchmark shows 4.8x degradation vs 1.5x for System malloc")`
			`print(" - May need hybrid approach: TLS fast path + different backend")`
			`print()`
			`print("4. Specific Debug Actions")`
			`print(" - Analyze '[SS_BACKEND] shared_fail→legacy' logs")`
			`print(" - Measure SuperSlab hit rate at different working set sizes")`
			`print(" - Profile cache misses and TLB misses")`
			`print()`

			`print("=" * 80)`
			`print("Raw Data (for reproducibility)")`
			`print("=" * 80)`
			`print()`

			`for key in ['hakmem_256', 'system_256', 'mimalloc_256', 'hakmem_8192', 'system_8192', 'mimalloc_8192']:`
			`print(f"{key:20s}: {stats[key]['data']}")`

			`print()`
			`print("=" * 80)`
			`print("Analysis Complete")`
			`print("=" * 80)`