// hakmem_tiny_alloc_v3.inc // Phase 3: Allocation Hot Path Simplification (mimalloc-style) // // Goal: Reduce malloc/alloc overhead from 27% to 20% // Expected improvement: +5-10% (16.53 → 17.5-18.0 M ops/sec) // // Key changes: // - Single-tier magazine hot path (2-3 cycles) // - Eliminate 6+ stage fallback chain → 2 stages // - Minimal stack frame // - Zero TLS overhead on hit // ============================================================================ #include "mid_tcache.h" // Phase 3 Helper: Magazine Refill from SuperSlab // ============================================================================ // Refill magazine from SuperSlab (batch allocation) // Returns: number of items added to magazine static int mag_refill_from_superslab_v3(int class_idx, TinyTLSMag* mag, int want) { if (!mag || want <= 0) return 0; int room = mag->cap - mag->top; if (room <= 0) return 0; if (want > room) want = room; int filled = 0; for (int i = 0; i < want; i++) { void* ptr = hak_tiny_alloc_superslab(class_idx); if (!ptr) break; mag->items[mag->top].ptr = ptr; #if HAKMEM_TINY_MAG_OWNER // Owner tracking not critical for SuperSlab allocations mag->items[mag->top].owner = NULL; #endif mag->top++; filled++; } return filled; } // ============================================================================ // Phase 3: Slow Path (Cold, Noinline) // ============================================================================ static void* __attribute__((cold, noinline)) hak_tiny_alloc_slow_v3(size_t size, int class_idx) { (void)size; // size already validated by caller if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) { return NULL; } TinyTLSMag* mag = &g_tls_mags[class_idx]; // Step 1: Try refilling magazine from SuperSlab (batch 32-64 items) int refill_count = (class_idx <= 3) ? 64 : 32; if (mag_refill_from_superslab_v3(class_idx, mag, refill_count) > 0) { // Magazine refilled successfully - pop one item void* ptr = mag->items[--mag->top].ptr; return ptr; } // Step 2: Direct SuperSlab allocation (magazine full or refill failed) void* ptr = hak_tiny_alloc_superslab(class_idx); return ptr; } // ============================================================================ // Phase 3: Hot Path (Ultra-Simple, Inline Candidate) // ============================================================================ void* hak_tiny_alloc_v3(size_t size) { // Phase 3 hot path: minimal branches, single TLS variable // 1. Size → class (branchless LUT) int class_idx = hak_tiny_size_to_class(size); if (__builtin_expect(class_idx < 0, 0)) { return NULL; // >1KB or size=0 } // 2. MidTC (class>=4) — TLS tcache最優先 if (__builtin_expect(class_idx > 3, 0)) { void* mp = midtc_pop(class_idx); if (mp) return mp; } // 3. Single-tier TLS magazine (HOT PATH - 2-3 cycles!) TinyTLSMag* mag = &g_tls_mags[class_idx]; int top = mag->top; if (__builtin_expect(top > 0, 1)) { // Fast path: pop from magazine void* ptr = mag->items[--top].ptr; mag->top = top; return ptr; // ← 最速パス! 🚀 } // 4. Slow path: refill + fallback return hak_tiny_alloc_slow_v3(size, class_idx); }