Files
hakmem/core/hakmem_tiny_alloc_v3.inc

103 lines
3.3 KiB
PHP
Raw Normal View History

// hakmem_tiny_alloc_v3.inc
// Phase 3: Allocation Hot Path Simplification (mimalloc-style)
//
// Goal: Reduce malloc/alloc overhead from 27% to 20%
// Expected improvement: +5-10% (16.53 → 17.5-18.0 M ops/sec)
//
// Key changes:
// - Single-tier magazine hot path (2-3 cycles)
// - Eliminate 6+ stage fallback chain → 2 stages
// - Minimal stack frame
// - Zero TLS overhead on hit
// ============================================================================
#include "mid_tcache.h"
// Phase 3 Helper: Magazine Refill from SuperSlab
// ============================================================================
// Refill magazine from SuperSlab (batch allocation)
// Returns: number of items added to magazine
static int mag_refill_from_superslab_v3(int class_idx, TinyTLSMag* mag, int want) {
if (!mag || want <= 0) return 0;
int room = mag->cap - mag->top;
if (room <= 0) return 0;
if (want > room) want = room;
int filled = 0;
for (int i = 0; i < want; i++) {
void* ptr = hak_tiny_alloc_superslab(class_idx);
if (!ptr) break;
mag->items[mag->top].ptr = ptr;
#if HAKMEM_TINY_MAG_OWNER
// Owner tracking not critical for SuperSlab allocations
mag->items[mag->top].owner = NULL;
#endif
mag->top++;
filled++;
}
return filled;
}
// ============================================================================
// Phase 3: Slow Path (Cold, Noinline)
// ============================================================================
static void* __attribute__((cold, noinline))
hak_tiny_alloc_slow_v3(size_t size, int class_idx) {
(void)size; // size already validated by caller
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
return NULL;
}
TinyTLSMag* mag = &g_tls_mags[class_idx];
// Step 1: Try refilling magazine from SuperSlab (batch 32-64 items)
int refill_count = (class_idx <= 3) ? 64 : 32;
if (mag_refill_from_superslab_v3(class_idx, mag, refill_count) > 0) {
// Magazine refilled successfully - pop one item
void* ptr = mag->items[--mag->top].ptr;
return ptr;
}
// Step 2: Direct SuperSlab allocation (magazine full or refill failed)
void* ptr = hak_tiny_alloc_superslab(class_idx);
return ptr;
}
// ============================================================================
// Phase 3: Hot Path (Ultra-Simple, Inline Candidate)
// ============================================================================
void* hak_tiny_alloc_v3(size_t size) {
// Phase 3 hot path: minimal branches, single TLS variable
// 1. Size → class (branchless LUT)
int class_idx = hak_tiny_size_to_class(size);
if (__builtin_expect(class_idx < 0, 0)) {
return NULL; // >1KB or size=0
}
// 2. MidTC (class>=4) — TLS tcache最優先
if (__builtin_expect(class_idx > 3, 0)) {
void* mp = midtc_pop(class_idx);
if (mp) return mp;
}
// 3. Single-tier TLS magazine (HOT PATH - 2-3 cycles!)
TinyTLSMag* mag = &g_tls_mags[class_idx];
int top = mag->top;
if (__builtin_expect(top > 0, 1)) {
// Fast path: pop from magazine
void* ptr = mag->items[--top].ptr;
mag->top = top;
return ptr; // ← 最速パス! 🚀
}
// 4. Slow path: refill + fallback
return hak_tiny_alloc_slow_v3(size, class_idx);
}