103 lines
3.3 KiB
PHP
103 lines
3.3 KiB
PHP
|
|
// hakmem_tiny_alloc_v3.inc
|
||
|
|
// Phase 3: Allocation Hot Path Simplification (mimalloc-style)
|
||
|
|
//
|
||
|
|
// Goal: Reduce malloc/alloc overhead from 27% to 20%
|
||
|
|
// Expected improvement: +5-10% (16.53 → 17.5-18.0 M ops/sec)
|
||
|
|
//
|
||
|
|
// Key changes:
|
||
|
|
// - Single-tier magazine hot path (2-3 cycles)
|
||
|
|
// - Eliminate 6+ stage fallback chain → 2 stages
|
||
|
|
// - Minimal stack frame
|
||
|
|
// - Zero TLS overhead on hit
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
#include "mid_tcache.h"
|
||
|
|
// Phase 3 Helper: Magazine Refill from SuperSlab
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
// Refill magazine from SuperSlab (batch allocation)
|
||
|
|
// Returns: number of items added to magazine
|
||
|
|
static int mag_refill_from_superslab_v3(int class_idx, TinyTLSMag* mag, int want) {
|
||
|
|
if (!mag || want <= 0) return 0;
|
||
|
|
|
||
|
|
int room = mag->cap - mag->top;
|
||
|
|
if (room <= 0) return 0;
|
||
|
|
if (want > room) want = room;
|
||
|
|
|
||
|
|
int filled = 0;
|
||
|
|
for (int i = 0; i < want; i++) {
|
||
|
|
void* ptr = hak_tiny_alloc_superslab(class_idx);
|
||
|
|
if (!ptr) break;
|
||
|
|
|
||
|
|
mag->items[mag->top].ptr = ptr;
|
||
|
|
#if HAKMEM_TINY_MAG_OWNER
|
||
|
|
// Owner tracking not critical for SuperSlab allocations
|
||
|
|
mag->items[mag->top].owner = NULL;
|
||
|
|
#endif
|
||
|
|
mag->top++;
|
||
|
|
filled++;
|
||
|
|
}
|
||
|
|
|
||
|
|
return filled;
|
||
|
|
}
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Phase 3: Slow Path (Cold, Noinline)
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
static void* __attribute__((cold, noinline))
|
||
|
|
hak_tiny_alloc_slow_v3(size_t size, int class_idx) {
|
||
|
|
(void)size; // size already validated by caller
|
||
|
|
|
||
|
|
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
TinyTLSMag* mag = &g_tls_mags[class_idx];
|
||
|
|
|
||
|
|
// Step 1: Try refilling magazine from SuperSlab (batch 32-64 items)
|
||
|
|
int refill_count = (class_idx <= 3) ? 64 : 32;
|
||
|
|
if (mag_refill_from_superslab_v3(class_idx, mag, refill_count) > 0) {
|
||
|
|
// Magazine refilled successfully - pop one item
|
||
|
|
void* ptr = mag->items[--mag->top].ptr;
|
||
|
|
return ptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Step 2: Direct SuperSlab allocation (magazine full or refill failed)
|
||
|
|
void* ptr = hak_tiny_alloc_superslab(class_idx);
|
||
|
|
return ptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Phase 3: Hot Path (Ultra-Simple, Inline Candidate)
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
void* hak_tiny_alloc_v3(size_t size) {
|
||
|
|
// Phase 3 hot path: minimal branches, single TLS variable
|
||
|
|
|
||
|
|
// 1. Size → class (branchless LUT)
|
||
|
|
int class_idx = hak_tiny_size_to_class(size);
|
||
|
|
if (__builtin_expect(class_idx < 0, 0)) {
|
||
|
|
return NULL; // >1KB or size=0
|
||
|
|
}
|
||
|
|
|
||
|
|
// 2. MidTC (class>=4) — TLS tcache最優先
|
||
|
|
if (__builtin_expect(class_idx > 3, 0)) {
|
||
|
|
void* mp = midtc_pop(class_idx);
|
||
|
|
if (mp) return mp;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 3. Single-tier TLS magazine (HOT PATH - 2-3 cycles!)
|
||
|
|
TinyTLSMag* mag = &g_tls_mags[class_idx];
|
||
|
|
int top = mag->top;
|
||
|
|
if (__builtin_expect(top > 0, 1)) {
|
||
|
|
// Fast path: pop from magazine
|
||
|
|
void* ptr = mag->items[--top].ptr;
|
||
|
|
mag->top = top;
|
||
|
|
return ptr; // ← 最速パス! 🚀
|
||
|
|
}
|
||
|
|
|
||
|
|
// 4. Slow path: refill + fallback
|
||
|
|
return hak_tiny_alloc_slow_v3(size, class_idx);
|
||
|
|
}
|