Files
hakmem/core/hakmem_tiny_alloc_v3.inc
Moe Charm (CI) 52386401b3 Debug Counters Implementation - Clean History
Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-05 12:31:14 +09:00

103 lines
3.3 KiB
C++

// hakmem_tiny_alloc_v3.inc
// Phase 3: Allocation Hot Path Simplification (mimalloc-style)
//
// Goal: Reduce malloc/alloc overhead from 27% to 20%
// Expected improvement: +5-10% (16.53 → 17.5-18.0 M ops/sec)
//
// Key changes:
// - Single-tier magazine hot path (2-3 cycles)
// - Eliminate 6+ stage fallback chain → 2 stages
// - Minimal stack frame
// - Zero TLS overhead on hit
// ============================================================================
#include "mid_tcache.h"
// Phase 3 Helper: Magazine Refill from SuperSlab
// ============================================================================
// Refill magazine from SuperSlab (batch allocation)
// Returns: number of items added to magazine
static int mag_refill_from_superslab_v3(int class_idx, TinyTLSMag* mag, int want) {
if (!mag || want <= 0) return 0;
int room = mag->cap - mag->top;
if (room <= 0) return 0;
if (want > room) want = room;
int filled = 0;
for (int i = 0; i < want; i++) {
void* ptr = hak_tiny_alloc_superslab(class_idx);
if (!ptr) break;
mag->items[mag->top].ptr = ptr;
#if HAKMEM_TINY_MAG_OWNER
// Owner tracking not critical for SuperSlab allocations
mag->items[mag->top].owner = NULL;
#endif
mag->top++;
filled++;
}
return filled;
}
// ============================================================================
// Phase 3: Slow Path (Cold, Noinline)
// ============================================================================
static void* __attribute__((cold, noinline))
hak_tiny_alloc_slow_v3(size_t size, int class_idx) {
(void)size; // size already validated by caller
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
return NULL;
}
TinyTLSMag* mag = &g_tls_mags[class_idx];
// Step 1: Try refilling magazine from SuperSlab (batch 32-64 items)
int refill_count = (class_idx <= 3) ? 64 : 32;
if (mag_refill_from_superslab_v3(class_idx, mag, refill_count) > 0) {
// Magazine refilled successfully - pop one item
void* ptr = mag->items[--mag->top].ptr;
return ptr;
}
// Step 2: Direct SuperSlab allocation (magazine full or refill failed)
void* ptr = hak_tiny_alloc_superslab(class_idx);
return ptr;
}
// ============================================================================
// Phase 3: Hot Path (Ultra-Simple, Inline Candidate)
// ============================================================================
void* hak_tiny_alloc_v3(size_t size) {
// Phase 3 hot path: minimal branches, single TLS variable
// 1. Size → class (branchless LUT)
int class_idx = hak_tiny_size_to_class(size);
if (__builtin_expect(class_idx < 0, 0)) {
return NULL; // >1KB or size=0
}
// 2. MidTC (class>=4) — TLS tcache最優先
if (__builtin_expect(class_idx > 3, 0)) {
void* mp = midtc_pop(class_idx);
if (mp) return mp;
}
// 3. Single-tier TLS magazine (HOT PATH - 2-3 cycles!)
TinyTLSMag* mag = &g_tls_mags[class_idx];
int top = mag->top;
if (__builtin_expect(top > 0, 1)) {
// Fast path: pop from magazine
void* ptr = mag->items[--top].ptr;
mag->top = top;
return ptr; // ← 最速パス! 🚀
}
// 4. Slow path: refill + fallback
return hak_tiny_alloc_slow_v3(size, class_idx);
}