Phase 17-1: Small-Mid Allocator - TLS Frontend Cache (結果: ±0.3%, 層分離成功)
Summary: ======== Phase 17-1 implements Small-Mid allocator as TLS frontend cache with Tiny backend delegation. Result: Clean layer separation achieved with minimal overhead (±0.3%), but no performance gain. Conclusion: Frontend-only approach is dead end. Phase 17-2 (dedicated backend) required for 2-3x target. Implementation: =============== 1. Small-Mid TLS frontend (256B/512B/1KB - 3 classes) - TLS freelist (32/24/16 capacity) - Backend delegation to Tiny C5/C6/C7 - Header conversion (0xa0 → 0xb0) 2. Auto-adjust Tiny boundary - When Small-Mid ON: Tiny auto-limits to C0-C5 (0-255B) - When Small-Mid OFF: Tiny default C0-C7 (0-1023B) - Prevents routing conflict 3. Routing order fix - Small-Mid BEFORE Tiny (critical for proper execution) - Fall-through on TLS miss Files Modified: =============== - core/hakmem_smallmid.h/c: TLS freelist + backend delegation - core/hakmem_tiny.c: tiny_get_max_size() auto-adjust - core/box/hak_alloc_api.inc.h: Routing order (Small-Mid → Tiny) - CURRENT_TASK.md: Phase 17-1 results + Phase 17-2 plan A/B Benchmark Results: ====================== | Size | Config A (OFF) | Config B (ON) | Delta | % Change | |--------|----------------|---------------|----------|----------| | 256B | 5.87M ops/s | 6.06M ops/s | +191K | +3.3% | | 512B | 6.02M ops/s | 5.91M ops/s | -112K | -1.9% | | 1024B | 5.58M ops/s | 5.54M ops/s | -35K | -0.6% | | Overall| 5.82M ops/s | 5.84M ops/s | +20K | +0.3% | Analysis: ========= ✅ SUCCESS: Clean layer separation (Small-Mid ↔ Tiny coexist) ✅ SUCCESS: Minimal overhead (±0.3% = measurement noise) ❌ FAIL: No performance gain (target was 2-4x) Root Cause: ----------- - Delegation overhead = TLS savings (net gain ≈ 0 instructions) - Small-Mid TLS alloc: ~3-5 instructions - Tiny backend delegation: ~3-5 instructions - Header conversion: ~2 instructions - No batching: 1:1 delegation to Tiny (no refill amortization) Lessons Learned: ================ - Frontend-only approach ineffective (backend calls not reduced) - Dedicated backend essential for meaningful improvement - Clean separation achieved = solid foundation for Phase 17-2 Next Steps (Phase 17-2): ======================== - Dedicated Small-Mid SuperSlab backend (separate from Tiny) - TLS batch refill (8-16 blocks per refill) - Optimized 0xb0 header fast path (no delegation) - Target: 12-15M ops/s (2.0-2.6x improvement) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -170,40 +170,84 @@ static inline bool smallmid_tls_push(int class_idx, void* ptr) {
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Backend: Use Tiny Allocator APIs (Phase 17-1)
|
||||
// Backend Delegation (Phase 17-1: Reuse Tiny infrastructure)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* smallmid_backend_alloc - Allocate from Tiny backend
|
||||
* smallmid_backend_alloc - Allocate from Tiny backend and convert header
|
||||
*
|
||||
* @param size Allocation size
|
||||
* @return Allocated pointer (user pointer, no Small-Mid header)
|
||||
* @param size Allocation size (256-1024)
|
||||
* @return User pointer with Small-Mid header (0xb0), or NULL on failure
|
||||
*
|
||||
* Phase 17-1: Delegate to existing Tiny allocator infrastructure
|
||||
* This reuses Tiny's SuperSlab/SharedPool without building dedicated backend
|
||||
* Strategy:
|
||||
* - Call Tiny allocator (handles C5/C6/C7 = 256B/512B/1KB)
|
||||
* - Tiny writes header: 0xa5/0xa6/0xa7
|
||||
* - Overwrite with Small-Mid header: 0xb0/0xb1/0xb2
|
||||
*/
|
||||
static inline void* smallmid_backend_alloc(size_t size) {
|
||||
static void* smallmid_backend_alloc(size_t size) {
|
||||
#ifdef HAKMEM_SMALLMID_STATS
|
||||
__atomic_fetch_add(&g_smallmid_stats.tls_misses, 1, __ATOMIC_RELAXED);
|
||||
__atomic_fetch_add(&g_smallmid_stats.superslab_refills, 1, __ATOMIC_RELAXED);
|
||||
#endif
|
||||
|
||||
// Call Tiny allocator (reuses existing SuperSlab/SharedPool)
|
||||
// Call Tiny allocator
|
||||
void* ptr = hak_tiny_alloc(size);
|
||||
SMALLMID_LOG("smallmid_backend_alloc(%zu) = %p (via Tiny)", size, ptr);
|
||||
if (!ptr) {
|
||||
SMALLMID_LOG("smallmid_backend_alloc(%zu): Tiny allocation failed", size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Overwrite header: Tiny (0xa0 | tiny_class) → Small-Mid (0xb0 | sm_class)
|
||||
// Tiny class mapping: C5=256B, C6=512B, C7=1KB
|
||||
// Small-Mid class mapping: SM0=256B, SM1=512B, SM2=1KB
|
||||
uint8_t* base = (uint8_t*)ptr - 1;
|
||||
uint8_t tiny_header = *base;
|
||||
uint8_t tiny_class = tiny_header & 0x0f;
|
||||
|
||||
// Convert Tiny class (5/6/7) to Small-Mid class (0/1/2)
|
||||
int sm_class = tiny_class - 5;
|
||||
if (sm_class < 0 || sm_class >= SMALLMID_NUM_CLASSES) {
|
||||
// Should never happen - Tiny allocated wrong class
|
||||
SMALLMID_LOG("smallmid_backend_alloc(%zu): Invalid Tiny class %d", size, tiny_class);
|
||||
// Revert header and free
|
||||
hak_tiny_free(ptr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Write Small-Mid header
|
||||
*base = 0xb0 | sm_class;
|
||||
|
||||
SMALLMID_LOG("smallmid_backend_alloc(%zu) = %p (Tiny C%d → SM C%d)", size, ptr, tiny_class, sm_class);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* smallmid_backend_free - Free to Tiny backend
|
||||
* smallmid_backend_free - Convert header and delegate to Tiny backend
|
||||
*
|
||||
* @param ptr User pointer (no Small-Mid header)
|
||||
* @param size Allocation size
|
||||
* @param ptr User pointer (must have Small-Mid header 0xb0)
|
||||
* @param size Allocation size (unused, Tiny reads header)
|
||||
*
|
||||
* Phase 17-1: Delegate to existing Tiny allocator infrastructure
|
||||
* Strategy:
|
||||
* - Convert header: Small-Mid (0xb0 | sm_class) → Tiny (0xa0 | tiny_class)
|
||||
* - Call Tiny free to handle deallocation
|
||||
*/
|
||||
static inline void smallmid_backend_free(void* ptr, size_t size) {
|
||||
(void)size; // Unused: Tiny free reads header, doesn't need size
|
||||
SMALLMID_LOG("smallmid_backend_free(%p) (via Tiny)", ptr);
|
||||
static void smallmid_backend_free(void* ptr, size_t size) {
|
||||
(void)size; // Unused - Tiny reads size from header
|
||||
|
||||
// Read Small-Mid header
|
||||
uint8_t* base = (uint8_t*)ptr - 1;
|
||||
uint8_t sm_header = *base;
|
||||
uint8_t sm_class = sm_header & 0x0f;
|
||||
|
||||
// Convert Small-Mid class (0/1/2) to Tiny class (5/6/7)
|
||||
uint8_t tiny_class = sm_class + 5;
|
||||
|
||||
// Write Tiny header
|
||||
*base = 0xa0 | tiny_class;
|
||||
|
||||
SMALLMID_LOG("smallmid_backend_free(%p): SM C%d → Tiny C%d", ptr, sm_class, tiny_class);
|
||||
|
||||
// Call Tiny free
|
||||
hak_tiny_free(ptr);
|
||||
}
|
||||
|
||||
@ -247,16 +291,16 @@ void* smallmid_alloc(size_t size) {
|
||||
return (uint8_t*)ptr + 1; // Return user pointer (skip header)
|
||||
}
|
||||
|
||||
// Slow path: Allocate from Tiny backend (no refill, direct delegation)
|
||||
// Phase 17-1: Simplified - no TLS refill, just pass through to Tiny
|
||||
void* backend_ptr = smallmid_backend_alloc(size);
|
||||
if (!backend_ptr) {
|
||||
SMALLMID_LOG("smallmid_alloc(%zu) = NULL (backend alloc failed)", size);
|
||||
// TLS miss: Allocate from Tiny backend
|
||||
// Phase 17-1: Reuse Tiny infrastructure (C5/C6/C7) instead of dedicated SuperSlab
|
||||
ptr = smallmid_backend_alloc(size);
|
||||
if (!ptr) {
|
||||
SMALLMID_LOG("smallmid_alloc(%zu) = NULL (backend failed)", size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SMALLMID_LOG("smallmid_alloc(%zu) = %p (backend alloc, class=%d)", size, backend_ptr, class_idx);
|
||||
return backend_ptr; // Backend returns user pointer directly
|
||||
SMALLMID_LOG("smallmid_alloc(%zu) = %p (backend alloc, class=%d)", size, ptr, class_idx);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
|
||||
Reference in New Issue
Block a user