Files
hakmem/core/tiny_ultra_fast.inc.h

103 lines
3.8 KiB
C
Raw Normal View History

#ifndef TINY_ULTRA_FAST_INC_H
#define TINY_ULTRA_FAST_INC_H
// ============================================================================
// HAKMEM Ultra Fast Path
// ============================================================================
// Phase E5: System malloc並みの超軽量fast path
//
// 目的:
// - FastCache/SFC/統計/プロファイリングを全てOFF
// - TLS SLL 1層のみのシンプル実装
// - 8-10命令でalloc/freeを完結
//
// 期待:
// - System malloc並みの性能 (90M+ ops/s)
// - 「賢い機能」のコストを定量化
// ============================================================================
#include "hakmem_tiny.h"
// External TLS arrays (defined in hakmem_tiny.c)
// Phase 3d-B: TLS Cache Merge - Unified structure (type in hakmem_tiny.h)
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
// ============================================================================
// Ultra-Fast Allocation (8-10 instructions)
// ============================================================================
static inline void* tiny_alloc_ultra_fast(size_t size) {
// 1. Size to class (direct calculation, no LUT)
// HAKMEM Tiny classes (from g_tiny_class_sizes):
// C0=8B, C1=16B, C2=32B, C3=64B, C4=128B, C5=256B, C6=512B, C7=1024B
if (size == 0) size = 1;
if (size > 1024) return NULL; // Tiny範囲外
// Direct mapping: use BSR-style or simple branching
int cl;
if (size <= 8) cl = 0;
else if (size <= 16) cl = 1;
else if (size <= 32) cl = 2;
else if (size <= 64) cl = 3;
else if (size <= 128) cl = 4;
else if (size <= 256) cl = 5;
else if (size <= 512) cl = 6;
else cl = 7; // size <= 1024
// 2. TLS SLL pop (3-4 instructions)
// Phase 3d-B: Use unified struct (head+count in same cache line)
void* ptr = g_tls_sll[cl].head; // 1 load
if (!ptr) return NULL; // 1 branch (miss → slow path)
void* next = *(void**)ptr; // 1 load (next pointer)
g_tls_sll[cl].head = next; // 1 store
g_tls_sll[cl].count--; // 1 decrement
// 3. Return USER pointer (ptr is BASE, +1 for header)
// Phase 7 header-based fast free requires this
return (char*)ptr + 1;
}
// ============================================================================
// Ultra-Fast Free (6-8 instructions)
// ============================================================================
static inline int tiny_free_ultra_fast(void* ptr) {
if (!ptr) return 0;
// 1. Read header to get class_idx (Phase 7 header-based)
uint8_t header = *((uint8_t*)ptr - 1);
uint8_t class_idx = header & 0x0F;
// 2. Bounds check (safety - minimal overhead)
if (class_idx >= TINY_NUM_CLASSES) return 0; // Route to slow path
// 3. Convert USER → BASE
void* base = (char*)ptr - 1;
// 4. TLS SLL push (3-4 instructions)
// Phase 3d-B: Use unified struct (head+count in same cache line)
void* head = g_tls_sll[class_idx].head; // 1 load
*(void**)base = head; // 1 store (link)
g_tls_sll[class_idx].head = base; // 1 store
g_tls_sll[class_idx].count++; // 1 increment
return 1; // Success
}
// ============================================================================
// Ultra Mode Entry Point - TLS SLL Only (no fallback)
// ============================================================================
// NOTE: Ultra mode expects TLS SLL to be warm. If miss, returns NULL.
// Caller (wrapper) will fallback to full tiny_alloc_fast path.
static inline void* tiny_alloc_fast_ultra(size_t size) {
// Try ultra-fast path (TLS SLL only)
return tiny_alloc_ultra_fast(size);
}
static inline void tiny_free_fast_ultra(void* ptr) {
// Try ultra-fast free (TLS SLL push only)
tiny_free_ultra_fast(ptr);
}
#endif // TINY_ULTRA_FAST_INC_H