#ifndef TINY_ULTRA_FAST_INC_H #define TINY_ULTRA_FAST_INC_H // ============================================================================ // HAKMEM Ultra Fast Path // ============================================================================ // Phase E5: System malloc並みの超軽量fast path // // 目的: // - FastCache/SFC/統計/プロファイリングを全てOFF // - TLS SLL 1層のみのシンプル実装 // - 8-10命令でalloc/freeを完結 // // 期待: // - System malloc並みの性能 (90M+ ops/s) // - 「賢い機能」のコストを定量化 // ============================================================================ #include "hakmem_tiny.h" // External TLS arrays (defined in hakmem_tiny.c) // Phase 3d-B: TLS Cache Merge - Unified structure (type in hakmem_tiny.h) extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES]; // ============================================================================ // Ultra-Fast Allocation (8-10 instructions) // ============================================================================ static inline void* tiny_alloc_ultra_fast(size_t size) { // 1. Size to class (direct calculation, no LUT) // HAKMEM Tiny classes (from g_tiny_class_sizes): // C0=8B, C1=16B, C2=32B, C3=64B, C4=128B, C5=256B, C6=512B, C7=1024B if (size == 0) size = 1; if (size > 1024) return NULL; // Tiny範囲外 // Direct mapping: use BSR-style or simple branching int cl; if (size <= 8) cl = 0; else if (size <= 16) cl = 1; else if (size <= 32) cl = 2; else if (size <= 64) cl = 3; else if (size <= 128) cl = 4; else if (size <= 256) cl = 5; else if (size <= 512) cl = 6; else cl = 7; // size <= 1024 // 2. TLS SLL pop (3-4 instructions) // Phase 3d-B: Use unified struct (head+count in same cache line) void* ptr = g_tls_sll[cl].head; // 1 load if (!ptr) return NULL; // 1 branch (miss → slow path) void* next = *(void**)ptr; // 1 load (next pointer) g_tls_sll[cl].head = next; // 1 store g_tls_sll[cl].count--; // 1 decrement // 3. Return USER pointer (ptr is BASE, +1 for header) // Phase 7 header-based fast free requires this return (char*)ptr + 1; } // ============================================================================ // Ultra-Fast Free (6-8 instructions) // ============================================================================ static inline int tiny_free_ultra_fast(void* ptr) { if (!ptr) return 0; // 1. Read header to get class_idx (Phase 7 header-based) uint8_t header = *((uint8_t*)ptr - 1); uint8_t class_idx = header & 0x0F; // 2. Bounds check (safety - minimal overhead) if (class_idx >= TINY_NUM_CLASSES) return 0; // Route to slow path // 3. Convert USER → BASE void* base = (char*)ptr - 1; // 4. TLS SLL push (3-4 instructions) // Phase 3d-B: Use unified struct (head+count in same cache line) void* head = g_tls_sll[class_idx].head; // 1 load *(void**)base = head; // 1 store (link) g_tls_sll[class_idx].head = base; // 1 store g_tls_sll[class_idx].count++; // 1 increment return 1; // Success } // ============================================================================ // Ultra Mode Entry Point - TLS SLL Only (no fallback) // ============================================================================ // NOTE: Ultra mode expects TLS SLL to be warm. If miss, returns NULL. // Caller (wrapper) will fallback to full tiny_alloc_fast path. static inline void* tiny_alloc_fast_ultra(size_t size) { // Try ultra-fast path (TLS SLL only) return tiny_alloc_ultra_fast(size); } static inline void tiny_free_fast_ultra(void* ptr) { // Try ultra-fast free (TLS SLL push only) tiny_free_ultra_fast(ptr); } #endif // TINY_ULTRA_FAST_INC_H