// tiny_fastcache.h - Ultra-Simple Tiny Fast Path (System tcache style) // Phase 6-3: Bypass Magazine/SuperSlab for Tiny allocations (<=128B) // Goal: 3-4 instruction fast path, 70-80% of System tcache performance #pragma once #include #include #include // ========== Configuration ========== // Enable Tiny Fast Path (default: ON for Phase 6-3) #ifndef HAKMEM_TINY_FAST_PATH #define HAKMEM_TINY_FAST_PATH 1 #endif // Tiny class count (sizes: 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128) #define TINY_FAST_CLASS_COUNT 16 // Fast cache capacity per class (default: 64 slots, like System tcache) #ifndef TINY_FAST_CACHE_CAP #define TINY_FAST_CACHE_CAP 64 #endif // Tiny size threshold (<=128B goes to fast path) #define TINY_FAST_THRESHOLD 128 // ========== TLS Cache (System tcache style) ========== // Per-thread fast cache: array of freelist heads (defined in tiny_fastcache.c) extern __thread void* g_tiny_fast_cache[TINY_FAST_CLASS_COUNT]; // Per-thread cache counts (for capacity management) extern __thread uint32_t g_tiny_fast_count[TINY_FAST_CLASS_COUNT]; // Initialized flag extern __thread int g_tiny_fast_initialized; // ========== Size to Class Mapping ========== // Inline size-to-class for fast path (minimal branches) static inline int tiny_fast_size_to_class(size_t size) { // Class mapping (same as existing Tiny classes): // 0: 16B, 1: 24B, 2: 32B, 3: 40B, 4: 48B, 5: 56B, 6: 64B // 7: 80B, 8: 96B, 9: 112B, 10: 128B, 11-15: reserved if (size <= 16) return 0; if (size <= 24) return 1; if (size <= 32) return 2; if (size <= 40) return 3; if (size <= 48) return 4; if (size <= 56) return 5; if (size <= 64) return 6; if (size <= 80) return 7; if (size <= 96) return 8; if (size <= 112) return 9; if (size <= 128) return 10; return -1; // Not tiny } // ========== Forward Declarations ========== // Slow path: refill from Magazine/SuperSlab (implemented in tiny_fastcache.c) void* tiny_fast_refill(int class_idx); void tiny_fast_drain(int class_idx); // ========== Fast Path: Alloc (3-4 instructions!) ========== static inline void* tiny_fast_alloc(size_t size) { // Step 1: Size to class (1-2 instructions, branch predictor friendly) int cls = tiny_fast_size_to_class(size); if (__builtin_expect(cls < 0, 0)) return NULL; // Not tiny (rare) // Step 2: Pop from TLS cache (2-3 instructions) void* ptr = g_tiny_fast_cache[cls]; if (__builtin_expect(ptr != NULL, 1)) { // Fast path: Pop head, decrement count g_tiny_fast_cache[cls] = *(void**)ptr; g_tiny_fast_count[cls]--; return ptr; } // Step 3: Slow path - refill from Magazine/SuperSlab return tiny_fast_refill(cls); } // ========== Fast Path: Free (2-3 instructions!) ========== static inline void tiny_fast_free(void* ptr, size_t size) { // Step 1: Size to class int cls = tiny_fast_size_to_class(size); if (__builtin_expect(cls < 0, 0)) return; // Not tiny (error) // Step 2: Check capacity if (__builtin_expect(g_tiny_fast_count[cls] >= TINY_FAST_CACHE_CAP, 0)) { // Cache full - drain to Magazine/SuperSlab tiny_fast_drain(cls); } // Step 3: Push to TLS cache (2 instructions) *(void**)ptr = g_tiny_fast_cache[cls]; g_tiny_fast_cache[cls] = ptr; g_tiny_fast_count[cls]++; } // ========== Initialization ========== static inline void tiny_fast_init(void) { if (g_tiny_fast_initialized) return; memset(g_tiny_fast_cache, 0, sizeof(g_tiny_fast_cache)); memset(g_tiny_fast_count, 0, sizeof(g_tiny_fast_count)); g_tiny_fast_initialized = 1; }