// hakmem_tiny_hot_pop_v4.inc.h // Phase 4-A1: TLS-BUMP Immediate-Value Hot Functions // // This file contains Phase 4-A1 optimized hot-path functions with: // - Immediate-value block sizes (no g_tiny_class_sizes[] lookup) // - Direct TLS bump allocation (2-register path) // - Branch minimization // // Expected improvement: +5-8% (16.53 → 17.5-18.0 M ops/sec) #ifndef HAKMEM_TINY_HOT_POP_V4_INC_H #define HAKMEM_TINY_HOT_POP_V4_INC_H #include "hakmem_tiny.h" #include // External TLS variables extern int g_fast_enable; extern uint16_t g_fast_cap[TINY_NUM_CLASSES]; extern __thread void* g_fast_head[TINY_NUM_CLASSES]; extern __thread uint16_t g_fast_count[TINY_NUM_CLASSES]; extern __thread uint8_t* g_tls_bcur[TINY_NUM_CLASSES]; extern __thread uint8_t* g_tls_bend[TINY_NUM_CLASSES]; // ============================================================================ // Phase 4-A1: Immediate-Value TLS-BUMP Functions // ============================================================================ // Class 0: 8B (immediate value) static inline __attribute__((always_inline)) void* tiny_hot_bump_class0_v4(void) { uint8_t* p = g_tls_bcur[0]; if (__builtin_expect(p != NULL, 1)) { uint8_t* n = p + 8; // ← Immediate value! uint8_t* end = g_tls_bend[0]; if (__builtin_expect(n <= end, 1)) { g_tls_bcur[0] = n; return p; // 2-register hot path! 🚀 } // Window exhausted - clear and fallback g_tls_bcur[0] = NULL; g_tls_bend[0] = NULL; } return NULL; // Fallback to next tier } // Class 1: 16B (immediate value) static inline __attribute__((always_inline)) void* tiny_hot_bump_class1_v4(void) { uint8_t* p = g_tls_bcur[1]; if (__builtin_expect(p != NULL, 1)) { uint8_t* n = p + 16; // ← Immediate value! uint8_t* end = g_tls_bend[1]; if (__builtin_expect(n <= end, 1)) { g_tls_bcur[1] = n; return p; } g_tls_bcur[1] = NULL; g_tls_bend[1] = NULL; } return NULL; } // Class 2: 32B (immediate value) static inline __attribute__((always_inline)) void* tiny_hot_bump_class2_v4(void) { uint8_t* p = g_tls_bcur[2]; if (__builtin_expect(p != NULL, 1)) { uint8_t* n = p + 32; // ← Immediate value! uint8_t* end = g_tls_bend[2]; if (__builtin_expect(n <= end, 1)) { g_tls_bcur[2] = n; return p; } g_tls_bcur[2] = NULL; g_tls_bend[2] = NULL; } return NULL; } // ============================================================================ // Phase 4-A1: Hot-Class Wrapper Functions (BUMP → Linked-List) // ============================================================================ // Phase 4-A1: Replace original hot-pop functions (keep same names for compatibility) static inline __attribute__((always_inline)) void* tiny_hot_pop_class0(void) { // Layer 1: TLS-BUMP (immediate-value, 2-register) void* bump = tiny_hot_bump_class0_v4(); if (__builtin_expect(bump != NULL, 1)) { return bump; } // Layer 2: Linked-list fallback (existing hot path) if (__builtin_expect(!g_fast_enable, 0)) return NULL; uint16_t cap = g_fast_cap[0]; if (__builtin_expect(cap == 0, 0)) return NULL; void* head = g_fast_head[0]; if (__builtin_expect(head == NULL, 0)) return NULL; g_fast_head[0] = *(void**)head; uint16_t count = g_fast_count[0]; if (count > 0) { g_fast_count[0] = (uint16_t)(count - 1); } else { g_fast_count[0] = 0; } return head; } static inline __attribute__((always_inline)) void* tiny_hot_pop_class1(void) { void* bump = tiny_hot_bump_class1_v4(); if (__builtin_expect(bump != NULL, 1)) { return bump; } if (__builtin_expect(!g_fast_enable, 0)) return NULL; uint16_t cap = g_fast_cap[1]; if (__builtin_expect(cap == 0, 0)) return NULL; void* head = g_fast_head[1]; if (__builtin_expect(head == NULL, 0)) return NULL; g_fast_head[1] = *(void**)head; uint16_t count = g_fast_count[1]; if (count > 0) { g_fast_count[1] = (uint16_t)(count - 1); } else { g_fast_count[1] = 0; } return head; } static inline __attribute__((always_inline)) void* tiny_hot_pop_class2(void) { void* bump = tiny_hot_bump_class2_v4(); if (__builtin_expect(bump != NULL, 1)) { return bump; } if (__builtin_expect(!g_fast_enable, 0)) return NULL; uint16_t cap = g_fast_cap[2]; if (__builtin_expect(cap == 0, 0)) return NULL; void* head = g_fast_head[2]; if (__builtin_expect(head == NULL, 0)) return NULL; g_fast_head[2] = *(void**)head; uint16_t count = g_fast_count[2]; if (count > 0) { g_fast_count[2] = (uint16_t)(count - 1); } else { g_fast_count[2] = 0; } return head; } // Class 3: 64B - Keep original (no immediate optimization for now) // Forward declaration static inline void* superslab_tls_bump_fast(int class_idx); static inline __attribute__((always_inline)) void* tiny_hot_pop_class3(void) { // Use original implementation (from hakmem_tiny_hot_pop.inc.h) extern int g_ultra_bump_shadow; if (__builtin_expect(g_ultra_bump_shadow != 0, 1)) { // Expect enabled! void* bump = superslab_tls_bump_fast(3); if (__builtin_expect(bump != NULL, 1)) { return bump; } } if (__builtin_expect(!g_fast_enable, 0)) return NULL; uint16_t cap = g_fast_cap[3]; if (__builtin_expect(cap == 0, 0)) return NULL; void* head = g_fast_head[3]; if (__builtin_expect(head == NULL, 0)) return NULL; g_fast_head[3] = *(void**)head; uint16_t count = g_fast_count[3]; if (count > 0) { g_fast_count[3] = (uint16_t)(count - 1); } else { g_fast_count[3] = 0; } return head; } #endif // HAKMEM_TINY_HOT_POP_V4_INC_H