184 lines
5.8 KiB
C
184 lines
5.8 KiB
C
|
|
// hakmem_tiny_hot_pop_v4.inc.h
|
||
|
|
// Phase 4-A1: TLS-BUMP Immediate-Value Hot Functions
|
||
|
|
//
|
||
|
|
// This file contains Phase 4-A1 optimized hot-path functions with:
|
||
|
|
// - Immediate-value block sizes (no g_tiny_class_sizes[] lookup)
|
||
|
|
// - Direct TLS bump allocation (2-register path)
|
||
|
|
// - Branch minimization
|
||
|
|
//
|
||
|
|
// Expected improvement: +5-8% (16.53 → 17.5-18.0 M ops/sec)
|
||
|
|
|
||
|
|
#ifndef HAKMEM_TINY_HOT_POP_V4_INC_H
|
||
|
|
#define HAKMEM_TINY_HOT_POP_V4_INC_H
|
||
|
|
|
||
|
|
#include "hakmem_tiny.h"
|
||
|
|
#include <stdint.h>
|
||
|
|
|
||
|
|
// External TLS variables
|
||
|
|
extern int g_fast_enable;
|
||
|
|
extern uint16_t g_fast_cap[TINY_NUM_CLASSES];
|
||
|
|
extern __thread void* g_fast_head[TINY_NUM_CLASSES];
|
||
|
|
extern __thread uint16_t g_fast_count[TINY_NUM_CLASSES];
|
||
|
|
extern __thread uint8_t* g_tls_bcur[TINY_NUM_CLASSES];
|
||
|
|
extern __thread uint8_t* g_tls_bend[TINY_NUM_CLASSES];
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Phase 4-A1: Immediate-Value TLS-BUMP Functions
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
// Class 0: 8B (immediate value)
|
||
|
|
static inline __attribute__((always_inline))
|
||
|
|
void* tiny_hot_bump_class0_v4(void) {
|
||
|
|
uint8_t* p = g_tls_bcur[0];
|
||
|
|
if (__builtin_expect(p != NULL, 1)) {
|
||
|
|
uint8_t* n = p + 8; // ← Immediate value!
|
||
|
|
uint8_t* end = g_tls_bend[0];
|
||
|
|
if (__builtin_expect(n <= end, 1)) {
|
||
|
|
g_tls_bcur[0] = n;
|
||
|
|
return p; // 2-register hot path! 🚀
|
||
|
|
}
|
||
|
|
// Window exhausted - clear and fallback
|
||
|
|
g_tls_bcur[0] = NULL;
|
||
|
|
g_tls_bend[0] = NULL;
|
||
|
|
}
|
||
|
|
return NULL; // Fallback to next tier
|
||
|
|
}
|
||
|
|
|
||
|
|
// Class 1: 16B (immediate value)
|
||
|
|
static inline __attribute__((always_inline))
|
||
|
|
void* tiny_hot_bump_class1_v4(void) {
|
||
|
|
uint8_t* p = g_tls_bcur[1];
|
||
|
|
if (__builtin_expect(p != NULL, 1)) {
|
||
|
|
uint8_t* n = p + 16; // ← Immediate value!
|
||
|
|
uint8_t* end = g_tls_bend[1];
|
||
|
|
if (__builtin_expect(n <= end, 1)) {
|
||
|
|
g_tls_bcur[1] = n;
|
||
|
|
return p;
|
||
|
|
}
|
||
|
|
g_tls_bcur[1] = NULL;
|
||
|
|
g_tls_bend[1] = NULL;
|
||
|
|
}
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Class 2: 32B (immediate value)
|
||
|
|
static inline __attribute__((always_inline))
|
||
|
|
void* tiny_hot_bump_class2_v4(void) {
|
||
|
|
uint8_t* p = g_tls_bcur[2];
|
||
|
|
if (__builtin_expect(p != NULL, 1)) {
|
||
|
|
uint8_t* n = p + 32; // ← Immediate value!
|
||
|
|
uint8_t* end = g_tls_bend[2];
|
||
|
|
if (__builtin_expect(n <= end, 1)) {
|
||
|
|
g_tls_bcur[2] = n;
|
||
|
|
return p;
|
||
|
|
}
|
||
|
|
g_tls_bcur[2] = NULL;
|
||
|
|
g_tls_bend[2] = NULL;
|
||
|
|
}
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Phase 4-A1: Hot-Class Wrapper Functions (BUMP → Linked-List)
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
// Phase 4-A1: Replace original hot-pop functions (keep same names for compatibility)
|
||
|
|
static inline __attribute__((always_inline))
|
||
|
|
void* tiny_hot_pop_class0(void) {
|
||
|
|
// Layer 1: TLS-BUMP (immediate-value, 2-register)
|
||
|
|
void* bump = tiny_hot_bump_class0_v4();
|
||
|
|
if (__builtin_expect(bump != NULL, 1)) {
|
||
|
|
return bump;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Layer 2: Linked-list fallback (existing hot path)
|
||
|
|
if (__builtin_expect(!g_fast_enable, 0)) return NULL;
|
||
|
|
uint16_t cap = g_fast_cap[0];
|
||
|
|
if (__builtin_expect(cap == 0, 0)) return NULL;
|
||
|
|
void* head = g_fast_head[0];
|
||
|
|
if (__builtin_expect(head == NULL, 0)) return NULL;
|
||
|
|
g_fast_head[0] = *(void**)head;
|
||
|
|
uint16_t count = g_fast_count[0];
|
||
|
|
if (count > 0) {
|
||
|
|
g_fast_count[0] = (uint16_t)(count - 1);
|
||
|
|
} else {
|
||
|
|
g_fast_count[0] = 0;
|
||
|
|
}
|
||
|
|
return head;
|
||
|
|
}
|
||
|
|
|
||
|
|
static inline __attribute__((always_inline))
|
||
|
|
void* tiny_hot_pop_class1(void) {
|
||
|
|
void* bump = tiny_hot_bump_class1_v4();
|
||
|
|
if (__builtin_expect(bump != NULL, 1)) {
|
||
|
|
return bump;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (__builtin_expect(!g_fast_enable, 0)) return NULL;
|
||
|
|
uint16_t cap = g_fast_cap[1];
|
||
|
|
if (__builtin_expect(cap == 0, 0)) return NULL;
|
||
|
|
void* head = g_fast_head[1];
|
||
|
|
if (__builtin_expect(head == NULL, 0)) return NULL;
|
||
|
|
g_fast_head[1] = *(void**)head;
|
||
|
|
uint16_t count = g_fast_count[1];
|
||
|
|
if (count > 0) {
|
||
|
|
g_fast_count[1] = (uint16_t)(count - 1);
|
||
|
|
} else {
|
||
|
|
g_fast_count[1] = 0;
|
||
|
|
}
|
||
|
|
return head;
|
||
|
|
}
|
||
|
|
|
||
|
|
static inline __attribute__((always_inline))
|
||
|
|
void* tiny_hot_pop_class2(void) {
|
||
|
|
void* bump = tiny_hot_bump_class2_v4();
|
||
|
|
if (__builtin_expect(bump != NULL, 1)) {
|
||
|
|
return bump;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (__builtin_expect(!g_fast_enable, 0)) return NULL;
|
||
|
|
uint16_t cap = g_fast_cap[2];
|
||
|
|
if (__builtin_expect(cap == 0, 0)) return NULL;
|
||
|
|
void* head = g_fast_head[2];
|
||
|
|
if (__builtin_expect(head == NULL, 0)) return NULL;
|
||
|
|
g_fast_head[2] = *(void**)head;
|
||
|
|
uint16_t count = g_fast_count[2];
|
||
|
|
if (count > 0) {
|
||
|
|
g_fast_count[2] = (uint16_t)(count - 1);
|
||
|
|
} else {
|
||
|
|
g_fast_count[2] = 0;
|
||
|
|
}
|
||
|
|
return head;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Class 3: 64B - Keep original (no immediate optimization for now)
|
||
|
|
// Forward declaration
|
||
|
|
static inline void* superslab_tls_bump_fast(int class_idx);
|
||
|
|
|
||
|
|
static inline __attribute__((always_inline))
|
||
|
|
void* tiny_hot_pop_class3(void) {
|
||
|
|
// Use original implementation (from hakmem_tiny_hot_pop.inc.h)
|
||
|
|
extern int g_ultra_bump_shadow;
|
||
|
|
if (__builtin_expect(g_ultra_bump_shadow != 0, 1)) { // Expect enabled!
|
||
|
|
void* bump = superslab_tls_bump_fast(3);
|
||
|
|
if (__builtin_expect(bump != NULL, 1)) {
|
||
|
|
return bump;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if (__builtin_expect(!g_fast_enable, 0)) return NULL;
|
||
|
|
uint16_t cap = g_fast_cap[3];
|
||
|
|
if (__builtin_expect(cap == 0, 0)) return NULL;
|
||
|
|
void* head = g_fast_head[3];
|
||
|
|
if (__builtin_expect(head == NULL, 0)) return NULL;
|
||
|
|
g_fast_head[3] = *(void**)head;
|
||
|
|
uint16_t count = g_fast_count[3];
|
||
|
|
if (count > 0) {
|
||
|
|
g_fast_count[3] = (uint16_t)(count - 1);
|
||
|
|
} else {
|
||
|
|
g_fast_count[3] = 0;
|
||
|
|
}
|
||
|
|
return head;
|
||
|
|
}
|
||
|
|
|
||
|
|
#endif // HAKMEM_TINY_HOT_POP_V4_INC_H
|