2025-11-08 23:53:25 +09:00
|
|
|
#include "pool_tls.h"
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <stdbool.h>
|
2025-11-09 18:55:50 +09:00
|
|
|
#include <sys/syscall.h>
|
|
|
|
|
#include <unistd.h>
|
2025-11-14 14:18:56 +09:00
|
|
|
#include <stdatomic.h>
|
2025-11-09 18:55:50 +09:00
|
|
|
#include "pool_tls_registry.h"
|
2025-11-14 15:00:13 +09:00
|
|
|
#ifdef HAKMEM_POOL_TLS_BIND_BOX
|
|
|
|
|
#include "pool_tls_bind.h"
|
|
|
|
|
#else
|
|
|
|
|
// gettid_cached is defined in pool_tls_bind.h when BIND_BOX is enabled
|
2025-11-09 18:55:50 +09:00
|
|
|
static inline pid_t gettid_cached(void){
|
|
|
|
|
static __thread pid_t t=0; if (__builtin_expect(t==0,0)) t=(pid_t)syscall(SYS_gettid); return t;
|
|
|
|
|
}
|
2025-11-14 15:00:13 +09:00
|
|
|
#endif
|
|
|
|
|
|
2025-11-09 18:55:50 +09:00
|
|
|
#include <stdio.h>
|
2025-11-08 23:53:25 +09:00
|
|
|
|
|
|
|
|
// Class sizes: 8KB, 16KB, 24KB, 32KB, 40KB, 48KB, 52KB
|
|
|
|
|
const size_t POOL_CLASS_SIZES[POOL_SIZE_CLASSES] = {
|
|
|
|
|
8192, 16384, 24576, 32768, 40960, 49152, 53248
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// TLS state (per-thread)
|
|
|
|
|
__thread void* g_tls_pool_head[POOL_SIZE_CLASSES];
|
|
|
|
|
__thread uint32_t g_tls_pool_count[POOL_SIZE_CLASSES];
|
|
|
|
|
|
2025-11-09 18:55:50 +09:00
|
|
|
// Phase 1.5b: Lazy pre-warm flag (per-thread)
|
|
|
|
|
#ifdef HAKMEM_POOL_TLS_PREWARM
|
|
|
|
|
__thread int g_tls_pool_prewarmed = 0;
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-11-08 23:53:25 +09:00
|
|
|
// Fixed refill counts (Phase 1: no learning)
|
|
|
|
|
static const uint32_t DEFAULT_REFILL_COUNT[POOL_SIZE_CLASSES] = {
|
|
|
|
|
64, 48, 32, 32, 24, 16, 16 // Larger classes = smaller refill
|
|
|
|
|
};
|
|
|
|
|
|
2025-11-09 18:55:50 +09:00
|
|
|
// Pre-warm counts optimized for memory usage (Phase 1.5b)
|
|
|
|
|
// Total memory: ~1.6MB per thread
|
|
|
|
|
// Hot classes (8-24KB): 16 blocks - common in real workloads
|
|
|
|
|
// Warm classes (32-40KB): 8 blocks
|
|
|
|
|
// Cold classes (48-52KB): 4 blocks - rare
|
|
|
|
|
static const int PREWARM_COUNTS[POOL_SIZE_CLASSES] = {
|
|
|
|
|
16, 16, 12, // Hot: 8KB, 16KB, 24KB
|
|
|
|
|
8, 8, // Warm: 32KB, 40KB
|
|
|
|
|
4, 4 // Cold: 48KB, 52KB
|
|
|
|
|
};
|
|
|
|
|
|
2025-11-08 23:53:25 +09:00
|
|
|
// Forward declare refill function (from Box 2)
|
|
|
|
|
extern void* pool_refill_and_alloc(int class_idx);
|
|
|
|
|
|
|
|
|
|
// Size to class mapping
|
|
|
|
|
static inline int pool_size_to_class(size_t size) {
|
|
|
|
|
// Binary search would be overkill for 7 classes
|
|
|
|
|
// Simple linear search with early exit
|
|
|
|
|
if (size <= 8192) return 0;
|
|
|
|
|
if (size <= 16384) return 1;
|
|
|
|
|
if (size <= 24576) return 2;
|
|
|
|
|
if (size <= 32768) return 3;
|
|
|
|
|
if (size <= 40960) return 4;
|
|
|
|
|
if (size <= 49152) return 5;
|
|
|
|
|
if (size <= 53248) return 6;
|
|
|
|
|
return -1; // Too large for Pool
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Ultra-fast allocation (5-6 cycles)
|
|
|
|
|
void* pool_alloc(size_t size) {
|
2025-11-09 18:55:50 +09:00
|
|
|
// Phase 1.5b: Lazy pre-warm on first allocation per thread
|
|
|
|
|
#ifdef HAKMEM_POOL_TLS_PREWARM
|
|
|
|
|
if (__builtin_expect(!g_tls_pool_prewarmed, 0)) {
|
|
|
|
|
g_tls_pool_prewarmed = 1; // Set flag FIRST to prevent recursion!
|
|
|
|
|
pool_tls_prewarm(); // Pre-populate TLS caches
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-11-08 23:53:25 +09:00
|
|
|
// Quick bounds check
|
2025-11-14 15:32:07 +09:00
|
|
|
if (size < 8192 || size > 53248) {
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
static _Atomic int debug_reject_count = 0;
|
|
|
|
|
int reject_num = atomic_fetch_add(&debug_reject_count, 1);
|
|
|
|
|
if (reject_num < 20) {
|
|
|
|
|
fprintf(stderr, "[POOL_TLS_REJECT] size=%zu (out of bounds 8192-53248)\n", size);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2025-11-08 23:53:25 +09:00
|
|
|
|
|
|
|
|
int class_idx = pool_size_to_class(size);
|
|
|
|
|
if (class_idx < 0) return NULL;
|
|
|
|
|
|
2025-11-09 18:55:50 +09:00
|
|
|
// Drain a small batch of remote frees for this class
|
|
|
|
|
extern int pool_remote_pop_chain(int class_idx, int max_take, void** out_chain);
|
|
|
|
|
void* chain = NULL;
|
|
|
|
|
int drained = pool_remote_pop_chain(class_idx, 32, &chain);
|
|
|
|
|
if (drained > 0 && chain) {
|
|
|
|
|
// Splice into TLS freelist
|
|
|
|
|
void* tail = chain;
|
|
|
|
|
int n = 1;
|
|
|
|
|
while (*(void**)tail) { tail = *(void**)tail; n++; }
|
|
|
|
|
*(void**)tail = g_tls_pool_head[class_idx];
|
|
|
|
|
g_tls_pool_head[class_idx] = chain;
|
|
|
|
|
g_tls_pool_count[class_idx] += n;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-08 23:53:25 +09:00
|
|
|
void* head = g_tls_pool_head[class_idx];
|
|
|
|
|
|
|
|
|
|
if (__builtin_expect(head != NULL, 1)) { // LIKELY
|
|
|
|
|
// Pop from freelist (3-4 instructions)
|
|
|
|
|
g_tls_pool_head[class_idx] = *(void**)head;
|
|
|
|
|
g_tls_pool_count[class_idx]--;
|
|
|
|
|
|
|
|
|
|
#if POOL_USE_HEADERS
|
|
|
|
|
// Write header (1 byte before ptr)
|
|
|
|
|
*((uint8_t*)head - POOL_HEADER_SIZE) = POOL_MAGIC | class_idx;
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-11-09 18:55:50 +09:00
|
|
|
// Low-water integration: if TLS count is low, opportunistically drain remotes
|
|
|
|
|
if (g_tls_pool_count[class_idx] < 4) {
|
|
|
|
|
extern int pool_remote_pop_chain(int class_idx, int max_take, void** out_chain);
|
|
|
|
|
void* chain2 = NULL; int got = pool_remote_pop_chain(class_idx, 32, &chain2);
|
|
|
|
|
if (got > 0 && chain2) {
|
|
|
|
|
void* tail = chain2; while (*(void**)tail) tail = *(void**)tail;
|
|
|
|
|
*(void**)tail = g_tls_pool_head[class_idx];
|
|
|
|
|
g_tls_pool_head[class_idx] = chain2;
|
|
|
|
|
g_tls_pool_count[class_idx] += got;
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-11-08 23:53:25 +09:00
|
|
|
return head;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Cold path: refill
|
2025-11-14 14:18:56 +09:00
|
|
|
void* refill_ret = pool_refill_and_alloc(class_idx);
|
|
|
|
|
if (!refill_ret) {
|
|
|
|
|
// DEBUG: Log refill failure
|
|
|
|
|
static _Atomic int refill_fail_count = 0;
|
|
|
|
|
int fail_num = atomic_fetch_add(&refill_fail_count, 1);
|
|
|
|
|
if (fail_num < 10) {
|
|
|
|
|
fprintf(stderr, "[POOL_TLS] pool_refill_and_alloc FAILED: class=%d, size=%zu\n",
|
|
|
|
|
class_idx, POOL_CLASS_SIZES[class_idx]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return refill_ret;
|
2025-11-08 23:53:25 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Ultra-fast free (5-6 cycles)
|
|
|
|
|
void pool_free(void* ptr) {
|
|
|
|
|
if (!ptr) return;
|
|
|
|
|
|
|
|
|
|
#if POOL_USE_HEADERS
|
|
|
|
|
// Read class from header
|
|
|
|
|
uint8_t header = *((uint8_t*)ptr - POOL_HEADER_SIZE);
|
|
|
|
|
if ((header & 0xF0) != POOL_MAGIC) {
|
|
|
|
|
// Not ours, route elsewhere
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
int class_idx = header & 0x0F;
|
|
|
|
|
if (class_idx >= POOL_SIZE_CLASSES) return; // Invalid class
|
|
|
|
|
#else
|
|
|
|
|
// Need registry lookup (slower fallback) - not implemented in Phase 1
|
|
|
|
|
return;
|
|
|
|
|
#endif
|
2025-11-14 15:00:13 +09:00
|
|
|
|
2025-11-09 18:55:50 +09:00
|
|
|
// Owner resolution via page registry
|
2025-11-14 15:00:13 +09:00
|
|
|
pid_t owner_tid = 0;
|
|
|
|
|
int reg_cls = -1;
|
|
|
|
|
if (pool_reg_lookup(ptr, &owner_tid, ®_cls)) {
|
|
|
|
|
#ifdef HAKMEM_POOL_TLS_BIND_BOX
|
|
|
|
|
// POOL_TLS_BIND_BOX: Fast TID comparison (no repeated gettid syscalls)
|
|
|
|
|
if (!pool_tls_is_mine_tid(owner_tid)) {
|
|
|
|
|
// Cross-thread free
|
|
|
|
|
extern int pool_remote_push(int class_idx, void* ptr, int owner_tid);
|
|
|
|
|
(void)pool_remote_push(class_idx, ptr, owner_tid);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// Same-thread: Continue to fast free path below
|
|
|
|
|
#else
|
|
|
|
|
// Original gettid comparison
|
2025-11-09 18:55:50 +09:00
|
|
|
pid_t me = gettid_cached();
|
2025-11-14 15:00:13 +09:00
|
|
|
if (owner_tid != me) {
|
|
|
|
|
// Cross-thread free
|
2025-11-09 18:55:50 +09:00
|
|
|
extern int pool_remote_push(int class_idx, void* ptr, int owner_tid);
|
|
|
|
|
(void)pool_remote_push(class_idx, ptr, owner_tid);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2025-11-14 15:00:13 +09:00
|
|
|
#endif
|
2025-11-09 18:55:50 +09:00
|
|
|
}
|
2025-11-08 23:53:25 +09:00
|
|
|
|
2025-11-09 18:55:50 +09:00
|
|
|
// Same-thread: Push to TLS freelist (2-3 instructions)
|
2025-11-08 23:53:25 +09:00
|
|
|
*(void**)ptr = g_tls_pool_head[class_idx];
|
|
|
|
|
g_tls_pool_head[class_idx] = ptr;
|
|
|
|
|
g_tls_pool_count[class_idx]++;
|
|
|
|
|
|
|
|
|
|
// Phase 1: No drain logic (keep it simple)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Install refilled chain (called by Box 2)
|
|
|
|
|
void pool_install_chain(int class_idx, void* chain, int count) {
|
|
|
|
|
if (class_idx < 0 || class_idx >= POOL_SIZE_CLASSES) return;
|
|
|
|
|
g_tls_pool_head[class_idx] = chain;
|
|
|
|
|
g_tls_pool_count[class_idx] = count;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Get refill count for a class
|
|
|
|
|
int pool_get_refill_count(int class_idx) {
|
|
|
|
|
if (class_idx < 0 || class_idx >= POOL_SIZE_CLASSES) return 0;
|
|
|
|
|
return DEFAULT_REFILL_COUNT[class_idx];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Thread init/cleanup
|
|
|
|
|
void pool_thread_init(void) {
|
|
|
|
|
memset(g_tls_pool_head, 0, sizeof(g_tls_pool_head));
|
|
|
|
|
memset(g_tls_pool_count, 0, sizeof(g_tls_pool_count));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void pool_thread_cleanup(void) {
|
|
|
|
|
// Phase 1: No cleanup (keep it simple)
|
|
|
|
|
// TODO: Drain back to global pool
|
2025-11-09 18:55:50 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Pre-warm TLS cache (Phase 1.5b optimization)
|
|
|
|
|
// Eliminates cold-start penalty by pre-populating TLS freelists
|
|
|
|
|
// Expected improvement: +180-740% (based on Phase 7 Task 3 success)
|
|
|
|
|
void pool_tls_prewarm(void) {
|
|
|
|
|
// Forward declare refill function (from Box 2)
|
|
|
|
|
extern void* backend_batch_carve(int class_idx, int count);
|
|
|
|
|
|
|
|
|
|
for (int class_idx = 0; class_idx < POOL_SIZE_CLASSES; class_idx++) {
|
|
|
|
|
int count = PREWARM_COUNTS[class_idx];
|
|
|
|
|
|
|
|
|
|
// Directly refill TLS cache (bypass alloc/free during init)
|
|
|
|
|
// This avoids issues with g_initializing=1 affecting routing
|
|
|
|
|
void* chain = backend_batch_carve(class_idx, count);
|
|
|
|
|
if (chain) {
|
|
|
|
|
// Install entire chain directly into TLS
|
|
|
|
|
pool_install_chain(class_idx, chain, count);
|
|
|
|
|
}
|
|
|
|
|
// If OOM, continue with other classes (graceful degradation)
|
|
|
|
|
}
|
|
|
|
|
}
|