diff --git a/core/box/hak_alloc_api.inc.h b/core/box/hak_alloc_api.inc.h index 20e38cfe..652887fe 100644 --- a/core/box/hak_alloc_api.inc.h +++ b/core/box/hak_alloc_api.inc.h @@ -30,6 +30,9 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) { #endif if (!g_initialized) hak_init(); + // Adaptive CAS: Register thread on first allocation + hakmem_thread_register(); + uintptr_t site_id = (uintptr_t)site; // Phase 17-1: Small-Mid Front Box (256B-1KB) - TRY FIRST! diff --git a/core/box/slab_freelist_atomic.h b/core/box/slab_freelist_atomic.h index c7c6e4c1..b2474745 100644 --- a/core/box/slab_freelist_atomic.h +++ b/core/box/slab_freelist_atomic.h @@ -25,6 +25,9 @@ #include "../superslab/superslab_types.h" #include "tiny_next_ptr_box.h" // Phase 1: Include for tiny_next_read/write +// Adaptive CAS: extern declaration (defined in hakmem_tiny.c) +extern _Atomic uint32_t g_hakmem_active_threads; + // ============================================================================ // HOT PATH: Lock-Free CAS Operations // ============================================================================ @@ -52,6 +55,19 @@ // Performance: 6-10 cycles (optimistic case, no contention) // static inline void* slab_freelist_pop_lockfree(TinySlabMeta* meta, int class_idx) { + // Adaptive CAS: Single-threaded fast path (skip CAS loop) + uint32_t num_threads = atomic_load_explicit(&g_hakmem_active_threads, memory_order_relaxed); + if (__builtin_expect(num_threads <= 1, 0)) { + // Single-threaded: Use relaxed load/store (no contention expected) + void* head = atomic_load_explicit(&meta->freelist, memory_order_relaxed); + if (!head) return NULL; + + void* next = tiny_next_read(class_idx, head); + atomic_store_explicit(&meta->freelist, next, memory_order_relaxed); + return head; // ← Skip CAS, just store (safe if single-threaded) + } + + // Multi-threaded: Full CAS loop for MT safety // Load current head (acquire: see next pointer) void* head = atomic_load_explicit(&meta->freelist, memory_order_acquire); @@ -99,6 +115,17 @@ static inline void* slab_freelist_pop_lockfree(TinySlabMeta* meta, int class_idx // Performance: 6-10 cycles (optimistic case, no contention) // static inline void slab_freelist_push_lockfree(TinySlabMeta* meta, int class_idx, void* node) { + // Adaptive CAS: Single-threaded fast path (skip CAS loop) + uint32_t num_threads = atomic_load_explicit(&g_hakmem_active_threads, memory_order_relaxed); + if (__builtin_expect(num_threads <= 1, 0)) { + // Single-threaded: Use relaxed load/store (no contention expected) + void* head = atomic_load_explicit(&meta->freelist, memory_order_relaxed); + tiny_next_write(class_idx, node, head); + atomic_store_explicit(&meta->freelist, node, memory_order_relaxed); + return; // ← Skip CAS, just store (safe if single-threaded) + } + + // Multi-threaded: Full CAS loop for MT safety // Load current head (relaxed: we'll overwrite node->next anyway) void* head = atomic_load_explicit(&meta->freelist, memory_order_relaxed); diff --git a/core/hakmem_tiny.c b/core/hakmem_tiny.c index d3a990c5..fdf63c21 100644 --- a/core/hakmem_tiny.c +++ b/core/hakmem_tiny.c @@ -235,6 +235,23 @@ int g_refill_one_on_miss = 0; // NOTE: Non-static because used in hakmem_tiny_refill.inc.h _Atomic uint32_t g_frontend_fill_target[TINY_NUM_CLASSES]; +// Adaptive CAS: Active thread counter (for single-threaded optimization) +// Incremented on thread init, decremented on thread shutdown +_Atomic uint32_t g_hakmem_active_threads = 0; + +// Per-thread registration flag (TLS variable) +static __thread int g_thread_registered = 0; + +// Adaptive CAS: Register current thread (called on first allocation) +// NOTE: Non-static for cross-TU visibility (called from hak_alloc_api.inc.h) +__attribute__((always_inline)) +inline void hakmem_thread_register(void) { + if (__builtin_expect(g_thread_registered == 0, 0)) { + g_thread_registered = 1; + atomic_fetch_add_explicit(&g_hakmem_active_threads, 1, memory_order_relaxed); + } +} + // Forward declarations for helpers referenced by frontend_refill_fc static inline int ultra_batch_for_class(int class_idx); enum { HAK_TIER_SLL=1, HAK_TIER_MAG=2, HAK_TIER_SLAB=3, HAK_TIER_SUPER=4, HAK_TIER_FRONT=5 }; diff --git a/core/hakmem_tiny.h b/core/hakmem_tiny.h index 00068ea6..fb79718f 100644 --- a/core/hakmem_tiny.h +++ b/core/hakmem_tiny.h @@ -156,6 +156,12 @@ extern SlabRegistryEntry g_slab_registry[SLAB_REGISTRY_SIZE]; // Tiny Pool initialization flag (extern for inline function access) extern int g_tiny_initialized; +// Adaptive CAS: Active thread counter (for single-threaded optimization) +extern _Atomic uint32_t g_hakmem_active_threads; + +// Adaptive CAS: Thread registration (called on first allocation) +void hakmem_thread_register(void); + // Per-class locks to protect slab lists and bitmaps (padded to avoid false sharing) typedef struct __attribute__((aligned(64))) { pthread_mutex_t m; char _pad[64]; } PaddedLock; extern PaddedLock g_tiny_class_locks[TINY_NUM_CLASSES];