diff --git a/core/hakmem_tiny_refill.inc.h b/core/hakmem_tiny_refill.inc.h index 55efb4b7..7588d4e9 100644 --- a/core/hakmem_tiny_refill.inc.h +++ b/core/hakmem_tiny_refill.inc.h @@ -204,6 +204,46 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) { TinySlabMeta* meta = tls->meta; if (!meta) return 0; + // Class7 special-case: simple batch refill (favor linear carve, minimal branching) + if (__builtin_expect(class_idx == 7, 0)) { + uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP); + int room = (int)sll_cap - (int)g_tls_sll_count[class_idx]; + if (room <= 0) return 0; + int take = max_take < room ? max_take : room; + int taken = 0; + size_t bs = g_tiny_class_sizes[class_idx]; + for (; taken < take;) { + // Linear first (LIKELY for class7) + if (__builtin_expect(meta->freelist == NULL && meta->used < meta->capacity, 1)) { + uint8_t* base = tiny_slab_base_for(tls->ss, tls->slab_idx); + void* p = (void*)(base + ((size_t)meta->used * bs)); + meta->used++; + *(void**)p = g_tls_sll_head[class_idx]; + g_tls_sll_head[class_idx] = p; + g_tls_sll_count[class_idx]++; + ss_active_inc(tls->ss); + taken++; + continue; + } + // Freelist fallback + if (__builtin_expect(meta->freelist != NULL, 0)) { + void* p = meta->freelist; + meta->freelist = *(void**)p; + meta->used++; + *(void**)p = g_tls_sll_head[class_idx]; + g_tls_sll_head[class_idx] = p; + g_tls_sll_count[class_idx]++; + ss_active_inc(tls->ss); + taken++; + continue; + } + // Need another slab with space + if (__builtin_expect(superslab_refill(class_idx) == NULL, 0)) break; + meta = tls->meta; // refresh after refill + } + return taken; + } + // Compute how many we can actually push into SLL without overflow uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP); int room = (int)sll_cap - (int)g_tls_sll_count[class_idx]; @@ -214,11 +254,11 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) { size_t bs = g_tiny_class_sizes[class_idx]; while (taken < take) { void* p = NULL; - if (meta->freelist) { + if (__builtin_expect(meta->freelist != NULL, 0)) { p = meta->freelist; meta->freelist = *(void**)p; meta->used++; // Track active blocks reserved into TLS SLL ss_active_inc(tls->ss); - } else if (meta->used < meta->capacity) { + } else if (__builtin_expect(meta->used < meta->capacity, 1)) { void* slab_start = tiny_slab_base_for(tls->ss, tls->slab_idx); p = (char*)slab_start + ((size_t)meta->used * bs); meta->used++; diff --git a/core/tiny_superslab_alloc.inc.h b/core/tiny_superslab_alloc.inc.h index cd277377..7031c515 100644 --- a/core/tiny_superslab_alloc.inc.h +++ b/core/tiny_superslab_alloc.inc.h @@ -16,8 +16,8 @@ static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) { TinySlabMeta* meta = &ss->slabs[slab_idx]; - // Ensure remote queue is drained before handing blocks back to TLS - if (atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0) { + // Ensure remote queue is drained before handing blocks back to TLS (UNLIKELY in 1T) + if (__builtin_expect(atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0, 0)) { uint32_t self_tid = tiny_self_u32(); SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid); if (slab_is_valid(&h)) { @@ -68,7 +68,7 @@ static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) { // Phase 6.24: Linear allocation mode (freelist == NULL) // This avoids the 4000-8000 cycle cost of building freelist on init - if (meta->freelist == NULL && meta->used < meta->capacity) { + if (__builtin_expect(meta->freelist == NULL && meta->used < meta->capacity, 1)) { // Linear allocation: use canonical tiny_slab_base_for() only size_t block_size = g_tiny_class_sizes[ss->size_class]; uint8_t* base = tiny_slab_base_for(ss, slab_idx); @@ -80,7 +80,7 @@ static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) { } // Freelist mode (after first free()) - if (meta->freelist) { + if (__builtin_expect(meta->freelist != NULL, 0)) { void* block = meta->freelist; // CORRUPTION DEBUG: Validate freelist head before popping