// superslab_inline.h - SuperSlab Hot Path Inline Functions // Purpose: Performance-critical inline helpers for SuperSlab allocator // Extracted from hakmem_tiny_superslab.h (Phase 6-2.8 Refactoring) #ifndef SUPERSLAB_INLINE_H #define SUPERSLAB_INLINE_H #include #include #include #include #include #include #include #include #include #include #include "superslab_types.h" #include "hakmem_tiny_superslab_constants.h" #include "tiny_debug_ring.h" #include "tiny_remote.h" // External declarations extern int g_debug_remote_guard; extern int g_tiny_safe_free_strict; extern _Atomic uint64_t g_ss_active_dec_calls; extern _Atomic uint64_t g_ss_remote_push_calls; extern _Atomic int g_ss_remote_seen; extern int g_remote_side_enable; extern int g_remote_force_notify; // Function declarations uint32_t tiny_remote_drain_threshold(void); void tiny_publish_notify(int class_idx, struct SuperSlab* ss, int slab_idx); // ============================================================================ // Fast Path Inline Functions // ============================================================================ // Runtime-safe slab count for a given SuperSlab (MUST BE FIRST - used by other functions) static inline int ss_slabs_capacity(const SuperSlab* ss) { size_t ss_size = (size_t)1 << ss->lg_size; return (int)(ss_size / SLAB_SIZE); // 16 or 32 } // Fail-fast validation level (0=off, 1=basic, 2=paranoid) static inline int tiny_refill_failfast_level(void) { static int g_failfast_level = -1; if (__builtin_expect(g_failfast_level == -1, 0)) { const char* env = getenv("HAKMEM_TINY_REFILL_FAILFAST"); if (env && *env) { g_failfast_level = atoi(env); } else { g_failfast_level = 1; } } return g_failfast_level; } // Fail-fast logging (level 2 only) static inline void tiny_failfast_log(const char* stage, int class_idx, SuperSlab* ss, TinySlabMeta* meta, const void* node, const void* next) { if (__builtin_expect(tiny_refill_failfast_level() < 2, 1)) return; uintptr_t base = ss ? (uintptr_t)ss : 0; size_t size = ss ? ((size_t)1ULL << ss->lg_size) : 0; uintptr_t limit = base + size; fprintf(stderr, "[TRC_FREELIST_LOG] stage=%s cls=%d node=%p next=%p head=%p base=%p limit=%p\n", stage ? stage : "(null)", class_idx, node, next, meta ? meta->freelist : NULL, (void*)base, (void*)limit); fflush(stderr); } // Fail-fast abort with detailed diagnostics static inline void tiny_failfast_abort_ptr(const char* stage, SuperSlab* ss, int slab_idx, const void* ptr, const char* reason) { if (__builtin_expect(tiny_refill_failfast_level() < 2, 1)) return; uintptr_t base = ss ? (uintptr_t)ss : 0; size_t size = ss ? ((size_t)1ULL << ss->lg_size) : 0; uintptr_t limit = base + size; size_t cap = 0; uint32_t used = 0; if (ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) { cap = ss->slabs[slab_idx].capacity; used = ss->slabs[slab_idx].used; } size_t offset = 0; if (ptr && base && ptr >= (void*)base) { offset = (size_t)((uintptr_t)ptr - base); } fprintf(stderr, "[TRC_FAILFAST_PTR] stage=%s cls=%d slab_idx=%d ptr=%p reason=%s base=%p limit=%p cap=%zu used=%u offset=%zu\n", stage ? stage : "(null)", ss ? (int)ss->size_class : -1, slab_idx, ptr, reason ? reason : "(null)", (void*)base, (void*)limit, cap, used, offset); fflush(stderr); abort(); } // Get slab index within SuperSlab (DEPRECATED - use slab_index_for) static inline int ptr_to_slab_index(void* p) { uintptr_t offset = (uintptr_t)p & SUPERSLAB_MASK; return (int)(offset >> 16); // Divide by 64KB (2^16) } // Safe slab index computation using SuperSlab base (supports 1MB/2MB) static inline int slab_index_for(const SuperSlab* ss, const void* p) { uintptr_t base = (uintptr_t)ss; uintptr_t addr = (uintptr_t)p; uintptr_t off = addr - base; int idx = (int)(off >> 16); // 64KB int cap = ss_slabs_capacity(ss); return (idx >= 0 && idx < cap) ? idx : -1; } // Get slab data start address static inline void* slab_data_start(SuperSlab* ss, int slab_idx) { return (char*)ss + (slab_idx * SLAB_SIZE); } // Get slab base address (accounts for SUPERSLAB_SLAB0_DATA_OFFSET) static inline uint8_t* tiny_slab_base_for(SuperSlab* ss, int slab_idx) { uint8_t* base = (uint8_t*)slab_data_start(ss, slab_idx); // Phase 6-2.5 FIX: Use SUPERSLAB_SLAB0_DATA_OFFSET constant // sizeof(SuperSlab)=1088, aligned to next 1024-boundary=2048 // This ensures proper alignment for class 7 (1024-byte blocks) if (slab_idx == 0) base += SUPERSLAB_SLAB0_DATA_OFFSET; return base; } // Refcount helpers (for future MT-safe empty reclamation) static inline void superslab_ref_inc(SuperSlab* ss) { atomic_fetch_add_explicit(&ss->refcount, 1u, memory_order_relaxed); } static inline unsigned superslab_ref_dec(SuperSlab* ss) { return atomic_fetch_sub_explicit(&ss->refcount, 1u, memory_order_acq_rel) - 1u; } static inline unsigned superslab_ref_get(SuperSlab* ss) { return atomic_load_explicit(&ss->refcount, memory_order_acquire); } // Active block counter helper (saturating decrement for free operations) static inline void ss_active_dec_one(SuperSlab* ss) { atomic_fetch_add_explicit(&g_ss_active_dec_calls, 1, memory_order_relaxed); uint32_t old = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed); while (old != 0) { if (atomic_compare_exchange_weak_explicit(&ss->total_active_blocks, &old, old - 1u, memory_order_relaxed, memory_order_relaxed)) { break; } // CAS failed: old is reloaded by CAS intrinsic } } // Low-cost timestamp (nanoseconds, monotonic) - inline for hot path static inline uint64_t hak_now_ns(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec; } // Get next lg_size for new SuperSlab allocation (uses target_lg) // Forward declaration of ACE state (defined in main header) typedef struct { uint8_t current_lg; uint8_t target_lg; uint16_t hot_score; uint32_t alloc_count; uint32_t refill_count; uint32_t spill_count; uint32_t live_blocks; uint64_t last_tick_ns; } SuperSlabACEState; extern SuperSlabACEState g_ss_ace[8]; // TINY_NUM_CLASSES_SS static inline uint8_t hak_tiny_superslab_next_lg(int class_idx) { uint8_t lg = g_ss_ace[class_idx].target_lg ? g_ss_ace[class_idx].target_lg : g_ss_ace[class_idx].current_lg; return lg ? lg : SUPERSLAB_LG_DEFAULT; // Use default if uninitialized } // Remote free push (MPSC stack) - returns 1 if transitioned from empty static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* ptr) { atomic_fetch_add_explicit(&g_ss_remote_push_calls, 1, memory_order_relaxed); static _Atomic int g_remote_push_count = 0; int count = atomic_fetch_add_explicit(&g_remote_push_count, 1, memory_order_relaxed); if (count < 5) { fprintf(stderr, "[DEBUG ss_remote_push] Call #%d ss=%p slab_idx=%d\n", count+1, (void*)ss, slab_idx); fflush(stderr); } if (g_debug_remote_guard && count < 5) { fprintf(stderr, "[REMOTE_PUSH] ss=%p slab_idx=%d ptr=%p count=%d\n", (void*)ss, slab_idx, ptr, count); } // Unconditional sanity checks (Fail-Fast without crashing) { uintptr_t ptr_val = (uintptr_t)ptr; uintptr_t base = (uintptr_t)ss; size_t ss_size = (size_t)1ULL << ss->lg_size; int cap = ss_slabs_capacity(ss); int in_range = (ptr_val >= base) && (ptr_val < base + ss_size); int aligned = ((ptr_val & (sizeof(void*) - 1)) == 0); if (!in_range || slab_idx < 0 || slab_idx >= cap || !aligned) { uintptr_t code = 0xB001u; if (!in_range) code |= 0x01u; if (!aligned) code |= 0x02u; tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, ((uintptr_t)slab_idx << 32) | code); return 0; } } // A/B: global disable for remote MPSC — fallback to legacy freelist push do { static int g_disable_remote_glob = -1; if (__builtin_expect(g_disable_remote_glob == -1, 0)) { const char* e = getenv("HAKMEM_TINY_DISABLE_REMOTE"); g_disable_remote_glob = (e && *e && *e != '0') ? 1 : 0; } if (__builtin_expect(g_disable_remote_glob, 0)) { TinySlabMeta* meta = &ss->slabs[slab_idx]; void* prev = meta->freelist; *(void**)ptr = prev; meta->freelist = ptr; // Reflect accounting (callers also decrement used; keep idempotent here) ss_active_dec_one(ss); if (prev == NULL) { // first item: mark this slab visible to adopters uint32_t bit = (1u << slab_idx); atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release); return 1; } return 0; } } while (0); _Atomic(uintptr_t)* head = &ss->remote_heads[slab_idx]; uintptr_t old; do { old = atomic_load_explicit(head, memory_order_acquire); if (!g_remote_side_enable) { *(void**)ptr = (void*)old; // legacy embedding } } while (!atomic_compare_exchange_weak_explicit(head, &old, (uintptr_t)ptr, memory_order_release, memory_order_relaxed)); tiny_remote_side_set(ss, slab_idx, ptr, old); tiny_remote_track_on_remote_push(ss, slab_idx, ptr, "remote_push", 0); if (__builtin_expect(g_debug_remote_guard, 0)) { // One-shot verify just-written next/ptr alignment and range uintptr_t base = (uintptr_t)ss; size_t ss_size = (size_t)1ULL << ss->lg_size; uintptr_t pv = (uintptr_t)ptr; int ptr_in = (pv >= base && pv < base + ss_size); int ptr_al = ((pv & (sizeof(void*) - 1)) == 0); int old_in = (old == 0) || ((old >= base) && (old < base + ss_size)); int old_al = (old == 0) || ((old & (sizeof(void*) - 1)) == 0); if (!ptr_in || !ptr_al || !old_in || !old_al) { uintptr_t flags = ((uintptr_t)ptr_al << 3) | ((uintptr_t)ptr_in << 2) | ((uintptr_t)old_al << 1) | (uintptr_t)old_in; tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, 0xB100u | (flags & 0xFu)); if (g_tiny_safe_free_strict) { raise(SIGUSR2); } } fprintf(stderr, "[REMOTE_PUSH] cls=%u slab=%d ptr=%p old=%p transitioned=%d\n", ss->size_class, slab_idx, ptr, (void*)old, old == 0); // Pack: [slab_idx<<32 | bit0:old==0 | bit1:old_al | bit2:ptr_al] uintptr_t aux = ((uintptr_t)slab_idx << 32) | ((old == 0) ? 1u : 0u) | ((old_al ? 1u : 0u) << 1) | ((ptr_al ? 1u : 0u) << 2); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_PUSH, (uint16_t)ss->size_class, ptr, aux); } else { tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_PUSH, (uint16_t)ss->size_class, ptr, ((uintptr_t)slab_idx << 32) | (uint32_t)(old == 0)); } atomic_fetch_add_explicit(&ss->remote_counts[slab_idx], 1u, memory_order_relaxed); ss_active_dec_one(ss); // Fix: Decrement active blocks on cross-thread free atomic_store_explicit(&g_ss_remote_seen, 1, memory_order_relaxed); int transitioned = (old == 0); // (optional hint to Ready ring moved to mailbox/aggregator to avoid header coupling) if (transitioned) { // First remote observed for this slab: mark slab_listed and notify publisher paths unsigned prev = atomic_exchange_explicit(&ss->slab_listed[slab_idx], 1u, memory_order_acq_rel); (void)prev; // best-effort tiny_publish_notify((int)ss->size_class, ss, slab_idx); } else { // Optional: best-effort notify if already non-empty but not listed if (__builtin_expect(g_remote_force_notify, 0)) { unsigned listed = atomic_load_explicit(&ss->slab_listed[slab_idx], memory_order_acquire); if (listed == 0) { unsigned prev = atomic_exchange_explicit(&ss->slab_listed[slab_idx], 1u, memory_order_acq_rel); (void)prev; tiny_publish_notify((int)ss->size_class, ss, slab_idx); } } } return transitioned; } // Drain remote queue into freelist (no change to used/active; already adjusted at free) // INTERNAL UNSAFE VERSION - Only called by slab_handle.h after ownership verified! // DO NOT call directly - use slab_drain_remote() via SlabHandle instead. static inline void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta) { do { // one-shot debug print when enabled static int en = -1; static _Atomic int printed; if (__builtin_expect(en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_REFILL_OPT_DEBUG"); en = (e && *e && *e != '0') ? 1 : 0; } if (en) { int exp = 0; if (atomic_compare_exchange_strong(&printed, &exp, 1)) { fprintf(stderr, "[DRAIN_OPT] chain splice active (cls=%u slab=%d)\n", ss ? ss->size_class : 0u, slab_idx); } } } while (0); _Atomic(uintptr_t)* head = &ss->remote_heads[slab_idx]; uintptr_t p = atomic_exchange_explicit(head, (uintptr_t)NULL, memory_order_acq_rel); if (p == 0) return; uint32_t drained = 0; uintptr_t base = (uintptr_t)ss; size_t ss_size = (size_t)1ULL << ss->lg_size; uint32_t drain_tid = (uint32_t)(uintptr_t)pthread_self(); // Build a local chain then splice once into freelist to reduce writes void* chain_head = NULL; void* chain_tail = NULL; while (p != 0) { // Guard: range/alignment before deref if (__builtin_expect(g_debug_remote_guard, 0)) { if (p < base || p >= base + ss_size) { uintptr_t aux = tiny_remote_pack_diag(0xA210u, base, ss_size, p); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)p, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } break; } if ((p & (uintptr_t)(sizeof(void*) - 1)) != 0) { uintptr_t aux = tiny_remote_pack_diag(0xA211u, base, ss_size, p); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)p, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } break; } } void* node = (void*)p; uintptr_t next = tiny_remote_side_get(ss, slab_idx, node); tiny_remote_watch_note("drain_pull", ss, slab_idx, node, 0xA238u, drain_tid, 0); if (__builtin_expect(g_remote_side_enable, 0)) { if (!tiny_remote_sentinel_ok(node)) { uintptr_t aux = tiny_remote_pack_diag(0xA202u, base, ss_size, (uintptr_t)node); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, node, aux); uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)node, memory_order_relaxed); tiny_remote_report_corruption("drain", node, observed); TinySlabMeta* meta = &ss->slabs[slab_idx]; fprintf(stderr, "[REMOTE_SENTINEL-DRAIN] cls=%u slab=%d node=%p drained=%u observed=0x%016" PRIxPTR " owner=%u used=%u freelist=%p\n", ss->size_class, slab_idx, node, drained, observed, meta->owner_tid, (unsigned)meta->used, meta->freelist); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } } tiny_remote_side_clear(ss, slab_idx, node); } tiny_remote_watch_note("drain_link", ss, slab_idx, node, 0xA239u, drain_tid, 0); tiny_remote_track_on_remote_drain(ss, slab_idx, node, "remote_drain", drain_tid); if (__builtin_expect(g_debug_remote_guard && drained < 3, 0)) { // First few nodes: record low info for triage uintptr_t aux = ((uintptr_t)slab_idx << 32) | (uintptr_t)(drained & 0xFFFF); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_DRAIN, (uint16_t)ss->size_class, node, aux); } // Link into local chain (avoid touching meta->freelist per node) if (chain_head == NULL) { chain_head = node; chain_tail = node; *(void**)node = NULL; } else { *(void**)node = chain_head; chain_head = node; } p = next; drained++; } // Splice the drained chain into freelist (single meta write) if (chain_head != NULL) { if (chain_tail != NULL) { *(void**)chain_tail = meta->freelist; } void* prev = meta->freelist; meta->freelist = chain_head; tiny_failfast_log("remote_drain", ss->size_class, ss, meta, chain_head, prev); // Optional: set freelist bit when transitioning from empty do { static int g_mask_en = -1; if (__builtin_expect(g_mask_en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_FREELIST_MASK"); g_mask_en = (e && *e && *e != '0') ? 1 : 0; } if (__builtin_expect(g_mask_en, 0)) { uint32_t bit = (1u << slab_idx); atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release); } } while (0); } // Reset remote count after full drain atomic_store_explicit(&ss->remote_counts[slab_idx], 0u, memory_order_relaxed); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_DRAIN, (uint16_t)ss->size_class, ss, ((uintptr_t)slab_idx << 32) | drained); } // Legacy wrapper for compatibility (UNSAFE - ownership NOT checked!) // DEPRECATED: Use slab_drain_remote() via SlabHandle instead static inline void ss_remote_drain_to_freelist(SuperSlab* ss, int slab_idx) { TinySlabMeta* meta = &ss->slabs[slab_idx]; _ss_remote_drain_to_freelist_unsafe(ss, slab_idx, meta); } // Try to acquire exclusive ownership of slab (REQUIRED before draining remote queue!) // Returns 1 on success (now own slab), 0 on failure (another thread owns it) // CRITICAL: Only succeeds if slab is unowned (owner_tid==0) or already owned by us. static inline int ss_owner_try_acquire(TinySlabMeta* m, uint32_t self_tid) { uint32_t cur = __atomic_load_n(&m->owner_tid, __ATOMIC_RELAXED); if (cur == self_tid) return 1; // Already owner - success if (cur != 0) return 0; // Another thread owns it - FAIL immediately // Slab is unowned (cur==0) - try to claim it uint32_t expected = 0; return __atomic_compare_exchange_n(&m->owner_tid, &expected, self_tid, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } // Drain remote queues where activity was observed (lightweight sweep). // CRITICAL: Must acquire ownership before draining each slab! static inline void ss_remote_drain_light(SuperSlab* ss) { if (!ss) return; uint32_t threshold = tiny_remote_drain_threshold(); uint32_t self_tid = (uint32_t)(uintptr_t)pthread_self(); int cap = ss_slabs_capacity(ss); for (int s = 0; s < cap; s++) { uint32_t rc = atomic_load_explicit(&ss->remote_counts[s], memory_order_relaxed); if (rc <= threshold) continue; if (atomic_load_explicit(&ss->remote_heads[s], memory_order_acquire) != 0) { // BUGFIX: Must acquire ownership BEFORE draining! // Without this, we can drain a slab owned by another thread → freelist corruption TinySlabMeta* m = &ss->slabs[s]; if (!ss_owner_try_acquire(m, self_tid)) { continue; // Failed to acquire - skip this slab } ss_remote_drain_to_freelist(ss, s); } } } // Best-effort CAS to transfer slab ownership (DEPRECATED - use ss_owner_try_acquire!) static inline void ss_owner_cas(TinySlabMeta* m, uint32_t self_tid) { (void)ss_owner_try_acquire(m, self_tid); // Ignore result (unsafe) } #endif // SUPERSLAB_INLINE_H