// hakmem_tiny_superslab.h - SuperSlab allocator for Tiny Pool (Phase 6.22) // Purpose: mimalloc-inspired 2MB aligned slab allocation for fast pointer→slab lookup // License: MIT // Date: 2025-10-24 #ifndef HAKMEM_TINY_SUPERSLAB_H #define HAKMEM_TINY_SUPERSLAB_H #include #include #include #include #include #include // Phase 8.3: For clock_gettime() in hak_now_ns() #include #include // For fprintf() debugging #include #include "tiny_debug_ring.h" #include "tiny_remote.h" // Debug instrumentation flags (defined in hakmem_tiny.c) extern int g_debug_remote_guard; extern int g_tiny_safe_free_strict; uint32_t tiny_remote_drain_threshold(void); // ============================================================================ // SuperSlab Configuration // ============================================================================ // Phase 8.3: ACE - Variable SuperSlab size (1MB ↔ 2MB) #define SUPERSLAB_SIZE_MAX (2 * 1024 * 1024) // 2MB max size #define SUPERSLAB_SIZE_MIN (1 * 1024 * 1024) // 1MB min size #define SUPERSLAB_LG_MAX 21 // lg(2MB) #define SUPERSLAB_LG_MIN 20 // lg(1MB) #define SUPERSLAB_LG_DEFAULT 21 // Default: 2MB (syscall reduction, ACE will adapt) #define SLAB_SIZE (64 * 1024) // 64KB per slab (fixed) // Legacy defines (kept for backward compatibility, use lg_size instead) #define SUPERSLAB_SIZE SUPERSLAB_SIZE_MAX // Default to 2MB (syscall reduction) #define SUPERSLAB_MASK (SUPERSLAB_SIZE - 1) // IMPORTANT: Support variable-size SuperSlab (1MB=16 slabs, 2MB=32 slabs) // Arrays below must be sized for the MAX to avoid OOB when lg_size=21 (2MB) #define SLABS_PER_SUPERSLAB_MIN (SUPERSLAB_SIZE_MIN / SLAB_SIZE) // 16 for 1MB #define SLABS_PER_SUPERSLAB_MAX (SUPERSLAB_SIZE_MAX / SLAB_SIZE) // 32 for 2MB // Magic number for validation #define SUPERSLAB_MAGIC 0x48414B4D454D5353ULL // "HAKMEMSS" // ============================================================================ // SuperSlab Metadata Structure // ============================================================================ // Per-slab metadata (16 bytes) typedef struct TinySlabMeta { void* freelist; // Freelist head (NULL = linear mode, Phase 6.24) uint16_t used; // Blocks currently used uint16_t capacity; // Total blocks in slab uint32_t owner_tid; // Owner thread ID (for same-thread fast path) // Phase 6.24: freelist == NULL → linear allocation mode (lazy init) // Linear mode: allocate sequentially without building freelist // Freelist mode: use freelist after first free() call } TinySlabMeta; // SuperSlab header (cache-line aligned, 64B) typedef struct SuperSlab { // Header fields (64B total) uint64_t magic; // Magic number (0xHAKMEM_SUPERSLAB) uint8_t size_class; // Size class (0-7 for 8-64B) uint8_t active_slabs; // Number of active slabs (0-32 for 2MB, 0-16 for 1MB) uint8_t lg_size; // Phase 8.3: ACE - SuperSlab size (20=1MB, 21=2MB) uint8_t _pad0; // Padding uint32_t slab_bitmap; // 32-bit bitmap (1=active, 0=free) _Atomic uint32_t freelist_mask; // Bit i=1 when slab i freelist is non-empty (opt-in) // Phase 6-2.1: ChatGPT Pro P0 optimization - O(1) non-empty slab lookup uint32_t nonempty_mask; // Bit i = 1 if slabs[i].freelist != NULL (O(1) lookup via ctz) // Phase 7.6: Deallocation support atomic_uint total_active_blocks; // Total blocks in use (all slabs combined) atomic_uint refcount; // MT-safe refcount for empty detection/free(将来利用) atomic_uint listed; // 0/1: published to partial adopt ring(publish gating) uint32_t partial_epoch; // Last partial madvise epoch (optional) uint8_t publish_hint; // Best slab index hint for adopt (0..31), 0xFF=none uint8_t _pad1[3]; // Padding // Per-slab metadata (16B each) // Sized for MAX; use ss->lg_size to bound loops at runtime TinySlabMeta slabs[SLABS_PER_SUPERSLAB_MAX]; // Remote free queues (per slab): MPSC stack heads + counts _Atomic(uintptr_t) remote_heads[SLABS_PER_SUPERSLAB_MAX]; _Atomic(uint32_t) remote_counts[SLABS_PER_SUPERSLAB_MAX]; // Per-slab publish state: 0/1 = not listed/listed (for slab-granular republish hints) atomic_uint slab_listed[SLABS_PER_SUPERSLAB_MAX]; // Partial adopt overflow linkage (single-linked, best-effort) struct SuperSlab* partial_next; // Padding to fill remaining space (2MB - 64B - 512B) // Note: Actual slab data starts at offset SLAB_SIZE (64KB) } __attribute__((aligned(64))) SuperSlab; static inline int ss_slabs_capacity(const SuperSlab* ss); static inline int tiny_refill_failfast_level(void) { static int g_failfast_level = -1; if (__builtin_expect(g_failfast_level == -1, 0)) { const char* env = getenv("HAKMEM_TINY_REFILL_FAILFAST"); if (env && *env) { g_failfast_level = atoi(env); } else { g_failfast_level = 1; } } return g_failfast_level; } static inline void tiny_failfast_log(const char* stage, int class_idx, SuperSlab* ss, TinySlabMeta* meta, const void* node, const void* next) { if (__builtin_expect(tiny_refill_failfast_level() < 2, 1)) return; uintptr_t base = ss ? (uintptr_t)ss : 0; size_t size = ss ? ((size_t)1ULL << ss->lg_size) : 0; uintptr_t limit = base + size; fprintf(stderr, "[TRC_FREELIST_LOG] stage=%s cls=%d node=%p next=%p head=%p base=%p limit=%p\n", stage ? stage : "(null)", class_idx, node, next, meta ? meta->freelist : NULL, (void*)base, (void*)limit); fflush(stderr); } static inline void tiny_failfast_abort_ptr(const char* stage, SuperSlab* ss, int slab_idx, const void* ptr, const char* reason) { if (__builtin_expect(tiny_refill_failfast_level() < 2, 1)) return; uintptr_t base = ss ? (uintptr_t)ss : 0; size_t size = ss ? ((size_t)1ULL << ss->lg_size) : 0; uintptr_t limit = base + size; size_t cap = 0; uint32_t used = 0; if (ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) { cap = ss->slabs[slab_idx].capacity; used = ss->slabs[slab_idx].used; } size_t offset = 0; if (ptr && base && ptr >= (void*)base) { offset = (size_t)((uintptr_t)ptr - base); } fprintf(stderr, "[TRC_FAILFAST_PTR] stage=%s cls=%d slab_idx=%d ptr=%p reason=%s base=%p limit=%p cap=%zu used=%u offset=%zu\n", stage ? stage : "(null)", ss ? (int)ss->size_class : -1, slab_idx, ptr, reason ? reason : "(null)", (void*)base, (void*)limit, cap, used, offset); fflush(stderr); abort(); } // Compile-time assertions _Static_assert(sizeof(TinySlabMeta) == 16, "TinySlabMeta must be 16 bytes"); // Phase 8.3: Variable-size SuperSlab assertions (1MB=16 slabs, 2MB=32 slabs) _Static_assert((SUPERSLAB_SIZE_MIN / SLAB_SIZE) == 16, "1MB SuperSlab must have 16 slabs"); _Static_assert((SUPERSLAB_SIZE_MAX / SLAB_SIZE) == 32, "2MB SuperSlab must have 32 slabs"); _Static_assert((SUPERSLAB_SIZE & SUPERSLAB_MASK) == 0, "SUPERSLAB_SIZE must be power of 2"); // ============================================================================ // Fast Inline Functions (mimalloc-style) // ============================================================================ // DEPRECATED (Phase 1): This function causes false positives! Use hak_super_lookup() instead. // Problem: L2.5 allocations at 1MB boundary are misidentified as SuperSlabs // Solution: Use registry-based hak_super_lookup() from hakmem_super_registry.h #if 0 // DISABLED - unsafe function removed in Phase 1 static inline SuperSlab* ptr_to_superslab(void* p) { return (SuperSlab*)((uintptr_t)p & ~(uintptr_t)SUPERSLAB_MASK); } #endif // Get slab index within SuperSlab (shift operation, 0-31) // Deprecated: Do not use for 2MB SuperSlabs (mask is 1MB). Use slab_index_for(). static inline int ptr_to_slab_index(void* p) { uintptr_t offset = (uintptr_t)p & SUPERSLAB_MASK; return (int)(offset >> 16); // Divide by 64KB (2^16) } // Runtime-safe slab count for a given SuperSlab static inline int ss_slabs_capacity(const SuperSlab* ss) { size_t ss_size = (size_t)1 << ss->lg_size; return (int)(ss_size / SLAB_SIZE); // 16 or 32 } // Safe slab index computation using SuperSlab base (supports 1MB/2MB) static inline int slab_index_for(const SuperSlab* ss, const void* p) { uintptr_t base = (uintptr_t)ss; uintptr_t addr = (uintptr_t)p; uintptr_t off = addr - base; int idx = (int)(off >> 16); // 64KB int cap = ss_slabs_capacity(ss); return (idx >= 0 && idx < cap) ? idx : -1; } // DEPRECATED (Phase 1): Uses unsafe ptr_to_superslab() internally // Use hak_super_lookup() + ptr_to_slab_index() instead #if 0 // DISABLED - uses unsafe ptr_to_superslab() static inline TinySlabMeta* ptr_to_slab_meta(void* p) { SuperSlab* ss = ptr_to_superslab(p); int idx = ptr_to_slab_index(p); return &ss->slabs[idx]; } #endif // Get slab data start address static inline void* slab_data_start(SuperSlab* ss, int slab_idx) { return (char*)ss + (slab_idx * SLAB_SIZE); } static inline uint8_t* tiny_slab_base_for(SuperSlab* ss, int slab_idx) { uint8_t* base = (uint8_t*)slab_data_start(ss, slab_idx); if (slab_idx == 0) base += 1024; return base; } // DEPRECATED (Phase 1): Uses unsafe ptr_to_superslab() internally (false positives!) // Use: SuperSlab* ss = hak_super_lookup(p); if (ss && ss->magic == SUPERSLAB_MAGIC) { ... } #if 0 // DISABLED - uses unsafe ptr_to_superslab(), causes crashes on L2.5 boundaries static inline int is_superslab_pointer(void* p) { SuperSlab* ss = ptr_to_superslab(p); return ss->magic == SUPERSLAB_MAGIC; } #endif // Refcount helpers(将来のMT安全な空回収に使用) static inline void superslab_ref_inc(SuperSlab* ss) { atomic_fetch_add_explicit(&ss->refcount, 1u, memory_order_relaxed); } static inline unsigned superslab_ref_dec(SuperSlab* ss) { return atomic_fetch_sub_explicit(&ss->refcount, 1u, memory_order_acq_rel) - 1u; } static inline unsigned superslab_ref_get(SuperSlab* ss) { return atomic_load_explicit(&ss->refcount, memory_order_acquire); } // Debug counter extern declaration extern _Atomic uint64_t g_ss_active_dec_calls; // Active block counter helpers (saturating decrement for free operations) static inline void ss_active_dec_one(SuperSlab* ss) { atomic_fetch_add_explicit(&g_ss_active_dec_calls, 1, memory_order_relaxed); uint32_t old = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed); while (old != 0) { if (atomic_compare_exchange_weak_explicit(&ss->total_active_blocks, &old, old - 1u, memory_order_relaxed, memory_order_relaxed)) { break; } // CAS failed: old is reloaded by CAS intrinsic } } // ============================================================================ // SuperSlab Management Functions // ============================================================================ // Allocate a new SuperSlab (2MB aligned) SuperSlab* superslab_allocate(uint8_t size_class); // Free a SuperSlab void superslab_free(SuperSlab* ss); // Initialize a slab within SuperSlab void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_t owner_tid); // Mark a slab as active void superslab_activate_slab(SuperSlab* ss, int slab_idx); // Mark a slab as inactive void superslab_deactivate_slab(SuperSlab* ss, int slab_idx); // Find first free slab index (-1 if none) int superslab_find_free_slab(SuperSlab* ss); // Statistics void superslab_print_stats(SuperSlab* ss); // Phase 8.3: ACE statistics void superslab_ace_print_stats(void); // ============================================================================ // Phase 8.3: ACE (Adaptive Cache Engine) - SuperSlab adaptive sizing // ============================================================================ #define TINY_NUM_CLASSES_SS 8 // Same as TINY_NUM_CLASSES (avoid circular include) // Per-class ACE state (lightweight observation + decision) typedef struct { uint8_t current_lg; // Current lg_size in use (20=1MB, 21=2MB) uint8_t target_lg; // Target lg_size for next allocation (20/21) uint16_t hot_score; // Hotness score (0-1000) for visualization uint32_t alloc_count; // Allocs since last tick uint32_t refill_count; // Refills since last tick uint32_t spill_count; // Spills since last tick uint32_t live_blocks; // Estimated live blocks (alloc-free EMA) uint64_t last_tick_ns; // Last tick timestamp (ns) } SuperSlabACEState; // Global ACE state (one per tiny class) extern SuperSlabACEState g_ss_ace[TINY_NUM_CLASSES_SS]; // ACE tick function (called periodically, ~150ms interval) // Observes metrics and decides promotion (1MB→2MB) or demotion (2MB→1MB) void hak_tiny_superslab_ace_tick(int class_idx, uint64_t now_ns); // Phase 8.4: ACE Observer (called from Learner thread - zero hot-path overhead) void hak_tiny_superslab_ace_observe_all(void); // Low-cost timestamp (nanoseconds, monotonic) - inline for hot path static inline uint64_t hak_now_ns(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec; } // Get next lg_size for new SuperSlab allocation (uses target_lg) static inline uint8_t hak_tiny_superslab_next_lg(int class_idx) { uint8_t lg = g_ss_ace[class_idx].target_lg ? g_ss_ace[class_idx].target_lg : g_ss_ace[class_idx].current_lg; return lg ? lg : SUPERSLAB_LG_DEFAULT; // Use default if uninitialized } // ---------------------------------------------------------------------------- // Partial SuperSlab adopt/publish (per-class single-slot) // ---------------------------------------------------------------------------- // Publish a SuperSlab with available freelist for other threads to adopt. void ss_partial_publish(int class_idx, SuperSlab* ss); // Adopt published SuperSlab for the class (returns NULL if none). SuperSlab* ss_partial_adopt(int class_idx); // ---------------------------------------------------------------------------- // SuperSlab adopt gate (publish/adopt wiring helper) // ---------------------------------------------------------------------------- // Environment-aware switch that keeps free/alloc sides in sync. Default: // - Disabled until cross-thread free is observed. // - `HAKMEM_TINY_SS_ADOPT=1` forces ON, `=0` forces OFF. int tiny_adopt_gate_should_publish(void); int tiny_adopt_gate_should_adopt(void); void tiny_adopt_gate_on_remote_seen(int class_idx); // Remote free push (MPSC stack) - returns 1 if transitioned from empty extern _Atomic int g_ss_remote_seen; // set to 1 on first remote free observed extern int g_debug_remote_guard; static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* ptr) { extern _Atomic uint64_t g_ss_remote_push_calls; atomic_fetch_add_explicit(&g_ss_remote_push_calls, 1, memory_order_relaxed); static _Atomic int g_remote_push_count = 0; int count = atomic_fetch_add_explicit(&g_remote_push_count, 1, memory_order_relaxed); if (count < 5) { fprintf(stderr, "[DEBUG ss_remote_push] Call #%d ss=%p slab_idx=%d\n", count+1, (void*)ss, slab_idx); fflush(stderr); } if (g_debug_remote_guard && count < 5) { fprintf(stderr, "[REMOTE_PUSH] ss=%p slab_idx=%d ptr=%p count=%d\n", (void*)ss, slab_idx, ptr, count); } // Unconditional sanity checks (Fail-Fast without crashing) { uintptr_t ptr_val = (uintptr_t)ptr; uintptr_t base = (uintptr_t)ss; size_t ss_size = (size_t)1ULL << ss->lg_size; int cap = ss_slabs_capacity(ss); int in_range = (ptr_val >= base) && (ptr_val < base + ss_size); int aligned = ((ptr_val & (sizeof(void*) - 1)) == 0); if (!in_range || slab_idx < 0 || slab_idx >= cap || !aligned) { uintptr_t code = 0xB001u; if (!in_range) code |= 0x01u; if (!aligned) code |= 0x02u; tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, ((uintptr_t)slab_idx << 32) | code); return 0; } } // A/B: global disable for remote MPSC — fallback to legacy freelist push do { static int g_disable_remote_glob = -1; if (__builtin_expect(g_disable_remote_glob == -1, 0)) { const char* e = getenv("HAKMEM_TINY_DISABLE_REMOTE"); g_disable_remote_glob = (e && *e && *e != '0') ? 1 : 0; } if (__builtin_expect(g_disable_remote_glob, 0)) { TinySlabMeta* meta = &ss->slabs[slab_idx]; void* prev = meta->freelist; *(void**)ptr = prev; meta->freelist = ptr; // Reflect accounting (callers also decrement used; keep idempotent here) ss_active_dec_one(ss); if (prev == NULL) { // first item: mark this slab visible to adopters uint32_t bit = (1u << slab_idx); atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release); return 1; } return 0; } } while (0); _Atomic(uintptr_t)* head = &ss->remote_heads[slab_idx]; uintptr_t old; do { old = atomic_load_explicit(head, memory_order_acquire); if (!g_remote_side_enable) { *(void**)ptr = (void*)old; // legacy embedding } } while (!atomic_compare_exchange_weak_explicit(head, &old, (uintptr_t)ptr, memory_order_release, memory_order_relaxed)); tiny_remote_side_set(ss, slab_idx, ptr, old); tiny_remote_track_on_remote_push(ss, slab_idx, ptr, "remote_push", 0); if (__builtin_expect(g_debug_remote_guard, 0)) { // One-shot verify just-written next/ptr alignment and range uintptr_t base = (uintptr_t)ss; size_t ss_size = (size_t)1ULL << ss->lg_size; uintptr_t pv = (uintptr_t)ptr; int ptr_in = (pv >= base && pv < base + ss_size); int ptr_al = ((pv & (sizeof(void*) - 1)) == 0); int old_in = (old == 0) || ((old >= base) && (old < base + ss_size)); int old_al = (old == 0) || ((old & (sizeof(void*) - 1)) == 0); if (!ptr_in || !ptr_al || !old_in || !old_al) { uintptr_t flags = ((uintptr_t)ptr_al << 3) | ((uintptr_t)ptr_in << 2) | ((uintptr_t)old_al << 1) | (uintptr_t)old_in; tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, 0xB100u | (flags & 0xFu)); if (g_tiny_safe_free_strict) { raise(SIGUSR2); } } fprintf(stderr, "[REMOTE_PUSH] cls=%u slab=%d ptr=%p old=%p transitioned=%d\n", ss->size_class, slab_idx, ptr, (void*)old, old == 0); // Pack: [slab_idx<<32 | bit0:old==0 | bit1:old_al | bit2:ptr_al] uintptr_t aux = ((uintptr_t)slab_idx << 32) | ((old == 0) ? 1u : 0u) | ((old_al ? 1u : 0u) << 1) | ((ptr_al ? 1u : 0u) << 2); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_PUSH, (uint16_t)ss->size_class, ptr, aux); } else { tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_PUSH, (uint16_t)ss->size_class, ptr, ((uintptr_t)slab_idx << 32) | (uint32_t)(old == 0)); } atomic_fetch_add_explicit(&ss->remote_counts[slab_idx], 1u, memory_order_relaxed); ss_active_dec_one(ss); // Fix: Decrement active blocks on cross-thread free atomic_store_explicit(&g_ss_remote_seen, 1, memory_order_relaxed); int transitioned = (old == 0); // (optional hint to Ready ring moved to mailbox/aggregator to avoid header coupling) if (transitioned) { // First remote observed for this slab: mark slab_listed and notify publisher paths unsigned prev = atomic_exchange_explicit(&ss->slab_listed[slab_idx], 1u, memory_order_acq_rel); (void)prev; // best-effort extern void tiny_publish_notify(int class_idx, struct SuperSlab* ss, int slab_idx); tiny_publish_notify((int)ss->size_class, ss, slab_idx); } else { // Optional: best-effort notify if already non-empty but not listed extern int g_remote_force_notify; if (__builtin_expect(g_remote_force_notify, 0)) { unsigned listed = atomic_load_explicit(&ss->slab_listed[slab_idx], memory_order_acquire); if (listed == 0) { unsigned prev = atomic_exchange_explicit(&ss->slab_listed[slab_idx], 1u, memory_order_acq_rel); (void)prev; extern void tiny_publish_notify(int class_idx, struct SuperSlab* ss, int slab_idx); tiny_publish_notify((int)ss->size_class, ss, slab_idx); } } } return transitioned; } // Drain remote queue into freelist (no change to used/active; already adjusted at free) // INTERNAL UNSAFE VERSION - Only called by slab_handle.h after ownership verified! // DO NOT call directly - use slab_drain_remote() via SlabHandle instead. static inline void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta) { do { // one-shot debug print when enabled static int en = -1; static _Atomic int printed; if (__builtin_expect(en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_REFILL_OPT_DEBUG"); en = (e && *e && *e != '0') ? 1 : 0; } if (en) { int exp = 0; if (atomic_compare_exchange_strong(&printed, &exp, 1)) { fprintf(stderr, "[DRAIN_OPT] chain splice active (cls=%u slab=%d)\n", ss ? ss->size_class : 0u, slab_idx); } } } while (0); _Atomic(uintptr_t)* head = &ss->remote_heads[slab_idx]; uintptr_t p = atomic_exchange_explicit(head, (uintptr_t)NULL, memory_order_acq_rel); if (p == 0) return; uint32_t drained = 0; uintptr_t base = (uintptr_t)ss; size_t ss_size = (size_t)1ULL << ss->lg_size; uint32_t drain_tid = (uint32_t)(uintptr_t)pthread_self(); // Build a local chain then splice once into freelist to reduce writes void* chain_head = NULL; void* chain_tail = NULL; while (p != 0) { // Guard: range/alignment before deref if (__builtin_expect(g_debug_remote_guard, 0)) { if (p < base || p >= base + ss_size) { uintptr_t aux = tiny_remote_pack_diag(0xA210u, base, ss_size, p); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)p, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } break; } if ((p & (uintptr_t)(sizeof(void*) - 1)) != 0) { uintptr_t aux = tiny_remote_pack_diag(0xA211u, base, ss_size, p); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)p, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } break; } } void* node = (void*)p; uintptr_t next = tiny_remote_side_get(ss, slab_idx, node); tiny_remote_watch_note("drain_pull", ss, slab_idx, node, 0xA238u, drain_tid, 0); if (__builtin_expect(g_remote_side_enable, 0)) { if (!tiny_remote_sentinel_ok(node)) { uintptr_t aux = tiny_remote_pack_diag(0xA202u, base, ss_size, (uintptr_t)node); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, node, aux); uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)node, memory_order_relaxed); tiny_remote_report_corruption("drain", node, observed); TinySlabMeta* meta = &ss->slabs[slab_idx]; fprintf(stderr, "[REMOTE_SENTINEL-DRAIN] cls=%u slab=%d node=%p drained=%u observed=0x%016" PRIxPTR " owner=%u used=%u freelist=%p\n", ss->size_class, slab_idx, node, drained, observed, meta->owner_tid, (unsigned)meta->used, meta->freelist); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } } tiny_remote_side_clear(ss, slab_idx, node); } tiny_remote_watch_note("drain_link", ss, slab_idx, node, 0xA239u, drain_tid, 0); tiny_remote_track_on_remote_drain(ss, slab_idx, node, "remote_drain", drain_tid); if (__builtin_expect(g_debug_remote_guard && drained < 3, 0)) { // First few nodes: record low info for triage uintptr_t aux = ((uintptr_t)slab_idx << 32) | (uintptr_t)(drained & 0xFFFF); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_DRAIN, (uint16_t)ss->size_class, node, aux); } // Link into local chain (avoid touching meta->freelist per node) if (chain_head == NULL) { chain_head = node; chain_tail = node; *(void**)node = NULL; } else { *(void**)node = chain_head; chain_head = node; } p = next; drained++; } // Splice the drained chain into freelist (single meta write) if (chain_head != NULL) { if (chain_tail != NULL) { *(void**)chain_tail = meta->freelist; } void* prev = meta->freelist; meta->freelist = chain_head; tiny_failfast_log("remote_drain", ss->size_class, ss, meta, chain_head, prev); // Optional: set freelist bit when transitioning from empty do { static int g_mask_en = -1; if (__builtin_expect(g_mask_en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_FREELIST_MASK"); g_mask_en = (e && *e && *e != '0') ? 1 : 0; } if (__builtin_expect(g_mask_en, 0)) { uint32_t bit = (1u << slab_idx); atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release); } } while (0); } // Reset remote count after full drain atomic_store_explicit(&ss->remote_counts[slab_idx], 0u, memory_order_relaxed); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_DRAIN, (uint16_t)ss->size_class, ss, ((uintptr_t)slab_idx << 32) | drained); } // Legacy wrapper for compatibility (UNSAFE - ownership NOT checked!) // DEPRECATED: Use slab_drain_remote() via SlabHandle instead static inline void ss_remote_drain_to_freelist(SuperSlab* ss, int slab_idx) { TinySlabMeta* meta = &ss->slabs[slab_idx]; _ss_remote_drain_to_freelist_unsafe(ss, slab_idx, meta); } // Try to acquire exclusive ownership of slab (REQUIRED before draining remote queue!) // Returns 1 on success (now own slab), 0 on failure (another thread owns it) // CRITICAL: Only succeeds if slab is unowned (owner_tid==0) or already owned by us. static inline int ss_owner_try_acquire(TinySlabMeta* m, uint32_t self_tid) { uint32_t cur = __atomic_load_n(&m->owner_tid, __ATOMIC_RELAXED); if (cur == self_tid) return 1; // Already owner - success if (cur != 0) return 0; // Another thread owns it - FAIL immediately // Slab is unowned (cur==0) - try to claim it uint32_t expected = 0; return __atomic_compare_exchange_n(&m->owner_tid, &expected, self_tid, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } // Drain remote queues where activity was observed (lightweight sweep). // CRITICAL: Must acquire ownership before draining each slab! static inline void ss_remote_drain_light(SuperSlab* ss) { if (!ss) return; uint32_t threshold = tiny_remote_drain_threshold(); uint32_t self_tid = (uint32_t)(uintptr_t)pthread_self(); int cap = ss_slabs_capacity(ss); for (int s = 0; s < cap; s++) { uint32_t rc = atomic_load_explicit(&ss->remote_counts[s], memory_order_relaxed); if (rc <= threshold) continue; if (atomic_load_explicit(&ss->remote_heads[s], memory_order_acquire) != 0) { // BUGFIX: Must acquire ownership BEFORE draining! // Without this, we can drain a slab owned by another thread → freelist corruption TinySlabMeta* m = &ss->slabs[s]; if (!ss_owner_try_acquire(m, self_tid)) { continue; // Failed to acquire - skip this slab } ss_remote_drain_to_freelist(ss, s); } } } // Best-effort CAS to transfer slab ownership (DEPRECATED - use ss_owner_try_acquire!) static inline void ss_owner_cas(TinySlabMeta* m, uint32_t self_tid) { (void)ss_owner_try_acquire(m, self_tid); // Ignore result (unsafe) } #endif // HAKMEM_TINY_SUPERSLAB_H