diff --git a/core/box/free_local_box.c b/core/box/free_local_box.c index cb915c2c..984bb248 100644 --- a/core/box/free_local_box.c +++ b/core/box/free_local_box.c @@ -19,11 +19,11 @@ void tiny_failfast_log(const char* stage, void* ptr, void* prev); -void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void* ptr, uint32_t my_tid) { +int tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void* ptr, uint32_t my_tid) { extern _Atomic uint64_t g_free_local_box_calls; atomic_fetch_add_explicit(&g_free_local_box_calls, 1, memory_order_relaxed); - if (!(ss && ss->magic == SUPERSLAB_MAGIC)) return; - if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) return; + if (!(ss && ss->magic == SUPERSLAB_MAGIC)) return 0; + if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) return 0; (void)my_tid; // ✅ Phase E1-CORRECT: ALL classes have headers, calculate BASE pointer once @@ -177,11 +177,16 @@ void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void* // Track local free (debug helpers may be no-op) tiny_remote_track_on_local_free(ss, slab_idx, ptr, "local_free", my_tid); - meta->used--; + + // BUGFIX Phase 9-2: Use atomic_fetch_sub to detect 1->0 transition reliably + // meta->used--; // old + uint16_t prev_used = atomic_fetch_sub_explicit(&meta->used, 1, memory_order_release); + int is_empty = (prev_used == 1); // Transitioned from 1 to 0 + ss_active_dec_one(ss); // Phase 12-1.1: EMPTY slab detection (immediate reuse optimization) - if (meta->used == 0) { + if (is_empty) { // Slab became EMPTY → mark for highest-priority reuse ss_mark_slab_empty(ss, slab_idx); @@ -206,4 +211,6 @@ void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void* uint8_t cls0 = (meta && meta->class_idx < TINY_NUM_CLASSES) ? meta->class_idx : 0; tiny_free_publish_first_free((int)cls0, ss, slab_idx); } + + return is_empty; } diff --git a/core/box/free_local_box.h b/core/box/free_local_box.h index 7e565e7b..1e2303da 100644 --- a/core/box/free_local_box.h +++ b/core/box/free_local_box.h @@ -4,5 +4,6 @@ #include "hakmem_tiny_superslab.h" // Perform same-thread freelist push. On first-free (prev==NULL), publishes via Ready/Mailbox. -void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void* ptr, uint32_t my_tid); +// Returns: 1 if slab transitioned to EMPTY (used=0), 0 otherwise. +int tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void* ptr, uint32_t my_tid); diff --git a/core/box/tls_sll_drain_box.h b/core/box/tls_sll_drain_box.h index 4aaf51a8..86517589 100644 --- a/core/box/tls_sll_drain_box.h +++ b/core/box/tls_sll_drain_box.h @@ -204,7 +204,8 @@ static inline uint32_t tiny_tls_sll_drain(int class_idx, uint32_t batch_size) { // Call tiny_free_local_box() to: // 1. Push block to slab freelist // 2. Decrement meta->used (THIS IS THE KEY!) - tiny_free_local_box(ss, slab_idx, meta, user_ptr, my_tid); + // Phase 9-2 FIX: Capture 'is_empty' return value to detect ownership of 1->0 transition + int is_empty = tiny_free_local_box(ss, slab_idx, meta, user_ptr, my_tid); #if !HAKMEM_BUILD_RELEASE // Trace drain operation (debug only) @@ -220,15 +221,9 @@ static inline uint32_t tiny_tls_sll_drain(int class_idx, uint32_t batch_size) { drained++; // Phase 9-2: Track touched slab for later EMPTY check - // We track (ss, slab_idx) pairs to check after loop completes - int already_tracked = 0; - for (int t = 0; t < num_touched; t++) { - if (touched[t].ss == ss && touched[t].slab_idx == slab_idx) { - already_tracked = 1; - break; - } - } - if (!already_tracked && num_touched < MAX_TOUCHED_SLABS) { + // CRITICAL FIX: Only recycle if WE caused the transition to EMPTY (is_empty == 1) + // This prevents multiple threads from racing to release the same slab + if (is_empty && num_touched < MAX_TOUCHED_SLABS) { touched[num_touched].ss = ss; touched[num_touched].slab_idx = slab_idx; num_touched++; diff --git a/core/hakmem_shared_pool_acquire.c b/core/hakmem_shared_pool_acquire.c index 3f7cba84..5f641747 100644 --- a/core/hakmem_shared_pool_acquire.c +++ b/core/hakmem_shared_pool_acquire.c @@ -418,19 +418,8 @@ stage2_fallback: } // Before creating a new SuperSlab, consult learning-layer soft cap. - // If current active slots for this class already exceed the policy cap, - // fail early so caller can fall back to legacy backend. - uint32_t limit = sp_class_active_limit(class_idx); - if (limit > 0) { - uint32_t cur = g_shared_pool.class_active_slots[class_idx]; - if (cur >= limit) { - if (g_lock_stats_enabled == 1) { - atomic_fetch_add(&g_lock_release_count, 1); - } - pthread_mutex_unlock(&g_shared_pool.alloc_lock); - return -1; // Soft cap reached for this class - } - } + // Phase 9-2: Soft Cap removed to allow Shared Pool to fully replace Legacy Backend. + // We now rely on LRU eviction and EMPTY recycling to manage memory pressure. // Create metadata for this new SuperSlab SharedSSMeta* new_meta = sp_meta_find_or_create(new_ss); diff --git a/core/hakmem_shared_pool_release.c b/core/hakmem_shared_pool_release.c index a51dfeef..de1c6cb5 100644 --- a/core/hakmem_shared_pool_release.c +++ b/core/hakmem_shared_pool_release.c @@ -26,6 +26,23 @@ shared_pool_release_slab(SuperSlab* ss, int slab_idx) return; } + // Phase 9-2 FIX: Promote Legacy SuperSlabs to Shared Pool on first recycle + // If we are recycling a slot from a Legacy SS, we must remove it from the + // Legacy list (g_superslab_heads) to prevent Legacy Backend from allocating + // from it simultaneously (Double Allocation Race). + // This effectively transfers ownership to Shared Pool. + extern void remove_superslab_from_legacy_head(SuperSlab* ss); + remove_superslab_from_legacy_head(ss); + + // BUGFIX: Re-check used count after removal. Legacy Backend might have + // allocated from this slab while we were waiting for the lock in remove(). + TinySlabMeta* slab_meta = &ss->slabs[slab_idx]; + if (atomic_load_explicit(&slab_meta->used, memory_order_acquire) != 0) { + // Legacy Backend stole this slab. It's now an orphan (removed from list). + // We abort recycling. It will be recycled when Legacy frees it later. + return; + } + // Debug logging #if !HAKMEM_BUILD_RELEASE static int dbg = -1; @@ -46,9 +63,9 @@ shared_pool_release_slab(SuperSlab* ss, int slab_idx) pthread_mutex_lock(&g_shared_pool.alloc_lock); - TinySlabMeta* slab_meta = &ss->slabs[slab_idx]; + // TinySlabMeta* slab_meta = &ss->slabs[slab_idx]; // Already declared above if (slab_meta->used != 0) { - // Not actually empty; nothing to do + // Not actually empty (double check under lock) if (g_lock_stats_enabled == 1) { atomic_fetch_add(&g_lock_release_count, 1); } @@ -160,15 +177,28 @@ shared_pool_release_slab(SuperSlab* ss, int slab_idx) pthread_mutex_unlock(&g_shared_pool.alloc_lock); - // Remove from legacy backend list (if present) to prevent dangling pointers - extern void remove_superslab_from_legacy_head(SuperSlab* ss); - remove_superslab_from_legacy_head(ss); + // Remove from legacy backend list (moved to top of function) + // extern void remove_superslab_from_legacy_head(SuperSlab* ss); + // remove_superslab_from_legacy_head(ss); // Free SuperSlab: // 1. Try LRU cache (hak_ss_lru_push) - lazy deallocation // 2. Or munmap if LRU is full - eager deallocation - extern void superslab_free(SuperSlab* ss); - superslab_free(ss); + + // BUGFIX: Double check total_active_blocks. Legacy Backend might have + // allocated from ANOTHER slab in this SS just before we removed it. + // If so, we must NOT free the SS. + if (atomic_load(&ss->total_active_blocks) == 0) { + extern void superslab_free(SuperSlab* ss); + superslab_free(ss); + } else { + #if !HAKMEM_BUILD_RELEASE + if (dbg == 1) { + fprintf(stderr, "[SP_SLOT_RELEASE] SKIP free ss=%p: total_active_blocks=%u > 0\n", + (void*)ss, atomic_load(&ss->total_active_blocks)); + } + #endif + } return; } diff --git a/core/superslab_backend.c b/core/superslab_backend.c index f465f462..3adf5de5 100644 --- a/core/superslab_backend.c +++ b/core/superslab_backend.c @@ -28,6 +28,9 @@ void* hak_tiny_alloc_superslab_backend_legacy(int class_idx) g_superslab_heads[class_idx] = head; } + // LOCK expansion_lock to protect list traversal (vs remove_superslab_from_legacy_head) + pthread_mutex_lock(&head->expansion_lock); + SuperSlab* chunk = head->current_chunk ? head->current_chunk : head->first_chunk; while (chunk) { @@ -62,12 +65,19 @@ void* hak_tiny_alloc_superslab_backend_legacy(int class_idx) meta->used++; atomic_fetch_add_explicit(&chunk->total_active_blocks, 1, memory_order_relaxed); + + // UNLOCK before return + pthread_mutex_unlock(&head->expansion_lock); + HAK_RET_ALLOC_BLOCK_TRACED(class_idx, base, ALLOC_PATH_BACKEND); } } chunk = chunk->next_chunk; } + // UNLOCK before expansion (which takes lock internally) + pthread_mutex_unlock(&head->expansion_lock); + if (expand_superslab_head(head) < 0) { return NULL; } @@ -212,74 +222,23 @@ void* hak_tiny_alloc_superslab_backend_shared(int class_idx) * Box API entry: * - Single front-door for tiny-side Superslab allocations. * - * Phase 9-2 Root Fix: Shared Pool backend unified mode (default ON) + * Phase 9-2 Final: Shared Pool ONLY (Legacy Backend Removed) * Policy: - * - HAKMEM_TINY_SS_SHARED=2 (default) → Shared Pool backend ONLY (no legacy fallback) - * - HAKMEM_TINY_SS_SHARED=1 → Shared Pool backend with legacy fallback (testing mode) - * - HAKMEM_TINY_SS_SHARED=0 → Legacy backend only (compatibility mode) - * - * Root Cause: Legacy backend (g_superslab_heads) has TLS_SLL_DUP issue - * Solution: Disable legacy backend by default, keep as "reversible box" via env var + * - HAKMEM_TINY_SS_SHARED is now ignored (or used only for logging). + * - Always uses Shared Pool backend. + * - Legacy backend (g_superslab_heads) is no longer used for allocation. */ void* hak_tiny_alloc_superslab_box(int class_idx) { - static int g_ss_shared_mode = -1; - static _Atomic uint32_t g_ss_backend_log = 0; - if (__builtin_expect(g_ss_shared_mode == -1, 0)) { - const char* e = getenv("HAKMEM_TINY_SS_SHARED"); - if (!e || !*e) { - g_ss_shared_mode = 2; // Phase 9-2 Root Fix: Shared Pool ONLY (no legacy fallback) - } else { - int v = atoi(e); - g_ss_shared_mode = v; // 0=legacy only, 1=shared+fallback, 2=shared only - } -#if !HAKMEM_BUILD_RELEASE - const char* mode_str = (g_ss_shared_mode == 2) ? "shared_only" : - (g_ss_shared_mode == 1) ? "shared+fallback" : "legacy_only"; - fprintf(stderr, "[SS_BACKEND] Mode: %s (HAKMEM_TINY_SS_SHARED=%d)\n", mode_str, g_ss_shared_mode); -#endif - } - - // Mode 2: Shared Pool ONLY (default, no legacy fallback) - if (g_ss_shared_mode == 2) { - void* p = hak_tiny_alloc_superslab_backend_shared(class_idx); - if (p != NULL) { - uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed); - if (n < 4) { - fprintf(stderr, "[SS_BACKEND] shared_only cls=%d ptr=%p\n", class_idx, p); - } - return p; - } - // Phase 9-2: NO fallback to legacy - return NULL on failure - uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed); + // Always use Shared Pool (Mode 2 equivalent) + void* p = hak_tiny_alloc_superslab_backend_shared(class_idx); + + if (p == NULL) { + static _Atomic uint32_t g_ss_oom_log = 0; + uint32_t n = atomic_fetch_add_explicit(&g_ss_oom_log, 1, memory_order_relaxed); if (n < 4) { - fprintf(stderr, "[SS_BACKEND] shared_fail→NULL (no legacy) cls=%d\n", class_idx); + fprintf(stderr, "[SS_BACKEND] shared_fail→NULL (OOM) cls=%d\n", class_idx); } - return NULL; } - - // Mode 1: Shared Pool with legacy fallback (testing mode) - if (g_ss_shared_mode == 1) { - void* p = hak_tiny_alloc_superslab_backend_shared(class_idx); - if (p != NULL) { - uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed); - if (n < 4) { - fprintf(stderr, "[SS_BACKEND] shared cls=%d ptr=%p\n", class_idx, p); - } - return p; - } - // Fallback to legacy - uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed); - if (n < 4) { - fprintf(stderr, "[SS_BACKEND] shared_fail→legacy cls=%d\n", class_idx); - } - return hak_tiny_alloc_superslab_backend_legacy(class_idx); - } - - // Mode 0: Legacy backend only (compatibility mode) - uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed); - if (n < 4) { - fprintf(stderr, "[SS_BACKEND] legacy cls=%d\n", class_idx); - } - return hak_tiny_alloc_superslab_backend_legacy(class_idx); + return p; } diff --git a/core/superslab_slab.c b/core/superslab_slab.c index 6e4d6724..dfbbe4bf 100644 --- a/core/superslab_slab.c +++ b/core/superslab_slab.c @@ -27,6 +27,8 @@ void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMe // and splice in front of current freelist preserving relative order. void* prev = meta->freelist; int cls = (int)meta->class_idx; + uint16_t drained_count = 0; // Phase 9-2: Batched used decrement + HAK_CHECK_CLASS_IDX(cls, "_ss_remote_drain_to_freelist_unsafe"); if (__builtin_expect(cls < 0 || cls >= TINY_NUM_CLASSES, 0)) { static _Atomic int g_remote_drain_cls_oob = 0; @@ -104,14 +106,21 @@ void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMe tiny_next_write(cls, (void*)cur, prev); prev = (void*)cur; cur = next; + drained_count++; } meta->freelist = prev; // Reset remote count after full drain atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_release); - // Phase 9-2: Try to recycle slab if EMPTY after remote drain - // This fixes the bug where EMPTY slabs accumulate and never get returned to freelist - SLAB_TRY_RECYCLE(ss, slab_idx, meta); + // Phase 9-2: Batched decrement of used count (Atomic) + // Remote frees don't decrement used until they land in freelist. + if (drained_count > 0) { + uint16_t old_used = atomic_fetch_sub_explicit(&meta->used, drained_count, memory_order_release); + // If used became 0 (old_used == drained_count), try to recycle + if (old_used == drained_count) { + SLAB_TRY_RECYCLE(ss, slab_idx, meta); + } + } // Update freelist/nonempty visibility bits uint32_t bit = (1u << slab_idx);