// superslab_slab.c - Slab initialization and management // Purpose: Slab lifecycle and bitmap management within SuperSlabs // License: MIT // Date: 2025-11-28 #include "hakmem_tiny_superslab_internal.h" #include "box/slab_recycling_box.h" #include "hakmem_env_cache.h" // Priority-2: ENV cache (eliminate syscalls) #include // ============================================================================ // Remote Drain (MPSC queue to freelist conversion) // ============================================================================ // Drain remote MPSC stack into freelist (ownership already verified by caller) void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta) { if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss) || !meta) return; static _Atomic uint32_t g_remote_drain_diag_once = 0; static int g_remote_drain_diag_en = -1; // Atomically take the whole remote list uintptr_t head = atomic_exchange_explicit(&ss->remote_heads[slab_idx], 0, memory_order_acq_rel); if (head == 0) return; // Convert remote stack (offset 0 next) into freelist encoding via Box API // and splice in front of current freelist preserving relative order. void* prev = meta->freelist; int cls = (int)meta->class_idx; uint16_t drained_count = 0; // Phase 9-2: Batched used decrement HAK_CHECK_CLASS_IDX(cls, "_ss_remote_drain_to_freelist_unsafe"); if (__builtin_expect(cls < 0 || cls >= TINY_NUM_CLASSES, 0)) { static _Atomic int g_remote_drain_cls_oob = 0; if (atomic_fetch_add_explicit(&g_remote_drain_cls_oob, 1, memory_order_relaxed) == 0) { fprintf(stderr, "[REMOTE_DRAIN_CLASS_OOB] ss=%p slab_idx=%d meta=%p cls=%d head=%#lx\n", (void*)ss, slab_idx, (void*)meta, cls, (unsigned long)head); } return; } uintptr_t cur = head; while (cur != 0) { uintptr_t next = *(uintptr_t*)cur; // remote-next stored at offset 0 // Priority-2: Use cached ENV (eliminate lazy-init static overhead) if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) { #if !HAKMEM_BUILD_RELEASE g_remote_drain_diag_en = HAK_ENV_TINY_SLL_DIAG(); #else g_remote_drain_diag_en = 0; #endif } if (__builtin_expect(g_remote_drain_diag_en, 0)) { uintptr_t addr = (uintptr_t)next; if (addr != 0 && (addr < 4096 || addr > 0x00007fffffffffffULL)) { uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed); if (shot < 8) { fprintf(stderr, "[REMOTE_DRAIN_NEXT_INVALID] cls=%d slab=%d cur=%p next=%p head=%#lx prev=%p count=%u\n", cls, slab_idx, (void*)cur, (void*)next, (unsigned long)head, prev, (unsigned)meta->used); } } #if HAKMEM_TINY_HEADER_CLASSIDX int hdr_cls = tiny_region_id_read_header((uint8_t*)cur + 1); if (hdr_cls >= 0 && hdr_cls != cls) { uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed); if (shot < 8) { fprintf(stderr, "[REMOTE_DRAIN_HDR_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d head=%#lx\n", cls, slab_idx, (void*)cur, hdr_cls, (int)meta->class_idx, (unsigned long)head); } } #endif } #if HAKMEM_TINY_HEADER_CLASSIDX // Cross-check header vs meta before writing next (even if diag is off) { int hdr_cls_pre = tiny_region_id_read_header((uint8_t*)cur + 1); if (hdr_cls_pre >= 0 && hdr_cls_pre != cls) { static _Atomic uint32_t g_hdr_meta_mismatch_rd = 0; uint32_t n = atomic_fetch_add_explicit(&g_hdr_meta_mismatch_rd, 1, memory_order_relaxed); if (n < 16) { fprintf(stderr, "[REMOTE_DRAIN_HDR_META_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d\n", cls, slab_idx, (void*)cur, hdr_cls_pre, (int)meta->class_idx); } } } #endif // Restore header for header-classes (class 1-6) which were clobbered by remote push #if HAKMEM_TINY_HEADER_CLASSIDX if (cls != 0) { uint8_t expected = (uint8_t)(HEADER_MAGIC | (cls & HEADER_CLASS_MASK)); *(uint8_t*)(uintptr_t)cur = expected; } #endif // Rewrite next pointer to Box representation for this class tiny_next_write(cls, (void*)cur, prev); prev = (void*)cur; cur = next; drained_count++; } meta->freelist = prev; // Reset remote count after full drain atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_release); // Phase 9-2: Batched decrement of used count (Atomic) // Remote frees don't decrement used until they land in freelist. if (drained_count > 0) { uint16_t old_used = atomic_fetch_sub_explicit(&meta->used, drained_count, memory_order_release); // If used became 0 (old_used == drained_count), try to recycle if (old_used == drained_count) { SLAB_TRY_RECYCLE(ss, slab_idx, meta); } } // Update freelist/nonempty visibility bits uint32_t bit = (1u << slab_idx); atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release); atomic_fetch_or_explicit(&ss->nonempty_mask, bit, memory_order_release); } // ============================================================================ // Slab Initialization within SuperSlab // ============================================================================ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_t owner_tid) { if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) { return; } // Phase E1-CORRECT unified geometry: // - block_size is the TOTAL stride for this class (g_tiny_class_sizes[cls]) // - usable bytes are determined by slab index (slab0 vs others) // - capacity = usable / stride for ALL classes (including former C7) size_t usable_size = (slab_idx == 0) ? SUPERSLAB_SLAB0_USABLE_SIZE : SUPERSLAB_SLAB_USABLE_SIZE; size_t stride = block_size; uint16_t capacity = (uint16_t)(usable_size / stride); #if !HAKMEM_BUILD_RELEASE if (slab_idx == 0) { fprintf(stderr, "[SUPERSLAB_INIT] slab 0: usable_size=%zu stride=%zu capacity=%u\n", usable_size, stride, (unsigned)capacity); } #endif TinySlabMeta* meta = &ss->slabs[slab_idx]; meta->freelist = NULL; // NULL = linear allocation mode meta->used = 0; meta->active = 0; // P1.3: blocks in use by user (starts at 0) meta->tls_cached = 0; // P2.2: blocks cached in TLS SLL (starts at 0) meta->capacity = capacity; meta->carved = 0; // LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes meta->owner_tid_low = (uint8_t)((owner_tid >> 8) & 0xFFu); // Fail-safe: stamp class_idx from geometry (stride → class). // This ensures legacy/shared/legacy-refill paths all end with a correct class. for (int i = 0; i < TINY_NUM_CLASSES; i++) { if (g_tiny_class_sizes[i] == stride) { meta->class_idx = (uint8_t)i; // P1.1: Update class_map for out-of-band lookup on free path ss->class_map[slab_idx] = (uint8_t)i; break; } } #if HAKMEM_BUILD_RELEASE static _Atomic int rel_c7_init_logged = 0; if (meta->class_idx == 7 && atomic_load_explicit(&rel_c7_init_logged, memory_order_relaxed) == 0) { fprintf(stderr, "[REL_C7_INIT] ss=%p slab=%d cls=%u cap=%u used=%u carved=%u stride=%zu\n", (void*)ss, slab_idx, (unsigned)meta->class_idx, (unsigned)meta->capacity, (unsigned)meta->used, (unsigned)meta->carved, stride); atomic_store_explicit(&rel_c7_init_logged, 1, memory_order_relaxed); } #else static __thread int dbg_c7_init_logged = 0; if (meta->class_idx == 7 && dbg_c7_init_logged == 0) { fprintf(stderr, "[DBG_C7_INIT] ss=%p slab=%d cls=%u cap=%u used=%u carved=%u stride=%zu\n", (void*)ss, slab_idx, (unsigned)meta->class_idx, (unsigned)meta->capacity, (unsigned)meta->used, (unsigned)meta->carved, stride); dbg_c7_init_logged = 1; } #endif superslab_activate_slab(ss, slab_idx); } // ============================================================================ // Slab Bitmap Management // ============================================================================ void superslab_activate_slab(SuperSlab* ss, int slab_idx) { if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) { return; } uint32_t mask = 1u << slab_idx; if ((ss->slab_bitmap & mask) == 0) { ss->slab_bitmap |= mask; ss->active_slabs++; // Phase 3d-C: Update hot/cold indices after activating new slab ss_update_hot_cold_indices(ss); } } void superslab_deactivate_slab(SuperSlab* ss, int slab_idx) { if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) { return; } uint32_t mask = 1u << slab_idx; if (ss->slab_bitmap & mask) { ss->slab_bitmap &= ~mask; ss->active_slabs--; } } int superslab_find_free_slab(SuperSlab* ss) { if (!ss) return -1; if ((int)ss->active_slabs >= ss_slabs_capacity(ss)) { return -1; // No free slabs } // Find first 0 bit in bitmap int cap = ss_slabs_capacity(ss); for (int i = 0; i < cap; i++) { if ((ss->slab_bitmap & (1u << i)) == 0) { return i; } } return -1; }