#include "hakmem_shared_pool.h" #include "hakmem_tiny_superslab.h" #include "hakmem_tiny_superslab_constants.h" #include #include // Phase 12-2: SharedSuperSlabPool skeleton implementation // Goal: // - Centralize SuperSlab allocation/registration // - Provide acquire_slab/release_slab APIs for later refill/free integration // - Keep logic simple & conservative; correctness and observability first. // // Notes: // - Concurrency: protected by g_shared_pool.alloc_lock for now. // - class_hints is best-effort: read lock-free, written under lock. // - LRU hooks left as no-op placeholders. SharedSuperSlabPool g_shared_pool = { .slabs = NULL, .capacity = 0, .total_count = 0, .active_count = 0, .alloc_lock = PTHREAD_MUTEX_INITIALIZER, .class_hints = { NULL }, .lru_head = NULL, .lru_tail = NULL, .lru_count = 0 }; static void shared_pool_ensure_capacity_unlocked(uint32_t min_capacity) { if (g_shared_pool.capacity >= min_capacity) { return; } uint32_t new_cap = g_shared_pool.capacity ? g_shared_pool.capacity : 16; while (new_cap < min_capacity) { new_cap *= 2; } SuperSlab** new_slabs = (SuperSlab**)realloc(g_shared_pool.slabs, new_cap * sizeof(SuperSlab*)); if (!new_slabs) { // Allocation failure: keep old state; caller must handle NULL later. return; } // Zero new entries to keep scanning logic simple. memset(new_slabs + g_shared_pool.capacity, 0, (new_cap - g_shared_pool.capacity) * sizeof(SuperSlab*)); g_shared_pool.slabs = new_slabs; g_shared_pool.capacity = new_cap; } void shared_pool_init(void) { // Idempotent init; safe to call from multiple early paths. // pthread_mutex_t with static initializer is already valid. pthread_mutex_lock(&g_shared_pool.alloc_lock); if (g_shared_pool.capacity == 0 && g_shared_pool.slabs == NULL) { shared_pool_ensure_capacity_unlocked(16); } pthread_mutex_unlock(&g_shared_pool.alloc_lock); } /* * Internal: allocate and register a new SuperSlab for the shared pool. * * Phase 12 NOTE: * - We MUST use the real superslab_allocate() path so that: * - backing memory is a full SuperSlab region (1–2MB), * - header/layout are initialized correctly, * - registry integration stays consistent. * - shared_pool is responsible only for: * - tracking pointers, * - marking per-slab class_idx as UNASSIGNED initially. * It does NOT bypass registry/LRU. * * Caller must hold alloc_lock. */ static SuperSlab* shared_pool_allocate_superslab_unlocked(void) { // Use size_class 0 as a neutral hint; Phase 12 per-slab class_idx is authoritative. extern SuperSlab* superslab_allocate(uint8_t size_class); SuperSlab* ss = superslab_allocate(0); if (!ss) { return NULL; } // superslab_allocate() already: // - zeroes slab metadata / remote queues, // - sets magic/lg_size/etc, // - registers in global registry. // For shared-pool semantics we normalize all slab class_idx to UNASSIGNED. int max_slabs = ss_slabs_capacity(ss); for (int i = 0; i < max_slabs; i++) { ss->slabs[i].class_idx = 255; // UNASSIGNED } if (g_shared_pool.total_count >= g_shared_pool.capacity) { shared_pool_ensure_capacity_unlocked(g_shared_pool.total_count + 1); if (g_shared_pool.total_count >= g_shared_pool.capacity) { // Pool table expansion failed; leave ss alive (registry-owned), // but do not treat it as part of shared_pool. return NULL; } } g_shared_pool.slabs[g_shared_pool.total_count] = ss; g_shared_pool.total_count++; // Not counted as active until at least one slab is assigned. return ss; } SuperSlab* shared_pool_acquire_superslab(void) { // Phase 12 debug safety: // If shared backend is disabled at Box API level, this function SHOULD NOT be called. // But since bench currently SEGVs here even with legacy forced, treat this as a hard guard: // we early-return error instead of touching potentially-bad state. // // This isolates shared_pool from the current crash so we can validate legacy path first. // FIXED: Remove the return -1; that was preventing operation shared_pool_init(); pthread_mutex_lock(&g_shared_pool.alloc_lock); // For now, always allocate a fresh SuperSlab and register it. // More advanced reuse/GC comes later. SuperSlab* ss = shared_pool_allocate_superslab_unlocked(); pthread_mutex_unlock(&g_shared_pool.alloc_lock); return ss; } int shared_pool_acquire_slab(int class_idx, SuperSlab** ss_out, int* slab_idx_out) { // Phase 12: real shared backend is enabled; this function must be correct & safe. // Invariants (callers rely on): // - On success, *ss_out != NULL, 0 <= *slab_idx_out < SLABS_PER_SUPERSLAB_MAX. // - The chosen slab has meta->class_idx == class_idx and capacity > 0. if (!ss_out || !slab_idx_out) { return -1; } if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { return -1; } shared_pool_init(); // Fast-path hint: read without lock (best-effort). SuperSlab* hint = g_shared_pool.class_hints[class_idx]; if (hint) { // Scan for a free, unassigned slab in this SuperSlab. uint32_t bitmap = hint->slab_bitmap; for (int i = 0; i < SLABS_PER_SUPERSLAB_MAX; i++) { uint32_t bit = (1u << i); if ((bitmap & bit) == 0 && hint->slabs[i].class_idx == 255) { // Tentative claim: upgrade under lock to avoid races. pthread_mutex_lock(&g_shared_pool.alloc_lock); // Re-check under lock. bitmap = hint->slab_bitmap; if ((bitmap & bit) == 0 && hint->slabs[i].class_idx == 255) { hint->slab_bitmap |= bit; hint->slabs[i].class_idx = (uint8_t)class_idx; hint->active_slabs++; if (hint->active_slabs == 1) { g_shared_pool.active_count++; } *ss_out = hint; *slab_idx_out = i; pthread_mutex_unlock(&g_shared_pool.alloc_lock); return 0; } pthread_mutex_unlock(&g_shared_pool.alloc_lock); break; // fall through to slow path } } } // Slow path: lock and scan all registered SuperSlabs. pthread_mutex_lock(&g_shared_pool.alloc_lock); for (uint32_t idx = 0; idx < g_shared_pool.total_count; idx++) { SuperSlab* ss = g_shared_pool.slabs[idx]; if (!ss) { continue; } uint32_t bitmap = ss->slab_bitmap; for (int i = 0; i < SLABS_PER_SUPERSLAB_MAX; i++) { uint32_t bit = (1u << i); if ((bitmap & bit) == 0 && ss->slabs[i].class_idx == 255) { // Assign this slab to class_idx. ss->slab_bitmap |= bit; ss->slabs[i].class_idx = (uint8_t)class_idx; ss->active_slabs++; if (ss->active_slabs == 1) { g_shared_pool.active_count++; } // Update hint. g_shared_pool.class_hints[class_idx] = ss; *ss_out = ss; *slab_idx_out = i; pthread_mutex_unlock(&g_shared_pool.alloc_lock); return 0; } } } // No existing space: allocate a new SuperSlab and take its first slab. SuperSlab* ss = shared_pool_allocate_superslab_unlocked(); if (!ss) { pthread_mutex_unlock(&g_shared_pool.alloc_lock); return -1; } int slab_idx = 0; ss->slab_bitmap |= (1u << slab_idx); ss->slabs[slab_idx].class_idx = (uint8_t)class_idx; ss->active_slabs = 1; g_shared_pool.active_count++; g_shared_pool.class_hints[class_idx] = ss; *ss_out = ss; *slab_idx_out = slab_idx; pthread_mutex_unlock(&g_shared_pool.alloc_lock); return 0; } void shared_pool_release_slab(SuperSlab* ss, int slab_idx) { if (!ss) { return; } if (slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) { return; } // ADD DEBUG LOGGING static int dbg = -1; if (__builtin_expect(dbg == -1, 0)) { const char* e = getenv("HAKMEM_SS_FREE_DEBUG"); dbg = (e && *e && *e != '0') ? 1 : 0; } pthread_mutex_lock(&g_shared_pool.alloc_lock); TinySlabMeta* meta = &ss->slabs[slab_idx]; if (meta->used != 0) { // Not actually empty; nothing to do. pthread_mutex_unlock(&g_shared_pool.alloc_lock); return; } if (dbg == 1) { fprintf(stderr, "[SS_SLAB_EMPTY] ss=%p slab_idx=%d class=%d used=0 (releasing to pool)\n", (void*)ss, slab_idx, meta->class_idx); } uint32_t bit = (1u << slab_idx); if (ss->slab_bitmap & bit) { ss->slab_bitmap &= ~bit; uint8_t old_class = meta->class_idx; meta->class_idx = 255; // UNASSIGNED if (ss->active_slabs > 0) { ss->active_slabs--; if (ss->active_slabs == 0 && g_shared_pool.active_count > 0) { g_shared_pool.active_count--; } } // Invalidate class hint if it pointed here and this superslab has no free slab // for that class anymore; for now we do a simple best-effort clear. if (old_class < TINY_NUM_CLASSES_SS && g_shared_pool.class_hints[old_class] == ss) { // We could rescan ss for another matching slab; to keep it cheap, just clear. g_shared_pool.class_hints[old_class] = NULL; } // DEBUG: Check if SuperSlab is now completely empty if (dbg == 1 && ss->active_slabs == 0) { fprintf(stderr, "[SS_COMPLETELY_EMPTY] ss=%p active_slabs=0 (calling superslab_free)\n", (void*)ss); } // Phase 12-4: Free SuperSlab when it becomes completely empty if (ss->active_slabs == 0) { pthread_mutex_unlock(&g_shared_pool.alloc_lock); // Call superslab_free() to either: // 1. Cache in LRU (hak_ss_lru_push) - lazy deallocation // 2. Or munmap if LRU is full - eager deallocation extern void superslab_free(SuperSlab* ss); superslab_free(ss); return; } } pthread_mutex_unlock(&g_shared_pool.alloc_lock); }