hakmem/core/hakmem_shared_pool_release.c

#include "hakmem_shared_pool_internal.h"
#include "hakmem_debug_master.h"
#include "box/ss_slab_meta_box.h"
#include "box/ss_hot_cold_box.h"

#include <stdlib.h>
#include <stdio.h>
#include <stdatomic.h>

void
shared_pool_release_slab(SuperSlab* ss, int slab_idx)
{
    // Phase 12: SP-SLOT Box - Slot-based Release
    //
    // Flow:
    //   1. Validate inputs and check meta->used == 0
    //   2. Find SharedSSMeta for this SuperSlab
    //   3. Mark slot ACTIVE → EMPTY
    //   4. Push to per-class free list (enables same-class reuse)
    //   5. If all slots EMPTY → superslab_free() → LRU cache

    if (!ss) {
        return;
    }
    if (slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) {
        return;
    }

    // Debug logging
#if !HAKMEM_BUILD_RELEASE
    static int dbg = -1;
    if (__builtin_expect(dbg == -1, 0)) {
        const char* e = getenv("HAKMEM_SS_FREE_DEBUG");
        dbg = (e && *e && *e != '0') ? 1 : 0;
    }
#else
    static const int dbg = 0;
#endif

    // P0 instrumentation: count lock acquisitions
    lock_stats_init();
    if (g_lock_stats_enabled == 1) {
        atomic_fetch_add(&g_lock_stats_enabled, 1);
        atomic_fetch_add(&g_lock_release_slab_count, 1);
    }

    pthread_mutex_lock(&g_shared_pool.alloc_lock);

    TinySlabMeta* slab_meta = &ss->slabs[slab_idx];
    if (slab_meta->used != 0) {
        // Not actually empty; nothing to do
        if (g_lock_stats_enabled == 1) {
            atomic_fetch_add(&g_lock_release_count, 1);
        }
        pthread_mutex_unlock(&g_shared_pool.alloc_lock);
        return;
    }

    uint8_t class_idx = slab_meta->class_idx;

    #if !HAKMEM_BUILD_RELEASE
    if (dbg == 1) {
        fprintf(stderr, "[SP_SLOT_RELEASE] ss=%p slab_idx=%d class=%d used=0 (marking EMPTY)\n",
                (void*)ss, slab_idx, class_idx);
    }
    #endif

    // Find SharedSSMeta for this SuperSlab
    SharedSSMeta* sp_meta = NULL;
    uint32_t count = atomic_load_explicit(&g_shared_pool.ss_meta_count, memory_order_relaxed);
    for (uint32_t i = 0; i < count; i++) {
        // RACE FIX: Load pointer atomically
        SuperSlab* meta_ss = atomic_load_explicit(&g_shared_pool.ss_metadata[i].ss, memory_order_relaxed);
        if (meta_ss == ss) {
            sp_meta = &g_shared_pool.ss_metadata[i];
            break;
        }
    }

    if (!sp_meta) {
        // SuperSlab not in SP-SLOT system yet - create metadata
        sp_meta = sp_meta_find_or_create(ss);
        if (!sp_meta) {
            pthread_mutex_unlock(&g_shared_pool.alloc_lock);
            return;  // Failed to create metadata
        }
    }

    // Mark slot as EMPTY (ACTIVE → EMPTY)
    uint32_t slab_bit = (1u << slab_idx);
    SlotState slot_state = atomic_load_explicit(
        &sp_meta->slots[slab_idx].state,
        memory_order_acquire);
    if (slot_state != SLOT_ACTIVE && (ss->slab_bitmap & slab_bit)) {
        // Legacy path import: rebuild slot states from SuperSlab bitmap/class_map
        sp_meta_sync_slots_from_ss(sp_meta, ss);
        slot_state = atomic_load_explicit(
            &sp_meta->slots[slab_idx].state,
            memory_order_acquire);
    }

    if (slot_state != SLOT_ACTIVE || sp_slot_mark_empty(sp_meta, slab_idx) != 0) {
        if (g_lock_stats_enabled == 1) {
            atomic_fetch_add(&g_lock_release_count, 1);
        }
        pthread_mutex_unlock(&g_shared_pool.alloc_lock);
        return;  // Slot wasn't ACTIVE
    }

    // Update SuperSlab metadata
    uint32_t bit = (1u << slab_idx);
    if (ss->slab_bitmap & bit) {
        ss->slab_bitmap &= ~bit;
        slab_meta->class_idx = 255;  // UNASSIGNED
        // P1.1: Mark class_map as UNASSIGNED when releasing slab
        ss->class_map[slab_idx] = 255;

        if (ss->active_slabs > 0) {
            ss->active_slabs--;
            if (ss->active_slabs == 0 && g_shared_pool.active_count > 0) {
                g_shared_pool.active_count--;
            }
        }
        if (class_idx < TINY_NUM_CLASSES_SS &&
            g_shared_pool.class_active_slots[class_idx] > 0) {
            g_shared_pool.class_active_slots[class_idx]--;
        }
    }

    // P0-4: Push to lock-free per-class free list (enables reuse by same class)
    // Note: push BEFORE releasing mutex (slot state already updated under lock)
    if (class_idx < TINY_NUM_CLASSES_SS) {
        sp_freelist_push_lockfree(class_idx, sp_meta, slab_idx);

        #if !HAKMEM_BUILD_RELEASE
        if (dbg == 1) {
            fprintf(stderr, "[SP_SLOT_FREELIST_LOCKFREE] class=%d pushed slot (ss=%p slab=%d) active_slots=%u/%u\n",
                    class_idx, (void*)ss, slab_idx,
                    sp_meta->active_slots, sp_meta->total_slots);
        }
        #endif
    }

    // Check if SuperSlab is now completely empty (all slots EMPTY or UNUSED)
    if (sp_meta->active_slots == 0) {
        #if !HAKMEM_BUILD_RELEASE
        if (dbg == 1) {
            fprintf(stderr, "[SP_SLOT_COMPLETELY_EMPTY] ss=%p active_slots=0 (calling superslab_free)\n",
                    (void*)ss);
        }
        #endif

        if (g_lock_stats_enabled == 1) {
            atomic_fetch_add(&g_lock_release_count, 1);
        }

        // RACE FIX: Set meta->ss to NULL BEFORE unlocking mutex
        // This prevents Stage 2 from accessing freed SuperSlab
        atomic_store_explicit(&sp_meta->ss, NULL, memory_order_release);

        pthread_mutex_unlock(&g_shared_pool.alloc_lock);

        // Remove from legacy backend list (if present) to prevent dangling pointers
        extern void remove_superslab_from_legacy_head(SuperSlab* ss);
        remove_superslab_from_legacy_head(ss);

        // Free SuperSlab:
        // 1. Try LRU cache (hak_ss_lru_push) - lazy deallocation
        // 2. Or munmap if LRU is full - eager deallocation
        extern void superslab_free(SuperSlab* ss);
        superslab_free(ss);
        return;
    }

    if (g_lock_stats_enabled == 1) {
        atomic_fetch_add(&g_lock_release_count, 1);
    }
    pthread_mutex_unlock(&g_shared_pool.alloc_lock);
}
Refactor: Split monolithic hakmem_shared_pool.c into acquire/release modules - Split core/hakmem_shared_pool.c into acquire/release modules for maintainability. - Introduced core/hakmem_shared_pool_internal.h for shared internal API. - Fixed incorrect function name usage (superslab_alloc -> superslab_allocate). - Increased SUPER_REG_SIZE to 1M to support large working sets (Phase 9-2 fix). - Updated Makefile. - Verified with benchmarks. 2025-11-30 18:11:08 +09:00			`#include "hakmem_shared_pool_internal.h"`
			`#include "hakmem_debug_master.h"`
			`#include "box/ss_slab_meta_box.h"`
			`#include "box/ss_hot_cold_box.h"`

			`#include <stdlib.h>`
			`#include <stdio.h>`
			`#include <stdatomic.h>`

			`void`
			`shared_pool_release_slab(SuperSlab* ss, int slab_idx)`
			`{`
			`// Phase 12: SP-SLOT Box - Slot-based Release`
			`//`
			`// Flow:`
			`// 1. Validate inputs and check meta->used == 0`
			`// 2. Find SharedSSMeta for this SuperSlab`
			`// 3. Mark slot ACTIVE → EMPTY`
			`// 4. Push to per-class free list (enables same-class reuse)`
			`// 5. If all slots EMPTY → superslab_free() → LRU cache`

			`if (!ss) {`
			`return;`
			`}`
			`if (slab_idx < 0 \|\| slab_idx >= SLABS_PER_SUPERSLAB_MAX) {`
			`return;`
			`}`

			`// Debug logging`
			`#if !HAKMEM_BUILD_RELEASE`
			`static int dbg = -1;`
			`if (__builtin_expect(dbg == -1, 0)) {`
			`const char* e = getenv("HAKMEM_SS_FREE_DEBUG");`
			`dbg = (e && e && e != '0') ? 1 : 0;`
			`}`
			`#else`
			`static const int dbg = 0;`
			`#endif`

			`// P0 instrumentation: count lock acquisitions`
			`lock_stats_init();`
			`if (g_lock_stats_enabled == 1) {`
			`atomic_fetch_add(&g_lock_stats_enabled, 1);`
			`atomic_fetch_add(&g_lock_release_slab_count, 1);`
			`}`

			`pthread_mutex_lock(&g_shared_pool.alloc_lock);`

			`TinySlabMeta* slab_meta = &ss->slabs[slab_idx];`
			`if (slab_meta->used != 0) {`
			`// Not actually empty; nothing to do`
			`if (g_lock_stats_enabled == 1) {`
			`atomic_fetch_add(&g_lock_release_count, 1);`
			`}`
			`pthread_mutex_unlock(&g_shared_pool.alloc_lock);`
			`return;`
			`}`

			`uint8_t class_idx = slab_meta->class_idx;`

			`#if !HAKMEM_BUILD_RELEASE`
			`if (dbg == 1) {`
			`fprintf(stderr, "[SP_SLOT_RELEASE] ss=%p slab_idx=%d class=%d used=0 (marking EMPTY)\n",`
			`(void*)ss, slab_idx, class_idx);`
			`}`
			`#endif`

			`// Find SharedSSMeta for this SuperSlab`
			`SharedSSMeta* sp_meta = NULL;`
			`uint32_t count = atomic_load_explicit(&g_shared_pool.ss_meta_count, memory_order_relaxed);`
			`for (uint32_t i = 0; i < count; i++) {`
			`// RACE FIX: Load pointer atomically`
			`SuperSlab* meta_ss = atomic_load_explicit(&g_shared_pool.ss_metadata[i].ss, memory_order_relaxed);`
			`if (meta_ss == ss) {`
			`sp_meta = &g_shared_pool.ss_metadata[i];`
			`break;`
			`}`
			`}`

			`if (!sp_meta) {`
			`// SuperSlab not in SP-SLOT system yet - create metadata`
			`sp_meta = sp_meta_find_or_create(ss);`
			`if (!sp_meta) {`
			`pthread_mutex_unlock(&g_shared_pool.alloc_lock);`
			`return; // Failed to create metadata`
			`}`
			`}`

			`// Mark slot as EMPTY (ACTIVE → EMPTY)`
			`uint32_t slab_bit = (1u << slab_idx);`
			`SlotState slot_state = atomic_load_explicit(`
			`&sp_meta->slots[slab_idx].state,`
			`memory_order_acquire);`
			`if (slot_state != SLOT_ACTIVE && (ss->slab_bitmap & slab_bit)) {`
			`// Legacy path import: rebuild slot states from SuperSlab bitmap/class_map`
			`sp_meta_sync_slots_from_ss(sp_meta, ss);`
			`slot_state = atomic_load_explicit(`
			`&sp_meta->slots[slab_idx].state,`
			`memory_order_acquire);`
			`}`

			`if (slot_state != SLOT_ACTIVE \|\| sp_slot_mark_empty(sp_meta, slab_idx) != 0) {`
			`if (g_lock_stats_enabled == 1) {`
			`atomic_fetch_add(&g_lock_release_count, 1);`
			`}`
			`pthread_mutex_unlock(&g_shared_pool.alloc_lock);`
			`return; // Slot wasn't ACTIVE`
			`}`

			`// Update SuperSlab metadata`
			`uint32_t bit = (1u << slab_idx);`
			`if (ss->slab_bitmap & bit) {`
			`ss->slab_bitmap &= ~bit;`
			`slab_meta->class_idx = 255; // UNASSIGNED`
			`// P1.1: Mark class_map as UNASSIGNED when releasing slab`
			`ss->class_map[slab_idx] = 255;`

			`if (ss->active_slabs > 0) {`
			`ss->active_slabs--;`
			`if (ss->active_slabs == 0 && g_shared_pool.active_count > 0) {`
			`g_shared_pool.active_count--;`
			`}`
			`}`
			`if (class_idx < TINY_NUM_CLASSES_SS &&`
			`g_shared_pool.class_active_slots[class_idx] > 0) {`
			`g_shared_pool.class_active_slots[class_idx]--;`
			`}`
			`}`

			`// P0-4: Push to lock-free per-class free list (enables reuse by same class)`
			`// Note: push BEFORE releasing mutex (slot state already updated under lock)`
			`if (class_idx < TINY_NUM_CLASSES_SS) {`
			`sp_freelist_push_lockfree(class_idx, sp_meta, slab_idx);`

			`#if !HAKMEM_BUILD_RELEASE`
			`if (dbg == 1) {`
			`fprintf(stderr, "[SP_SLOT_FREELIST_LOCKFREE] class=%d pushed slot (ss=%p slab=%d) active_slots=%u/%u\n",`
			`class_idx, (void*)ss, slab_idx,`
			`sp_meta->active_slots, sp_meta->total_slots);`
			`}`
			`#endif`
			`}`

			`// Check if SuperSlab is now completely empty (all slots EMPTY or UNUSED)`
			`if (sp_meta->active_slots == 0) {`
			`#if !HAKMEM_BUILD_RELEASE`
			`if (dbg == 1) {`
			`fprintf(stderr, "[SP_SLOT_COMPLETELY_EMPTY] ss=%p active_slots=0 (calling superslab_free)\n",`
			`(void*)ss);`
			`}`
			`#endif`

			`if (g_lock_stats_enabled == 1) {`
			`atomic_fetch_add(&g_lock_release_count, 1);`
			`}`

			`// RACE FIX: Set meta->ss to NULL BEFORE unlocking mutex`
			`// This prevents Stage 2 from accessing freed SuperSlab`
			`atomic_store_explicit(&sp_meta->ss, NULL, memory_order_release);`

			`pthread_mutex_unlock(&g_shared_pool.alloc_lock);`

			`// Remove from legacy backend list (if present) to prevent dangling pointers`
			`extern void remove_superslab_from_legacy_head(SuperSlab* ss);`
			`remove_superslab_from_legacy_head(ss);`

			`// Free SuperSlab:`
			`// 1. Try LRU cache (hak_ss_lru_push) - lazy deallocation`
			`// 2. Or munmap if LRU is full - eager deallocation`
			`extern void superslab_free(SuperSlab* ss);`
			`superslab_free(ss);`
			`return;`
			`}`

			`if (g_lock_stats_enabled == 1) {`
			`atomic_fetch_add(&g_lock_release_count, 1);`
			`}`
			`pthread_mutex_unlock(&g_shared_pool.alloc_lock);`
			`}`