hakmem/core/box/warm_pool_prefill_box.h

// warm_pool_prefill_box.h - Warm Pool Prefill Box
// Purpose: Secondary prefill optimization - load multiple superlslabs when pool is empty
// License: MIT
// Date: 2025-12-04

#ifndef HAK_WARM_POOL_PREFILL_BOX_H
#define HAK_WARM_POOL_PREFILL_BOX_H

#include <stdint.h>
#include <stdatomic.h>
#include <stdio.h>
#include <stdlib.h>
#include "../hakmem_tiny_config.h"
#include "../hakmem_tiny_superslab.h"
#include "../tiny_tls.h"
#include "../front/tiny_warm_pool.h"
#include "../box/warm_pool_stats_box.h"
#include "../box/warm_pool_rel_counters_box.h"

extern _Atomic uintptr_t g_c7_stage3_magic_ss;

static inline int warm_prefill_log_enabled(void) {
    static int g_warm_log = -1;
    if (__builtin_expect(g_warm_log == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_WARM_LOG");
        g_warm_log = (e && *e && *e != '0') ? 1 : 0;
    }
    return g_warm_log;
}

static inline void warm_prefill_log_c7_meta(const char* tag, TinyTLSSlab* tls) {
    if (!tls || !tls->ss) return;
    if (!warm_prefill_log_enabled()) return;
#if HAKMEM_BUILD_RELEASE
    static _Atomic uint32_t rel_logs = 0;
    uint32_t n = atomic_fetch_add_explicit(&rel_logs, 1, memory_order_relaxed);
    if (n < 4) {
        TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
        uintptr_t magic = atomic_load_explicit(&g_c7_stage3_magic_ss, memory_order_relaxed);
        fprintf(stderr,
                "[REL_C7_%s] ss=%p slab=%u cls=%u used=%u cap=%u carved=%u freelist=%p magic=%#lx\n",
                tag,
                (void*)tls->ss,
                (unsigned)tls->slab_idx,
                (unsigned)meta->class_idx,
                (unsigned)meta->used,
                (unsigned)meta->capacity,
                (unsigned)meta->carved,
                meta->freelist,
                (unsigned long)magic);
    }
#else
    static _Atomic uint32_t dbg_logs = 0;
    uint32_t n = atomic_fetch_add_explicit(&dbg_logs, 1, memory_order_relaxed);
    if (n < 4) {
        TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
        uintptr_t magic = atomic_load_explicit(&g_c7_stage3_magic_ss, memory_order_relaxed);
        fprintf(stderr,
                "[DBG_C7_%s] ss=%p slab=%u cls=%u used=%u cap=%u carved=%u freelist=%p magic=%#lx\n",
                tag,
                (void*)tls->ss,
                (unsigned)tls->slab_idx,
                (unsigned)meta->class_idx,
                (unsigned)meta->used,
                (unsigned)meta->capacity,
                (unsigned)meta->carved,
                meta->freelist,
                (unsigned long)magic);
    }
#endif
}

// Forward declarations
extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
extern SuperSlab* superslab_refill(int class_idx);

// ============================================================================
// Warm Pool Prefill Policy
// ============================================================================

// Prefill budget: How many additional SuperSlabs to load when pool is empty
// - If pool is empty, load PREFILL_BUDGET extra slabs to build working set
// - This avoids repeated registry scans on rapid cache misses
// - Phase 2: Keep at 2 (increasing to 4 caused contention regression -1.5%)
#define WARM_POOL_PREFILL_BUDGET 2

// ============================================================================
// Warm Pool Prefill API (Inline for Cold Path)
// ============================================================================

// Perform secondary prefill when warm pool becomes empty
// Called from unified_cache_refill() cold path when warm_pool_count() == 0
//
// Algorithm:
//   1. Check if pool is empty
//   2. If yes, load PREFILL_BUDGET additional superlslabs via superslab_refill
//   3. Push all but the last to warm pool
//   4. Return the last one for immediate carving (in tls->ss)
//
// Returns: 0 on success, -1 if superslab_refill fails
//
// Performance: Only triggered when pool is empty, cold path cost
//
static inline int warm_pool_do_prefill(int class_idx, TinyTLSSlab* tls, int warm_cap_hint) {
    #if HAKMEM_BUILD_RELEASE
    if (class_idx == 7) {
        warm_pool_rel_c7_prefill_call();
    }
    #endif
    int budget = (tiny_warm_pool_count(class_idx) == 0) ? WARM_POOL_PREFILL_BUDGET : 1;

    while (budget > 0) {
        if (class_idx == 7) {
            warm_prefill_log_c7_meta("PREFILL_META", tls);
        }
        if (!tls->ss) {
            // Need to load a new SuperSlab
            if (!superslab_refill(class_idx)) {
                return -1;  // Error: cannot allocate new SuperSlab
            }
            tls = &g_tls_slabs[class_idx];  // Reload TLS after refill
        }

        // Check SuperSlab validity
        if (!(tls->ss && tls->ss->magic == SUPERSLAB_MAGIC)) {
            break;
        }

        // C7 safety: prefer only pristine slabs (used=0 carved=0 freelist=NULL)
        if (class_idx == 7 && warm_prefill_log_enabled()) {
            TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
            if (meta->class_idx == 7 &&
                (meta->used > 0 || meta->carved > 0 || meta->freelist != NULL)) {
                #if HAKMEM_BUILD_RELEASE
                static _Atomic int rel_c7_skip_logged = 0;
                if (atomic_load_explicit(&rel_c7_skip_logged, memory_order_relaxed) == 0) {
                    fprintf(stderr,
                            "[REL_C7_PREFILL_SKIP_NONEMPTY] ss=%p slab=%u used=%u cap=%u carved=%u freelist=%p\n",
                            (void*)tls->ss,
                            (unsigned)tls->slab_idx,
                            (unsigned)meta->used,
                            (unsigned)meta->capacity,
                            (unsigned)meta->carved,
                            meta->freelist);
                    atomic_store_explicit(&rel_c7_skip_logged, 1, memory_order_relaxed);
                }
                #else
                static __thread int dbg_c7_skip_logged = 0;
                if (dbg_c7_skip_logged < 4) {
                    fprintf(stderr,
                            "[DBG_C7_PREFILL_SKIP_NONEMPTY] ss=%p slab=%u used=%u cap=%u carved=%u freelist=%p\n",
                            (void*)tls->ss,
                            (unsigned)tls->slab_idx,
                            (unsigned)meta->used,
                            (unsigned)meta->capacity,
                            (unsigned)meta->carved,
                            meta->freelist);
                    dbg_c7_skip_logged++;
                }
                #endif
                tls->ss = NULL;  // Drop exhausted slab and try another
                budget--;
                continue;
            }
        }

        if (budget > 1) {
            // Prefill mode: push to pool and load another
            tiny_warm_pool_push_with_cap(class_idx, tls->ss, warm_cap_hint);
            warm_pool_record_prefilled(class_idx);
            #if HAKMEM_BUILD_RELEASE
        if (class_idx == 7) {
            warm_pool_rel_c7_prefill_slab();
        }
        #else
        if (class_idx == 7 && warm_prefill_log_enabled()) {
            static __thread int dbg_c7_prefill_logs = 0;
            if (dbg_c7_prefill_logs < 8) {
                TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
                fprintf(stderr,
                        "[DBG_C7_PREFILL] ss=%p slab=%u used=%u cap=%u carved=%u freelist=%p\n",
                        (void*)tls->ss,
                        (unsigned)tls->slab_idx,
                        (unsigned)meta->used,
                        (unsigned)meta->capacity,
                        (unsigned)meta->carved,
                        meta->freelist);
                dbg_c7_prefill_logs++;
            }
        }
        #endif
        tls->ss = NULL;  // Force next iteration to refill
        budget--;
    } else {
        // Final slab: keep in TLS for immediate carving
        budget = 0;
    }
    }

    return 0;  // Success
}

#endif // HAK_WARM_POOL_PREFILL_BOX_H