hakmem/core/hakmem_tiny_ss_target.c

// SuperSlab targeted queue (per-class lock-free ring)
#include "hakmem_tiny_ss_target.h"
#include <stdatomic.h>
#include <stdint.h>
#include <stddef.h>
#include "hakmem_tiny.h"  // debug counters externs

#ifndef SSQ_CAP
#define SSQ_CAP 1024u  // power of two
#endif
#define SSQ_MASK (SSQ_CAP - 1u)

typedef struct {
    _Atomic uint64_t head;                    // producers fetch_add
    _Atomic uint64_t tail;                    // consumers fetch_add
    _Atomic(uintptr_t) slots[SSQ_CAP];        // 0 == empty
} ClassQ;

static ClassQ g_q[ TINY_NUM_CLASSES ];

void ss_target_init(void) {
    for (int c = 0; c < TINY_NUM_CLASSES; c++) {
        atomic_store_explicit(&g_q[c].head, 0, memory_order_relaxed);
        atomic_store_explicit(&g_q[c].tail, 0, memory_order_relaxed);
        for (uint32_t i = 0; i < SSQ_CAP; i++) {
            atomic_store_explicit(&g_q[c].slots[i], (uintptr_t)0, memory_order_relaxed);
        }
    }
}

// Multi-producer enqueue (best-effort, drops on full)
void ss_target_enqueue(int class_idx, struct SuperSlab* ss) {
    if (!ss || class_idx < 0 || class_idx >= TINY_NUM_CLASSES) return;
    ClassQ* q = &g_q[class_idx];
    // Try a few times in case of transient contention
    for (int attempt = 0; attempt < 4; attempt++) {
        uint64_t pos = atomic_fetch_add_explicit(&q->head, 1u, memory_order_acq_rel);
        uint32_t idx = (uint32_t)(pos & SSQ_MASK);
        uintptr_t expected = 0;
        if (atomic_compare_exchange_strong_explicit(&q->slots[idx], &expected,
                                                    (uintptr_t)ss,
                                                    memory_order_release,
                                                    memory_order_relaxed)) {
            atomic_fetch_add_explicit(&g_dbg_adopt_enq[class_idx], 1u, memory_order_relaxed);
            return; // enqueued
        }
        // slot busy, retry (head advanced; rare overflow tolerated)
    }
    // Drop on persistent contention to keep non-blocking
}

// Single-consumer pop (intended to be called by alloc slow path opportunistically)
struct SuperSlab* ss_target_pop(int class_idx) {
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) return NULL;
    ClassQ* q = &g_q[class_idx];
    for (int tries = 0; tries < (int)SSQ_CAP; tries++) {
        uint64_t pos = atomic_fetch_add_explicit(&q->tail, 1u, memory_order_acq_rel);
        uint32_t idx = (uint32_t)(pos & SSQ_MASK);
        uintptr_t val = atomic_exchange_explicit(&q->slots[idx], (uintptr_t)0, memory_order_acquire);
        if (val != 0) {
            atomic_fetch_add_explicit(&g_dbg_adopt_pop[class_idx], 1u, memory_order_relaxed);
            return (struct SuperSlab*)val;
        }
        // empty; continue
    }
    atomic_fetch_add_explicit(&g_dbg_adopt_empty[class_idx], 1u, memory_order_relaxed);
    return NULL;
}

void ss_target_requeue(int class_idx, struct SuperSlab* ss) {
    ss_target_enqueue(class_idx, ss);
}
Debug Counters Implementation - Clean History Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-05 12:31:14 +09:00			`// SuperSlab targeted queue (per-class lock-free ring)`
			`#include "hakmem_tiny_ss_target.h"`
			`#include <stdatomic.h>`
			`#include <stdint.h>`
			`#include <stddef.h>`
			`#include "hakmem_tiny.h" // debug counters externs`

			`#ifndef SSQ_CAP`
			`#define SSQ_CAP 1024u // power of two`
			`#endif`
			`#define SSQ_MASK (SSQ_CAP - 1u)`

			`typedef struct {`
			`_Atomic uint64_t head; // producers fetch_add`
			`_Atomic uint64_t tail; // consumers fetch_add`
			`_Atomic(uintptr_t) slots[SSQ_CAP]; // 0 == empty`
			`} ClassQ;`

			`static ClassQ g_q[ TINY_NUM_CLASSES ];`

			`void ss_target_init(void) {`
			`for (int c = 0; c < TINY_NUM_CLASSES; c++) {`
			`atomic_store_explicit(&g_q[c].head, 0, memory_order_relaxed);`
			`atomic_store_explicit(&g_q[c].tail, 0, memory_order_relaxed);`
			`for (uint32_t i = 0; i < SSQ_CAP; i++) {`
			`atomic_store_explicit(&g_q[c].slots[i], (uintptr_t)0, memory_order_relaxed);`
			`}`
			`}`
			`}`

			`// Multi-producer enqueue (best-effort, drops on full)`
			`void ss_target_enqueue(int class_idx, struct SuperSlab* ss) {`
			`if (!ss \|\| class_idx < 0 \|\| class_idx >= TINY_NUM_CLASSES) return;`
			`ClassQ* q = &g_q[class_idx];`
			`// Try a few times in case of transient contention`
			`for (int attempt = 0; attempt < 4; attempt++) {`
			`uint64_t pos = atomic_fetch_add_explicit(&q->head, 1u, memory_order_acq_rel);`
			`uint32_t idx = (uint32_t)(pos & SSQ_MASK);`
			`uintptr_t expected = 0;`
			`if (atomic_compare_exchange_strong_explicit(&q->slots[idx], &expected,`
			`(uintptr_t)ss,`
			`memory_order_release,`
			`memory_order_relaxed)) {`
			`atomic_fetch_add_explicit(&g_dbg_adopt_enq[class_idx], 1u, memory_order_relaxed);`
			`return; // enqueued`
			`}`
			`// slot busy, retry (head advanced; rare overflow tolerated)`
			`}`
			`// Drop on persistent contention to keep non-blocking`
			`}`

			`// Single-consumer pop (intended to be called by alloc slow path opportunistically)`
			`struct SuperSlab* ss_target_pop(int class_idx) {`
			`if (class_idx < 0 \|\| class_idx >= TINY_NUM_CLASSES) return NULL;`
			`ClassQ* q = &g_q[class_idx];`
			`for (int tries = 0; tries < (int)SSQ_CAP; tries++) {`
			`uint64_t pos = atomic_fetch_add_explicit(&q->tail, 1u, memory_order_acq_rel);`
			`uint32_t idx = (uint32_t)(pos & SSQ_MASK);`
			`uintptr_t val = atomic_exchange_explicit(&q->slots[idx], (uintptr_t)0, memory_order_acquire);`
			`if (val != 0) {`
			`atomic_fetch_add_explicit(&g_dbg_adopt_pop[class_idx], 1u, memory_order_relaxed);`
			`return (struct SuperSlab*)val;`
			`}`
			`// empty; continue`
			`}`
			`atomic_fetch_add_explicit(&g_dbg_adopt_empty[class_idx], 1u, memory_order_relaxed);`
			`return NULL;`
			`}`

			`void ss_target_requeue(int class_idx, struct SuperSlab* ss) {`
			`ss_target_enqueue(class_idx, ss);`
			`}`