Files
hakmem/core/hakmem_tiny_superslab.h
Moe Charm (CI) c9053a43ac Phase 6-2.3~6-2.5: Critical bug fixes + SuperSlab optimization (WIP)
## Phase 6-2.3: Fix 4T Larson crash (active counter bug) 
**Problem:** 4T Larson crashed with "free(): invalid pointer", OOM errors
**Root cause:** core/hakmem_tiny_refill_p0.inc.h:103
  - P0 batch refill moved freelist blocks to TLS cache
  - Active counter NOT incremented → double-decrement on free
  - Counter underflows → SuperSlab appears full → OOM → crash
**Fix:** Added ss_active_add(tls->ss, from_freelist);
**Result:** 4T stable at 838K ops/s 

## Phase 6-2.4: Fix SEGV in random_mixed/mid_large_mt benchmarks 
**Problem:** bench_random_mixed_hakmem, bench_mid_large_mt_hakmem → immediate SEGV
**Root cause #1:** core/box/hak_free_api.inc.h:92-95
  - "Guess loop" dereferenced unmapped memory when registry lookup failed
**Root cause #2:** core/box/hak_free_api.inc.h:115
  - Header magic check dereferenced unmapped memory
**Fix:**
  1. Removed dangerous guess loop (lines 92-95)
  2. Added hak_is_memory_readable() check before dereferencing header
     (core/hakmem_internal.h:277-294 - uses mincore() syscall)
**Result:**
  - random_mixed (2KB): SEGV → 2.22M ops/s 
  - random_mixed (4KB): SEGV → 2.58M ops/s 
  - Larson 4T: no regression (838K ops/s) 

## Phase 6-2.5: Performance investigation + SuperSlab fix (WIP) ⚠️
**Problem:** Severe performance gaps (19-26x slower than system malloc)
**Investigation:** Task agent identified root cause
  - hak_is_memory_readable() syscall overhead (100-300 cycles per free)
  - ALL frees hit unmapped_header_fallback path
  - SuperSlab lookup NEVER called
  - Why? g_use_superslab = 0 (disabled by diet mode)

**Root cause:** core/hakmem_tiny_init.inc:104-105
  - Diet mode (default ON) disables SuperSlab
  - SuperSlab defaults to 1 (hakmem_config.c:334)
  - BUT diet mode overrides it to 0 during init

**Fix:** Separate SuperSlab from diet mode
  - SuperSlab: Performance-critical (fast alloc/free)
  - Diet mode: Memory efficiency (magazine capacity limits only)
  - Both are independent features, should not interfere

**Status:** ⚠️ INCOMPLETE - New SEGV discovered after fix
  - SuperSlab lookup now works (confirmed via debug output)
  - But benchmark crashes (Exit 139) after ~20 lookups
  - Needs further investigation

**Files modified:**
- core/hakmem_tiny_init.inc:99-109 - Removed diet mode override
- PERFORMANCE_INVESTIGATION_REPORT.md - Task agent analysis (303x instruction gap)

**Next steps:**
- Investigate new SEGV (likely SuperSlab free path bug)
- OR: Revert Phase 6-2.5 changes if blocking progress

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 20:31:01 +09:00

661 lines
29 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem_tiny_superslab.h - SuperSlab allocator for Tiny Pool (Phase 6.22)
// Purpose: mimalloc-inspired 2MB aligned slab allocation for fast pointer→slab lookup
// License: MIT
// Date: 2025-10-24
#ifndef HAKMEM_TINY_SUPERSLAB_H
#define HAKMEM_TINY_SUPERSLAB_H
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdatomic.h>
#include <stdlib.h>
#include <time.h> // Phase 8.3: For clock_gettime() in hak_now_ns()
#include <signal.h>
#include <stdio.h> // For fprintf() debugging
#include <pthread.h>
#include "tiny_debug_ring.h"
#include "tiny_remote.h"
// Debug instrumentation flags (defined in hakmem_tiny.c)
extern int g_debug_remote_guard;
extern int g_tiny_safe_free_strict;
uint32_t tiny_remote_drain_threshold(void);
// ============================================================================
// SuperSlab Configuration
// ============================================================================
// Phase 8.3: ACE - Variable SuperSlab size (1MB ↔ 2MB)
#define SUPERSLAB_SIZE_MAX (2 * 1024 * 1024) // 2MB max size
#define SUPERSLAB_SIZE_MIN (1 * 1024 * 1024) // 1MB min size
#define SUPERSLAB_LG_MAX 21 // lg(2MB)
#define SUPERSLAB_LG_MIN 20 // lg(1MB)
#define SUPERSLAB_LG_DEFAULT 21 // Default: 2MB (syscall reduction, ACE will adapt)
#define SLAB_SIZE (64 * 1024) // 64KB per slab (fixed)
// Legacy defines (kept for backward compatibility, use lg_size instead)
#define SUPERSLAB_SIZE SUPERSLAB_SIZE_MAX // Default to 2MB (syscall reduction)
#define SUPERSLAB_MASK (SUPERSLAB_SIZE - 1)
// IMPORTANT: Support variable-size SuperSlab (1MB=16 slabs, 2MB=32 slabs)
// Arrays below must be sized for the MAX to avoid OOB when lg_size=21 (2MB)
#define SLABS_PER_SUPERSLAB_MIN (SUPERSLAB_SIZE_MIN / SLAB_SIZE) // 16 for 1MB
#define SLABS_PER_SUPERSLAB_MAX (SUPERSLAB_SIZE_MAX / SLAB_SIZE) // 32 for 2MB
// Magic number for validation
#define SUPERSLAB_MAGIC 0x48414B4D454D5353ULL // "HAKMEMSS"
// ============================================================================
// SuperSlab Metadata Structure
// ============================================================================
// Per-slab metadata (16 bytes)
typedef struct TinySlabMeta {
void* freelist; // Freelist head (NULL = linear mode, Phase 6.24)
uint16_t used; // Blocks currently used
uint16_t capacity; // Total blocks in slab
uint32_t owner_tid; // Owner thread ID (for same-thread fast path)
// Phase 6.24: freelist == NULL → linear allocation mode (lazy init)
// Linear mode: allocate sequentially without building freelist
// Freelist mode: use freelist after first free() call
} TinySlabMeta;
// SuperSlab header (cache-line aligned, 64B)
typedef struct SuperSlab {
// Header fields (64B total)
uint64_t magic; // Magic number (0xHAKMEM_SUPERSLAB)
uint8_t size_class; // Size class (0-7 for 8-64B)
uint8_t active_slabs; // Number of active slabs (0-32 for 2MB, 0-16 for 1MB)
uint8_t lg_size; // Phase 8.3: ACE - SuperSlab size (20=1MB, 21=2MB)
uint8_t _pad0; // Padding
uint32_t slab_bitmap; // 32-bit bitmap (1=active, 0=free)
_Atomic uint32_t freelist_mask; // Bit i=1 when slab i freelist is non-empty (opt-in)
// Phase 6-2.1: ChatGPT Pro P0 optimization - O(1) non-empty slab lookup
uint32_t nonempty_mask; // Bit i = 1 if slabs[i].freelist != NULL (O(1) lookup via ctz)
// Phase 7.6: Deallocation support
atomic_uint total_active_blocks; // Total blocks in use (all slabs combined)
atomic_uint refcount; // MT-safe refcount for empty detection/free将来利用
atomic_uint listed; // 0/1: published to partial adopt ringpublish gating
uint32_t partial_epoch; // Last partial madvise epoch (optional)
uint8_t publish_hint; // Best slab index hint for adopt (0..31), 0xFF=none
uint8_t _pad1[3]; // Padding
// Per-slab metadata (16B each)
// Sized for MAX; use ss->lg_size to bound loops at runtime
TinySlabMeta slabs[SLABS_PER_SUPERSLAB_MAX];
// Remote free queues (per slab): MPSC stack heads + counts
_Atomic(uintptr_t) remote_heads[SLABS_PER_SUPERSLAB_MAX];
_Atomic(uint32_t) remote_counts[SLABS_PER_SUPERSLAB_MAX];
// Per-slab publish state: 0/1 = not listed/listed (for slab-granular republish hints)
atomic_uint slab_listed[SLABS_PER_SUPERSLAB_MAX];
// Partial adopt overflow linkage (single-linked, best-effort)
struct SuperSlab* partial_next;
// Padding to fill remaining space (2MB - 64B - 512B)
// Note: Actual slab data starts at offset SLAB_SIZE (64KB)
} __attribute__((aligned(64))) SuperSlab;
static inline int ss_slabs_capacity(const SuperSlab* ss);
static inline int tiny_refill_failfast_level(void) {
static int g_failfast_level = -1;
if (__builtin_expect(g_failfast_level == -1, 0)) {
const char* env = getenv("HAKMEM_TINY_REFILL_FAILFAST");
if (env && *env) {
g_failfast_level = atoi(env);
} else {
g_failfast_level = 1;
}
}
return g_failfast_level;
}
static inline void tiny_failfast_log(const char* stage,
int class_idx,
SuperSlab* ss,
TinySlabMeta* meta,
const void* node,
const void* next) {
if (__builtin_expect(tiny_refill_failfast_level() < 2, 1)) return;
uintptr_t base = ss ? (uintptr_t)ss : 0;
size_t size = ss ? ((size_t)1ULL << ss->lg_size) : 0;
uintptr_t limit = base + size;
fprintf(stderr,
"[TRC_FREELIST_LOG] stage=%s cls=%d node=%p next=%p head=%p base=%p limit=%p\n",
stage ? stage : "(null)",
class_idx,
node,
next,
meta ? meta->freelist : NULL,
(void*)base,
(void*)limit);
fflush(stderr);
}
static inline void tiny_failfast_abort_ptr(const char* stage,
SuperSlab* ss,
int slab_idx,
const void* ptr,
const char* reason) {
if (__builtin_expect(tiny_refill_failfast_level() < 2, 1)) return;
uintptr_t base = ss ? (uintptr_t)ss : 0;
size_t size = ss ? ((size_t)1ULL << ss->lg_size) : 0;
uintptr_t limit = base + size;
size_t cap = 0;
uint32_t used = 0;
if (ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
cap = ss->slabs[slab_idx].capacity;
used = ss->slabs[slab_idx].used;
}
size_t offset = 0;
if (ptr && base && ptr >= (void*)base) {
offset = (size_t)((uintptr_t)ptr - base);
}
fprintf(stderr,
"[TRC_FAILFAST_PTR] stage=%s cls=%d slab_idx=%d ptr=%p reason=%s base=%p limit=%p cap=%zu used=%u offset=%zu\n",
stage ? stage : "(null)",
ss ? (int)ss->size_class : -1,
slab_idx,
ptr,
reason ? reason : "(null)",
(void*)base,
(void*)limit,
cap,
used,
offset);
fflush(stderr);
abort();
}
// Compile-time assertions
_Static_assert(sizeof(TinySlabMeta) == 16, "TinySlabMeta must be 16 bytes");
// Phase 8.3: Variable-size SuperSlab assertions (1MB=16 slabs, 2MB=32 slabs)
_Static_assert((SUPERSLAB_SIZE_MIN / SLAB_SIZE) == 16, "1MB SuperSlab must have 16 slabs");
_Static_assert((SUPERSLAB_SIZE_MAX / SLAB_SIZE) == 32, "2MB SuperSlab must have 32 slabs");
_Static_assert((SUPERSLAB_SIZE & SUPERSLAB_MASK) == 0, "SUPERSLAB_SIZE must be power of 2");
// ============================================================================
// Fast Inline Functions (mimalloc-style)
// ============================================================================
// DEPRECATED (Phase 1): This function causes false positives! Use hak_super_lookup() instead.
// Problem: L2.5 allocations at 1MB boundary are misidentified as SuperSlabs
// Solution: Use registry-based hak_super_lookup() from hakmem_super_registry.h
#if 0 // DISABLED - unsafe function removed in Phase 1
static inline SuperSlab* ptr_to_superslab(void* p) {
return (SuperSlab*)((uintptr_t)p & ~(uintptr_t)SUPERSLAB_MASK);
}
#endif
// Get slab index within SuperSlab (shift operation, 0-31)
// Deprecated: Do not use for 2MB SuperSlabs (mask is 1MB). Use slab_index_for().
static inline int ptr_to_slab_index(void* p) {
uintptr_t offset = (uintptr_t)p & SUPERSLAB_MASK;
return (int)(offset >> 16); // Divide by 64KB (2^16)
}
// Runtime-safe slab count for a given SuperSlab
static inline int ss_slabs_capacity(const SuperSlab* ss) {
size_t ss_size = (size_t)1 << ss->lg_size;
return (int)(ss_size / SLAB_SIZE); // 16 or 32
}
// Safe slab index computation using SuperSlab base (supports 1MB/2MB)
static inline int slab_index_for(const SuperSlab* ss, const void* p) {
uintptr_t base = (uintptr_t)ss;
uintptr_t addr = (uintptr_t)p;
uintptr_t off = addr - base;
int idx = (int)(off >> 16); // 64KB
int cap = ss_slabs_capacity(ss);
return (idx >= 0 && idx < cap) ? idx : -1;
}
// DEPRECATED (Phase 1): Uses unsafe ptr_to_superslab() internally
// Use hak_super_lookup() + ptr_to_slab_index() instead
#if 0 // DISABLED - uses unsafe ptr_to_superslab()
static inline TinySlabMeta* ptr_to_slab_meta(void* p) {
SuperSlab* ss = ptr_to_superslab(p);
int idx = ptr_to_slab_index(p);
return &ss->slabs[idx];
}
#endif
// Get slab data start address
static inline void* slab_data_start(SuperSlab* ss, int slab_idx) {
return (char*)ss + (slab_idx * SLAB_SIZE);
}
static inline uint8_t* tiny_slab_base_for(SuperSlab* ss, int slab_idx) {
uint8_t* base = (uint8_t*)slab_data_start(ss, slab_idx);
if (slab_idx == 0) base += 1024;
return base;
}
// DEPRECATED (Phase 1): Uses unsafe ptr_to_superslab() internally (false positives!)
// Use: SuperSlab* ss = hak_super_lookup(p); if (ss && ss->magic == SUPERSLAB_MAGIC) { ... }
#if 0 // DISABLED - uses unsafe ptr_to_superslab(), causes crashes on L2.5 boundaries
static inline int is_superslab_pointer(void* p) {
SuperSlab* ss = ptr_to_superslab(p);
return ss->magic == SUPERSLAB_MAGIC;
}
#endif
// Refcount helpers将来のMT安全な空回収に使用
static inline void superslab_ref_inc(SuperSlab* ss) {
atomic_fetch_add_explicit(&ss->refcount, 1u, memory_order_relaxed);
}
static inline unsigned superslab_ref_dec(SuperSlab* ss) {
return atomic_fetch_sub_explicit(&ss->refcount, 1u, memory_order_acq_rel) - 1u;
}
static inline unsigned superslab_ref_get(SuperSlab* ss) {
return atomic_load_explicit(&ss->refcount, memory_order_acquire);
}
// Debug counter extern declaration
extern _Atomic uint64_t g_ss_active_dec_calls;
// Active block counter helpers (saturating decrement for free operations)
static inline void ss_active_dec_one(SuperSlab* ss) {
atomic_fetch_add_explicit(&g_ss_active_dec_calls, 1, memory_order_relaxed);
uint32_t old = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed);
while (old != 0) {
if (atomic_compare_exchange_weak_explicit(&ss->total_active_blocks, &old, old - 1u,
memory_order_relaxed, memory_order_relaxed)) {
break;
}
// CAS failed: old is reloaded by CAS intrinsic
}
}
// ============================================================================
// SuperSlab Management Functions
// ============================================================================
// Allocate a new SuperSlab (2MB aligned)
SuperSlab* superslab_allocate(uint8_t size_class);
// Free a SuperSlab
void superslab_free(SuperSlab* ss);
// Initialize a slab within SuperSlab
void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_t owner_tid);
// Mark a slab as active
void superslab_activate_slab(SuperSlab* ss, int slab_idx);
// Mark a slab as inactive
void superslab_deactivate_slab(SuperSlab* ss, int slab_idx);
// Find first free slab index (-1 if none)
int superslab_find_free_slab(SuperSlab* ss);
// Statistics
void superslab_print_stats(SuperSlab* ss);
// Phase 8.3: ACE statistics
void superslab_ace_print_stats(void);
// ============================================================================
// Phase 8.3: ACE (Adaptive Cache Engine) - SuperSlab adaptive sizing
// ============================================================================
#define TINY_NUM_CLASSES_SS 8 // Same as TINY_NUM_CLASSES (avoid circular include)
// Per-class ACE state (lightweight observation + decision)
typedef struct {
uint8_t current_lg; // Current lg_size in use (20=1MB, 21=2MB)
uint8_t target_lg; // Target lg_size for next allocation (20/21)
uint16_t hot_score; // Hotness score (0-1000) for visualization
uint32_t alloc_count; // Allocs since last tick
uint32_t refill_count; // Refills since last tick
uint32_t spill_count; // Spills since last tick
uint32_t live_blocks; // Estimated live blocks (alloc-free EMA)
uint64_t last_tick_ns; // Last tick timestamp (ns)
} SuperSlabACEState;
// Global ACE state (one per tiny class)
extern SuperSlabACEState g_ss_ace[TINY_NUM_CLASSES_SS];
// ACE tick function (called periodically, ~150ms interval)
// Observes metrics and decides promotion (1MB→2MB) or demotion (2MB→1MB)
void hak_tiny_superslab_ace_tick(int class_idx, uint64_t now_ns);
// Phase 8.4: ACE Observer (called from Learner thread - zero hot-path overhead)
void hak_tiny_superslab_ace_observe_all(void);
// Low-cost timestamp (nanoseconds, monotonic) - inline for hot path
static inline uint64_t hak_now_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec;
}
// Get next lg_size for new SuperSlab allocation (uses target_lg)
static inline uint8_t hak_tiny_superslab_next_lg(int class_idx) {
uint8_t lg = g_ss_ace[class_idx].target_lg ? g_ss_ace[class_idx].target_lg
: g_ss_ace[class_idx].current_lg;
return lg ? lg : SUPERSLAB_LG_DEFAULT; // Use default if uninitialized
}
// ----------------------------------------------------------------------------
// Partial SuperSlab adopt/publish (per-class single-slot)
// ----------------------------------------------------------------------------
// Publish a SuperSlab with available freelist for other threads to adopt.
void ss_partial_publish(int class_idx, SuperSlab* ss);
// Adopt published SuperSlab for the class (returns NULL if none).
SuperSlab* ss_partial_adopt(int class_idx);
// ----------------------------------------------------------------------------
// SuperSlab adopt gate (publish/adopt wiring helper)
// ----------------------------------------------------------------------------
// Environment-aware switch that keeps free/alloc sides in sync. Default:
// - Disabled until cross-thread free is observed.
// - `HAKMEM_TINY_SS_ADOPT=1` forces ON, `=0` forces OFF.
int tiny_adopt_gate_should_publish(void);
int tiny_adopt_gate_should_adopt(void);
void tiny_adopt_gate_on_remote_seen(int class_idx);
// Remote free push (MPSC stack) - returns 1 if transitioned from empty
extern _Atomic int g_ss_remote_seen; // set to 1 on first remote free observed
extern int g_debug_remote_guard;
static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* ptr) {
extern _Atomic uint64_t g_ss_remote_push_calls;
atomic_fetch_add_explicit(&g_ss_remote_push_calls, 1, memory_order_relaxed);
static _Atomic int g_remote_push_count = 0;
int count = atomic_fetch_add_explicit(&g_remote_push_count, 1, memory_order_relaxed);
if (count < 5) {
fprintf(stderr, "[DEBUG ss_remote_push] Call #%d ss=%p slab_idx=%d\n", count+1, (void*)ss, slab_idx);
fflush(stderr);
}
if (g_debug_remote_guard && count < 5) {
fprintf(stderr, "[REMOTE_PUSH] ss=%p slab_idx=%d ptr=%p count=%d\n",
(void*)ss, slab_idx, ptr, count);
}
// Unconditional sanity checks (Fail-Fast without crashing)
{
uintptr_t ptr_val = (uintptr_t)ptr;
uintptr_t base = (uintptr_t)ss;
size_t ss_size = (size_t)1ULL << ss->lg_size;
int cap = ss_slabs_capacity(ss);
int in_range = (ptr_val >= base) && (ptr_val < base + ss_size);
int aligned = ((ptr_val & (sizeof(void*) - 1)) == 0);
if (!in_range || slab_idx < 0 || slab_idx >= cap || !aligned) {
uintptr_t code = 0xB001u;
if (!in_range) code |= 0x01u;
if (!aligned) code |= 0x02u;
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID,
(uint16_t)ss->size_class,
ptr,
((uintptr_t)slab_idx << 32) | code);
return 0;
}
}
// A/B: global disable for remote MPSC — fallback to legacy freelist push
do {
static int g_disable_remote_glob = -1;
if (__builtin_expect(g_disable_remote_glob == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_DISABLE_REMOTE");
g_disable_remote_glob = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_disable_remote_glob, 0)) {
TinySlabMeta* meta = &ss->slabs[slab_idx];
void* prev = meta->freelist;
*(void**)ptr = prev;
meta->freelist = ptr;
// Reflect accounting (callers also decrement used; keep idempotent here)
ss_active_dec_one(ss);
if (prev == NULL) {
// first item: mark this slab visible to adopters
uint32_t bit = (1u << slab_idx);
atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
return 1;
}
return 0;
}
} while (0);
_Atomic(uintptr_t)* head = &ss->remote_heads[slab_idx];
uintptr_t old;
do {
old = atomic_load_explicit(head, memory_order_acquire);
if (!g_remote_side_enable) {
*(void**)ptr = (void*)old; // legacy embedding
}
} while (!atomic_compare_exchange_weak_explicit(head, &old, (uintptr_t)ptr,
memory_order_release, memory_order_relaxed));
tiny_remote_side_set(ss, slab_idx, ptr, old);
tiny_remote_track_on_remote_push(ss, slab_idx, ptr, "remote_push", 0);
if (__builtin_expect(g_debug_remote_guard, 0)) {
// One-shot verify just-written next/ptr alignment and range
uintptr_t base = (uintptr_t)ss;
size_t ss_size = (size_t)1ULL << ss->lg_size;
uintptr_t pv = (uintptr_t)ptr;
int ptr_in = (pv >= base && pv < base + ss_size);
int ptr_al = ((pv & (sizeof(void*) - 1)) == 0);
int old_in = (old == 0) || ((old >= base) && (old < base + ss_size));
int old_al = (old == 0) || ((old & (sizeof(void*) - 1)) == 0);
if (!ptr_in || !ptr_al || !old_in || !old_al) {
uintptr_t flags = ((uintptr_t)ptr_al << 3) | ((uintptr_t)ptr_in << 2) | ((uintptr_t)old_al << 1) | (uintptr_t)old_in;
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID,
(uint16_t)ss->size_class,
ptr,
0xB100u | (flags & 0xFu));
if (g_tiny_safe_free_strict) { raise(SIGUSR2); }
}
fprintf(stderr, "[REMOTE_PUSH] cls=%u slab=%d ptr=%p old=%p transitioned=%d\n",
ss->size_class, slab_idx, ptr, (void*)old, old == 0);
// Pack: [slab_idx<<32 | bit0:old==0 | bit1:old_al | bit2:ptr_al]
uintptr_t aux = ((uintptr_t)slab_idx << 32) | ((old == 0) ? 1u : 0u) | ((old_al ? 1u : 0u) << 1) | ((ptr_al ? 1u : 0u) << 2);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_PUSH,
(uint16_t)ss->size_class,
ptr,
aux);
} else {
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_PUSH,
(uint16_t)ss->size_class,
ptr,
((uintptr_t)slab_idx << 32) | (uint32_t)(old == 0));
}
atomic_fetch_add_explicit(&ss->remote_counts[slab_idx], 1u, memory_order_relaxed);
ss_active_dec_one(ss); // Fix: Decrement active blocks on cross-thread free
atomic_store_explicit(&g_ss_remote_seen, 1, memory_order_relaxed);
int transitioned = (old == 0);
// (optional hint to Ready ring moved to mailbox/aggregator to avoid header coupling)
if (transitioned) {
// First remote observed for this slab: mark slab_listed and notify publisher paths
unsigned prev = atomic_exchange_explicit(&ss->slab_listed[slab_idx], 1u, memory_order_acq_rel);
(void)prev; // best-effort
extern void tiny_publish_notify(int class_idx, struct SuperSlab* ss, int slab_idx);
tiny_publish_notify((int)ss->size_class, ss, slab_idx);
} else {
// Optional: best-effort notify if already non-empty but not listed
extern int g_remote_force_notify;
if (__builtin_expect(g_remote_force_notify, 0)) {
unsigned listed = atomic_load_explicit(&ss->slab_listed[slab_idx], memory_order_acquire);
if (listed == 0) {
unsigned prev = atomic_exchange_explicit(&ss->slab_listed[slab_idx], 1u, memory_order_acq_rel);
(void)prev;
extern void tiny_publish_notify(int class_idx, struct SuperSlab* ss, int slab_idx);
tiny_publish_notify((int)ss->size_class, ss, slab_idx);
}
}
}
return transitioned;
}
// Drain remote queue into freelist (no change to used/active; already adjusted at free)
// INTERNAL UNSAFE VERSION - Only called by slab_handle.h after ownership verified!
// DO NOT call directly - use slab_drain_remote() via SlabHandle instead.
static inline void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta) {
do { // one-shot debug print when enabled
static int en = -1; static _Atomic int printed;
if (__builtin_expect(en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_REFILL_OPT_DEBUG");
en = (e && *e && *e != '0') ? 1 : 0;
}
if (en) {
int exp = 0; if (atomic_compare_exchange_strong(&printed, &exp, 1)) {
fprintf(stderr, "[DRAIN_OPT] chain splice active (cls=%u slab=%d)\n", ss ? ss->size_class : 0u, slab_idx);
}
}
} while (0);
_Atomic(uintptr_t)* head = &ss->remote_heads[slab_idx];
uintptr_t p = atomic_exchange_explicit(head, (uintptr_t)NULL, memory_order_acq_rel);
if (p == 0) return;
uint32_t drained = 0;
uintptr_t base = (uintptr_t)ss;
size_t ss_size = (size_t)1ULL << ss->lg_size;
uint32_t drain_tid = (uint32_t)(uintptr_t)pthread_self();
// Build a local chain then splice once into freelist to reduce writes
void* chain_head = NULL;
void* chain_tail = NULL;
while (p != 0) {
// Guard: range/alignment before deref
if (__builtin_expect(g_debug_remote_guard, 0)) {
if (p < base || p >= base + ss_size) {
uintptr_t aux = tiny_remote_pack_diag(0xA210u, base, ss_size, p);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)p, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
if ((p & (uintptr_t)(sizeof(void*) - 1)) != 0) {
uintptr_t aux = tiny_remote_pack_diag(0xA211u, base, ss_size, p);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)p, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
}
void* node = (void*)p;
uintptr_t next = tiny_remote_side_get(ss, slab_idx, node);
tiny_remote_watch_note("drain_pull", ss, slab_idx, node, 0xA238u, drain_tid, 0);
if (__builtin_expect(g_remote_side_enable, 0)) {
if (!tiny_remote_sentinel_ok(node)) {
uintptr_t aux = tiny_remote_pack_diag(0xA202u, base, ss_size, (uintptr_t)node);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, node, aux);
uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)node, memory_order_relaxed);
tiny_remote_report_corruption("drain", node, observed);
TinySlabMeta* meta = &ss->slabs[slab_idx];
fprintf(stderr,
"[REMOTE_SENTINEL-DRAIN] cls=%u slab=%d node=%p drained=%u observed=0x%016" PRIxPTR " owner=%u used=%u freelist=%p\n",
ss->size_class,
slab_idx,
node,
drained,
observed,
meta->owner_tid,
(unsigned)meta->used,
meta->freelist);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
}
tiny_remote_side_clear(ss, slab_idx, node);
}
tiny_remote_watch_note("drain_link", ss, slab_idx, node, 0xA239u, drain_tid, 0);
tiny_remote_track_on_remote_drain(ss, slab_idx, node, "remote_drain", drain_tid);
if (__builtin_expect(g_debug_remote_guard && drained < 3, 0)) {
// First few nodes: record low info for triage
uintptr_t aux = ((uintptr_t)slab_idx << 32) | (uintptr_t)(drained & 0xFFFF);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_DRAIN, (uint16_t)ss->size_class, node, aux);
}
// Link into local chain (avoid touching meta->freelist per node)
if (chain_head == NULL) {
chain_head = node;
chain_tail = node;
*(void**)node = NULL;
} else {
*(void**)node = chain_head;
chain_head = node;
}
p = next;
drained++;
}
// Splice the drained chain into freelist (single meta write)
if (chain_head != NULL) {
if (chain_tail != NULL) {
*(void**)chain_tail = meta->freelist;
}
void* prev = meta->freelist;
meta->freelist = chain_head;
tiny_failfast_log("remote_drain", ss->size_class, ss, meta, chain_head, prev);
// Optional: set freelist bit when transitioning from empty
do {
static int g_mask_en = -1;
if (__builtin_expect(g_mask_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FREELIST_MASK");
g_mask_en = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_mask_en, 0)) {
uint32_t bit = (1u << slab_idx);
atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
}
} while (0);
}
// Reset remote count after full drain
atomic_store_explicit(&ss->remote_counts[slab_idx], 0u, memory_order_relaxed);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_DRAIN,
(uint16_t)ss->size_class,
ss,
((uintptr_t)slab_idx << 32) | drained);
}
// Legacy wrapper for compatibility (UNSAFE - ownership NOT checked!)
// DEPRECATED: Use slab_drain_remote() via SlabHandle instead
static inline void ss_remote_drain_to_freelist(SuperSlab* ss, int slab_idx) {
TinySlabMeta* meta = &ss->slabs[slab_idx];
_ss_remote_drain_to_freelist_unsafe(ss, slab_idx, meta);
}
// Try to acquire exclusive ownership of slab (REQUIRED before draining remote queue!)
// Returns 1 on success (now own slab), 0 on failure (another thread owns it)
// CRITICAL: Only succeeds if slab is unowned (owner_tid==0) or already owned by us.
static inline int ss_owner_try_acquire(TinySlabMeta* m, uint32_t self_tid) {
uint32_t cur = __atomic_load_n(&m->owner_tid, __ATOMIC_RELAXED);
if (cur == self_tid) return 1; // Already owner - success
if (cur != 0) return 0; // Another thread owns it - FAIL immediately
// Slab is unowned (cur==0) - try to claim it
uint32_t expected = 0;
return __atomic_compare_exchange_n(&m->owner_tid, &expected, self_tid, false,
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
}
// Drain remote queues where activity was observed (lightweight sweep).
// CRITICAL: Must acquire ownership before draining each slab!
static inline void ss_remote_drain_light(SuperSlab* ss) {
if (!ss) return;
uint32_t threshold = tiny_remote_drain_threshold();
uint32_t self_tid = (uint32_t)(uintptr_t)pthread_self();
int cap = ss_slabs_capacity(ss);
for (int s = 0; s < cap; s++) {
uint32_t rc = atomic_load_explicit(&ss->remote_counts[s], memory_order_relaxed);
if (rc <= threshold) continue;
if (atomic_load_explicit(&ss->remote_heads[s], memory_order_acquire) != 0) {
// BUGFIX: Must acquire ownership BEFORE draining!
// Without this, we can drain a slab owned by another thread → freelist corruption
TinySlabMeta* m = &ss->slabs[s];
if (!ss_owner_try_acquire(m, self_tid)) {
continue; // Failed to acquire - skip this slab
}
ss_remote_drain_to_freelist(ss, s);
}
}
}
// Best-effort CAS to transfer slab ownership (DEPRECATED - use ss_owner_try_acquire!)
static inline void ss_owner_cas(TinySlabMeta* m, uint32_t self_tid) {
(void)ss_owner_try_acquire(m, self_tid); // Ignore result (unsafe)
}
#endif // HAKMEM_TINY_SUPERSLAB_H