Files
hakmem/core/tiny_remote.c

663 lines
27 KiB
C
Raw Normal View History

#include <stdlib.h>
#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <signal.h>
#include <pthread.h>
#include <inttypes.h>
#ifdef __GLIBC__
#include <execinfo.h>
#endif
#include <string.h>
#include "tiny_remote.h"
#include "hakmem_tiny_superslab.h"
#include "tiny_debug_ring.h"
#define REM_SIDE_LOG2 20
#define REM_SIDE_SIZE (1u<<REM_SIDE_LOG2)
typedef struct {
_Atomic(uintptr_t) key; // node pointer
_Atomic(uintptr_t) val; // next pointer
} rem_side_entry;
static rem_side_entry g_rem_side[REM_SIDE_SIZE];
int g_remote_side_enable = 0;
extern int g_debug_remote_guard;
static _Atomic int g_remote_scribble_once = 0;
static _Atomic uintptr_t g_remote_watch_ptr = 0;
static _Atomic uint32_t g_remote_watch_tid = 0;
static inline uint32_t hmix(uintptr_t v);
static inline uint32_t tiny_remote_stage_hash(const char* stage);
static void tiny_remote_dump_backtrace(void);
#if !defined(HAKMEM_BUILD_RELEASE)
#define REM_TRACK_TABLE_LOG2 20
#define REM_TRACK_TABLE_SIZE (1u << REM_TRACK_TABLE_LOG2)
typedef struct {
_Atomic(uintptr_t) key;
_Atomic(uint32_t) state;
_Atomic(uintptr_t) stage;
_Atomic(uint32_t) tid;
} rem_track_entry;
static rem_track_entry g_rem_track[REM_TRACK_TABLE_SIZE];
static const char* tiny_remote_track_state_name(uint32_t state) {
switch (state) {
case TINY_REMOTE_TRACK_NONE: return "none";
case TINY_REMOTE_TRACK_ALLOC: return "alloc";
case TINY_REMOTE_TRACK_REMOTE: return "remote";
case TINY_REMOTE_TRACK_FREELIST: return "freelist";
default: return "unknown";
}
}
static inline uint32_t tiny_remote_track_tid_or_self(uint32_t tid_hint) {
if (tid_hint != 0) return tid_hint;
return (uint32_t)(uintptr_t)pthread_self();
}
static void tiny_remote_track_log_mismatch(const char* stage,
SuperSlab* ss,
int slab_idx,
void* node,
uint32_t prev_state,
uint32_t new_state,
uint32_t prev_tid,
uint32_t tid,
const char* prev_stage) {
if (!__builtin_expect(g_debug_remote_guard, 0)) return;
uint16_t cls = ss ? (uint16_t)ss->size_class : 0;
uintptr_t base = ss ? (uintptr_t)ss : 0;
size_t ss_size = ss ? ((size_t)1ULL << ss->lg_size) : 0;
fprintf(stderr,
"[REMOTE_TRACK_MISMATCH] stage=%s cls=%u slab=%d node=%p prev=%s tid=0x%08x last_stage=%s -> new=%s tid=0x%08x\n",
stage ? stage : "(null)",
cls,
slab_idx,
node,
tiny_remote_track_state_name(prev_state),
prev_tid,
prev_stage ? prev_stage : "(unknown)",
tiny_remote_track_state_name(new_state),
tid);
tiny_remote_watch_mark(node, stage, tid);
uint32_t code = 0xA240u | (new_state & 0x0Fu);
uintptr_t aux = tiny_remote_pack_diag(code, base, ss_size, (uintptr_t)node);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, cls, node, aux);
if (stage && prev_state == TINY_REMOTE_TRACK_REMOTE && new_state == TINY_REMOTE_TRACK_ALLOC && strcmp(stage, "alloc_ret") == 0) {
tiny_remote_dump_backtrace();
}
if (__builtin_expect(g_debug_remote_guard, 0)) {
raise(SIGUSR2);
}
}
static void tiny_remote_track_transition(SuperSlab* ss,
int slab_idx,
void* node,
uint32_t expect_mask,
uint32_t new_state,
const char* stage,
uint32_t tid_hint) {
if (!__builtin_expect(g_debug_remote_guard, 0)) return;
if (!node) return;
uint32_t tid = tiny_remote_track_tid_or_self(tid_hint);
const char* stage_ptr = stage ? stage : "unknown";
uintptr_t k = (uintptr_t)node;
uint32_t i = hmix(k) & (REM_TRACK_TABLE_SIZE - 1);
for (uint32_t probe = 0; probe < REM_TRACK_TABLE_SIZE; probe++, i = (i + 1) & (REM_TRACK_TABLE_SIZE - 1)) {
uintptr_t key = atomic_load_explicit(&g_rem_track[i].key, memory_order_acquire);
if (key == k) {
uint32_t prev_state = atomic_load_explicit(&g_rem_track[i].state, memory_order_relaxed);
uint32_t prev_tid = atomic_load_explicit(&g_rem_track[i].tid, memory_order_relaxed);
const char* prev_stage = (const char*)atomic_load_explicit(&g_rem_track[i].stage, memory_order_relaxed);
if ((expect_mask & (1u << prev_state)) == 0u) {
tiny_remote_track_log_mismatch(stage_ptr, ss, slab_idx, node, prev_state, new_state, prev_tid, tid, prev_stage);
}
atomic_store_explicit(&g_rem_track[i].state, new_state, memory_order_relaxed);
atomic_store_explicit(&g_rem_track[i].stage, (uintptr_t)stage_ptr, memory_order_relaxed);
atomic_store_explicit(&g_rem_track[i].tid, tid, memory_order_relaxed);
return;
}
if (key == 0) {
uintptr_t expect = 0;
if (!atomic_compare_exchange_strong_explicit(&g_rem_track[i].key,
&expect,
k,
memory_order_acq_rel,
memory_order_relaxed)) {
probe--;
continue;
}
atomic_store_explicit(&g_rem_track[i].state, new_state, memory_order_relaxed);
atomic_store_explicit(&g_rem_track[i].stage, (uintptr_t)stage_ptr, memory_order_relaxed);
atomic_store_explicit(&g_rem_track[i].tid, tid, memory_order_relaxed);
return;
}
}
fprintf(stderr,
"[REMOTE_TRACK_OVERFLOW] stage=%s node=%p tid=0x%08x\n",
stage ? stage : "(null)",
node,
tid);
}
#define REM_TRACK_MASK(state) (1u << (state))
void tiny_remote_track_on_alloc(SuperSlab* ss, int slab_idx, void* node, const char* stage, uint32_t tid) {
tiny_remote_track_transition(ss,
slab_idx,
node,
REM_TRACK_MASK(TINY_REMOTE_TRACK_NONE) |
REM_TRACK_MASK(TINY_REMOTE_TRACK_FREELIST) |
REM_TRACK_MASK(TINY_REMOTE_TRACK_ALLOC),
TINY_REMOTE_TRACK_ALLOC,
stage ? stage : "alloc",
tid);
}
void tiny_remote_track_on_remote_push(SuperSlab* ss, int slab_idx, void* node, const char* stage, uint32_t tid) {
tiny_remote_track_transition(ss,
slab_idx,
node,
REM_TRACK_MASK(TINY_REMOTE_TRACK_ALLOC) | REM_TRACK_MASK(TINY_REMOTE_TRACK_REMOTE),
TINY_REMOTE_TRACK_REMOTE,
stage ? stage : "remote_push",
tid);
}
void tiny_remote_track_on_remote_drain(SuperSlab* ss, int slab_idx, void* node, const char* stage, uint32_t tid) {
tiny_remote_track_transition(ss,
slab_idx,
node,
REM_TRACK_MASK(TINY_REMOTE_TRACK_REMOTE),
TINY_REMOTE_TRACK_FREELIST,
stage ? stage : "remote_drain",
tid);
}
void tiny_remote_track_on_local_free(SuperSlab* ss, int slab_idx, void* node, const char* stage, uint32_t tid) {
tiny_remote_track_transition(ss,
slab_idx,
node,
REM_TRACK_MASK(TINY_REMOTE_TRACK_ALLOC),
TINY_REMOTE_TRACK_FREELIST,
stage ? stage : "local_free",
tid);
}
static void tiny_remote_track_expect_state(SuperSlab* ss,
int slab_idx,
void* node,
uint32_t expect_mask,
const char* stage,
uint32_t tid_hint) {
if (!__builtin_expect(g_debug_remote_guard, 0)) return;
if (!node) return;
uint32_t tid = tiny_remote_track_tid_or_self(tid_hint);
uintptr_t k = (uintptr_t)node;
uint32_t i = hmix(k) & (REM_TRACK_TABLE_SIZE - 1);
for (uint32_t probe = 0; probe < REM_TRACK_TABLE_SIZE; probe++, i = (i + 1) & (REM_TRACK_TABLE_SIZE - 1)) {
uintptr_t key = atomic_load_explicit(&g_rem_track[i].key, memory_order_acquire);
if (key == k) {
uint32_t prev_state = atomic_load_explicit(&g_rem_track[i].state, memory_order_relaxed);
uint32_t prev_tid = atomic_load_explicit(&g_rem_track[i].tid, memory_order_relaxed);
const char* prev_stage = (const char*)atomic_load_explicit(&g_rem_track[i].stage, memory_order_relaxed);
if ((expect_mask & (1u << prev_state)) == 0u) {
tiny_remote_track_log_mismatch(stage, ss, slab_idx, node, prev_state, prev_state, prev_tid, tid, prev_stage);
raise(SIGUSR2);
}
return;
}
if (key == 0) {
if ((expect_mask & REM_TRACK_MASK(TINY_REMOTE_TRACK_NONE)) == 0u) {
tiny_remote_track_log_mismatch(stage, ss, slab_idx, node, TINY_REMOTE_TRACK_NONE, TINY_REMOTE_TRACK_NONE, 0, tid, "(untracked)");
raise(SIGUSR2);
}
return;
}
}
fprintf(stderr,
"[REMOTE_TRACK_OVERFLOW] expect stage=%s node=%p tid=0x%08x\n",
stage ? stage : "(null)",
node,
tid);
raise(SIGUSR2);
}
void tiny_remote_track_expect_alloc(SuperSlab* ss, int slab_idx, void* node, const char* stage, uint32_t tid) {
tiny_remote_track_expect_state(ss,
slab_idx,
node,
REM_TRACK_MASK(TINY_REMOTE_TRACK_ALLOC),
stage ? stage : "expect_alloc",
tid);
}
void tiny_remote_assert_not_remote(SuperSlab* ss, int slab_idx, void* node, const char* stage, uint32_t tid) {
if (!__builtin_expect(g_debug_remote_guard, 0)) return;
if (!ss || !node) return;
int remote_hit = 0;
if (__builtin_expect(g_remote_side_enable, 0)) {
remote_hit = tiny_remote_side_contains(ss, slab_idx, node);
}
if (!remote_hit) {
uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)node, memory_order_relaxed);
if ((observed == TINY_REMOTE_SENTINEL) || ((observed & 0xFFFFu) == 0x6261u)) {
remote_hit = 1;
}
}
if (!remote_hit) return;
tiny_remote_watch_mark(node, stage, tid);
tiny_remote_watch_note(stage ? stage : "alloc_remote_detected",
ss,
slab_idx,
node,
0xA245u,
tid,
1);
}
int tiny_remote_guard_allow_local_push(SuperSlab* ss,
int slab_idx,
TinySlabMeta* meta,
void* node,
const char* stage,
uint32_t self_tid) {
CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
// A/B: when remote is disabled, always allow local push to freelist
do {
static int g_disable_remote_guard = -1;
if (__builtin_expect(g_disable_remote_guard == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_DISABLE_REMOTE");
g_disable_remote_guard = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_disable_remote_guard, 0)) return 1;
} while (0);
if (!__builtin_expect(g_debug_remote_guard, 0)) return 1;
uint32_t owner = __atomic_load_n(&meta->owner_tid, __ATOMIC_RELAXED);
if (owner == self_tid && owner != 0) {
return 1;
}
tiny_remote_watch_mark(node, stage, self_tid);
tiny_remote_watch_note(stage ? stage : "local_push_owner_mismatch",
ss,
slab_idx,
node,
0xA246u | ((uintptr_t)owner << 16),
self_tid,
1);
return 0;
}
#endif // !HAKMEM_BUILD_RELEASE
static inline uint32_t hmix(uintptr_t v) {
uint64_t x = (uint64_t)v;
x ^= x >> 33;
x *= 0xff51afd7ed558ccdULL;
x ^= x >> 33;
x *= 0xc4ceb9fe1a85ec53ULL;
x ^= x >> 33;
return (uint32_t)(x ^ (x >> 32));
}
static inline uint32_t tiny_remote_stage_hash(const char* stage) {
if (!stage) return 0;
uint32_t h = 2166136261u;
const unsigned char* p = (const unsigned char*)stage;
while (*p) {
h ^= *p++;
h *= 16777619u;
}
return h & 0xFFFFu;
}
int tiny_remote_watch_is(void* node) {
if (!node) return 0;
uintptr_t current = atomic_load_explicit(&g_remote_watch_ptr, memory_order_acquire);
return current == (uintptr_t)node;
}
static void tiny_remote_watch_emit(const char* stage,
SuperSlab* ss,
int slab_idx,
void* node,
uint32_t code,
uint32_t tid,
int fatal) {
if (!tiny_remote_watch_is(node)) return;
(void)slab_idx;
uint32_t stage_hash = tiny_remote_stage_hash(stage);
uintptr_t aux;
uint16_t cls = 0;
if (ss) {
uintptr_t base = (uintptr_t)ss;
size_t sz = (size_t)1ULL << ss->lg_size;
uint32_t combined = (code & 0xFFFFu) | ((stage_hash & 0xFFFFu) << 16);
aux = tiny_remote_pack_diag(combined, base, sz, (uintptr_t)node);
cls = (uint16_t)ss->size_class;
} else {
aux = ((uintptr_t)(code & 0xFFFFu) << 32) | (uintptr_t)(stage_hash & 0xFFFFu);
}
uint32_t first_tid = atomic_load_explicit(&g_remote_watch_tid, memory_order_acquire);
if (tid == 0) {
tid = (uint32_t)(uintptr_t)pthread_self();
}
if (__builtin_expect(g_debug_remote_guard, 0)) {
if (ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
TinySlabMeta* meta = &ss->slabs[slab_idx];
fprintf(stderr,
"[REMOTE_WATCH] stage=%s code=0x%04x cls=%u slab=%d node=%p owner=%u used=%u freelist=%p tid=0x%08x first_tid=0x%08x\n",
stage ? stage : "(null)",
(unsigned)code,
ss->size_class,
slab_idx,
node,
meta->owner_tid,
(unsigned)meta->used,
meta->freelist,
tid,
first_tid);
} else {
fprintf(stderr,
"[REMOTE_WATCH] stage=%s code=0x%04x node=%p tid=0x%08x first_tid=0x%08x\n",
stage ? stage : "(null)",
(unsigned)code,
node,
tid,
first_tid);
}
}
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, cls, node, aux);
if (fatal && __builtin_expect(g_tiny_safe_free_strict, 0)) {
raise(SIGUSR2);
}
}
void tiny_remote_watch_note(const char* stage,
SuperSlab* ss,
int slab_idx,
void* node,
uint32_t code,
uint32_t tid,
int fatal) {
tiny_remote_watch_emit(stage, ss, slab_idx, node, code, tid, fatal);
}
void tiny_remote_watch_mark(void* node, const char* stage, uint32_t tid) {
if (!node) return;
uintptr_t val = (uintptr_t)node;
uintptr_t expect = 0;
if (atomic_compare_exchange_strong_explicit(&g_remote_watch_ptr, &expect, val,
memory_order_acq_rel, memory_order_relaxed)) {
if (tid == 0) {
tid = (uint32_t)(uintptr_t)pthread_self();
}
atomic_store_explicit(&g_remote_watch_tid, tid, memory_order_release);
}
if (tiny_remote_watch_is(node)) {
tiny_remote_watch_emit(stage ? stage : "watch_mark", NULL, -1, node, 0xA230u, tid, 0);
}
}
void tiny_remote_watch_clear(void* node) {
if (!node) return;
uintptr_t val = (uintptr_t)node;
if (atomic_compare_exchange_strong_explicit(&g_remote_watch_ptr, &val, (uintptr_t)0,
memory_order_acq_rel, memory_order_relaxed)) {
atomic_store_explicit(&g_remote_watch_tid, 0u, memory_order_release);
}
}
static void tiny_remote_dump_backtrace(void) {
#ifdef __GLIBC__
void* frames[32];
int depth = backtrace(frames, 32);
char** symbols = backtrace_symbols(frames, depth);
if (symbols) {
for (int i = 0; i < depth; i++) {
fprintf(stderr, " bt[%d]=%s\n", i, symbols[i]);
}
free(symbols);
}
#else
fprintf(stderr, " (backtrace unavailable on this platform)\n");
#endif
}
static void tiny_remote_dump_queue_sample(SuperSlab* ss, int slab_idx) {
if (!g_debug_remote_guard || !ss) return;
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed);
unsigned rc = atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed);
fprintf(stderr,
"[REMOTE_QUEUE] cls=%u slab=%d head=%p rc=%u\n",
ss->size_class,
slab_idx,
(void*)head,
rc);
uintptr_t cur = head;
for (int n = 0; cur && n < 5; n++) {
uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)cur, memory_order_relaxed);
uintptr_t next = 0;
if (g_remote_side_enable) {
next = tiny_remote_side_get(ss, slab_idx, (void*)cur);
}
if (next == 0 && observed != TINY_REMOTE_SENTINEL) {
next = observed;
}
fprintf(stderr,
" [REMOTE_QUEUE:%d] node=%p observed=0x%016" PRIxPTR " next=%p\n",
n,
(void*)cur,
observed,
(void*)next);
if (next == 0 || next == TINY_REMOTE_SENTINEL) {
break;
}
cur = next;
}
}
static inline int tiny_remote_stage_mask(const char* stage) {
if (!stage) return 8;
if (stage[0] == 's' && stage[1] == 'e') return 1;
if (stage[0] == 's' && stage[1] == 'c') return 2;
if (stage[0] == 'd') return 4;
return 8;
}
static inline void tiny_remote_report_scribble(const char* stage, void* node, uintptr_t observed) {
if (!g_debug_remote_guard) return;
if ((observed & 0xFFFFu) != 0x6261u) return;
int mask = tiny_remote_stage_mask(stage);
int current = atomic_load_explicit(&g_remote_scribble_once, memory_order_relaxed);
while ((current & mask) == 0) {
if (atomic_compare_exchange_weak_explicit(&g_remote_scribble_once, &current, current | mask,
memory_order_acq_rel, memory_order_relaxed)) {
fprintf(stderr,
"[REMOTE_SENTINEL_TRAP:%s] node=%p observed=0x%016" PRIxPTR " tid=%lu\n",
stage ? stage : "unknown",
node,
observed,
(unsigned long)pthread_self());
tiny_remote_dump_backtrace();
raise(SIGUSR2);
break;
}
}
}
void tiny_remote_report_corruption(const char* stage, void* node, uintptr_t observed) {
if (stage && stage[0] == 'p' && stage[1] == 'r') {
tiny_remote_watch_mark(node, stage, 0);
} else if (stage && stage[0] == 's' && stage[1] == 'c' && !tiny_remote_watch_is(node)) {
tiny_remote_watch_mark(node, stage, 0);
} else if (stage && stage[0] == 'd' && !tiny_remote_watch_is(node)) {
tiny_remote_watch_mark(node, stage, 0);
} else if (tiny_remote_watch_is(node)) {
tiny_remote_watch_note(stage ? stage : "scribble", NULL, -1, node, 0xA235u, 0, 0);
}
tiny_remote_report_scribble(stage, node, observed);
}
void tiny_remote_sentinel_set(void* node) {
uintptr_t prior = atomic_load_explicit((_Atomic uintptr_t*)node, memory_order_relaxed);
if (__builtin_expect(g_debug_remote_guard, 0)) {
if (prior != 0 && prior != TINY_REMOTE_SENTINEL) {
tiny_remote_report_scribble("set", node, prior);
}
}
atomic_store_explicit((_Atomic uintptr_t*)node, TINY_REMOTE_SENTINEL, memory_order_relaxed);
if (__builtin_expect(g_debug_remote_guard, 0)) {
uintptr_t after = atomic_load_explicit((_Atomic uintptr_t*)node, memory_order_relaxed);
if (after != TINY_REMOTE_SENTINEL) {
tiny_remote_report_scribble("set_post", node, after);
}
}
}
int tiny_remote_sentinel_ok(void* node) {
uintptr_t v = atomic_load_explicit((_Atomic uintptr_t*)node, memory_order_relaxed);
return v == TINY_REMOTE_SENTINEL;
}
uint32_t tiny_remote_drain_threshold(void) {
static _Atomic uint32_t g_thresh = (uint32_t)-1;
uint32_t v = atomic_load_explicit(&g_thresh, memory_order_acquire);
if (v == (uint32_t)-1) {
const char* e = getenv("HAKMEM_TINY_REMOTE_DRAIN_THRESHOLD");
uint32_t t = (e && *e) ? (uint32_t)atoi(e) : 0u;
atomic_store_explicit(&g_thresh, t, memory_order_release);
v = t;
}
return v;
}
void tiny_remote_side_set(struct SuperSlab* ss, int slab_idx, void* node, uintptr_t next) {
(void)ss; (void)slab_idx;
if (!g_remote_side_enable) return;
uintptr_t k = (uintptr_t)node;
uintptr_t base = (uintptr_t)ss;
size_t ss_size = (size_t)1ULL << ss->lg_size;
uint32_t i = hmix(k) & (REM_SIDE_SIZE - 1);
for (uint32_t n=0; n<REM_SIDE_SIZE; n++, i=(i+1)&(REM_SIDE_SIZE-1)) {
uintptr_t expect = 0;
if (atomic_compare_exchange_weak_explicit(&g_rem_side[i].key, &expect, k, memory_order_acq_rel, memory_order_relaxed)) {
atomic_store_explicit(&g_rem_side[i].val, next, memory_order_release);
tiny_remote_sentinel_set(node);
tiny_remote_watch_note("side_set", ss, slab_idx, node, 0xA233u, 0, 0);
return;
} else if (expect == k) {
if (__builtin_expect(g_debug_remote_guard, 0)) {
uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)node, memory_order_relaxed);
tiny_remote_report_corruption("dup_push", node, observed);
uintptr_t aux = tiny_remote_pack_diag(0xA212u, base, ss_size, (uintptr_t)node);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, node, aux);
TinySlabMeta* meta = &ss->slabs[slab_idx];
fprintf(stderr,
"[REMOTE_DUP_PUSH] cls=%u slab=%d node=%p next=%p observed=0x%016" PRIxPTR " owner=%u rc=%u head=%p\n",
ss->size_class,
slab_idx,
node,
(void*)next,
observed,
meta->owner_tid,
(unsigned)atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed),
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed));
tiny_remote_watch_note("dup_push", ss, slab_idx, node, 0xA234u, 0, 1);
tiny_remote_dump_queue_sample(ss, slab_idx);
tiny_remote_dump_backtrace();
if (g_tiny_safe_free_strict) {
raise(SIGUSR2);
}
}
return;
}
}
if (__builtin_expect(g_debug_remote_guard, 0)) {
tiny_remote_report_scribble("side_overflow", node, atomic_load_explicit((_Atomic uintptr_t*)node, memory_order_relaxed));
}
// Fallback: legacy embedding if side table saturated
*(uintptr_t*)node = next;
}
uintptr_t tiny_remote_side_get(struct SuperSlab* ss, int slab_idx, void* node) {
(void)ss; (void)slab_idx;
(void)g_remote_side_enable; // always true in caller
uintptr_t k = (uintptr_t)node;
uint32_t i = hmix(k) & (REM_SIDE_SIZE - 1);
for (uint32_t n=0; n<REM_SIDE_SIZE; n++, i=(i+1)&(REM_SIDE_SIZE-1)) {
uintptr_t key = atomic_load_explicit(&g_rem_side[i].key, memory_order_acquire);
if (key == k) {
return atomic_load_explicit(&g_rem_side[i].val, memory_order_acquire);
}
if (key == 0) break;
}
// Fallback: If side table lookup failed (table overflow during push),
// read embedded next pointer from node memory (matches set() fallback at line 583)
uintptr_t fallback_val = atomic_load_explicit((_Atomic uintptr_t*)node, memory_order_acquire);
// If sentinel is present, it means the entry WAS in side table but got evicted
// In this case, we can't recover the next pointer - return 0 to stop chain traversal
if (fallback_val == TINY_REMOTE_SENTINEL) {
return 0;
}
return fallback_val;
}
void tiny_remote_side_clear(struct SuperSlab* ss, int slab_idx, void* node) {
(void)ss; (void)slab_idx;
if (!g_remote_side_enable) return;
uintptr_t k = (uintptr_t)node;
uint32_t i = hmix(k) & (REM_SIDE_SIZE - 1);
for (uint32_t n = 0; n < REM_SIDE_SIZE; n++, i = (i + 1) & (REM_SIDE_SIZE - 1)) {
uintptr_t key = atomic_load_explicit(&g_rem_side[i].key, memory_order_acquire);
if (key == k) {
atomic_store_explicit(&g_rem_side[i].val, 0, memory_order_relaxed);
atomic_store_explicit(&g_rem_side[i].key, 0, memory_order_release);
tiny_remote_watch_clear(node);
return;
}
if (key == 0) break;
}
}
int tiny_remote_side_contains(struct SuperSlab* ss, int slab_idx, void* node) {
(void)ss; (void)slab_idx;
if (!g_remote_side_enable) return 0;
uintptr_t k = (uintptr_t)node;
uint32_t i = hmix(k) & (REM_SIDE_SIZE - 1);
for (uint32_t n = 0; n < REM_SIDE_SIZE; n++, i = (i + 1) & (REM_SIDE_SIZE - 1)) {
uintptr_t key = atomic_load_explicit(&g_rem_side[i].key, memory_order_acquire);
if (key == k) {
return 1;
}
if (key == 0) break;
}
return 0;
}
void tiny_remote_side_init_from_env(void) {
static int g_side_init_once = 0;
if (__builtin_expect(g_side_init_once, 0)) return;
g_side_init_once = 1;
const char* side_env = getenv("HAKMEM_TINY_REMOTE_SIDE");
int enable = 1;
if (side_env && *side_env) {
enable = (atoi(side_env) != 0);
}
if (!enable && __builtin_expect(g_debug_remote_guard, 0)) {
enable = 1;
}
g_remote_side_enable = enable;
if (__builtin_expect(g_debug_remote_guard, 0)) {
fprintf(stderr, "[REMOTE_SIDE_INIT] enable=%d\n", enable);
}
if (!enable) return;
for (uint32_t i = 0; i < REM_SIDE_SIZE; i++) {
atomic_store_explicit(&g_rem_side[i].key, 0, memory_order_relaxed);
atomic_store_explicit(&g_rem_side[i].val, 0, memory_order_relaxed);
}
}