Files
hakmem/core/tiny_superslab_free.inc.h
Moe Charm (CI) 8052e8b320 Phase 24-26: Hot path atomic telemetry prune (+2.00% cumulative)
Summary:
- Phase 24 (alloc stats): +0.93% GO
- Phase 25 (free stats): +1.07% GO
- Phase 26 (diagnostics): -0.33% NEUTRAL (code cleanliness)
- Total: 11 atomics compiled-out, +2.00% improvement

Phase 24: OBSERVE tax prune (tiny_class_stats_box.h)
- Added HAKMEM_TINY_CLASS_STATS_COMPILED (default: 0)
- Wrapped 5 stats functions: uc_miss, warm_hit, shared_lock, tls_carve_*
- Result: +0.93% (baseline 56.675M vs compiled-in 56.151M ops/s)

Phase 25: Tiny free stats prune (tiny_superslab_free.inc.h)
- Added HAKMEM_TINY_FREE_STATS_COMPILED (default: 0)
- Wrapped g_free_ss_enter atomic in free hot path
- Result: +1.07% (baseline 57.017M vs compiled-in 56.415M ops/s)

Phase 26: Hot path diagnostic atomics prune
- Added 5 compile gates for low-frequency error counters:
  - HAKMEM_TINY_C7_FREE_COUNT_COMPILED
  - HAKMEM_TINY_HDR_MISMATCH_LOG_COMPILED
  - HAKMEM_TINY_HDR_META_MISMATCH_COMPILED
  - HAKMEM_TINY_METRIC_BAD_CLASS_COMPILED
  - HAKMEM_TINY_HDR_META_FAST_COMPILED
- Result: -0.33% NEUTRAL (within noise, kept for cleanliness)

Alignment with mimalloc principles:
- "No atomics on hot path" - telemetry moved to compile-time opt-in
- Fixed per-op tax elimination
- Production builds: maximum performance (atomics compiled-out)
- Research builds: full diagnostics (COMPILED=1)

Generated with Claude Code
https://claude.com/claude-code

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-16 05:35:11 +09:00

488 lines
24 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_superslab_free.inc.h - SuperSlab Free Layer
// Purpose: Same-thread and cross-thread free handling
// Extracted from: hakmem_tiny_free.inc lines 1171-1475
// Box Theory: Box 6 (Free Fast Path) + Box 2 (Remote Queue) integration
//
// Public functions:
// - hak_tiny_free_superslab(): Main SuperSlab free entry point
#include <stdatomic.h>
#include "hakmem_build_flags.h" // Phase 25: Compile-time feature switches
#include "box/ptr_type_box.h" // Phase 10
#include "box/free_remote_box.h"
#include "box/free_local_box.h"
#include "hakmem_env_cache.h" // Priority-2: ENV cache (eliminate syscalls)
// Phase 6.22-B: SuperSlab fast free path
static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
// Route trace: count SuperSlab free entries (diagnostics only)
// Phase 25: Compile-out free stats atomic (default OFF)
#if HAKMEM_TINY_FREE_STATS_COMPILED
extern _Atomic uint64_t g_free_ss_enter;
atomic_fetch_add_explicit(&g_free_ss_enter, 1, memory_order_relaxed);
#else
(void)0; // No-op when compiled out
#endif
ROUTE_MARK(16); // free_enter
HAK_DBG_INC(g_superslab_free_count); // Phase 7.6: Track SuperSlab frees
// Phase 10: Convert USER → BASE at entry point (single conversion)
hak_user_ptr_t user_ptr = HAK_USER_FROM_RAW(ptr);
hak_base_ptr_t base_ptr = hak_user_to_base(user_ptr);
void* base = HAK_BASE_TO_RAW(base_ptr);
// Get slab index (supports 1MB/2MB SuperSlabs)
// CRITICAL: Use BASE pointer for slab_index calculation!
int slab_idx = slab_index_for(ss, base);
size_t ss_size = (size_t)1ULL << ss->lg_size;
uintptr_t ss_base = (uintptr_t)ss;
if (__builtin_expect(slab_idx < 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0xBAD1u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xFFu, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
TinySlabMeta* meta = &ss->slabs[slab_idx];
uint8_t cls = meta->class_idx;
// Debug: Log first C7 alloc/free for path verification
// Phase 26A: Compile-out c7_free_count atomic (default OFF)
if (cls == 7) {
#if HAKMEM_C7_FREE_COUNT_COMPILED
static _Atomic int c7_free_count = 0;
int count = atomic_fetch_add_explicit(&c7_free_count, 1, memory_order_relaxed);
if (count == 0) {
#if !HAKMEM_BUILD_RELEASE && HAKMEM_DEBUG_VERBOSE
fprintf(stderr, "[C7_FIRST_FREE] ptr=%p base=%p slab_idx=%d\n", ptr, base, slab_idx);
#endif
}
#else
// No-op when compiled out (Phase 26A)
(void)0;
#endif
}
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
tiny_remote_watch_note("free_enter", ss, slab_idx, ptr, 0xA240u, tiny_self_u32(), 0);
#if !HAKMEM_BUILD_RELEASE
extern __thread TinyTLSSlab g_tls_slabs[];
tiny_alloc_dump_tls_state(cls, "watch_free_enter", &g_tls_slabs[cls]);
extern __thread TinyTLSMag g_tls_mags[];
TinyTLSMag* watch_mag = &g_tls_mags[cls];
fprintf(stderr,
"[REMOTE_WATCH_MAG] cls=%u mag_top=%d cap=%d\n",
cls,
watch_mag->top,
watch_mag->cap);
#endif
}
// BUGFIX (Phase 12): Validate class_idx before using as array index
if (__builtin_expect(cls >= TINY_NUM_CLASSES, 0)) {
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xF1, ptr, (uintptr_t)cls);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
// ChatGPT Pro Optimization: Move safety checks to debug mode only
// In release builds, these checks are completely eliminated by the compiler
// Expected impact: -10~-15% CPU (eliminates O(n) duplicate scan)
#if !HAKMEM_BUILD_RELEASE
if (__builtin_expect(g_tiny_safe_free, 0)) {
size_t blk = g_tiny_class_sizes[cls];
uint8_t* slab_base_ptr = tiny_slab_base_for(ss, slab_idx);
uintptr_t delta = (uintptr_t)ptr - (uintptr_t)slab_base_ptr;
int cap_ok = (meta->capacity > 0) ? 1 : 0;
int align_ok = (delta % blk) == 0;
int range_ok = cap_ok && (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
uint32_t code = 0xA100u;
if (align_ok) code |= 0x2u;
if (range_ok) code |= 0x1u;
uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
// Duplicate in freelist (best-effort scan up to 64)
// NOTE: This O(n) scan is VERY expensive (can scan 64 pointers per free!)
void* scan = meta->freelist; int scanned = 0; int dup = 0;
while (scan && scanned < 64) { if (scan == base) { dup = 1; break; } scan = tiny_next_read(cls, scan); scanned++; }
if (dup) {
uintptr_t aux = tiny_remote_pack_diag(0xDFu, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
}
#endif // !HAKMEM_BUILD_RELEASE
// Phase E1-CORRECT: C7 now has headers like other classes
// Validation must check base pointer (ptr-1) alignment, not user ptr
if (__builtin_expect(cls == 7, 0)) {
size_t blk = g_tiny_class_sizes[cls];
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
uintptr_t delta = (uintptr_t)base - (uintptr_t)slab_base;
int cap_ok = (meta->capacity > 0) ? 1 : 0;
int align_ok = (delta % blk) == 0;
int range_ok = cap_ok && (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
uintptr_t aux = tiny_remote_pack_diag(0xA107u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
#if !HAKMEM_BUILD_RELEASE
// Debug build: Print diagnostic info before failing
fprintf(stderr, "[C7_ALIGN_CHECK_FAIL] ptr=%p base=%p slab_base=%p\n", ptr, base, (void*)slab_base);
fprintf(stderr, "[C7_ALIGN_CHECK_FAIL] delta=%zu blk=%zu delta%%blk=%zu\n",
(size_t)delta, blk, (size_t)(delta % blk));
fprintf(stderr, "[C7_ALIGN_CHECK_FAIL] align_ok=%d range_ok=%d cap=%u capacity=%u\n",
align_ok, range_ok, (unsigned)(delta / blk), (unsigned)meta->capacity);
#endif
// BUGFIX: Guard with g_tiny_safe_free_strict like other validation checks
// Fail-fast in class7 to avoid silent SLL/freelist corruption (only if strict mode enabled)
if (g_tiny_safe_free_strict) { raise(SIGUSR2); }
return;
}
}
// Header vs meta class mismatch detectorenv: HAKMEM_TINY_SLL_DIAG、初回数件のみ
// Priority-2: Use cached ENV (eliminate lazy-init syscall overhead)
#if !HAKMEM_BUILD_RELEASE
do {
if (__builtin_expect(HAK_ENV_TINY_SLL_DIAG(), 0)) {
uint8_t hdr = *(uint8_t*)base;
uint8_t expect = (uint8_t)(HEADER_MAGIC | (cls & HEADER_CLASS_MASK));
if (__builtin_expect(hdr != expect, 0)) {
// Phase 26B: Compile-out g_hdr_mismatch_log atomic (default OFF)
#if HAKMEM_HDR_MISMATCH_LOG_COMPILED
static _Atomic uint32_t g_hdr_mismatch_log = 0;
uint32_t n = atomic_fetch_add_explicit(&g_hdr_mismatch_log, 1, memory_order_relaxed);
#else
uint32_t n = 0; // No-op when compiled out
#endif
if (n < 8) {
fprintf(stderr,
"[TLS_HDR_MISMATCH] cls=%u slab_idx=%d hdr=0x%02x expect=0x%02x ptr=%p\n",
(unsigned)cls, slab_idx, hdr, expect, ptr);
}
}
}
} while (0);
#endif
// Phase 6.23: Same-thread check (Phase 12: owner_tid_low)
uint32_t my_tid = tiny_self_u32();
uint8_t my_tid_low = (uint8_t)((my_tid >> 8) & 0xFFu);
const int debug_guard = g_debug_remote_guard;
static __thread int g_debug_free_count = 0;
// If owner is not set yet, claim ownership (low 8 bits) to avoid spurious remote path in 1T
if (!g_tiny_force_remote && meta->owner_tid_low == 0) {
meta->owner_tid_low = my_tid_low;
}
if (!g_tiny_force_remote && meta->owner_tid_low != 0 && meta->owner_tid_low == my_tid_low) {
ROUTE_MARK(17); // free_same_thread
// Fast path: Direct freelist push (same-thread)
if (0 && debug_guard && g_debug_free_count < 1) {
fprintf(stderr, "[FREE_SS] SAME-THREAD: owner=%u my=%u\n",
meta->owner_tid_low, my_tid);
g_debug_free_count++;
}
// Header/meta class cross-check (triage)
#if HAKMEM_TINY_HEADER_CLASSIDX
do {
uint8_t hdr_cls = tiny_region_id_read_header(ptr);
uint8_t meta_cls = meta->class_idx;
if (__builtin_expect(hdr_cls != meta_cls, 0)) {
// Phase 26C: Compile-out g_hdr_meta_mismatch atomic (default OFF)
#if HAKMEM_HDR_META_MISMATCH_COMPILED
static _Atomic uint32_t g_hdr_meta_mismatch = 0;
uint32_t n = atomic_fetch_add_explicit(&g_hdr_meta_mismatch, 1, memory_order_relaxed);
#else
uint32_t n = 0; // No-op when compiled out
#endif
if (n < 16) {
fprintf(stderr, "[SLAB_HDR_META_MISMATCH] slab_push cls_meta=%u hdr_cls=%u ptr=%p slab_idx=%d ss=%p freelist=%p used=%u\n",
(unsigned)meta_cls, (unsigned)hdr_cls, ptr, slab_idx, (void*)ss, meta->freelist, (unsigned)meta->used);
}
}
} while (0);
#endif
if (__builtin_expect(meta->used == 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0x00u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "local_free_enter", my_tid);
if (!tiny_remote_guard_allow_local_push(ss, slab_idx, meta, ptr, "local_free", my_tid)) {
int transitioned = tiny_free_remote_box(ss, slab_idx, meta, base_ptr, my_tid);
if (transitioned) {
extern unsigned long long g_remote_free_transitions[];
g_remote_free_transitions[cls]++;
// Free-side route: remote transition observed
// Priority-2: Use cached ENV (eliminate lazy-init syscall overhead)
do {
if (HAK_ENV_TINY_ROUTE_FREE()) route_free_commit((int)cls, (1ull<<18), 0xE2);
} while (0);
}
return;
}
// Optional: MidTC (TLS tcache for 128..1024B) — allow bypass via env HAKMEM_TINY_FREE_TO_SS=1
// Priority-2: Use cached ENV (eliminate lazy-init syscall overhead)
do {
if (!HAK_ENV_TINY_FREE_TO_SS()) {
int mid_cls = (int)cls;
if (midtc_enabled() && mid_cls >= 4) {
if (midtc_push(mid_cls, base)) {
// Treat as returned to TLS cache (not SS freelist)
meta->used--;
ss_active_dec_one(ss);
return;
}
}
}
} while (0);
#if !HAKMEM_BUILD_RELEASE
// DEBUG LOGGING - Track freelist operations
// Priority-2: Use cached ENV (eliminate lazy-init TLS overhead)
static __thread int free_count = 0;
if (HAK_ENV_SS_FREE_DEBUG() && (free_count++ % 1000) == 0) {
#else
if (0) {
#endif
fprintf(stderr, "[FREE_LOCAL] cls=%u slab=%d meta->used=%u (before dec)\n",
cls, slab_idx, meta->used);
}
// Perform freelist push (+first-free publish if applicable)
void* prev_before = meta->freelist;
// Phase 10: Use base_ptr
tiny_free_local_box(ss, slab_idx, meta, base_ptr, my_tid);
if (prev_before == NULL) {
ROUTE_MARK(19); // first_free_transition
extern unsigned long long g_first_free_transitions[];
g_first_free_transitions[cls]++;
ROUTE_MARK(20); // mailbox_publish
// Free-side route commit (one-shot)
do {
static int g_route_free = -1;
#if HAKMEM_BUILD_RELEASE
g_route_free = 0;
#else
// Priority-2: Use cached ENV (eliminate lazy-init static overhead)
g_route_free = HAK_ENV_TINY_ROUTE_FREE();
#endif
if (g_route_free) route_free_commit(cls, (1ull<<19) | (1ull<<20), 0xE1);
} while (0);
}
if (__builtin_expect(debug_guard, 0)) {
fprintf(stderr, "[REMOTE_LOCAL] cls=%u slab=%d owner=%u my=%u ptr=%p prev=%p used=%u\n",
cls, slab_idx, meta->owner_tid_low, my_tid, ptr, prev_before, meta->used);
}
// 空検出は別途(ホットパス除外)
// Phase 12: slab empty → shared pool に返却
if (meta->used == 0) {
// DEBUG LOGGING - Priority-2: Use cached ENV (eliminate lazy-init TLS overhead)
#if !HAKMEM_BUILD_RELEASE
if (HAK_ENV_SS_FREE_DEBUG()) {
#else
if (0) {
#endif
fprintf(stderr, "[FREE_PATH] meta->used=0 detected: cls=%u ss=%p slab_idx=%d\n",
cls, (void*)ss, slab_idx);
}
extern void shared_pool_release_slab(SuperSlab* ss, int slab_idx);
shared_pool_release_slab(ss, slab_idx);
}
} else {
ROUTE_MARK(18); // free_remote_transition
if (__builtin_expect(meta->owner_tid_low == my_tid_low && meta->owner_tid_low == 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0xA300u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
if (debug_guard) {
fprintf(stderr, "[REMOTE_OWNER_ZERO] cls=%u slab=%d ptr=%p my=%u used=%u\n",
cls, slab_idx, ptr, my_tid, (unsigned)meta->used);
}
}
tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "remote_free_enter", my_tid);
// Slow path: Remote free (cross-thread)
if (0 && debug_guard && g_debug_free_count < 5) {
fprintf(stderr, "[FREE_SS] CROSS-THREAD: owner=%u my=%u slab_idx=%d\n",
meta->owner_tid_low, my_tid, slab_idx);
g_debug_free_count++;
}
if (__builtin_expect(g_tiny_safe_free, 0)) {
// Best-effort duplicate scan in remote stack (up to 64 nodes)
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
uintptr_t base_addr = ss_base;
int scanned = 0; int dup = 0;
uintptr_t cur = head;
while (cur && scanned < 64) {
if ((cur < base_addr) || (cur >= base_addr + ss_size)) {
uintptr_t aux = tiny_remote_pack_diag(0xA200u, base_addr, ss_size, cur);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, (void*)cur, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
if ((void*)cur == base) { dup = 1; break; } // Check against BASE
if (__builtin_expect(g_remote_side_enable, 0)) {
if (!tiny_remote_sentinel_ok((void*)cur)) {
uintptr_t aux = tiny_remote_pack_diag(0xA202u, base_addr, ss_size, cur);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, (void*)cur, aux);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, (void*)cur, aux);
uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)(void*)cur, memory_order_relaxed);
tiny_remote_report_corruption("scan", (void*)cur, observed);
if (__builtin_expect(g_debug_remote_guard, 0)) {
fprintf(stderr,
"[REMOTE_SENTINEL] cls=%u slab=%d cur=%p head=%p ptr=%p scanned=%d observed=0x%016" PRIxPTR " owner=%u used=%u freelist=%p remote_head=%p\n",
cls,
slab_idx,
(void*)cur,
(void*)head,
ptr,
scanned,
observed,
meta->owner_tid_low,
(unsigned)meta->used,
meta->freelist,
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed));
}
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
cur = tiny_remote_side_get(ss, slab_idx, (void*)cur);
} else {
if ((cur & (uintptr_t)(sizeof(void*) - 1)) != 0) {
uintptr_t aux = tiny_remote_pack_diag(0xA201u, base_addr, ss_size, cur);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, (void*)cur, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
cur = (uintptr_t)tiny_next_read(cls, (void*)cur);
}
scanned++;
}
if (dup) {
uintptr_t aux = tiny_remote_pack_diag(0xD1u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
}
if (__builtin_expect(meta->used == 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0x01u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
static int g_ss_adopt_en2 = -1; // env cached
if (g_ss_adopt_en2 == -1) {
char* e = getenv("HAKMEM_TINY_SS_ADOPT");
// 既定: Remote Queueを使う1。env指定時のみ上書き。
g_ss_adopt_en2 = (e == NULL) ? 1 : ((*e != '0') ? 1 : 0);
if (__builtin_expect(debug_guard, 0)) {
fprintf(stderr, "[FREE_SS] g_ss_adopt_en2=%d (env='%s')\n", g_ss_adopt_en2, e ? e : "(null)");
}
}
// A/B gate: disable remote MPSC (use legacy freelist push)
do {
static int g_disable_remote = -1;
if (__builtin_expect(g_disable_remote == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_DISABLE_REMOTE");
g_disable_remote = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_disable_remote, 0)) {
g_ss_adopt_en2 = 0;
}
} while (0);
if (g_ss_adopt_en2) {
// Use remote queue
uintptr_t head_word = __atomic_load_n((uintptr_t*)base, __ATOMIC_RELAXED);
if (debug_guard) fprintf(stderr, "[REMOTE_PUSH_CALL] cls=%u slab=%d owner=%u my=%u ptr=%p used=%u remote_count=%u head=%p word=0x%016" PRIxPTR "\n",
cls,
slab_idx,
meta->owner_tid_low,
my_tid,
ptr,
(unsigned)meta->used,
atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed),
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed),
head_word);
int dup_remote = tiny_remote_queue_contains_guard(ss, slab_idx, base);
if (!dup_remote && __builtin_expect(g_remote_side_enable, 0)) {
dup_remote = (head_word == TINY_REMOTE_SENTINEL) || tiny_remote_side_contains(ss, slab_idx, base);
}
if (__builtin_expect(head_word == TINY_REMOTE_SENTINEL && !dup_remote && g_debug_remote_guard, 0)) {
tiny_remote_watch_note("dup_scan_miss", ss, slab_idx, ptr, 0xA215u, my_tid, 0);
}
if (dup_remote) {
uintptr_t aux = tiny_remote_pack_diag(0xA214u, ss_base, ss_size, (uintptr_t)ptr);
tiny_remote_watch_mark(ptr, "dup_prevent", my_tid);
tiny_remote_watch_note("dup_prevent", ss, slab_idx, ptr, 0xA214u, my_tid, 0);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
tiny_failfast_abort_ptr("double_free_remote", ss, slab_idx, ptr, "remote_side_contains");
}
if (__builtin_expect(g_remote_side_enable && (head_word & 0xFFFFu) == 0x6261u, 0)) {
// TLS guard scribble detected on the node's first word → same-pointer double free across routes
uintptr_t aux = tiny_remote_pack_diag(0xA213u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
tiny_remote_watch_mark(ptr, "pre_push", my_tid);
tiny_remote_watch_note("pre_push", ss, slab_idx, ptr, 0xA231u, my_tid, 0);
tiny_remote_report_corruption("pre_push", ptr, head_word);
tiny_failfast_abort_ptr("double_free_scribble", ss, slab_idx, ptr, "scribble_6261");
}
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
tiny_remote_watch_note("free_remote", ss, slab_idx, ptr, 0xA232u, my_tid, 0);
}
// Phase 10: Use base_ptr
int was_empty = tiny_free_remote_box(ss, slab_idx, meta, base_ptr, my_tid);
meta->used--;
// ss_active_dec_one(ss); // REMOVED: Already called inside ss_remote_push()
if (was_empty) {
extern unsigned long long g_remote_free_transitions[];
g_remote_free_transitions[cls]++;
g_remote_free_transitions[cls]++;
ss_partial_publish((int)cls, ss);
}
} else {
// Fallback: direct freelist push (legacy)
if (debug_guard) fprintf(stderr, "[FREE_SS] Using LEGACY freelist push (not remote queue)\n");
void* prev = meta->freelist;
tiny_next_write(cls, base, prev);
meta->freelist = base;
tiny_failfast_log("free_local_legacy", cls, ss, meta, ptr, prev);
// Priority-2: Use cached ENV (eliminate lazy-init static overhead)
do {
#if !HAKMEM_BUILD_RELEASE
int g_mask_en = (HAK_ENV_TINY_FREELIST_MASK() != 0) ? 1 : 0;
#else
int g_mask_en = 0;
#endif
if (__builtin_expect(g_mask_en, 0) && prev == NULL) {
uint32_t bit = (1u << slab_idx);
atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
}
} while (0);
meta->used--;
ss_active_dec_one(ss);
if (prev == NULL) {
ss_partial_publish((int)cls, ss);
}
}
// 空検出は別途(ホットパス除外)
// Phase 12: slab empty → shared pool に返却
if (meta->used == 0) {
// DEBUG LOGGING - Priority-2: Use cached ENV (eliminate lazy-init TLS overhead)
#if !HAKMEM_BUILD_RELEASE
if (HAK_ENV_SS_FREE_DEBUG()) {
#else
if (0) {
#endif
fprintf(stderr, "[FREE_PATH] meta->used=0 detected: cls=%u ss=%p slab_idx=%d\n",
cls, (void*)ss, slab_idx);
}
extern void shared_pool_release_slab(SuperSlab* ss, int slab_idx);
shared_pool_release_slab(ss, slab_idx);
}
}
}