Files
hakmem/core/tiny_superslab_free.inc.h

356 lines
19 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_superslab_free.inc.h - SuperSlab Free Layer
// Purpose: Same-thread and cross-thread free handling
// Extracted from: hakmem_tiny_free.inc lines 1171-1475
// Box Theory: Box 6 (Free Fast Path) + Box 2 (Remote Queue) integration
//
// Public functions:
// - hak_tiny_free_superslab(): Main SuperSlab free entry point
// Phase 6.22-B: SuperSlab fast free path
static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
// Route trace: count SuperSlab free entries (diagnostics only)
extern _Atomic uint64_t g_free_ss_enter;
atomic_fetch_add_explicit(&g_free_ss_enter, 1, memory_order_relaxed);
ROUTE_MARK(16); // free_enter
HAK_DBG_INC(g_superslab_free_count); // Phase 7.6: Track SuperSlab frees
// Get slab index (supports 1MB/2MB SuperSlabs)
int slab_idx = slab_index_for(ss, ptr);
size_t ss_size = (size_t)1ULL << ss->lg_size;
uintptr_t ss_base = (uintptr_t)ss;
if (__builtin_expect(slab_idx < 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0xBAD1u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
TinySlabMeta* meta = &ss->slabs[slab_idx];
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
tiny_remote_watch_note("free_enter", ss, slab_idx, ptr, 0xA240u, tiny_self_u32(), 0);
extern __thread TinyTLSSlab g_tls_slabs[];
tiny_alloc_dump_tls_state(ss->size_class, "watch_free_enter", &g_tls_slabs[ss->size_class]);
#if !HAKMEM_BUILD_RELEASE
extern __thread TinyTLSMag g_tls_mags[];
TinyTLSMag* watch_mag = &g_tls_mags[ss->size_class];
fprintf(stderr,
"[REMOTE_WATCH_MAG] cls=%u mag_top=%d cap=%d\n",
ss->size_class,
watch_mag->top,
watch_mag->cap);
#endif
}
// BUGFIX: Validate size_class before using as array index (prevents OOB)
if (__builtin_expect(ss->size_class < 0 || ss->size_class >= TINY_NUM_CLASSES, 0)) {
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xF1, ptr, (uintptr_t)ss->size_class);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
// ChatGPT Pro Optimization: Move safety checks to debug mode only
// In release builds, these checks are completely eliminated by the compiler
// Expected impact: -10~-15% CPU (eliminates O(n) duplicate scan)
#if !HAKMEM_BUILD_RELEASE
if (__builtin_expect(g_tiny_safe_free, 0)) {
size_t blk = g_tiny_class_sizes[ss->size_class];
uint8_t* base = tiny_slab_base_for(ss, slab_idx);
uintptr_t delta = (uintptr_t)ptr - (uintptr_t)base;
int cap_ok = (meta->capacity > 0) ? 1 : 0;
int align_ok = (delta % blk) == 0;
int range_ok = cap_ok && (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
uint32_t code = 0xA100u;
if (align_ok) code |= 0x2u;
if (range_ok) code |= 0x1u;
uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
// Duplicate in freelist (best-effort scan up to 64)
// NOTE: This O(n) scan is VERY expensive (can scan 64 pointers per free!)
void* scan = meta->freelist; int scanned = 0; int dup = 0;
while (scan && scanned < 64) { if (scan == ptr) { dup = 1; break; } scan = *(void**)scan; scanned++; }
if (dup) {
uintptr_t aux = tiny_remote_pack_diag(0xDFu, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
}
#endif // !HAKMEM_BUILD_RELEASE
// Lightweight guard always-on for class7 (headerless, 1024B): prevent corrupted pointer writes in release
if (__builtin_expect(ss->size_class == 7, 0)) {
size_t blk = g_tiny_class_sizes[ss->size_class];
uint8_t* base = tiny_slab_base_for(ss, slab_idx);
uintptr_t delta = (uintptr_t)ptr - (uintptr_t)base;
int cap_ok = (meta->capacity > 0) ? 1 : 0;
int align_ok = (delta % blk) == 0;
int range_ok = cap_ok && (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
uintptr_t aux = tiny_remote_pack_diag(0xA107u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
// Fail-fast in class7 to avoid silent SLL/freelist corruption
raise(SIGUSR2);
return;
}
}
// Phase 6.23: Same-thread check
uint32_t my_tid = tiny_self_u32();
const int debug_guard = g_debug_remote_guard;
static __thread int g_debug_free_count = 0;
// If owner is not set yet, claim ownership to avoid spurious remote path in 1T
if (!g_tiny_force_remote && meta->owner_tid == 0) {
meta->owner_tid = my_tid;
}
if (!g_tiny_force_remote && meta->owner_tid != 0 && meta->owner_tid == my_tid) {
ROUTE_MARK(17); // free_same_thread
// Fast path: Direct freelist push (same-thread)
if (0 && debug_guard && g_debug_free_count < 1) {
fprintf(stderr, "[FREE_SS] SAME-THREAD: owner=%u my=%u\n",
meta->owner_tid, my_tid);
g_debug_free_count++;
}
if (__builtin_expect(meta->used == 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0x00u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "local_free_enter", my_tid);
if (!tiny_remote_guard_allow_local_push(ss, slab_idx, meta, ptr, "local_free", my_tid)) {
#include "box/free_remote_box.h"
int transitioned = tiny_free_remote_box(ss, slab_idx, meta, ptr, my_tid);
if (transitioned) {
extern unsigned long long g_remote_free_transitions[];
g_remote_free_transitions[ss->size_class]++;
// Free-side route: remote transition observed
do {
static int g_route_free = -1; if (__builtin_expect(g_route_free == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ROUTE_FREE");
g_route_free = (e && *e && *e != '0') ? 1 : 0; }
if (g_route_free) route_free_commit((int)ss->size_class, (1ull<<18), 0xE2);
} while (0);
}
return;
}
// Optional: MidTC (TLS tcache for 128..1024B) — allow bypass via env HAKMEM_TINY_FREE_TO_SS=1
do {
static int g_free_to_ss = -1;
if (__builtin_expect(g_free_to_ss == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FREE_TO_SS");
g_free_to_ss = (e && *e && *e != '0') ? 1 : 0; // default OFF
}
if (!g_free_to_ss) {
int cls = (int)ss->size_class;
if (midtc_enabled() && cls >= 4) {
if (midtc_push(cls, ptr)) {
// Treat as returned to TLS cache (not SS freelist)
meta->used--;
ss_active_dec_one(ss);
return;
}
}
}
} while (0);
#include "box/free_local_box.h"
// Perform freelist push (+first-free publish if applicable)
void* prev_before = meta->freelist;
tiny_free_local_box(ss, slab_idx, meta, ptr, my_tid);
if (prev_before == NULL) {
ROUTE_MARK(19); // first_free_transition
extern unsigned long long g_first_free_transitions[];
g_first_free_transitions[ss->size_class]++;
ROUTE_MARK(20); // mailbox_publish
// Free-side route commit (one-shot)
do {
static int g_route_free = -1; if (__builtin_expect(g_route_free == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ROUTE_FREE");
g_route_free = (e && *e && *e != '0') ? 1 : 0; }
int cls = (int)ss->size_class;
if (g_route_free) route_free_commit(cls, (1ull<<19) | (1ull<<20), 0xE1);
} while (0);
}
if (__builtin_expect(debug_guard, 0)) {
fprintf(stderr, "[REMOTE_LOCAL] cls=%u slab=%d owner=%u my=%u ptr=%p prev=%p used=%u\n",
ss->size_class, slab_idx, meta->owner_tid, my_tid, ptr, prev_before, meta->used);
}
// 空検出は別途(ホットパス除外)
} else {
ROUTE_MARK(18); // free_remote_transition
if (__builtin_expect(meta->owner_tid == my_tid && meta->owner_tid == 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0xA300u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (debug_guard) {
fprintf(stderr, "[REMOTE_OWNER_ZERO] cls=%u slab=%d ptr=%p my=%u used=%u\n",
ss->size_class, slab_idx, ptr, my_tid, (unsigned)meta->used);
}
}
tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "remote_free_enter", my_tid);
// Slow path: Remote free (cross-thread)
if (0 && debug_guard && g_debug_free_count < 5) {
fprintf(stderr, "[FREE_SS] CROSS-THREAD: owner=%u my=%u slab_idx=%d\n",
meta->owner_tid, my_tid, slab_idx);
g_debug_free_count++;
}
if (__builtin_expect(g_tiny_safe_free, 0)) {
// Best-effort duplicate scan in remote stack (up to 64 nodes)
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
uintptr_t base = ss_base;
int scanned = 0; int dup = 0;
uintptr_t cur = head;
while (cur && scanned < 64) {
if ((cur < base) || (cur >= base + ss_size)) {
uintptr_t aux = tiny_remote_pack_diag(0xA200u, base, ss_size, cur);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
if ((void*)cur == ptr) { dup = 1; break; }
if (__builtin_expect(g_remote_side_enable, 0)) {
if (!tiny_remote_sentinel_ok((void*)cur)) {
uintptr_t aux = tiny_remote_pack_diag(0xA202u, base, ss_size, cur);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux);
uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)(void*)cur, memory_order_relaxed);
tiny_remote_report_corruption("scan", (void*)cur, observed);
if (__builtin_expect(g_debug_remote_guard, 0)) {
fprintf(stderr,
"[REMOTE_SENTINEL] cls=%u slab=%d cur=%p head=%p ptr=%p scanned=%d observed=0x%016" PRIxPTR " owner=%u used=%u freelist=%p remote_head=%p\n",
ss->size_class,
slab_idx,
(void*)cur,
(void*)head,
ptr,
scanned,
observed,
meta->owner_tid,
(unsigned)meta->used,
meta->freelist,
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed));
}
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
cur = tiny_remote_side_get(ss, slab_idx, (void*)cur);
} else {
if ((cur & (uintptr_t)(sizeof(void*) - 1)) != 0) {
uintptr_t aux = tiny_remote_pack_diag(0xA201u, base, ss_size, cur);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
cur = (uintptr_t)(*(void**)(void*)cur);
}
scanned++;
}
if (dup) {
uintptr_t aux = tiny_remote_pack_diag(0xD1u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
}
if (__builtin_expect(meta->used == 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0x01u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
static int g_ss_adopt_en2 = -1; // env cached
if (g_ss_adopt_en2 == -1) {
char* e = getenv("HAKMEM_TINY_SS_ADOPT");
// 既定: Remote Queueを使う1。env指定時のみ上書き。
g_ss_adopt_en2 = (e == NULL) ? 1 : ((*e != '0') ? 1 : 0);
if (__builtin_expect(debug_guard, 0)) {
fprintf(stderr, "[FREE_SS] g_ss_adopt_en2=%d (env='%s')\n", g_ss_adopt_en2, e ? e : "(null)");
}
}
// A/B gate: disable remote MPSC (use legacy freelist push)
do {
static int g_disable_remote = -1;
if (__builtin_expect(g_disable_remote == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_DISABLE_REMOTE");
g_disable_remote = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_disable_remote, 0)) {
g_ss_adopt_en2 = 0;
}
} while (0);
if (g_ss_adopt_en2) {
// Use remote queue
uintptr_t head_word = __atomic_load_n((uintptr_t*)ptr, __ATOMIC_RELAXED);
if (debug_guard) fprintf(stderr, "[REMOTE_PUSH_CALL] cls=%u slab=%d owner=%u my=%u ptr=%p used=%u remote_count=%u head=%p word=0x%016" PRIxPTR "\n",
ss->size_class,
slab_idx,
meta->owner_tid,
my_tid,
ptr,
(unsigned)meta->used,
atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed),
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed),
head_word);
int dup_remote = tiny_remote_queue_contains_guard(ss, slab_idx, ptr);
if (!dup_remote && __builtin_expect(g_remote_side_enable, 0)) {
dup_remote = (head_word == TINY_REMOTE_SENTINEL) || tiny_remote_side_contains(ss, slab_idx, ptr);
}
if (__builtin_expect(head_word == TINY_REMOTE_SENTINEL && !dup_remote && g_debug_remote_guard, 0)) {
tiny_remote_watch_note("dup_scan_miss", ss, slab_idx, ptr, 0xA215u, my_tid, 0);
}
if (dup_remote) {
uintptr_t aux = tiny_remote_pack_diag(0xA214u, ss_base, ss_size, (uintptr_t)ptr);
tiny_remote_watch_mark(ptr, "dup_prevent", my_tid);
tiny_remote_watch_note("dup_prevent", ss, slab_idx, ptr, 0xA214u, my_tid, 0);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
tiny_failfast_abort_ptr("double_free_remote", ss, slab_idx, ptr, "remote_side_contains");
}
if (__builtin_expect(g_remote_side_enable && (head_word & 0xFFFFu) == 0x6261u, 0)) {
// TLS guard scribble detected on the node's first word → same-pointer double free across routes
uintptr_t aux = tiny_remote_pack_diag(0xA213u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
tiny_remote_watch_mark(ptr, "pre_push", my_tid);
tiny_remote_watch_note("pre_push", ss, slab_idx, ptr, 0xA231u, my_tid, 0);
tiny_remote_report_corruption("pre_push", ptr, head_word);
tiny_failfast_abort_ptr("double_free_scribble", ss, slab_idx, ptr, "scribble_6261");
}
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
tiny_remote_watch_note("free_remote", ss, slab_idx, ptr, 0xA232u, my_tid, 0);
}
int was_empty = ss_remote_push(ss, slab_idx, ptr); // ss_active_dec_one() called inside
meta->used--;
// ss_active_dec_one(ss); // REMOVED: Already called inside ss_remote_push()
if (was_empty) {
extern unsigned long long g_remote_free_transitions[];
g_remote_free_transitions[ss->size_class]++;
ss_partial_publish((int)ss->size_class, ss);
}
} else {
// Fallback: direct freelist push (legacy)
if (debug_guard) fprintf(stderr, "[FREE_SS] Using LEGACY freelist push (not remote queue)\n");
void* prev = meta->freelist;
*(void**)ptr = prev;
meta->freelist = ptr;
tiny_failfast_log("free_local_legacy", ss->size_class, ss, meta, ptr, prev);
do {
static int g_mask_en = -1;
if (__builtin_expect(g_mask_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FREELIST_MASK");
g_mask_en = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_mask_en, 0) && prev == NULL) {
uint32_t bit = (1u << slab_idx);
atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
}
} while (0);
meta->used--;
ss_active_dec_one(ss);
if (prev == NULL) {
ss_partial_publish((int)ss->size_class, ss);
}
}
// 空検出は別途(ホットパス除外)
}
}