// tiny_superslab_free.inc.h - SuperSlab Free Layer // Purpose: Same-thread and cross-thread free handling // Extracted from: hakmem_tiny_free.inc lines 1171-1475 // Box Theory: Box 6 (Free Fast Path) + Box 2 (Remote Queue) integration // // Public functions: // - hak_tiny_free_superslab(): Main SuperSlab free entry point // Phase 6.22-B: SuperSlab fast free path static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) { // Route trace: count SuperSlab free entries (diagnostics only) extern _Atomic uint64_t g_free_ss_enter; atomic_fetch_add_explicit(&g_free_ss_enter, 1, memory_order_relaxed); ROUTE_MARK(16); // free_enter HAK_DBG_INC(g_superslab_free_count); // Phase 7.6: Track SuperSlab frees // Get slab index (supports 1MB/2MB SuperSlabs) int slab_idx = slab_index_for(ss, ptr); size_t ss_size = (size_t)1ULL << ss->lg_size; uintptr_t ss_base = (uintptr_t)ss; if (__builtin_expect(slab_idx < 0, 0)) { uintptr_t aux = tiny_remote_pack_diag(0xBAD1u, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } TinySlabMeta* meta = &ss->slabs[slab_idx]; if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) { tiny_remote_watch_note("free_enter", ss, slab_idx, ptr, 0xA240u, tiny_self_u32(), 0); extern __thread TinyTLSSlab g_tls_slabs[]; tiny_alloc_dump_tls_state(ss->size_class, "watch_free_enter", &g_tls_slabs[ss->size_class]); #if !HAKMEM_BUILD_RELEASE extern __thread TinyTLSMag g_tls_mags[]; TinyTLSMag* watch_mag = &g_tls_mags[ss->size_class]; fprintf(stderr, "[REMOTE_WATCH_MAG] cls=%u mag_top=%d cap=%d\n", ss->size_class, watch_mag->top, watch_mag->cap); #endif } // BUGFIX: Validate size_class before using as array index (prevents OOB) if (__builtin_expect(ss->size_class < 0 || ss->size_class >= TINY_NUM_CLASSES, 0)) { tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xF1, ptr, (uintptr_t)ss->size_class); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } if (__builtin_expect(g_tiny_safe_free, 0)) { size_t blk = g_tiny_class_sizes[ss->size_class]; uint8_t* base = tiny_slab_base_for(ss, slab_idx); uintptr_t delta = (uintptr_t)ptr - (uintptr_t)base; int cap_ok = (meta->capacity > 0) ? 1 : 0; int align_ok = (delta % blk) == 0; int range_ok = cap_ok && (delta / blk) < meta->capacity; if (!align_ok || !range_ok) { uint32_t code = 0xA100u; if (align_ok) code |= 0x2u; if (range_ok) code |= 0x1u; uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } // Duplicate in freelist (best-effort scan up to 64) void* scan = meta->freelist; int scanned = 0; int dup = 0; while (scan && scanned < 64) { if (scan == ptr) { dup = 1; break; } scan = *(void**)scan; scanned++; } if (dup) { uintptr_t aux = tiny_remote_pack_diag(0xDFu, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } } // Phase 6.23: Same-thread check uint32_t my_tid = tiny_self_u32(); const int debug_guard = g_debug_remote_guard; static __thread int g_debug_free_count = 0; // If owner is not set yet, claim ownership to avoid spurious remote path in 1T if (!g_tiny_force_remote && meta->owner_tid == 0) { meta->owner_tid = my_tid; } if (!g_tiny_force_remote && meta->owner_tid != 0 && meta->owner_tid == my_tid) { ROUTE_MARK(17); // free_same_thread // Fast path: Direct freelist push (same-thread) if (0 && debug_guard && g_debug_free_count < 1) { fprintf(stderr, "[FREE_SS] SAME-THREAD: owner=%u my=%u\n", meta->owner_tid, my_tid); g_debug_free_count++; } if (__builtin_expect(meta->used == 0, 0)) { uintptr_t aux = tiny_remote_pack_diag(0x00u, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "local_free_enter", my_tid); if (!tiny_remote_guard_allow_local_push(ss, slab_idx, meta, ptr, "local_free", my_tid)) { #include "box/free_remote_box.h" int transitioned = tiny_free_remote_box(ss, slab_idx, meta, ptr, my_tid); if (transitioned) { extern unsigned long long g_remote_free_transitions[]; g_remote_free_transitions[ss->size_class]++; // Free-side route: remote transition observed do { static int g_route_free = -1; if (__builtin_expect(g_route_free == -1, 0)) { const char* e = getenv("HAKMEM_TINY_ROUTE_FREE"); g_route_free = (e && *e && *e != '0') ? 1 : 0; } if (g_route_free) route_free_commit((int)ss->size_class, (1ull<<18), 0xE2); } while (0); } return; } // Optional: MidTC (TLS tcache for 128..1024B) — allow bypass via env HAKMEM_TINY_FREE_TO_SS=1 do { static int g_free_to_ss = -1; if (__builtin_expect(g_free_to_ss == -1, 0)) { const char* e = getenv("HAKMEM_TINY_FREE_TO_SS"); g_free_to_ss = (e && *e && *e != '0') ? 1 : 0; // default OFF } if (!g_free_to_ss) { int cls = (int)ss->size_class; if (midtc_enabled() && cls >= 4) { if (midtc_push(cls, ptr)) { // Treat as returned to TLS cache (not SS freelist) meta->used--; ss_active_dec_one(ss); return; } } } } while (0); #include "box/free_local_box.h" // Perform freelist push (+first-free publish if applicable) void* prev_before = meta->freelist; tiny_free_local_box(ss, slab_idx, meta, ptr, my_tid); if (prev_before == NULL) { ROUTE_MARK(19); // first_free_transition extern unsigned long long g_first_free_transitions[]; g_first_free_transitions[ss->size_class]++; ROUTE_MARK(20); // mailbox_publish // Free-side route commit (one-shot) do { static int g_route_free = -1; if (__builtin_expect(g_route_free == -1, 0)) { const char* e = getenv("HAKMEM_TINY_ROUTE_FREE"); g_route_free = (e && *e && *e != '0') ? 1 : 0; } int cls = (int)ss->size_class; if (g_route_free) route_free_commit(cls, (1ull<<19) | (1ull<<20), 0xE1); } while (0); } if (__builtin_expect(debug_guard, 0)) { fprintf(stderr, "[REMOTE_LOCAL] cls=%u slab=%d owner=%u my=%u ptr=%p prev=%p used=%u\n", ss->size_class, slab_idx, meta->owner_tid, my_tid, ptr, prev_before, meta->used); } // 空検出は別途(ホットパス除外) } else { ROUTE_MARK(18); // free_remote_transition if (__builtin_expect(meta->owner_tid == my_tid && meta->owner_tid == 0, 0)) { uintptr_t aux = tiny_remote_pack_diag(0xA300u, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (debug_guard) { fprintf(stderr, "[REMOTE_OWNER_ZERO] cls=%u slab=%d ptr=%p my=%u used=%u\n", ss->size_class, slab_idx, ptr, my_tid, (unsigned)meta->used); } } tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "remote_free_enter", my_tid); // Slow path: Remote free (cross-thread) if (0 && debug_guard && g_debug_free_count < 5) { fprintf(stderr, "[FREE_SS] CROSS-THREAD: owner=%u my=%u slab_idx=%d\n", meta->owner_tid, my_tid, slab_idx); g_debug_free_count++; } if (__builtin_expect(g_tiny_safe_free, 0)) { // Best-effort duplicate scan in remote stack (up to 64 nodes) uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire); uintptr_t base = ss_base; int scanned = 0; int dup = 0; uintptr_t cur = head; while (cur && scanned < 64) { if ((cur < base) || (cur >= base + ss_size)) { uintptr_t aux = tiny_remote_pack_diag(0xA200u, base, ss_size, cur); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } break; } if ((void*)cur == ptr) { dup = 1; break; } if (__builtin_expect(g_remote_side_enable, 0)) { if (!tiny_remote_sentinel_ok((void*)cur)) { uintptr_t aux = tiny_remote_pack_diag(0xA202u, base, ss_size, cur); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux); uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)(void*)cur, memory_order_relaxed); tiny_remote_report_corruption("scan", (void*)cur, observed); fprintf(stderr, "[REMOTE_SENTINEL] cls=%u slab=%d cur=%p head=%p ptr=%p scanned=%d observed=0x%016" PRIxPTR " owner=%u used=%u freelist=%p remote_head=%p\n", ss->size_class, slab_idx, (void*)cur, (void*)head, ptr, scanned, observed, meta->owner_tid, (unsigned)meta->used, meta->freelist, (void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed)); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } break; } cur = tiny_remote_side_get(ss, slab_idx, (void*)cur); } else { if ((cur & (uintptr_t)(sizeof(void*) - 1)) != 0) { uintptr_t aux = tiny_remote_pack_diag(0xA201u, base, ss_size, cur); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } break; } cur = (uintptr_t)(*(void**)(void*)cur); } scanned++; } if (dup) { uintptr_t aux = tiny_remote_pack_diag(0xD1u, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } } if (__builtin_expect(meta->used == 0, 0)) { uintptr_t aux = tiny_remote_pack_diag(0x01u, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } static int g_ss_adopt_en2 = -1; // env cached if (g_ss_adopt_en2 == -1) { char* e = getenv("HAKMEM_TINY_SS_ADOPT"); // 既定: Remote Queueを使う(1)。env指定時のみ上書き。 g_ss_adopt_en2 = (e == NULL) ? 1 : ((*e != '0') ? 1 : 0); if (__builtin_expect(debug_guard, 0)) { fprintf(stderr, "[FREE_SS] g_ss_adopt_en2=%d (env='%s')\n", g_ss_adopt_en2, e ? e : "(null)"); } } // A/B gate: disable remote MPSC (use legacy freelist push) do { static int g_disable_remote = -1; if (__builtin_expect(g_disable_remote == -1, 0)) { const char* e = getenv("HAKMEM_TINY_DISABLE_REMOTE"); g_disable_remote = (e && *e && *e != '0') ? 1 : 0; } if (__builtin_expect(g_disable_remote, 0)) { g_ss_adopt_en2 = 0; } } while (0); if (g_ss_adopt_en2) { // Use remote queue uintptr_t head_word = __atomic_load_n((uintptr_t*)ptr, __ATOMIC_RELAXED); if (debug_guard) fprintf(stderr, "[REMOTE_PUSH_CALL] cls=%u slab=%d owner=%u my=%u ptr=%p used=%u remote_count=%u head=%p word=0x%016" PRIxPTR "\n", ss->size_class, slab_idx, meta->owner_tid, my_tid, ptr, (unsigned)meta->used, atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed), (void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed), head_word); int dup_remote = tiny_remote_queue_contains_guard(ss, slab_idx, ptr); if (!dup_remote && __builtin_expect(g_remote_side_enable, 0)) { dup_remote = (head_word == TINY_REMOTE_SENTINEL) || tiny_remote_side_contains(ss, slab_idx, ptr); } if (__builtin_expect(head_word == TINY_REMOTE_SENTINEL && !dup_remote && g_debug_remote_guard, 0)) { tiny_remote_watch_note("dup_scan_miss", ss, slab_idx, ptr, 0xA215u, my_tid, 0); } if (dup_remote) { uintptr_t aux = tiny_remote_pack_diag(0xA214u, ss_base, ss_size, (uintptr_t)ptr); tiny_remote_watch_mark(ptr, "dup_prevent", my_tid); tiny_remote_watch_note("dup_prevent", ss, slab_idx, ptr, 0xA214u, my_tid, 0); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } if (__builtin_expect(g_remote_side_enable && (head_word & 0xFFFFu) == 0x6261u, 0)) { // TLS guard scribble detected on the node's first word → same-pointer double free across routes uintptr_t aux = tiny_remote_pack_diag(0xA213u, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); tiny_remote_watch_mark(ptr, "pre_push", my_tid); tiny_remote_watch_note("pre_push", ss, slab_idx, ptr, 0xA231u, my_tid, 0); tiny_remote_report_corruption("pre_push", ptr, head_word); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) { tiny_remote_watch_note("free_remote", ss, slab_idx, ptr, 0xA232u, my_tid, 0); } int was_empty = ss_remote_push(ss, slab_idx, ptr); // ss_active_dec_one() called inside meta->used--; // ss_active_dec_one(ss); // REMOVED: Already called inside ss_remote_push() if (was_empty) { extern unsigned long long g_remote_free_transitions[]; g_remote_free_transitions[ss->size_class]++; ss_partial_publish((int)ss->size_class, ss); } } else { // Fallback: direct freelist push (legacy) if (debug_guard) fprintf(stderr, "[FREE_SS] Using LEGACY freelist push (not remote queue)\n"); void* prev = meta->freelist; *(void**)ptr = prev; meta->freelist = ptr; tiny_failfast_log("free_local_legacy", ss->size_class, ss, meta, ptr, prev); do { static int g_mask_en = -1; if (__builtin_expect(g_mask_en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_FREELIST_MASK"); g_mask_en = (e && *e && *e != '0') ? 1 : 0; } if (__builtin_expect(g_mask_en, 0) && prev == NULL) { uint32_t bit = (1u << slab_idx); atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release); } } while (0); meta->used--; ss_active_dec_one(ss); if (prev == NULL) { ss_partial_publish((int)ss->size_class, ss); } } // 空検出は別途(ホットパス除外) } }