#include #include "tiny_remote.h" #include "slab_handle.h" #include "tiny_refill.h" #include "tiny_tls_guard.h" #include "mid_tcache.h" extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES]; extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES]; #if !HAKMEM_BUILD_RELEASE #include "hakmem_tiny_magazine.h" #endif extern int g_tiny_force_remote; // ENV: HAKMEM_TINY_DRAIN_TO_SLL (0=off) — adopt/bind境界でfreelist→TLS SLLへN個スプライス static inline int tiny_drain_to_sll_budget(void) { static int v = -1; if (__builtin_expect(v == -1, 0)) { const char* s = getenv("HAKMEM_TINY_DRAIN_TO_SLL"); int parsed = (s && *s) ? atoi(s) : 0; if (parsed < 0) parsed = 0; if (parsed > 256) parsed = 256; v = parsed; } return v; } static inline void tiny_drain_freelist_to_sll_once(SuperSlab* ss, int slab_idx, int class_idx) { int budget = tiny_drain_to_sll_budget(); if (__builtin_expect(budget <= 0, 1)) return; if (!(ss && ss->magic == SUPERSLAB_MAGIC)) return; if (slab_idx < 0) return; TinySlabMeta* m = &ss->slabs[slab_idx]; int moved = 0; while (m->freelist && moved < budget) { void* p = m->freelist; m->freelist = *(void**)p; *(void**)p = g_tls_sll_head[class_idx]; g_tls_sll_head[class_idx] = p; g_tls_sll_count[class_idx]++; moved++; } } static inline int tiny_remote_queue_contains_guard(SuperSlab* ss, int slab_idx, void* target) { if (!ss || slab_idx < 0) return 0; uintptr_t cur = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire); int limit = 8192; while (cur && limit-- > 0) { if ((void*)cur == target) { return 1; } uintptr_t next; if (__builtin_expect(g_remote_side_enable, 0)) { next = tiny_remote_side_get(ss, slab_idx, (void*)cur); } else { next = atomic_load_explicit((_Atomic uintptr_t*)cur, memory_order_relaxed); } cur = next; } if (limit <= 0) { return 1; // fail-safe: treat unbounded traversal as duplicate } return 0; } // Phase 6.12.1: Free with pre-calculated slab (Option C - avoids duplicate lookup) void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { // Phase 7.6: slab == NULL means SuperSlab mode (Magazine integration) if (!slab) { // SuperSlab path: Get class_idx from SuperSlab SuperSlab* ss = hak_super_lookup(ptr); if (!ss || ss->magic != SUPERSLAB_MAGIC) return; int class_idx = ss->size_class; size_t ss_size = (size_t)1ULL << ss->lg_size; uintptr_t ss_base = (uintptr_t)ss; if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) { tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFFu, ss, (uintptr_t)ss->size_class); return; } // Optional: cross-lookup TinySlab owner and detect class mismatch early if (__builtin_expect(g_tiny_safe_free, 0)) { TinySlab* ts = hak_tiny_owner_slab(ptr); if (ts) { int ts_cls = ts->class_idx; if (ts_cls >= 0 && ts_cls < TINY_NUM_CLASSES && ts_cls != class_idx) { uint32_t code = 0xAA00u | ((uint32_t)ts_cls & 0xFFu); uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)class_idx, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } } } } tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0); // Detect cross-thread: cross-thread free MUST go via superslab path int slab_idx = slab_index_for(ss, ptr); int ss_cap = ss_slabs_capacity(ss); if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) { tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFEu, ss, (uintptr_t)slab_idx); return; } TinySlabMeta* meta = &ss->slabs[slab_idx]; if (__builtin_expect(g_tiny_safe_free, 0)) { size_t blk = g_tiny_class_sizes[class_idx]; uint8_t* base = tiny_slab_base_for(ss, slab_idx); uintptr_t delta = (uintptr_t)ptr - (uintptr_t)base; int cap_ok = (meta->capacity > 0) ? 1 : 0; int align_ok = (delta % blk) == 0; int range_ok = cap_ok && (delta / blk) < meta->capacity; if (!align_ok || !range_ok) { uint32_t code = 0xA104u; if (align_ok) code |= 0x2u; if (range_ok) code |= 0x1u; uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)class_idx, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } } uint32_t self_tid = tiny_self_u32(); if (__builtin_expect(meta->owner_tid != self_tid, 0)) { // route directly to superslab (remote queue / freelist) uintptr_t ptr_val = (uintptr_t)ptr; uintptr_t ss_base = (uintptr_t)ss; size_t ss_size = (size_t)1ULL << ss->lg_size; if (__builtin_expect(ptr_val < ss_base || ptr_val >= ss_base + ss_size, 0)) { tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFDu, ss, ptr_val); return; } tiny_debug_ring_record(TINY_RING_EVENT_FREE_REMOTE, (uint16_t)class_idx, ss, (uintptr_t)ptr); hak_tiny_free_superslab(ptr, ss); HAK_STAT_FREE(class_idx); return; } if (__builtin_expect(g_debug_fast0, 0)) { tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx); void* prev = meta->freelist; *(void**)ptr = prev; meta->freelist = ptr; meta->used--; ss_active_dec_one(ss); if (prev == NULL) { ss_partial_publish((int)ss->size_class, ss); } tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx); HAK_STAT_FREE(class_idx); return; } if (g_fast_enable && g_fast_cap[class_idx] != 0) { if (tiny_fast_push(class_idx, ptr)) { tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx); HAK_STAT_FREE(class_idx); return; } } if (g_tls_list_enable) { TinyTLSList* tls = &g_tls_lists[class_idx]; uint32_t seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed); if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) { tiny_tls_refresh_params(class_idx, tls); } // TinyHotMag front push(8/16/32B, A/B) if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) { if (hotmag_push(class_idx, ptr)) { tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 1); HAK_STAT_FREE(class_idx); return; } } if (tls->count < tls->cap) { tiny_tls_list_guard_push(class_idx, tls, ptr); tls_list_push(tls, ptr); tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 0); HAK_STAT_FREE(class_idx); return; } seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed); if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) { tiny_tls_refresh_params(class_idx, tls); } tiny_tls_list_guard_push(class_idx, tls, ptr); tls_list_push(tls, ptr); if (tls_list_should_spill(tls)) { tls_list_spill_excess(class_idx, tls); } tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 2); HAK_STAT_FREE(class_idx); return; } #if !HAKMEM_BUILD_RELEASE // SuperSlab uses Magazine for TLS caching (same as TinySlab) tiny_small_mags_init_once(); if (class_idx > 3) tiny_mag_init_if_needed(class_idx); TinyTLSMag* mag = &g_tls_mags[class_idx]; int cap = mag->cap; // 32/64B: SLL優先(mag優先は無効化) // Prefer TinyQuickSlot (compile-out if HAKMEM_TINY_NO_QUICK) #if !defined(HAKMEM_TINY_NO_QUICK) if (g_quick_enable && class_idx <= 4) { TinyQuickSlot* qs = &g_tls_quick[class_idx]; if (__builtin_expect(qs->top < QUICK_CAP, 1)) { qs->items[qs->top++] = ptr; HAK_STAT_FREE(class_idx); return; } } #endif // Fast path: TLS SLL push for hottest classes if (!g_tls_list_enable && g_tls_sll_enable && g_tls_sll_count[class_idx] < sll_cap_for_class(class_idx, (uint32_t)cap)) { *(void**)ptr = g_tls_sll_head[class_idx]; g_tls_sll_head[class_idx] = ptr; g_tls_sll_count[class_idx]++; // Active → Inactive: count down immediately (TLS保管中は"使用中"ではない) ss_active_dec_one(ss); HAK_TP1(sll_push, class_idx); tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 3); HAK_STAT_FREE(class_idx); return; } // Next: Magazine push(必要ならmag→SLLへバルク転送で空きを作る) // Hysteresis: allow slight overfill before deciding to spill under lock if (mag->top >= cap && g_spill_hyst > 0) { (void)bulk_mag_to_sll_if_room(class_idx, mag, cap / 2); } if (mag->top < cap + g_spill_hyst) { mag->items[mag->top].ptr = ptr; #if HAKMEM_TINY_MAG_OWNER mag->items[mag->top].owner = NULL; // SuperSlab owner not a TinySlab; leave NULL #endif mag->top++; #if HAKMEM_DEBUG_COUNTERS g_magazine_push_count++; // Phase 7.6: Track pushes #endif // Active → Inactive: decrement now(アプリ解放時に非アクティブ扱い) ss_active_dec_one(ss); HAK_TP1(mag_push, class_idx); tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 2); HAK_STAT_FREE(class_idx); return; } // Background spill: queue to BG thread instead of locking (when enabled) if (g_bg_spill_enable) { uint32_t qlen = atomic_load_explicit(&g_bg_spill_len[class_idx], memory_order_relaxed); if ((int)qlen < g_bg_spill_target) { // Build a small chain: include current ptr and pop from mag up to limit int limit = g_bg_spill_max_batch; if (limit > cap/2) limit = cap/2; if (limit > 32) limit = 32; // keep free-path bounded void* head = ptr; *(void**)head = NULL; void* tail = head; // current tail int taken = 1; while (taken < limit && mag->top > 0) { void* p2 = mag->items[--mag->top].ptr; *(void**)p2 = head; head = p2; taken++; } // Push chain to spill queue (single CAS) bg_spill_push_chain(class_idx, head, tail, taken); tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 3); HAK_STAT_FREE(class_idx); return; } } // Spill half (SuperSlab version - simpler than TinySlab) pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m; hkm_prof_begin(NULL); pthread_mutex_lock(lock); // Batch spill: reduce lock frequency and work per call int spill = cap / 2; int over = mag->top - (cap + g_spill_hyst); if (over > 0 && over < spill) spill = over; for (int i = 0; i < spill && mag->top > 0; i++) { TinyMagItem it = mag->items[--mag->top]; // Phase 7.6: SuperSlab spill - return to freelist SuperSlab* owner_ss = hak_super_lookup(it.ptr); if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) { // Direct freelist push (same as old hak_tiny_free_superslab) int slab_idx = slab_index_for(owner_ss, it.ptr); TinySlabMeta* meta = &owner_ss->slabs[slab_idx]; *(void**)it.ptr = meta->freelist; meta->freelist = it.ptr; meta->used--; // Decrement SuperSlab active counter (spill returns blocks to SS) ss_active_dec_one(owner_ss); // Phase 8.4: Empty SuperSlab detection (will use meta->used scan) // TODO: Implement scan-based empty detection // Empty SuperSlab detection/munmapは別途フラッシュAPIで実施(ホットパスから除外) } } pthread_mutex_unlock(lock); hkm_prof_end(ss_time, HKP_TINY_SPILL, &tss); // Adaptive increase of cap after spill int max_cap = tiny_cap_max_for_class(class_idx); if (mag->cap < max_cap) { int new_cap = mag->cap + (mag->cap / 2); if (new_cap > max_cap) new_cap = max_cap; if (new_cap > TINY_TLS_MAG_CAP) new_cap = TINY_TLS_MAG_CAP; mag->cap = new_cap; } // Finally, try FastCache push first (≤128B) — compile-out if HAKMEM_TINY_NO_FRONT_CACHE #if !defined(HAKMEM_TINY_NO_FRONT_CACHE) if (g_fastcache_enable && class_idx <= 4) { if (fastcache_push(class_idx, ptr)) { HAK_TP1(front_push, class_idx); HAK_STAT_FREE(class_idx); return; } } #endif // Then TLS SLL if room, else magazine if (g_tls_sll_enable && g_tls_sll_count[class_idx] < sll_cap_for_class(class_idx, (uint32_t)mag->cap)) { *(void**)ptr = g_tls_sll_head[class_idx]; g_tls_sll_head[class_idx] = ptr; g_tls_sll_count[class_idx]++; } else { mag->items[mag->top].ptr = ptr; #if HAKMEM_TINY_MAG_OWNER mag->items[mag->top].owner = slab; #endif mag->top++; } #if HAKMEM_DEBUG_COUNTERS g_magazine_push_count++; // Phase 7.6: Track pushes #endif HAK_STAT_FREE(class_idx); return; #endif // HAKMEM_BUILD_RELEASE } // Phase 7.6: TinySlab path (original) //g_tiny_free_with_slab_count++; // Phase 7.6: Track calls - DISABLED due to segfault // Same-thread → TLS magazine; remote-thread → MPSC stack if (pthread_equal(slab->owner_tid, tiny_self_pt())) { int class_idx = slab->class_idx; if (g_tls_list_enable) { TinyTLSList* tls = &g_tls_lists[class_idx]; uint32_t seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed); if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) { tiny_tls_refresh_params(class_idx, tls); } // TinyHotMag front push(8/16/32B, A/B) if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) { if (hotmag_push(class_idx, ptr)) { HAK_STAT_FREE(class_idx); return; } } if (tls->count < tls->cap) { tiny_tls_list_guard_push(class_idx, tls, ptr); tls_list_push(tls, ptr); HAK_STAT_FREE(class_idx); return; } seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed); if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) { tiny_tls_refresh_params(class_idx, tls); } tiny_tls_list_guard_push(class_idx, tls, ptr); tls_list_push(tls, ptr); if (tls_list_should_spill(tls)) { tls_list_spill_excess(class_idx, tls); } HAK_STAT_FREE(class_idx); return; } tiny_mag_init_if_needed(class_idx); TinyTLSMag* mag = &g_tls_mags[class_idx]; int cap = mag->cap; // 32/64B: SLL優先(mag優先は無効化) // Fast path: FastCache push (preferred for ≤128B), then TLS SLL if (g_fastcache_enable && class_idx <= 4) { if (fastcache_push(class_idx, ptr)) { HAK_STAT_FREE(class_idx); return; } } // Fast path: TLS SLL push (preferred) if (!g_tls_list_enable && g_tls_sll_enable && class_idx <= 5) { uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)cap); if (g_tls_sll_count[class_idx] < sll_cap) { *(void**)ptr = g_tls_sll_head[class_idx]; g_tls_sll_head[class_idx] = ptr; g_tls_sll_count[class_idx]++; HAK_STAT_FREE(class_idx); return; } } // Next: if magazine has room, push immediately and return(満杯ならmag→SLLへバルク) if (mag->top >= cap) { (void)bulk_mag_to_sll_if_room(class_idx, mag, cap / 2); } // Remote-drain can be handled opportunistically on future calls. if (mag->top < cap) { mag->items[mag->top].ptr = ptr; #if HAKMEM_TINY_MAG_OWNER mag->items[mag->top].owner = slab; #endif mag->top++; #if HAKMEM_DEBUG_COUNTERS g_magazine_push_count++; // Phase 7.6: Track pushes #endif // Note: SuperSlab uses separate path (slab == NULL branch above) HAK_STAT_FREE(class_idx); // Phase 3 return; } // Magazine full: before spilling, opportunistically drain remotes once under lock. if (atomic_load_explicit(&slab->remote_count, memory_order_relaxed) >= (unsigned)g_remote_drain_thresh_per_class[class_idx] || atomic_load_explicit(&slab->remote_head, memory_order_acquire)) { pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m; pthread_mutex_lock(lock); HAK_TP1(remote_drain, class_idx); tiny_remote_drain_locked(slab); pthread_mutex_unlock(lock); } // Spill half under class lock pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m; pthread_mutex_lock(lock); int spill = cap / 2; // Phase 4.2: High-water threshold for gating Phase 4 logic int high_water = (cap * 3) / 4; // 75% of capacity for (int i = 0; i < spill && mag->top > 0; i++) { TinyMagItem it = mag->items[--mag->top]; // Phase 7.6: Check for SuperSlab first (mixed Magazine support) SuperSlab* ss_owner = hak_super_lookup(it.ptr); if (ss_owner && ss_owner->magic == SUPERSLAB_MAGIC) { // SuperSlab spill - return to freelist int slab_idx = slab_index_for(ss_owner, it.ptr); TinySlabMeta* meta = &ss_owner->slabs[slab_idx]; *(void**)it.ptr = meta->freelist; meta->freelist = it.ptr; meta->used--; // 空SuperSlab処理はフラッシュ/バックグラウンドで対応(ホットパス除外) HAK_STAT_FREE(class_idx); continue; // Skip TinySlab processing } TinySlab* owner = #if HAKMEM_TINY_MAG_OWNER it.owner; #else NULL; #endif if (!owner) { owner = tls_active_owner_for_ptr(class_idx, it.ptr); } if (!owner) { owner = hak_tiny_owner_slab(it.ptr); } if (!owner) continue; // Phase 4.2: Adaptive gating - skip Phase 4 when TLS Magazine is high-water // Rationale: When mag->top >= 75%, next alloc will come from TLS anyway // so pushing to mini-mag is wasted work int is_high_water = (mag->top >= high_water); if (!is_high_water) { // Low-water: Phase 4.1 logic (try mini-magazine first) uint8_t cidx = owner->class_idx; // Option A: 1回だけ読む TinySlab* tls_a = g_tls_active_slab_a[cidx]; TinySlab* tls_b = g_tls_active_slab_b[cidx]; // Option B: Branch prediction hint (spill → TLS-active への戻りが likely) if (__builtin_expect((owner == tls_a || owner == tls_b) && !mini_mag_is_full(&owner->mini_mag), 1)) { // Fast path: mini-magazineに戻す(bitmap触らない) mini_mag_push(&owner->mini_mag, it.ptr); HAK_TP1(spill_tiny, cidx); HAK_STAT_FREE(cidx); continue; // bitmap操作スキップ } } // High-water or Phase 4.1 mini-mag full: fall through to bitmap // Slow path: bitmap直接書き込み(既存ロジック) size_t bs = g_tiny_class_sizes[owner->class_idx]; int idx = ((uintptr_t)it.ptr - (uintptr_t)owner->base) / bs; if (hak_tiny_is_used(owner, idx)) { hak_tiny_set_free(owner, idx); int was_full = (owner->free_count == 0); owner->free_count++; if (was_full) move_to_free_list(owner->class_idx, owner); if (owner->free_count == owner->total_count) { // If this slab is TLS-active for this thread, clear the pointer before releasing if (g_tls_active_slab_a[owner->class_idx] == owner) g_tls_active_slab_a[owner->class_idx] = NULL; if (g_tls_active_slab_b[owner->class_idx] == owner) g_tls_active_slab_b[owner->class_idx] = NULL; TinySlab** headp = &g_tiny_pool.free_slabs[owner->class_idx]; TinySlab* prev = NULL; for (TinySlab* s = *headp; s; prev = s, s = s->next) { if (s == owner) { if (prev) prev->next = s->next; else *headp = s->next; break; } } release_slab(owner); } HAK_TP1(spill_tiny, owner->class_idx); HAK_STAT_FREE(owner->class_idx); } } pthread_mutex_unlock(lock); hkm_prof_end(ss, HKP_TINY_SPILL, &tss); // Adaptive increase of cap after spill int max_cap = tiny_cap_max_for_class(class_idx); if (mag->cap < max_cap) { int new_cap = mag->cap + (mag->cap / 2); if (new_cap > max_cap) new_cap = max_cap; if (new_cap > TINY_TLS_MAG_CAP) new_cap = TINY_TLS_MAG_CAP; mag->cap = new_cap; } // Finally: prefer TinyQuickSlot → SLL → UltraFront → HotMag → Magazine(順序で局所性を確保) #if !HAKMEM_BUILD_RELEASE && !defined(HAKMEM_TINY_NO_QUICK) if (g_quick_enable && class_idx <= 4) { TinyQuickSlot* qs = &g_tls_quick[class_idx]; if (__builtin_expect(qs->top < QUICK_CAP, 1)) { qs->items[qs->top++] = ptr; } else if (g_tls_sll_enable) { uint32_t sll_cap2 = sll_cap_for_class(class_idx, (uint32_t)mag->cap); if (g_tls_sll_count[class_idx] < sll_cap2) { *(void**)ptr = g_tls_sll_head[class_idx]; g_tls_sll_head[class_idx] = ptr; g_tls_sll_count[class_idx]++; } else if (!tiny_optional_push(class_idx, ptr)) { mag->items[mag->top].ptr = ptr; #if HAKMEM_TINY_MAG_OWNER mag->items[mag->top].owner = slab; #endif mag->top++; } } else { if (!tiny_optional_push(class_idx, ptr)) { mag->items[mag->top].ptr = ptr; #if HAKMEM_TINY_MAG_OWNER mag->items[mag->top].owner = slab; #endif mag->top++; } } } else #endif { if (g_tls_sll_enable && class_idx <= 5) { uint32_t sll_cap2 = sll_cap_for_class(class_idx, (uint32_t)mag->cap); if (g_tls_sll_count[class_idx] < sll_cap2) { *(void**)ptr = g_tls_sll_head[class_idx]; g_tls_sll_head[class_idx] = ptr; g_tls_sll_count[class_idx]++; } else if (!tiny_optional_push(class_idx, ptr)) { mag->items[mag->top].ptr = ptr; #if HAKMEM_TINY_MAG_OWNER mag->items[mag->top].owner = slab; #endif mag->top++; } } else { if (!tiny_optional_push(class_idx, ptr)) { mag->items[mag->top].ptr = ptr; #if HAKMEM_TINY_MAG_OWNER mag->items[mag->top].owner = slab; #endif mag->top++; } } } #if HAKMEM_DEBUG_COUNTERS g_magazine_push_count++; // Phase 7.6: Track pushes #endif // Note: SuperSlab uses separate path (slab == NULL branch above) HAK_STAT_FREE(class_idx); // Phase 3 return; } else { tiny_remote_push(slab, ptr); } } // ============================================================================ // Phase 6.23: SuperSlab Allocation Helpers // ============================================================================ // Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation) static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) { TinySlabMeta* meta = &ss->slabs[slab_idx]; // Ensure remote queue is drained before handing blocks back to TLS if (atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0) { uint32_t self_tid = tiny_self_u32(); SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid); if (slab_is_valid(&h)) { slab_drain_remote_full(&h); int pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0; if (__builtin_expect(pending, 0)) { if (__builtin_expect(g_debug_remote_guard, 0)) { uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed); tiny_remote_watch_note("alloc_pending_remote", ss, slab_idx, (void*)head, 0xA243u, self_tid, 0); } slab_release(&h); return NULL; } slab_release(&h); } else { if (__builtin_expect(g_debug_remote_guard, 0)) { tiny_remote_watch_note("alloc_acquire_fail", ss, slab_idx, meta, 0xA244u, self_tid, 0); } return NULL; } } if (__builtin_expect(g_debug_remote_guard, 0)) { uintptr_t head_pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire); if (head_pending != 0) { tiny_remote_watch_note("alloc_remote_pending", ss, slab_idx, (void*)head_pending, 0xA247u, tiny_self_u32(), 1); return NULL; } } // Phase 6.24: Linear allocation mode (freelist == NULL) // This avoids the 4000-8000 cycle cost of building freelist on init if (meta->freelist == NULL && meta->used < meta->capacity) { // Linear allocation: sequential memory access (cache-friendly!) size_t block_size = g_tiny_class_sizes[ss->size_class]; void* slab_start = slab_data_start(ss, slab_idx); // First slab: skip SuperSlab header if (slab_idx == 0) { slab_start = (char*)slab_start + 1024; } void* block = (char*)slab_start + (meta->used * block_size); meta->used++; tiny_remote_track_on_alloc(ss, slab_idx, block, "linear_alloc", 0); tiny_remote_assert_not_remote(ss, slab_idx, block, "linear_alloc_ret", 0); return block; // Fast path: O(1) pointer arithmetic } // Freelist mode (after first free()) if (meta->freelist) { void* block = meta->freelist; meta->freelist = *(void**)block; // Pop from freelist meta->used++; tiny_remote_track_on_alloc(ss, slab_idx, block, "freelist_alloc", 0); tiny_remote_assert_not_remote(ss, slab_idx, block, "freelist_alloc_ret", 0); return block; } return NULL; // Slab is full } // Phase 6.24 & 7.6: Refill TLS SuperSlab (with unified TLS cache + deferred allocation) static SuperSlab* superslab_refill(int class_idx) { #if HAKMEM_DEBUG_COUNTERS g_superslab_refill_calls_dbg[class_idx]++; #endif TinyTLSSlab* tls = &g_tls_slabs[class_idx]; static int g_ss_adopt_en = -1; // env: HAKMEM_TINY_SS_ADOPT=1; default auto-on if remote seen if (g_ss_adopt_en == -1) { char* e = getenv("HAKMEM_TINY_SS_ADOPT"); if (e) { g_ss_adopt_en = (*e != '0') ? 1 : 0; } else { extern _Atomic int g_ss_remote_seen; g_ss_adopt_en = (atomic_load_explicit(&g_ss_remote_seen, memory_order_relaxed) != 0) ? 1 : 0; } } extern int g_adopt_cool_period; extern __thread int g_tls_adopt_cd[]; if (g_adopt_cool_period == -1) { char* cd = getenv("HAKMEM_TINY_SS_ADOPT_COOLDOWN"); int v = (cd ? atoi(cd) : 0); if (v < 0) v = 0; if (v > 1024) v = 1024; g_adopt_cool_period = v; } static int g_superslab_refill_debug_once = 0; SuperSlab* prev_ss = tls->ss; TinySlabMeta* prev_meta = tls->meta; uint8_t prev_slab_idx = tls->slab_idx; uint8_t prev_active = prev_ss ? prev_ss->active_slabs : 0; uint32_t prev_bitmap = prev_ss ? prev_ss->slab_bitmap : 0; uint32_t prev_meta_used = prev_meta ? prev_meta->used : 0; uint32_t prev_meta_cap = prev_meta ? prev_meta->capacity : 0; int free_idx_attempted = -2; // -2 = not evaluated, -1 = none, >=0 = chosen int reused_slabs = 0; // Optional: Mid-size simple refill to avoid multi-layer scans (class>=4) do { static int g_mid_simple_warn = 0; if (class_idx >= 4 && tiny_mid_refill_simple_enabled()) { // If current TLS has a SuperSlab, prefer taking a virgin slab directly if (tls->ss) { int tls_cap = ss_slabs_capacity(tls->ss); if (tls->ss->active_slabs < tls_cap) { int free_idx = superslab_find_free_slab(tls->ss); if (free_idx >= 0) { uint32_t my_tid = tiny_self_u32(); superslab_init_slab(tls->ss, free_idx, g_tiny_class_sizes[class_idx], my_tid); tiny_tls_bind_slab(tls, tls->ss, free_idx); return tls->ss; } } } // Otherwise allocate a fresh SuperSlab and bind first slab SuperSlab* ssn = superslab_allocate((uint8_t)class_idx); if (!ssn) { if (!g_superslab_refill_debug_once && g_mid_simple_warn < 2) { g_mid_simple_warn++; int err = errno; fprintf(stderr, "[DEBUG] mid_simple_refill OOM class=%d errno=%d\n", class_idx, err); } return NULL; } uint32_t my_tid = tiny_self_u32(); superslab_init_slab(ssn, 0, g_tiny_class_sizes[class_idx], my_tid); SuperSlab* old = tls->ss; tiny_tls_bind_slab(tls, ssn, 0); superslab_ref_inc(ssn); if (old && old != ssn) { superslab_ref_dec(old); } return ssn; } } while (0); // First, try to adopt a published partial SuperSlab for this class if (g_ss_adopt_en) { if (g_adopt_cool_period > 0) { if (g_tls_adopt_cd[class_idx] > 0) { g_tls_adopt_cd[class_idx]--; } else { // eligible to adopt } } if (g_adopt_cool_period == 0 || g_tls_adopt_cd[class_idx] == 0) { SuperSlab* adopt = ss_partial_adopt(class_idx); if (adopt && adopt->magic == SUPERSLAB_MAGIC) { int best = -1; uint32_t best_score = 0; int adopt_cap = ss_slabs_capacity(adopt); for (int s = 0; s < adopt_cap; s++) { TinySlabMeta* m = &adopt->slabs[s]; uint32_t rc = atomic_load_explicit(&adopt->remote_counts[s], memory_order_relaxed); int has_remote = (atomic_load_explicit(&adopt->remote_heads[s], memory_order_acquire) != 0); uint32_t score = rc + (m->freelist ? (1u<<30) : 0u) + (has_remote ? 1u : 0u); if (score > best_score) { best_score = score; best = s; } } if (best >= 0) { // Box: Try to acquire ownership atomically uint32_t self = tiny_self_u32(); SlabHandle h = slab_try_acquire(adopt, best, self); if (slab_is_valid(&h)) { slab_drain_remote_full(&h); if (slab_remote_pending(&h)) { if (__builtin_expect(g_debug_remote_guard, 0)) { uintptr_t head = atomic_load_explicit(&h.ss->remote_heads[h.slab_idx], memory_order_relaxed); tiny_remote_watch_note("adopt_remote_pending", h.ss, h.slab_idx, (void*)head, 0xA255u, self, 0); } // Remote still pending; give up adopt path and fall through to normal refill. slab_release(&h); } // Box 4 Boundary: bind は remote_head==0 を保証する必要がある // slab_is_safe_to_bind() で TOCTOU-safe にチェック if (slab_is_safe_to_bind(&h)) { // Optional: move a few nodes to Front SLL to boost next hits tiny_drain_freelist_to_sll_once(h.ss, h.slab_idx, class_idx); // 安全に bind 可能(freelist 存在 && remote_head==0 保証) tiny_tls_bind_slab(tls, h.ss, h.slab_idx); if (g_adopt_cool_period > 0) { g_tls_adopt_cd[class_idx] = g_adopt_cool_period; } return h.ss; } // Safe to bind 失敗(freelist なしor remote pending)→ adopt 中止 slab_release(&h); } // Failed to acquire or no freelist - continue searching } // If no freelist found, ignore and continue (optional: republish) } } } // Phase 7.6 Step 4: Check existing SuperSlab with priority order if (tls->ss) { // Priority 1: Reuse slabs with freelist (already freed blocks) int tls_cap = ss_slabs_capacity(tls->ss); uint32_t nonempty_mask = 0; do { static int g_mask_en = -1; if (__builtin_expect(g_mask_en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_FREELIST_MASK"); g_mask_en = (e && *e && *e != '0') ? 1 : 0; } if (__builtin_expect(g_mask_en, 0)) { nonempty_mask = atomic_load_explicit(&tls->ss->freelist_mask, memory_order_acquire); break; } for (int i = 0; i < tls_cap; i++) { if (tls->ss->slabs[i].freelist) nonempty_mask |= (1u << i); } } while (0); // O(1) lookup: scan mask with ctz (1 instruction!) while (__builtin_expect(nonempty_mask != 0, 1)) { int i = __builtin_ctz(nonempty_mask); // Find first non-empty slab (O(1)) nonempty_mask &= ~(1u << i); // Clear bit for next iteration // FIX #1 DELETED (Race condition fix): // Previous drain without ownership caused concurrent freelist corruption. // Ownership protocol: MUST bind+owner_cas BEFORE drain (see Fix #3 in tiny_refill.h). // Remote frees will be drained when the slab is adopted (see tiny_refill.h paths). uint32_t self_tid = tiny_self_u32(); SlabHandle h = slab_try_acquire(tls->ss, i, self_tid); if (slab_is_valid(&h)) { if (slab_remote_pending(&h)) { slab_drain_remote_full(&h); if (__builtin_expect(g_debug_remote_guard, 0)) { uintptr_t head = atomic_load_explicit(&h.ss->remote_heads[h.slab_idx], memory_order_relaxed); tiny_remote_watch_note("reuse_remote_pending", h.ss, h.slab_idx, (void*)head, 0xA254u, self_tid, 0); } slab_release(&h); continue; } // Box 4 Boundary: bind は remote_head==0 を保証する必要がある if (slab_is_safe_to_bind(&h)) { // Optional: move a few nodes to Front SLL to boost next hits tiny_drain_freelist_to_sll_once(h.ss, h.slab_idx, class_idx); reused_slabs = 1; tiny_tls_bind_slab(tls, h.ss, h.slab_idx); return h.ss; } // Safe to bind 失敗 → 次の slab を試す slab_release(&h); } } // Priority 2: Use unused slabs (virgin slabs) if (tls->ss->active_slabs < tls_cap) { // Find next free slab int free_idx = superslab_find_free_slab(tls->ss); free_idx_attempted = free_idx; if (free_idx >= 0) { // Initialize this slab uint32_t my_tid = tiny_self_u32(); superslab_init_slab(tls->ss, free_idx, g_tiny_class_sizes[class_idx], my_tid); // Update TLS cache (unified update) tiny_tls_bind_slab(tls, tls->ss, free_idx); return tls->ss; } } } // Try to adopt a partial SuperSlab from registry (one-shot, cheap scan) // This reduces pressure to allocate new SS when other threads freed blocks. if (!tls->ss) { // Best-effort: scan a small window of registry for our class extern SuperRegEntry g_super_reg[]; int scanned = 0; const int scan_max = tiny_reg_scan_max(); for (int i = 0; i < SUPER_REG_SIZE && scanned < scan_max; i++) { SuperRegEntry* e = &g_super_reg[i]; uintptr_t base = atomic_load_explicit((_Atomic uintptr_t*)&e->base, memory_order_acquire); if (base == 0) continue; SuperSlab* ss = atomic_load_explicit(&e->ss, memory_order_acquire); if (!ss || ss->magic != SUPERSLAB_MAGIC) continue; if ((int)ss->size_class != class_idx) { scanned++; continue; } // Pick first slab with freelist (Box 4: 所有権取得 + remote check) int reg_cap = ss_slabs_capacity(ss); uint32_t self_tid = tiny_self_u32(); for (int s = 0; s < reg_cap; s++) { if (ss->slabs[s].freelist) { SlabHandle h = slab_try_acquire(ss, s, self_tid); if (slab_is_valid(&h)) { slab_drain_remote_full(&h); if (slab_is_safe_to_bind(&h)) { tiny_drain_freelist_to_sll_once(h.ss, h.slab_idx, class_idx); tiny_tls_bind_slab(tls, ss, s); return ss; } slab_release(&h); } } } scanned++; } } // Must-adopt-before-mmap gate: attempt sticky/hot/bench/mailbox/registry small-window { SuperSlab* gate_ss = tiny_must_adopt_gate(class_idx, tls); if (gate_ss) return gate_ss; } // Allocate new SuperSlab SuperSlab* ss = superslab_allocate((uint8_t)class_idx); if (!ss) { if (!g_superslab_refill_debug_once) { g_superslab_refill_debug_once = 1; int err = errno; fprintf(stderr, "[DEBUG] superslab_refill NULL detail: class=%d prev_ss=%p active=%u bitmap=0x%08x prev_meta=%p used=%u cap=%u slab_idx=%u reused_freelist=%d free_idx=%d errno=%d\n", class_idx, (void*)prev_ss, (unsigned)prev_active, prev_bitmap, (void*)prev_meta, (unsigned)prev_meta_used, (unsigned)prev_meta_cap, (unsigned)prev_slab_idx, reused_slabs, free_idx_attempted, err); } return NULL; // OOM } // Initialize first slab uint32_t my_tid = tiny_self_u32(); superslab_init_slab(ss, 0, g_tiny_class_sizes[class_idx], my_tid); // Cache in unified TLS(前のSS参照を解放) SuperSlab* old = tls->ss; tiny_tls_bind_slab(tls, ss, 0); // Maintain refcount(将来の空回収に備え、TLS参照をカウント) superslab_ref_inc(ss); if (old && old != ss) { superslab_ref_dec(old); } return ss; } // Phase 6.24: SuperSlab-based allocation (TLS unified, Medium fix) static inline void* hak_tiny_alloc_superslab(int class_idx) { // DEBUG: Function entry trace tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_ENTER, 0x01, (void*)(uintptr_t)class_idx, 0); // MidTC fast path: 128..1024B(class>=4)はTLS tcacheを最優先 do { void* mp = midtc_pop(class_idx); if (mp) { HAK_RET_ALLOC(class_idx, mp); } } while (0); // Phase 6.24: 1 TLS read (down from 3) TinyTLSSlab* tls = &g_tls_slabs[class_idx]; TinySlabMeta* meta = tls->meta; int slab_idx = tls->slab_idx; if (meta && slab_idx >= 0 && tls->ss) { // A/B: Relaxed read for remote head presence check static int g_alloc_remote_relax = -1; // env: HAKMEM_TINY_ALLOC_REMOTE_RELAX=1 → relaxed if (__builtin_expect(g_alloc_remote_relax == -1, 0)) { const char* e = getenv("HAKMEM_TINY_ALLOC_REMOTE_RELAX"); g_alloc_remote_relax = (e && *e && *e != '0') ? 1 : 0; } uintptr_t pending = atomic_load_explicit(&tls->ss->remote_heads[slab_idx], g_alloc_remote_relax ? memory_order_relaxed : memory_order_acquire); if (__builtin_expect(pending != 0, 0)) { uint32_t self_tid = tiny_self_u32(); if (ss_owner_try_acquire(meta, self_tid)) { _ss_remote_drain_to_freelist_unsafe(tls->ss, slab_idx, meta); } } } // FIX #2 DELETED (Race condition fix): // Previous drain-all-slabs without ownership caused concurrent freelist corruption. // Problem: Thread A owns slab 5, Thread B drains all slabs including 5 → both modify freelist → crash. // Ownership protocol: MUST bind+owner_cas BEFORE drain (see Fix #3 in tiny_refill.h). // Remote frees will be drained when the slab is adopted via refill paths. // Fast path: Direct metadata access (no repeated TLS reads!) if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) { // Linear allocation (lazy init) size_t block_size = g_tiny_class_sizes[tls->ss->size_class]; void* block = (void*)(tls->slab_base + ((size_t)meta->used * block_size)); meta->used++; // Track active blocks in SuperSlab for conservative reclamation ss_active_inc(tls->ss); HAK_RET_ALLOC(class_idx, block); // Phase 8.4: Zero hot-path overhead } if (meta && meta->freelist) { // Freelist allocation void* block = meta->freelist; // Safety: bounds/alignment check (debug) if (__builtin_expect(g_tiny_safe_free, 0)) { size_t blk = g_tiny_class_sizes[tls->ss->size_class]; uint8_t* base = tiny_slab_base_for(tls->ss, tls->slab_idx); uintptr_t delta = (uintptr_t)block - (uintptr_t)base; int align_ok = ((delta % blk) == 0); int range_ok = (delta / blk) < meta->capacity; if (!align_ok || !range_ok) { uintptr_t info = ((uintptr_t)(align_ok ? 1u : 0u) << 32) | (uint32_t)(range_ok ? 1u : 0u); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)tls->ss->size_class, block, info | 0xA100u); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return NULL; } return NULL; } } void* next = *(void**)block; meta->freelist = next; meta->used++; // Optional: clear freelist bit when becomes empty do { static int g_mask_en = -1; if (__builtin_expect(g_mask_en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_FREELIST_MASK"); g_mask_en = (e && *e && *e != '0') ? 1 : 0; } if (__builtin_expect(g_mask_en, 0) && next == NULL) { uint32_t bit = (1u << slab_idx); atomic_fetch_and_explicit(&tls->ss->freelist_mask, ~bit, memory_order_release); } } while (0); // Track active blocks in SuperSlab for conservative reclamation ss_active_inc(tls->ss); HAK_RET_ALLOC(class_idx, block); // Phase 8.4: Zero hot-path overhead } // Slow path: Refill TLS slab SuperSlab* ss = superslab_refill(class_idx); if (!ss) { static int log_oom = 0; if (log_oom < 2) { fprintf(stderr, "[DEBUG] superslab_refill returned NULL (OOM)\n"); log_oom++; } return NULL; // OOM } // Retry allocation (metadata already cached in superslab_refill) meta = tls->meta; // DEBUG: Check each condition (disabled for benchmarks) // static int log_retry = 0; // if (log_retry < 2) { // fprintf(stderr, "[DEBUG] Retry alloc: meta=%p, freelist=%p, used=%u, capacity=%u, slab_base=%p\n", // (void*)meta, meta ? meta->freelist : NULL, // meta ? meta->used : 0, meta ? meta->capacity : 0, // (void*)tls->slab_base); // log_retry++; // } if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) { size_t block_size = g_tiny_class_sizes[ss->size_class]; void* block = (void*)(tls->slab_base + ((size_t)meta->used * block_size)); // Disabled for benchmarks // static int log_success = 0; // if (log_success < 2) { // fprintf(stderr, "[DEBUG] Superslab alloc SUCCESS: ptr=%p, class=%d, used=%u->%u\n", // block, class_idx, meta->used, meta->used + 1); // log_success++; // } meta->used++; // Track active blocks in SuperSlab for conservative reclamation ss_active_inc(ss); HAK_RET_ALLOC(class_idx, block); // Phase 8.4: Zero hot-path overhead } // Disabled for benchmarks // static int log_fail = 0; // if (log_fail < 2) { // fprintf(stderr, "[DEBUG] Retry alloc FAILED - returning NULL\n"); // log_fail++; // } return NULL; } // Phase 6.22-B: SuperSlab fast free path static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) { HAK_DBG_INC(g_superslab_free_count); // Phase 7.6: Track SuperSlab frees // Get slab index (supports 1MB/2MB SuperSlabs) int slab_idx = slab_index_for(ss, ptr); size_t ss_size = (size_t)1ULL << ss->lg_size; uintptr_t ss_base = (uintptr_t)ss; if (__builtin_expect(slab_idx < 0, 0)) { uintptr_t aux = tiny_remote_pack_diag(0xBAD1u, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } TinySlabMeta* meta = &ss->slabs[slab_idx]; if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) { tiny_remote_watch_note("free_enter", ss, slab_idx, ptr, 0xA240u, tiny_self_u32(), 0); extern __thread TinyTLSSlab g_tls_slabs[]; tiny_alloc_dump_tls_state(ss->size_class, "watch_free_enter", &g_tls_slabs[ss->size_class]); #if !HAKMEM_BUILD_RELEASE extern __thread TinyTLSMag g_tls_mags[]; TinyTLSMag* watch_mag = &g_tls_mags[ss->size_class]; fprintf(stderr, "[REMOTE_WATCH_MAG] cls=%u mag_top=%d cap=%d\n", ss->size_class, watch_mag->top, watch_mag->cap); #endif } if (__builtin_expect(g_tiny_safe_free, 0)) { size_t blk = g_tiny_class_sizes[ss->size_class]; uint8_t* base = tiny_slab_base_for(ss, slab_idx); uintptr_t delta = (uintptr_t)ptr - (uintptr_t)base; int cap_ok = (meta->capacity > 0) ? 1 : 0; int align_ok = (delta % blk) == 0; int range_ok = cap_ok && (delta / blk) < meta->capacity; if (!align_ok || !range_ok) { uint32_t code = 0xA100u; if (align_ok) code |= 0x2u; if (range_ok) code |= 0x1u; uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } // Duplicate in freelist (best-effort scan up to 64) void* scan = meta->freelist; int scanned = 0; int dup = 0; while (scan && scanned < 64) { if (scan == ptr) { dup = 1; break; } scan = *(void**)scan; scanned++; } if (dup) { uintptr_t aux = tiny_remote_pack_diag(0xDFu, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } } // Phase 6.23: Same-thread check uint32_t my_tid = tiny_self_u32(); const int debug_guard = g_debug_remote_guard; static __thread int g_debug_free_count = 0; if (!g_tiny_force_remote && meta->owner_tid != 0 && meta->owner_tid == my_tid) { // Fast path: Direct freelist push (same-thread) if (g_debug_free_count < 1) { fprintf(stderr, "[FREE_SS] SAME-THREAD: owner=%u my=%u\n", meta->owner_tid, my_tid); g_debug_free_count++; } if (__builtin_expect(meta->used == 0, 0)) { uintptr_t aux = tiny_remote_pack_diag(0x00u, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "local_free_enter", my_tid); if (!tiny_remote_guard_allow_local_push(ss, slab_idx, meta, ptr, "local_free", my_tid)) { int transitioned = ss_remote_push(ss, slab_idx, ptr); meta->used--; ss_active_dec_one(ss); if (transitioned) { ss_partial_publish((int)ss->size_class, ss); } return; } // Optional: MidTC (TLS tcache for 128..1024B) do { int cls = (int)ss->size_class; if (midtc_enabled() && cls >= 4) { if (midtc_push(cls, ptr)) { // Treat as returned to TLS cache (not SS freelist) meta->used--; ss_active_dec_one(ss); return; } } } while (0); void* prev = meta->freelist; *(void**)ptr = prev; meta->freelist = ptr; do { static int g_mask_en = -1; if (__builtin_expect(g_mask_en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_FREELIST_MASK"); g_mask_en = (e && *e && *e != '0') ? 1 : 0; } if (__builtin_expect(g_mask_en, 0) && prev == NULL) { uint32_t bit = (1u << slab_idx); atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release); } } while (0); tiny_remote_track_on_local_free(ss, slab_idx, ptr, "local_free", my_tid); meta->used--; // Decrement SuperSlab active counter (actual return to SS) ss_active_dec_one(ss); if (prev == NULL) { ss_partial_publish((int)ss->size_class, ss); } if (__builtin_expect(debug_guard, 0)) { fprintf(stderr, "[REMOTE_LOCAL] cls=%u slab=%d owner=%u my=%u ptr=%p prev=%p used=%u\n", ss->size_class, slab_idx, meta->owner_tid, my_tid, ptr, prev, meta->used); } // 空検出は別途(ホットパス除外) } else { if (__builtin_expect(meta->owner_tid == my_tid && meta->owner_tid == 0, 0)) { uintptr_t aux = tiny_remote_pack_diag(0xA300u, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (debug_guard) { fprintf(stderr, "[REMOTE_OWNER_ZERO] cls=%u slab=%d ptr=%p my=%u used=%u\n", ss->size_class, slab_idx, ptr, my_tid, (unsigned)meta->used); } } tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "remote_free_enter", my_tid); // Slow path: Remote free (cross-thread) if (g_debug_free_count < 5) { fprintf(stderr, "[FREE_SS] CROSS-THREAD: owner=%u my=%u slab_idx=%d\n", meta->owner_tid, my_tid, slab_idx); g_debug_free_count++; } if (__builtin_expect(g_tiny_safe_free, 0)) { // Best-effort duplicate scan in remote stack (up to 64 nodes) uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire); uintptr_t base = ss_base; int scanned = 0; int dup = 0; uintptr_t cur = head; while (cur && scanned < 64) { if ((cur < base) || (cur >= base + ss_size)) { uintptr_t aux = tiny_remote_pack_diag(0xA200u, base, ss_size, cur); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } break; } if ((void*)cur == ptr) { dup = 1; break; } if (__builtin_expect(g_remote_side_enable, 0)) { if (!tiny_remote_sentinel_ok((void*)cur)) { uintptr_t aux = tiny_remote_pack_diag(0xA202u, base, ss_size, cur); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux); uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)(void*)cur, memory_order_relaxed); tiny_remote_report_corruption("scan", (void*)cur, observed); fprintf(stderr, "[REMOTE_SENTINEL] cls=%u slab=%d cur=%p head=%p ptr=%p scanned=%d observed=0x%016" PRIxPTR " owner=%u used=%u freelist=%p remote_head=%p\n", ss->size_class, slab_idx, (void*)cur, (void*)head, ptr, scanned, observed, meta->owner_tid, (unsigned)meta->used, meta->freelist, (void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed)); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } break; } cur = tiny_remote_side_get(ss, slab_idx, (void*)cur); } else { if ((cur & (uintptr_t)(sizeof(void*) - 1)) != 0) { uintptr_t aux = tiny_remote_pack_diag(0xA201u, base, ss_size, cur); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } break; } cur = (uintptr_t)(*(void**)(void*)cur); } scanned++; } if (dup) { uintptr_t aux = tiny_remote_pack_diag(0xD1u, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } } if (__builtin_expect(meta->used == 0, 0)) { uintptr_t aux = tiny_remote_pack_diag(0x01u, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } static int g_ss_adopt_en2 = -1; // env cached if (g_ss_adopt_en2 == -1) { char* e = getenv("HAKMEM_TINY_SS_ADOPT"); // 既定: Remote Queueを使う(1)。env指定時のみ上書き。 g_ss_adopt_en2 = (e == NULL) ? 1 : ((*e != '0') ? 1 : 0); if (__builtin_expect(debug_guard, 0)) { fprintf(stderr, "[FREE_SS] g_ss_adopt_en2=%d (env='%s')\n", g_ss_adopt_en2, e ? e : "(null)"); } } if (g_ss_adopt_en2) { // Use remote queue uintptr_t head_word = __atomic_load_n((uintptr_t*)ptr, __ATOMIC_RELAXED); fprintf(stderr, "[REMOTE_PUSH_CALL] cls=%u slab=%d owner=%u my=%u ptr=%p used=%u remote_count=%u head=%p word=0x%016" PRIxPTR "\n", ss->size_class, slab_idx, meta->owner_tid, my_tid, ptr, (unsigned)meta->used, atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed), (void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed), head_word); int dup_remote = tiny_remote_queue_contains_guard(ss, slab_idx, ptr); if (!dup_remote && __builtin_expect(g_remote_side_enable, 0)) { dup_remote = (head_word == TINY_REMOTE_SENTINEL) || tiny_remote_side_contains(ss, slab_idx, ptr); } if (__builtin_expect(head_word == TINY_REMOTE_SENTINEL && !dup_remote && g_debug_remote_guard, 0)) { tiny_remote_watch_note("dup_scan_miss", ss, slab_idx, ptr, 0xA215u, my_tid, 0); } if (dup_remote) { uintptr_t aux = tiny_remote_pack_diag(0xA214u, ss_base, ss_size, (uintptr_t)ptr); tiny_remote_watch_mark(ptr, "dup_prevent", my_tid); tiny_remote_watch_note("dup_prevent", ss, slab_idx, ptr, 0xA214u, my_tid, 0); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } if (__builtin_expect(g_remote_side_enable && (head_word & 0xFFFFu) == 0x6261u, 0)) { // TLS guard scribble detected on the node's first word → same-pointer double free across routes uintptr_t aux = tiny_remote_pack_diag(0xA213u, ss_base, ss_size, (uintptr_t)ptr); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux); tiny_remote_watch_mark(ptr, "pre_push", my_tid); tiny_remote_watch_note("pre_push", ss, slab_idx, ptr, 0xA231u, my_tid, 0); tiny_remote_report_corruption("pre_push", ptr, head_word); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) { tiny_remote_watch_note("free_remote", ss, slab_idx, ptr, 0xA232u, my_tid, 0); } int was_empty = ss_remote_push(ss, slab_idx, ptr); meta->used--; ss_active_dec_one(ss); if (was_empty) { ss_partial_publish((int)ss->size_class, ss); } } else { // Fallback: direct freelist push (legacy) fprintf(stderr, "[FREE_SS] Using LEGACY freelist push (not remote queue)\n"); void* prev = meta->freelist; *(void**)ptr = prev; meta->freelist = ptr; do { static int g_mask_en = -1; if (__builtin_expect(g_mask_en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_FREELIST_MASK"); g_mask_en = (e && *e && *e != '0') ? 1 : 0; } if (__builtin_expect(g_mask_en, 0) && prev == NULL) { uint32_t bit = (1u << slab_idx); atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release); } } while (0); meta->used--; ss_active_dec_one(ss); if (prev == NULL) { ss_partial_publish((int)ss->size_class, ss); } } // 空検出は別途(ホットパス除外) } } void hak_tiny_free(void* ptr) { if (!ptr || !g_tiny_initialized) return; hak_tiny_stats_poll(); tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, 0, ptr, 0); #ifdef HAKMEM_TINY_BENCH_SLL_ONLY // Bench-only SLL-only free: push to TLS SLL for ≤64B when possible { int class_idx = -1; if (g_use_superslab) { // FIXED: Use hak_super_lookup() instead of hak_super_lookup() to avoid false positives SuperSlab* ss = hak_super_lookup(ptr); if (ss && ss->magic == SUPERSLAB_MAGIC) class_idx = ss->size_class; } if (class_idx < 0) { TinySlab* slab = hak_tiny_owner_slab(ptr); if (slab) class_idx = slab->class_idx; } if (class_idx >= 0 && class_idx <= 3) { uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP); if ((int)g_tls_sll_count[class_idx] < (int)sll_cap) { *(void**)ptr = g_tls_sll_head[class_idx]; g_tls_sll_head[class_idx] = ptr; g_tls_sll_count[class_idx]++; return; } } } #endif if (g_tiny_ultra) { int class_idx = -1; if (g_use_superslab) { // FIXED: Use hak_super_lookup() instead of hak_super_lookup() to avoid false positives SuperSlab* ss = hak_super_lookup(ptr); if (ss && ss->magic == SUPERSLAB_MAGIC) class_idx = ss->size_class; } if (class_idx < 0) { TinySlab* slab = hak_tiny_owner_slab(ptr); if (slab) class_idx = slab->class_idx; } if (class_idx >= 0) { // Ultra free: push directly to TLS SLL without magazine init int sll_cap = ultra_sll_cap_for_class(class_idx); if ((int)g_tls_sll_count[class_idx] < sll_cap) { *(void**)ptr = g_tls_sll_head[class_idx]; g_tls_sll_head[class_idx] = ptr; g_tls_sll_count[class_idx]++; return; } } // Fallback to existing path if class resolution fails } SuperSlab* fast_ss = NULL; TinySlab* fast_slab = NULL; int fast_class_idx = -1; if (g_use_superslab) { fast_ss = hak_super_lookup(ptr); if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) { fast_class_idx = fast_ss->size_class; } else { fast_ss = NULL; } } if (fast_class_idx < 0) { fast_slab = hak_tiny_owner_slab(ptr); if (fast_slab) fast_class_idx = fast_slab->class_idx; } // Safety: detect class mismatch (SS vs TinySlab) early if (__builtin_expect(g_tiny_safe_free && fast_class_idx >= 0, 0)) { int ss_cls = -1, ts_cls = -1; SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL); if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) ss_cls = chk_ss->size_class; TinySlab* chk_slab = fast_slab ? fast_slab : hak_tiny_owner_slab(ptr); if (chk_slab) ts_cls = chk_slab->class_idx; if (ss_cls >= 0 && ts_cls >= 0 && ss_cls != ts_cls) { uintptr_t packed = ((uintptr_t)(uint16_t)ss_cls << 16) | (uint16_t)ts_cls; tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)fast_class_idx, ptr, packed); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } } } if (fast_class_idx >= 0) { tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)fast_class_idx, ptr, 1); } if (fast_class_idx >= 0 && g_fast_enable && g_fast_cap[fast_class_idx] != 0) { if (tiny_fast_push(fast_class_idx, ptr)) { tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)fast_class_idx, ptr, 0); HAK_STAT_FREE(fast_class_idx); return; } } // SuperSlab detection: prefer fast mask-based check when available SuperSlab* ss = fast_ss; if (!ss && g_use_superslab) { ss = hak_super_lookup(ptr); if (!(ss && ss->magic == SUPERSLAB_MAGIC)) { ss = NULL; } } if (ss && ss->magic == SUPERSLAB_MAGIC) { // Direct SuperSlab free (avoid second lookup TOCTOU) hak_tiny_free_superslab(ptr, ss); HAK_STAT_FREE(ss->size_class); return; } // Fallback to TinySlab only when SuperSlab is not in use TinySlab* slab = fast_slab; if (!slab) slab = hak_tiny_owner_slab(ptr); if (!slab) return; // Not managed by Tiny Pool if (__builtin_expect(g_use_superslab, 0)) { // In SS mode, a pointer that resolves only to TinySlab is suspicious → treat as invalid free tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xEE, ptr, 0xF1u); if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } return; } hak_tiny_free_with_slab(ptr, slab); } // ============================================================================ // EXTRACTED TO hakmem_tiny_query.c (Phase 2B-1) // ============================================================================ // EXTRACTED: int hak_tiny_is_managed(void* ptr) { // EXTRACTED: if (!ptr || !g_tiny_initialized) return 0; // EXTRACTED: // Phase 6.12.1: O(1) slab lookup via registry/list // EXTRACTED: return hak_tiny_owner_slab(ptr) != NULL || hak_super_lookup(ptr) != NULL; // EXTRACTED: } // Phase 7.6: Check if pointer is managed by Tiny Pool (TinySlab OR SuperSlab) // EXTRACTED: int hak_tiny_is_managed_superslab(void* ptr) { // EXTRACTED: if (!ptr || !g_tiny_initialized) return 0; // EXTRACTED: // EXTRACTED: // Safety: Only check if g_use_superslab is enabled // EXTRACTED: if (g_use_superslab) { // EXTRACTED: SuperSlab* ss = hak_super_lookup(ptr); // EXTRACTED: // Phase 8.2 optimization: Use alignment check instead of mincore() // EXTRACTED: // SuperSlabs are always SUPERSLAB_SIZE-aligned (2MB) // EXTRACTED: if (ss && ((uintptr_t)ss & (SUPERSLAB_SIZE - 1)) == 0) { // EXTRACTED: if (ss->magic == SUPERSLAB_MAGIC) { // EXTRACTED: return 1; // Valid SuperSlab pointer // EXTRACTED: } // EXTRACTED: } // EXTRACTED: } // EXTRACTED: // EXTRACTED: // Fallback to TinySlab check // EXTRACTED: return hak_tiny_owner_slab(ptr) != NULL; // EXTRACTED: } // Return the usable size for a Tiny-managed pointer (0 if unknown/not tiny). // Prefer SuperSlab metadata when available; otherwise use TinySlab owner class. // EXTRACTED: size_t hak_tiny_usable_size(void* ptr) { // EXTRACTED: if (!ptr || !g_tiny_initialized) return 0; // EXTRACTED: // EXTRACTED: // Check SuperSlab first via registry (safe under direct link and LD) // EXTRACTED: if (g_use_superslab) { // EXTRACTED: SuperSlab* ss = hak_super_lookup(ptr); // EXTRACTED: if (ss && ss->magic == SUPERSLAB_MAGIC) { // EXTRACTED: int k = (int)ss->size_class; // EXTRACTED: if (k >= 0 && k < TINY_NUM_CLASSES) { // EXTRACTED: return g_tiny_class_sizes[k]; // EXTRACTED: } // EXTRACTED: } // EXTRACTED: } // EXTRACTED: // EXTRACTED: // Fallback: TinySlab owner lookup // EXTRACTED: TinySlab* slab = hak_tiny_owner_slab(ptr); // EXTRACTED: if (slab) { // EXTRACTED: int k = slab->class_idx; // EXTRACTED: if (k >= 0 && k < TINY_NUM_CLASSES) { // EXTRACTED: return g_tiny_class_sizes[k]; // EXTRACTED: } // EXTRACTED: } // EXTRACTED: return 0; // EXTRACTED: } // ============================================================================ // Statistics and Debug Functions - Extracted to hakmem_tiny_stats.c // ============================================================================ // (Phase 2B API headers moved to top of file) // Optional shutdown hook to stop background components (e.g., Intelligence Engine) void hak_tiny_shutdown(void) { // Release TLS SuperSlab references (dec refcount) before stopping BG/INT for (int k = 0; k < TINY_NUM_CLASSES; k++) { TinyTLSSlab* tls = &g_tls_slabs[k]; if (tls->ss) { superslab_ref_dec(tls->ss); tls->ss = NULL; tls->meta = NULL; tls->slab_base = NULL; } } if (g_bg_bin_started) { g_bg_bin_stop = 1; if (!pthread_equal(tiny_self_pt(), g_bg_bin_thread)) { pthread_join(g_bg_bin_thread, NULL); } g_bg_bin_started = 0; g_bg_bin_enable = 0; } tiny_obs_shutdown(); if (g_int_engine && g_int_started) { g_int_stop = 1; // Best-effort join; avoid deadlock if called from within the thread if (!pthread_equal(tiny_self_pt(), g_int_thread)) { pthread_join(g_int_thread, NULL); } g_int_started = 0; g_int_engine = 0; } } // Always-available: Trim empty slabs (release fully-free slabs)